python_weather/sobear/GetWhetherData.py

95 lines
3.1 KiB
Python

import pandas as pd
import csv
import requests
from fake_useragent import UserAgent
ua = UserAgent()
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'User-Agent': ua.random
}
def get_response(url):
response = requests.get(url, headers=headers)
return response
# 用于存储网址的一维列表
url_list = []
# 读取CSV文件并将网址存储到列表中
with open('cities_urls.csv', newline='', encoding='utf-8') as csvfile:
reader = csv.reader(csvfile)
# 跳过表头
next(reader, None)
for row in reader:
url_list.append(row[0])
# 爬取data_now.csv数据
weather_data = []
#爬取data_hour.csv数据
weather_hour = []
#爬取data_nextDay.csv数据
weather_nexts = []
for index in url_list:
print(index)
response = get_response(index)
data = response.json()['data']
city_weather = {
'city_index': data['real']['station']['code'],
'city_name' : data['predict']['station']['city'],
'time' : data['real']['publish_time'],
'temperature': data['real']['weather']['temperature'],
'humidity' :data['real']['weather']['humidity'],
'rain' : data['real']['weather']['rain'],
'wind_direction' : data['real']['wind']['direct'],
'wind_speed' : data['real']['wind']['speed'],
'info' : data['real']['weather']['info'],
'aqi': data['air']['aqi'] if 'air' in data and 'aqi' in data['air'] else None,
'sqiText': data['air']['text'] if 'air' in data and 'text' in data['air'] else None
}
for i in range(0,24):
city_hour = {
'city_index': data['real']['station']['code'],
'city_name': data['predict']['station']['city'],
'time' : data['passedchart'][i]['time'],
'temperature': data['passedchart'][i]['temperature'],
'humidity': data['passedchart'][i]['humidity'],
'rain1h' : data['passedchart'][i]['rain1h'],
'pressure' : data['passedchart'][i]['pressure'],
'windDirection' : data['passedchart'][i]['windDirection'],
'windSpeed' : data['passedchart'][i]['windSpeed']
}
weather_hour.append(city_hour)
for j in range(0,14):
city_next = {
'city_index': data['real']['station']['code'],
'city_name': data['predict']['station']['city'],
'time' : data['tempchart'][j]['time'],
'maxtemp': data['tempchart'][j]['max_temp'],
'mintemp': data['tempchart'][j]['min_temp'],
'day_text' : data['tempchart'][j]['day_text'],
'night_text' : data['tempchart'][j]['night_text']
}
weather_nexts.append(city_next)
weather_data.append(city_weather)
# 将列表数据转换为 DataFrame
df1 = pd.DataFrame(weather_data)
# 将 DataFrame 写入 CSV 文件并包含表头
df1.to_csv('data_now.csv', index=False)
df2 = pd.DataFrame(weather_hour)
df2.to_csv('data_hour.csv', index=False)
df3 = pd.DataFrame(weather_nexts)
df3.to_csv('data_next.csv', index=False)