95 lines
3.1 KiB
Python
95 lines
3.1 KiB
Python
import pandas as pd
|
|
import csv
|
|
import requests
|
|
from fake_useragent import UserAgent
|
|
ua = UserAgent()
|
|
headers = {
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
'Accept-Encoding': 'gzip, deflate, sdch',
|
|
'Accept-Language': 'zh-CN,zh;q=0.8',
|
|
'Connection': 'keep-alive',
|
|
'User-Agent': ua.random
|
|
}
|
|
|
|
|
|
|
|
def get_response(url):
|
|
response = requests.get(url, headers=headers)
|
|
return response
|
|
|
|
# 用于存储网址的一维列表
|
|
url_list = []
|
|
|
|
# 读取CSV文件并将网址存储到列表中
|
|
with open('cities_urls.csv', newline='', encoding='utf-8') as csvfile:
|
|
reader = csv.reader(csvfile)
|
|
# 跳过表头
|
|
next(reader, None)
|
|
for row in reader:
|
|
url_list.append(row[0])
|
|
|
|
# 爬取data_now.csv数据
|
|
weather_data = []
|
|
|
|
#爬取data_hour.csv数据
|
|
weather_hour = []
|
|
|
|
#爬取data_nextDay.csv数据
|
|
weather_nexts = []
|
|
|
|
for index in url_list:
|
|
print(index)
|
|
response = get_response(index)
|
|
data = response.json()['data']
|
|
city_weather = {
|
|
'city_index': data['real']['station']['code'],
|
|
'city_name' : data['predict']['station']['city'],
|
|
'time' : data['real']['publish_time'],
|
|
'temperature': data['real']['weather']['temperature'],
|
|
'humidity' :data['real']['weather']['humidity'],
|
|
'rain' : data['real']['weather']['rain'],
|
|
'wind_direction' : data['real']['wind']['direct'],
|
|
'wind_speed' : data['real']['wind']['speed'],
|
|
'info' : data['real']['weather']['info'],
|
|
'aqi': data['air']['aqi'] if 'air' in data and 'aqi' in data['air'] else None,
|
|
'sqiText': data['air']['text'] if 'air' in data and 'text' in data['air'] else None
|
|
|
|
}
|
|
for i in range(0,24):
|
|
city_hour = {
|
|
'city_index': data['real']['station']['code'],
|
|
'city_name': data['predict']['station']['city'],
|
|
'time' : data['passedchart'][i]['time'],
|
|
'temperature': data['passedchart'][i]['temperature'],
|
|
'humidity': data['passedchart'][i]['humidity'],
|
|
'rain1h' : data['passedchart'][i]['rain1h'],
|
|
'pressure' : data['passedchart'][i]['pressure'],
|
|
'windDirection' : data['passedchart'][i]['windDirection'],
|
|
'windSpeed' : data['passedchart'][i]['windSpeed']
|
|
}
|
|
weather_hour.append(city_hour)
|
|
|
|
for j in range(0,14):
|
|
city_next = {
|
|
'city_index': data['real']['station']['code'],
|
|
'city_name': data['predict']['station']['city'],
|
|
'time' : data['tempchart'][j]['time'],
|
|
'maxtemp': data['tempchart'][j]['max_temp'],
|
|
'mintemp': data['tempchart'][j]['min_temp'],
|
|
'day_text' : data['tempchart'][j]['day_text'],
|
|
'night_text' : data['tempchart'][j]['night_text']
|
|
}
|
|
weather_nexts.append(city_next)
|
|
|
|
|
|
weather_data.append(city_weather)
|
|
# 将列表数据转换为 DataFrame
|
|
df1 = pd.DataFrame(weather_data)
|
|
# 将 DataFrame 写入 CSV 文件并包含表头
|
|
df1.to_csv('data_now.csv', index=False)
|
|
|
|
df2 = pd.DataFrame(weather_hour)
|
|
df2.to_csv('data_hour.csv', index=False)
|
|
|
|
df3 = pd.DataFrame(weather_nexts)
|
|
df3.to_csv('data_next.csv', index=False) |