import pandas as pd import csv import requests from fake_useragent import UserAgent ua = UserAgent() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Connection': 'keep-alive', 'User-Agent': ua.random } def get_response(url): response = requests.get(url, headers=headers) return response # 用于存储网址的一维列表 url_list = [] # 读取CSV文件并将网址存储到列表中 with open('cities_urls.csv', newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) # 跳过表头 next(reader, None) for row in reader: url_list.append(row[0]) # 爬取data_now.csv数据 weather_data = [] #爬取data_hour.csv数据 weather_hour = [] #爬取data_nextDay.csv数据 weather_nexts = [] for index in url_list: print(index) response = get_response(index) data = response.json()['data'] city_weather = { 'city_index': data['real']['station']['code'], 'city_name' : data['predict']['station']['city'], 'time' : data['real']['publish_time'], 'temperature': data['real']['weather']['temperature'], 'humidity' :data['real']['weather']['humidity'], 'rain' : data['real']['weather']['rain'], 'wind_direction' : data['real']['wind']['direct'], 'wind_speed' : data['real']['wind']['speed'], 'info' : data['real']['weather']['info'], 'aqi': data['air']['aqi'] if 'air' in data and 'aqi' in data['air'] else None, 'sqiText': data['air']['text'] if 'air' in data and 'text' in data['air'] else None } for i in range(0,24): city_hour = { 'city_index': data['real']['station']['code'], 'city_name': data['predict']['station']['city'], 'time' : data['passedchart'][i]['time'], 'temperature': data['passedchart'][i]['temperature'], 'humidity': data['passedchart'][i]['humidity'], 'rain1h' : data['passedchart'][i]['rain1h'], 'pressure' : data['passedchart'][i]['pressure'], 'windDirection' : data['passedchart'][i]['windDirection'], 'windSpeed' : data['passedchart'][i]['windSpeed'] } weather_hour.append(city_hour) for j in range(0,14): city_next = { 'city_index': data['real']['station']['code'], 'city_name': data['predict']['station']['city'], 'time' : data['tempchart'][j]['time'], 'maxtemp': data['tempchart'][j]['max_temp'], 'mintemp': data['tempchart'][j]['min_temp'], 'day_text' : data['tempchart'][j]['day_text'], 'night_text' : data['tempchart'][j]['night_text'] } weather_nexts.append(city_next) weather_data.append(city_weather) # 将列表数据转换为 DataFrame df1 = pd.DataFrame(weather_data) # 将 DataFrame 写入 CSV 文件并包含表头 df1.to_csv('data_now.csv', index=False) df2 = pd.DataFrame(weather_hour) df2.to_csv('data_hour.csv', index=False) df3 = pd.DataFrame(weather_nexts) df3.to_csv('data_next.csv', index=False)