import csv import requests from fake_useragent import UserAgent import pandas as pd import random # ip池构建 # 用于存储IP地址和端口号的列表 ip_port_list = [] username = 'd3347396121' password = 'ufinmek6' # 读取CSV文件 with open('ips.csv', newline='') as csvfile: reader = csv.reader(csvfile) next(reader) # 跳过第一行,即表头 for row in reader: # 将IP地址和端口号以字符串形式拼接,并添加到列表中 ip_port = ':'.join(row) # 在IP地址和端口号之前添加"https://" ip_port_list.append(ip_port) random_proxy = random.choice(ip_port_list) proxies_dict = { "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": random_proxy}, "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": random_proxy} } # ----------------爬取各城市链接------------ ua = UserAgent() headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, sdch', 'Accept-Language': 'zh-CN,zh;q=0.8', 'Connection': 'keep-alive', 'User-Agent': ua.random } url = 'http://www.nmc.cn/f/rest/province' # 替换成实际的URL # public # 发送GET请求获取响应对象 def get_response(url): response = requests.get(url, headers=headers,proxies=proxies_dict) return response # 获取城市代码 # 从给定的JSON数据中找到指定name对应的code def find_code_by_name(json_data, target_name): for item in json_data: if item['name'] == target_name: return item['code'] return None def get_cityCode(target_name,url): # 发送GET请求获取JSON数据 response = get_response(url,) if response.status_code == 200: json_data = response.json() code = find_code_by_name(json_data, target_name) if code: print(f"The code for {target_name} is {code}\n-----------citycode获取完成-----------\n") return code else: print(f"Code not found for {target_name}") else: print("Failed to retrieve JSON data") # 获取所有区县的'city_index', 'city_name', 'city_url' def get_citys(url): # 发送GET请求获取JSON数据 response = get_response(url) data = response.json() # 写入 CSV 文件 with open('cities_data.csv', 'w', newline='', encoding='utf-8') as csvfile: fieldnames = ['city_index', 'city_name'] writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() for index, city_data in enumerate(data, start=1): writer.writerow({ 'city_index': city_data['code'], 'city_name': city_data['city'], }) print("\n-----------cityindex-cityname获取完成-----------\n") # 存储 city_index 列数据的列表 city_index_list = [] def get_cityIndex(): # 打开 CSV 文件进行读取 with open('cities_data.csv', 'r', newline='', encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) # 读取每一行数据 for row in reader: # 提取 city_index 列数据并添加到列表中 city_index_list.append(row['city_index']) # 存储 cities_urls 列数据的列表 def get_citys(): # 获取对应城市数据的网址 citys_urllist = [] get_cityIndex() for index in city_index_list: acityurl = 'http://www.nmc.cn/rest/weather?stationid=' + index acityurl = acityurl.replace(' ','') citys_urllist.append(acityurl) df = pd.DataFrame(citys_urllist, columns=['city_url']) df.to_csv('cities_urls.csv', index=False) get_citys()