python_weather/sobear/GetCitiesUrl.py
2024-05-31 15:53:27 +08:00

122 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import csv
import requests
from fake_useragent import UserAgent
import pandas as pd
import random
# ip池构建
# 用于存储IP地址和端口号的列表
ip_port_list = []
username = 'd3347396121'
password = 'ufinmek6'
# 读取CSV文件
with open('ips.csv', newline='') as csvfile:
reader = csv.reader(csvfile)
next(reader) # 跳过第一行,即表头
for row in reader:
# 将IP地址和端口号以字符串形式拼接并添加到列表中
ip_port = ':'.join(row) # 在IP地址和端口号之前添加"https://"
ip_port_list.append(ip_port)
random_proxy = random.choice(ip_port_list)
proxies_dict = {
"http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": random_proxy},
"https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": random_proxy}
}
# ----------------爬取各城市链接------------
ua = UserAgent()
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'User-Agent': ua.random
}
url = 'http://www.nmc.cn/f/rest/province' # 替换成实际的URL
# public
# 发送GET请求获取响应对象
def get_response(url):
response = requests.get(url, headers=headers,proxies=proxies_dict)
return response
# 获取城市代码
# 从给定的JSON数据中找到指定name对应的code
def find_code_by_name(json_data, target_name):
for item in json_data:
if item['name'] == target_name:
return item['code']
return None
def get_cityCode(target_name,url):
# 发送GET请求获取JSON数据
response = get_response(url,)
if response.status_code == 200:
json_data = response.json()
code = find_code_by_name(json_data, target_name)
if code:
print(f"The code for {target_name} is {code}\n-----------citycode获取完成-----------\n")
return code
else:
print(f"Code not found for {target_name}")
else:
print("Failed to retrieve JSON data")
# 获取所有区县的'city_index', 'city_name', 'city_url'
def get_citys(url):
# 发送GET请求获取JSON数据
response = get_response(url)
data = response.json()
# 写入 CSV 文件
with open('cities_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['city_index', 'city_name']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for index, city_data in enumerate(data, start=1):
writer.writerow({
'city_index': city_data['code'],
'city_name': city_data['city'],
})
print("\n-----------cityindex-cityname获取完成-----------\n")
# 存储 city_index 列数据的列表
city_index_list = []
def get_cityIndex():
# 打开 CSV 文件进行读取
with open('cities_data.csv', 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
# 读取每一行数据
for row in reader:
# 提取 city_index 列数据并添加到列表中
city_index_list.append(row['city_index'])
# 存储 cities_urls 列数据的列表
def get_citys():
# 获取对应城市数据的网址
citys_urllist = []
get_cityIndex()
for index in city_index_list:
acityurl = 'http://www.nmc.cn/rest/weather?stationid=' + index
acityurl = acityurl.replace(' ','')
citys_urllist.append(acityurl)
df = pd.DataFrame(citys_urllist, columns=['city_url'])
df.to_csv('cities_urls.csv', index=False)
get_citys()