python_weather/sobear/GetCitiesUrl.py

99 lines
2.9 KiB
Python
Raw Normal View History

2024-05-31 02:43:15 +00:00
import csv
import requests
from fake_useragent import UserAgent
import pandas as pd
# ----------------爬取各城市链接
ua = UserAgent()
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'User-Agent': ua.random
}
url = 'http://www.nmc.cn/f/rest/province' # 替换成实际的URL
# public
# 发送GET请求获取响应对象
def get_response(url):
response = requests.get(url, headers=headers)
return response
# 获取城市代码
# 从给定的JSON数据中找到指定name对应的code
def find_code_by_name(json_data, target_name):
for item in json_data:
if item['name'] == target_name:
return item['code']
return None
def get_cityCode(target_name,url):
# 发送GET请求获取JSON数据
response = get_response(url)
if response.status_code == 200:
json_data = response.json()
code = find_code_by_name(json_data, target_name)
if code:
print(f"The code for {target_name} is {code}\n-----------citycode获取完成-----------\n")
return code
else:
print(f"Code not found for {target_name}")
else:
print("Failed to retrieve JSON data")
# 获取所有区县的'city_index', 'city_name', 'city_url'
def get_citys(url):
# 发送GET请求获取JSON数据
response = get_response(url)
data = response.json()
# 写入 CSV 文件
with open('cities_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['city_index', 'city_name']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for index, city_data in enumerate(data, start=1):
writer.writerow({
'city_index': city_data['code'],
'city_name': city_data['city'],
})
print("\n-----------cityindex-cityname获取完成-----------\n")
# 存储 city_index 列数据的列表
city_index_list = []
def get_cityIndex():
# 打开 CSV 文件进行读取
with open('cities_data.csv', 'r', newline='', encoding='utf-8') as csvfile:
reader = csv.DictReader(csvfile)
# 读取每一行数据
for row in reader:
# 提取 city_index 列数据并添加到列表中
city_index_list.append(row['city_index'])
# 存储 cities_urls 列数据的列表
def get_citys():
# 获取对应城市数据的网址
citys_urllist = []
get_cityIndex()
for index in city_index_list:
acityurl = 'http://www.nmc.cn/rest/weather?stationid=' + index
acityurl = acityurl.replace(' ','')
citys_urllist.append(acityurl)
df = pd.DataFrame(citys_urllist, columns=['city_url'])
df.to_csv('cities_urls.csv', index=False)
get_citys()