99 lines
2.9 KiB
Python
99 lines
2.9 KiB
Python
|
import csv
|
||
|
|
||
|
import requests
|
||
|
from fake_useragent import UserAgent
|
||
|
import pandas as pd
|
||
|
|
||
|
|
||
|
# ----------------爬取各城市链接
|
||
|
ua = UserAgent()
|
||
|
headers = {
|
||
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||
|
'Accept-Encoding': 'gzip, deflate, sdch',
|
||
|
'Accept-Language': 'zh-CN,zh;q=0.8',
|
||
|
'Connection': 'keep-alive',
|
||
|
'User-Agent': ua.random
|
||
|
}
|
||
|
|
||
|
url = 'http://www.nmc.cn/f/rest/province' # 替换成实际的URL
|
||
|
|
||
|
# public
|
||
|
# 发送GET请求获取响应对象
|
||
|
def get_response(url):
|
||
|
response = requests.get(url, headers=headers)
|
||
|
return response
|
||
|
|
||
|
|
||
|
|
||
|
# 获取城市代码
|
||
|
# 从给定的JSON数据中找到指定name对应的code
|
||
|
def find_code_by_name(json_data, target_name):
|
||
|
for item in json_data:
|
||
|
if item['name'] == target_name:
|
||
|
return item['code']
|
||
|
return None
|
||
|
|
||
|
def get_cityCode(target_name,url):
|
||
|
# 发送GET请求获取JSON数据
|
||
|
response = get_response(url)
|
||
|
if response.status_code == 200:
|
||
|
json_data = response.json()
|
||
|
code = find_code_by_name(json_data, target_name)
|
||
|
if code:
|
||
|
print(f"The code for {target_name} is {code}\n-----------citycode获取完成-----------\n")
|
||
|
return code
|
||
|
else:
|
||
|
print(f"Code not found for {target_name}")
|
||
|
else:
|
||
|
print("Failed to retrieve JSON data")
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
# 获取所有区县的'city_index', 'city_name', 'city_url'
|
||
|
|
||
|
def get_citys(url):
|
||
|
# 发送GET请求获取JSON数据
|
||
|
response = get_response(url)
|
||
|
data = response.json()
|
||
|
# 写入 CSV 文件
|
||
|
with open('cities_data.csv', 'w', newline='', encoding='utf-8') as csvfile:
|
||
|
fieldnames = ['city_index', 'city_name']
|
||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||
|
|
||
|
writer.writeheader()
|
||
|
for index, city_data in enumerate(data, start=1):
|
||
|
writer.writerow({
|
||
|
'city_index': city_data['code'],
|
||
|
'city_name': city_data['city'],
|
||
|
})
|
||
|
|
||
|
print("\n-----------cityindex-cityname获取完成-----------\n")
|
||
|
|
||
|
|
||
|
# 存储 city_index 列数据的列表
|
||
|
city_index_list = []
|
||
|
def get_cityIndex():
|
||
|
# 打开 CSV 文件进行读取
|
||
|
with open('cities_data.csv', 'r', newline='', encoding='utf-8') as csvfile:
|
||
|
reader = csv.DictReader(csvfile)
|
||
|
# 读取每一行数据
|
||
|
for row in reader:
|
||
|
# 提取 city_index 列数据并添加到列表中
|
||
|
city_index_list.append(row['city_index'])
|
||
|
|
||
|
# 存储 cities_urls 列数据的列表
|
||
|
def get_citys():
|
||
|
# 获取对应城市数据的网址
|
||
|
citys_urllist = []
|
||
|
get_cityIndex()
|
||
|
for index in city_index_list:
|
||
|
acityurl = 'http://www.nmc.cn/rest/weather?stationid=' + index
|
||
|
acityurl = acityurl.replace(' ','')
|
||
|
citys_urllist.append(acityurl)
|
||
|
df = pd.DataFrame(citys_urllist, columns=['city_url'])
|
||
|
df.to_csv('cities_urls.csv', index=False)
|
||
|
|
||
|
get_citys()
|