From cebbbc86d2d21747cbaeb49f36bf18a9c94ae1ee Mon Sep 17 00:00:00 2001 From: MingerMinger <106759770+MingerMinger@users.noreply.github.com> Date: Sun, 26 May 2024 22:13:52 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E5=8F=96http://www.weather.com.cn/=20?= =?UTF-8?q?2023-2024=E6=AF=8F=E6=9C=88=E6=95=B0=E6=8D=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++------- test.py | 40 +++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 10 deletions(-) create mode 100644 test.py diff --git a/main.py b/main.py index 5596b44..48ebef4 100644 --- a/main.py +++ b/main.py @@ -1,16 +1,89 @@ -# This is a sample Python script. +import json +from datetime import datetime, timedelta -# Press Shift+F10 to execute it or replace it with your code. -# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. +import httpx + +response = httpx.get('https://j.i8tq.com/weather2020/search/city.js') +citys_str = "\n".join(response.text.split("\n")[1:]) +citys_json = json.loads(citys_str) +citys_dict = {} +for city, provinces in citys_json.items(): + city_list = [] + for _, province_mes in provinces.items(): + + for province_name in province_mes: + area_id = citys_json[city][_][province_name]['AREAID'] + city_list.append({province_name: area_id}) + + citys_dict[city] = city_list + +root_url = "http://d1.weather.com.cn/calendar_new" -def print_hi(name): - # Use a breakpoint in the code line below to debug your script. - print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. +def generate_year_and_month_list(start_year, start_month): + current_date = datetime.now() # 获取当前日期 + start_date = datetime(start_year, start_month, 1) # 构建起始日期 + + year_and_month_list = [] # 存储年份和月份的列表 + + # 从起始日期开始,逐月生成年份和月份,直到当前日期 + while start_date <= current_date: + year = start_date.year + month = start_date.month + year_and_month_str = f"{year}{month:02d}" # 构建年份和月份的字符串,月份部分始终为两位数 + year_and_month_list.append(year_and_month_str) # 将年份和月份添加到列表中 + + # 增加一个月 + if start_date.month == 12: # 如果当前月份是12月,增加一年,月份重置为1月 + start_date = datetime(start_date.year + 1, 1, 1) + else: # 否则,增加一个月 + start_date += timedelta(days=31) # 这里简化处理,每次增加31天,不考虑月份天数的不同 + + return year_and_month_list -# Press the green button in the gutter to run the script. -if __name__ == '__main__': - print_hi('PyCharm') +all_dates_str = generate_year_and_month_list(2023, 1) +years = [2023, 2024] +for year in years: + for city, provinces in citys_dict.items(): + for province in provinces: + for area_id in province.values(): + for date in all_dates_str: + uri = f"/{year}/{area_id}_{date}.html" + headers = { + 'Accept': '*/*', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Connection': 'keep-alive', + 'Referer': 'http://www.weather.com.cn/', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', + } + timestamp = str(int(datetime.timestamp(datetime.now()))) + params = { + '_': timestamp, + } + + response = httpx.get( + root_url+uri, + params=params, + headers=headers, + verify=False, + ) + + content = response.text + encoding = response.encoding + decoded_html_content = content.encode(encoding).decode('utf-8') + json_str = decoded_html_content[len("var fc40 = "):] + + # 解析 JSON 字符串为字典 + data_dict = json.loads(json_str) + ls = [] + for item in data_dict: + d = dict() + d["date"] = item["date"] + d["hgl"] = item["hgl"] + d["hmax"] = item["hmax"] + d["hmin"] = item["hmin"] + ls.append(d) + print(province) + print(ls) -# See PyCharm help at https://www.jetbrains.com/help/pycharm/ diff --git a/test.py b/test.py new file mode 100644 index 0000000..57941d4 --- /dev/null +++ b/test.py @@ -0,0 +1,40 @@ +import json +from datetime import datetime + +import requests + +headers = { + 'Accept': '*/*', + 'Accept-Language': 'zh-CN,zh;q=0.9', + 'Connection': 'keep-alive', + 'Referer': 'http://www.weather.com.cn/', + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', +} +timestamp = str(int(datetime.timestamp(datetime.now()))) +params = { + '_': timestamp, +} + +response = requests.get( + 'http://d1.weather.com.cn/calendar_new/2024/101040200_202405.html', + params=params, + headers=headers, + verify=False, +) + +content = response.text +encoding = response.encoding +decoded_html_content = content.encode(encoding).decode('utf-8') +json_str = decoded_html_content[len("var fc40 = "):] + +# 解析 JSON 字符串为字典 +data_dict = json.loads(json_str) +ls = [] +for item in data_dict: + d = dict() + d["date"] = item["date"] + d["hgl"] = item["hgl"] + d["hmax"] = item["hmax"] + d["hmin"] = item["hmin"] + ls.append(d) +print(ls)