🐛 Fix honkai web parse

This commit is contained in:
xtaodada 2023-06-11 10:40:09 +08:00
parent 475e962089
commit b673997f95
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659

View File

@ -1,3 +1,4 @@
import re
from datetime import datetime from datetime import datetime
from typing import List from typing import List
@ -24,14 +25,17 @@ def parse_reward(reward: List[str]) -> Reward:
try: try:
name = reward_map.get(reward[0]) name = reward_map.get(reward[0])
if not name: if not name:
# 判断是否为中文
if not re.search("[\u4e00-\u9fa5]", reward[0]):
print("Unknown reward: ", reward[0]) print("Unknown reward: ", reward[0])
name = reward[0] name = reward[0]
return Reward( return Reward(
name=name, name=name,
cnt=int(reward[1]), cnt=int(reward[1]),
) )
except ValueError: except Exception as e:
print("Bad reward data: ", reward) print("Bad reward data: ", reward)
raise e
def parse_code(tr: Tag) -> Code: def parse_code(tr: Tag) -> Code:
@ -57,11 +61,18 @@ def parse_code(tr: Tag) -> Code:
rewards = [] rewards = []
for reward in tds[1].find_all("div", {"class": "flex"}): for reward in tds[1].find_all("div", {"class": "flex"}):
reward_div = reward.text.strip().split("\xa0x ") reward_div = reward.text.strip().split("\xa0x ")
if len(reward_div) < 2:
print("Bad td data: ", tds[1])
continue
parsed_reward = parse_reward(reward_div) parsed_reward = parse_reward(reward_div)
if parsed_reward: if parsed_reward:
rewards.append(parsed_reward) rewards.append(parsed_reward)
if not rewards:
for reward in tds[1].find_all("a"): for reward in tds[1].find_all("a"):
reward_a = reward.text.strip().split(" x ") reward_a = reward.text.strip().split(" x ")
if len(reward_a) < 2:
print("Bad a data: ", tds[1])
continue
parsed_reward = parse_reward(reward_a) parsed_reward = parse_reward(reward_a)
if parsed_reward: if parsed_reward:
rewards.append(parsed_reward) rewards.append(parsed_reward)