🐛 Fix strong h1~h6 tag

This commit is contained in:
xtaodada 2023-08-26 13:23:30 +08:00
parent 3ceddde101
commit 4ea59cb031
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659
2 changed files with 31 additions and 2 deletions

View File

@ -1,6 +1,7 @@
import json
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from typing import Any, List, Optional from typing import Any, List, Optional, Dict
from pydantic import BaseModel, PrivateAttr, Field, AliasChoices from pydantic import BaseModel, PrivateAttr, Field, AliasChoices
@ -145,6 +146,24 @@ class PostInfo(BaseModel):
return f"https://www.miyoushe.com/{self.game_id_str}/accountCenter/postList?id={author['uid']}" return f"https://www.miyoushe.com/{self.game_id_str}/accountCenter/postList?id={author['uid']}"
return f"https://www.hoyolab.com/accountCenter/postList?id={author['uid']}" return f"https://www.hoyolab.com/accountCenter/postList?id={author['uid']}"
@staticmethod
def parse_structured_content(data: List[Dict]) -> str:
content = []
for item in data:
if not item or item.get("insert") is None:
continue
insert = item["insert"]
if isinstance(insert, str):
if attr := item.get("attributes"):
if link := attr.get("link"):
content.append(f'<p><a href="{link}">{insert}</a></p>')
continue
content.append(f"<p>{insert}</p>")
elif isinstance(insert, dict):
if image := insert.get("image"):
content.append(f'<img src="{image}" />')
return "\n".join(content)
@classmethod @classmethod
def paste_data(cls, data: dict, hoyolab: bool = False) -> "PostInfo": def paste_data(cls, data: dict, hoyolab: bool = False) -> "PostInfo":
_data_post = data["post"] _data_post = data["post"]
@ -163,6 +182,12 @@ class PostInfo(BaseModel):
user = _data_post["user"] # 用户数据 user = _data_post["user"] # 用户数据
user_uid = user["uid"] # 用户ID user_uid = user["uid"] # 用户ID
content = post["content"] content = post["content"]
if (
hoyolab
and ("<" not in content)
and (structured_content := post.get("structured_content"))
):
content = PostInfo.parse_structured_content(json.loads(structured_content))
cover = post["cover"] cover = post["cover"]
cover_list = _data_post.get("cover_list", []) cover_list = _data_post.get("cover_list", [])
if (not cover) and cover_list: if (not cover) and cover_list:

View File

@ -67,6 +67,10 @@ def parse_tag(tag: Union[Tag, PageElement], post_info: PostInfo) -> str:
): ):
return format_image_url(src) return format_image_url(src)
return "" return ""
elif tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
return f"<{tag.name}>{tag.get_text()}</{tag.name}>"
elif tag.name == "strong":
return f"<strong>{tag.get_text()}</strong>"
elif tag.name == "p": elif tag.name == "p":
t = tag.get_text() t = tag.get_text()
if not t: if not t:
@ -75,7 +79,7 @@ def parse_tag(tag: Union[Tag, PageElement], post_info: PostInfo) -> str:
for tag_ in tag.children: for tag_ in tag.children:
if text := parse_tag(tag_, post_info): if text := parse_tag(tag_, post_info):
post_text.append(text) post_text.append(text)
return "<p>" + "\n".join(post_text) + "</p>" return "<p>" + "\n".join(post_text) + "</p>\n"
elif tag.name == "iframe": elif tag.name == "iframe":
src = tag.get("src") src = tag.get("src")
if src and "https://www.youtube.com" in src: if src and "https://www.youtube.com" in src: