🐛 Fix strong h1~h6 tag

This commit is contained in:
xtaodada 2023-08-26 13:23:30 +08:00
parent 3ceddde101
commit 4ea59cb031
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659
2 changed files with 31 additions and 2 deletions

View File

@ -1,6 +1,7 @@
import json
from datetime import datetime
from enum import Enum
from typing import Any, List, Optional
from typing import Any, List, Optional, Dict
from pydantic import BaseModel, PrivateAttr, Field, AliasChoices
@ -145,6 +146,24 @@ class PostInfo(BaseModel):
return f"https://www.miyoushe.com/{self.game_id_str}/accountCenter/postList?id={author['uid']}"
return f"https://www.hoyolab.com/accountCenter/postList?id={author['uid']}"
@staticmethod
def parse_structured_content(data: List[Dict]) -> str:
content = []
for item in data:
if not item or item.get("insert") is None:
continue
insert = item["insert"]
if isinstance(insert, str):
if attr := item.get("attributes"):
if link := attr.get("link"):
content.append(f'<p><a href="{link}">{insert}</a></p>')
continue
content.append(f"<p>{insert}</p>")
elif isinstance(insert, dict):
if image := insert.get("image"):
content.append(f'<img src="{image}" />')
return "\n".join(content)
@classmethod
def paste_data(cls, data: dict, hoyolab: bool = False) -> "PostInfo":
_data_post = data["post"]
@ -163,6 +182,12 @@ class PostInfo(BaseModel):
user = _data_post["user"] # 用户数据
user_uid = user["uid"] # 用户ID
content = post["content"]
if (
hoyolab
and ("<" not in content)
and (structured_content := post.get("structured_content"))
):
content = PostInfo.parse_structured_content(json.loads(structured_content))
cover = post["cover"]
cover_list = _data_post.get("cover_list", [])
if (not cover) and cover_list:

View File

@ -67,6 +67,10 @@ def parse_tag(tag: Union[Tag, PageElement], post_info: PostInfo) -> str:
):
return format_image_url(src)
return ""
elif tag.name in ["h1", "h2", "h3", "h4", "h5", "h6"]:
return f"<{tag.name}>{tag.get_text()}</{tag.name}>"
elif tag.name == "strong":
return f"<strong>{tag.get_text()}</strong>"
elif tag.name == "p":
t = tag.get_text()
if not t:
@ -75,7 +79,7 @@ def parse_tag(tag: Union[Tag, PageElement], post_info: PostInfo) -> str:
for tag_ in tag.children:
if text := parse_tag(tag_, post_info):
post_text.append(text)
return "<p>" + "\n".join(post_text) + "</p>"
return "<p>" + "\n".join(post_text) + "</p>\n"
elif tag.name == "iframe":
src = tag.get("src")
if src and "https://www.youtube.com" in src: