From 4be1f7c455f60b6db1feac422873508cb00bd813 Mon Sep 17 00:00:00 2001 From: xtaodada Date: Thu, 24 Aug 2023 11:33:16 +0800 Subject: [PATCH] :bug: Fix photo invalid dimensions --- src/api/models.py | 6 +++++- src/render/article.py | 23 ++++++++++++----------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/api/models.py b/src/api/models.py index 83f9e39..e8d18aa 100644 --- a/src/api/models.py +++ b/src/api/models.py @@ -57,7 +57,11 @@ class PostInfo(BaseModel): post_id = post["post_id"] subject = post["subject"] image_list = _data_post["image_list"] - image_urls = [image["url"] for image in image_list] + image_urls = [ + image["url"] + for image in image_list + if abs(image["width"] - image["height"]) < 1300 + ] vod_list = _data_post["vod_list"] video_urls = [vod["resolutions"][-1]["url"] for vod in vod_list] created_at = post["created_at"] diff --git a/src/render/article.py b/src/render/article.py index 1eceef8..258c68f 100644 --- a/src/render/article.py +++ b/src/render/article.py @@ -43,7 +43,7 @@ def format_image_url(url: str) -> str: return f'' -def parse_tag(tag: Union[Tag, PageElement]) -> str: +def parse_tag(tag: Union[Tag, PageElement], post_info: PostInfo) -> str: if tag.name == "a": href = tag.get("href") if href and href.startswith("/"): @@ -52,7 +52,7 @@ def parse_tag(tag: Union[Tag, PageElement]) -> str: return f'{tag.get_text()}' elif tag.name == "img": src = tag.get("src") - if src and "upload-bbs.miyoushe.com" in src: + if src and "upload-bbs.miyoushe.com" in src and src in post_info.image_urls: return format_image_url(src) return "" elif tag.name == "p": @@ -61,25 +61,25 @@ def parse_tag(tag: Union[Tag, PageElement]) -> str: return "" post_text = [] for tag_ in tag.children: - if text := parse_tag(tag_): + if text := parse_tag(tag_, post_info): post_text.append(text) return "

" + "\n".join(post_text) + "

" elif tag.name == "div": post_text = [] for tag_ in tag.children: - if text := parse_tag(tag_): + if text := parse_tag(tag_, post_info): post_text.append(text) return "\n".join(post_text) return replace_br(tag.get_text().strip()) -def parse_content(soup: BeautifulSoup, title: str, video_urls: List[str]) -> str: - post_text = f"

{title}

\n" - if video_urls: - for url in video_urls: +def parse_content(soup: BeautifulSoup, post_info: PostInfo) -> str: + post_text = f"

{post_info.subject}

\n" + if post_info.video_urls: + for url in post_info.video_urls: post_text += f'\n' for tag in soup.find("body").children: - if text := parse_tag(tag): + if text := parse_tag(tag, post_info): post_text += f"{text}\n" return post_text @@ -127,7 +127,7 @@ async def process_article_text(game_id: str, post_id: int, post_info: PostInfo) post_soup = BeautifulSoup(post_info.content, features="lxml") return template.render( description=get_description(post_soup), - article=parse_content(post_soup, post_info.subject, post_info.video_urls), + article=parse_content(post_soup, post_info), **get_public_data(game_id, post_id, post_info), ) @@ -137,7 +137,8 @@ async def process_article_image(game_id: str, post_id: int, post_info: PostInfo) description = json_data.get("describe", "") article = "" for image in json_data.get("imgs", []): - article += format_image_url(image) + if image in post_info.image_urls: + article += format_image_url(image) if description: article += f"

{description}

\n" return template.render(