🐛 Fix parse img article

This commit is contained in:
xtaodada 2023-08-23 20:30:18 +08:00
parent d0f72dde79
commit 42d6108b48
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659
3 changed files with 68 additions and 19 deletions

View File

@ -1,9 +1,11 @@
from typing import Any, List
from enum import Enum
from typing import Any, List, Optional
from pydantic import BaseModel, PrivateAttr
__all__ = (
"PostStat",
"PostType",
"PostInfo",
)
@ -16,6 +18,14 @@ class PostStat(BaseModel):
bookmark_num: int = 0
class PostType(int, Enum):
"""帖子类型"""
TEXT = 1
IMAGE = 2
VIDEO = 5
class PostInfo(BaseModel):
_data: dict = PrivateAttr()
post_id: int
@ -24,6 +34,10 @@ class PostInfo(BaseModel):
image_urls: List[str]
created_at: int
video_urls: List[str]
content: str
cover: Optional[str]
view_type: PostType
stat: PostStat
def __init__(self, _data: dict, **data: Any):
super().__init__(**data)
@ -42,6 +56,10 @@ class PostInfo(BaseModel):
created_at = post["created_at"]
user = _data_post["user"] # 用户数据
user_uid = user["uid"] # 用户ID
content = post["content"]
cover = post["cover"]
view_type = PostType(post["view_type"])
stat = PostStat(**_data_post["stat"])
return PostInfo(
_data=data,
post_id=post_id,
@ -50,6 +68,10 @@ class PostInfo(BaseModel):
image_urls=image_urls,
video_urls=video_urls,
created_at=created_at,
content=content,
cover=cover,
view_type=view_type,
stat=stat,
)
def __getitem__(self, item):

View File

@ -1,11 +1,12 @@
import json
from datetime import datetime
from typing import Union, List
from typing import Union, List, Dict
from bs4 import BeautifulSoup, Tag, PageElement
from src import template_env
from src.api.hyperion import Hyperion
from src.api.models import PostStat
from src.api.models import PostStat, PostInfo, PostType
from src.env import DEBUG
from src.error import ArticleNotFoundError
from src.services.cache import (
@ -93,6 +94,43 @@ def parse_stat(stat: PostStat):
)
def get_public_data(game_id: str, post_id: int, post_info: PostInfo) -> Dict:
return {
"url": f"https://www.miyoushe.com/{game_id}/article/{post_id}",
"published_time": datetime.fromtimestamp(post_info.created_at).strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"
),
"channel": CHANNEL_MAP.get(game_id, "HSRCN"),
"stat": parse_stat(post_info.stat),
"post": post_info,
"author": post_info["post"]["user"],
}
async def process_article_text(game_id: str, post_id: int, post_info: PostInfo) -> str:
post_soup = BeautifulSoup(post_info.content, features="lxml")
return template.render(
description=get_description(post_soup),
article=parse_content(post_soup, post_info.subject, post_info.video_urls),
**get_public_data(game_id, post_id, post_info),
)
async def process_article_image(game_id: str, post_id: int, post_info: PostInfo) -> str:
json_data = json.loads(post_info.content)
description = json_data.get("describe", "")
article = ""
for image in json_data.get("imgs", []):
article += f'<img src="{image}"/>\n'
if description:
article += f"<p>{description}</p>\n"
return template.render(
description=description,
article=article,
**get_public_data(game_id, post_id, post_info),
)
async def process_article(game_id: str, post_id: int) -> str:
path = get_article_cache_file_path(game_id, post_id)
if content := await get_article_cache_file(path):
@ -105,21 +143,10 @@ async def process_article(game_id: str, post_id: int) -> str:
post_info = await hyperion.get_post_info(gids=gids, post_id=post_id)
finally:
await hyperion.close()
post_data = post_info["post"]["post"]
post_soup = BeautifulSoup(post_data["content"], features="lxml")
author_data = post_info["post"]["user"]
content = template.render(
url=f"https://www.miyoushe.com/{game_id}/article/{post_id}",
description=get_description(post_soup),
published_time=datetime.fromtimestamp(post_info.created_at).strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"
),
channel=CHANNEL_MAP.get(game_id, "HSRCN"),
article=parse_content(post_soup, post_info.subject, post_info.video_urls),
stat=parse_stat(PostStat(**post_info["post"]["stat"])),
post=post_data,
author=author_data,
)
if post_info.view_type in [PostType.TEXT, PostType.VIDEO]:
content = await process_article_text(game_id, post_id, post_info)
elif post_info.view_type == PostType.IMAGE:
content = await process_article_image(game_id, post_id, post_info)
if not DEBUG:
await write_article_cache_file(path, content)
add_delete_file_job(path)

View File

@ -19,5 +19,5 @@ async def parse_article(game_id: str, post_id: int, request: Request):
logger.warning(e.msg)
return get_redirect_response(request)
except Exception as _:
logger.exception("Failed to get article.")
logger.exception(f"Failed to get article {game_id} {post_id}")
return get_redirect_response(request)