mirror of
https://github.com/PaiGramTeam/FixMiYouShe.git
synced 2024-11-16 04:45:40 +00:00
🐛 Fix parse img article
This commit is contained in:
parent
d0f72dde79
commit
42d6108b48
@ -1,9 +1,11 @@
|
||||
from typing import Any, List
|
||||
from enum import Enum
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pydantic import BaseModel, PrivateAttr
|
||||
|
||||
__all__ = (
|
||||
"PostStat",
|
||||
"PostType",
|
||||
"PostInfo",
|
||||
)
|
||||
|
||||
@ -16,6 +18,14 @@ class PostStat(BaseModel):
|
||||
bookmark_num: int = 0
|
||||
|
||||
|
||||
class PostType(int, Enum):
|
||||
"""帖子类型"""
|
||||
|
||||
TEXT = 1
|
||||
IMAGE = 2
|
||||
VIDEO = 5
|
||||
|
||||
|
||||
class PostInfo(BaseModel):
|
||||
_data: dict = PrivateAttr()
|
||||
post_id: int
|
||||
@ -24,6 +34,10 @@ class PostInfo(BaseModel):
|
||||
image_urls: List[str]
|
||||
created_at: int
|
||||
video_urls: List[str]
|
||||
content: str
|
||||
cover: Optional[str]
|
||||
view_type: PostType
|
||||
stat: PostStat
|
||||
|
||||
def __init__(self, _data: dict, **data: Any):
|
||||
super().__init__(**data)
|
||||
@ -42,6 +56,10 @@ class PostInfo(BaseModel):
|
||||
created_at = post["created_at"]
|
||||
user = _data_post["user"] # 用户数据
|
||||
user_uid = user["uid"] # 用户ID
|
||||
content = post["content"]
|
||||
cover = post["cover"]
|
||||
view_type = PostType(post["view_type"])
|
||||
stat = PostStat(**_data_post["stat"])
|
||||
return PostInfo(
|
||||
_data=data,
|
||||
post_id=post_id,
|
||||
@ -50,6 +68,10 @@ class PostInfo(BaseModel):
|
||||
image_urls=image_urls,
|
||||
video_urls=video_urls,
|
||||
created_at=created_at,
|
||||
content=content,
|
||||
cover=cover,
|
||||
view_type=view_type,
|
||||
stat=stat,
|
||||
)
|
||||
|
||||
def __getitem__(self, item):
|
||||
|
@ -1,11 +1,12 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from typing import Union, List
|
||||
from typing import Union, List, Dict
|
||||
|
||||
from bs4 import BeautifulSoup, Tag, PageElement
|
||||
|
||||
from src import template_env
|
||||
from src.api.hyperion import Hyperion
|
||||
from src.api.models import PostStat
|
||||
from src.api.models import PostStat, PostInfo, PostType
|
||||
from src.env import DEBUG
|
||||
from src.error import ArticleNotFoundError
|
||||
from src.services.cache import (
|
||||
@ -93,6 +94,43 @@ def parse_stat(stat: PostStat):
|
||||
)
|
||||
|
||||
|
||||
def get_public_data(game_id: str, post_id: int, post_info: PostInfo) -> Dict:
|
||||
return {
|
||||
"url": f"https://www.miyoushe.com/{game_id}/article/{post_id}",
|
||||
"published_time": datetime.fromtimestamp(post_info.created_at).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S.%fZ"
|
||||
),
|
||||
"channel": CHANNEL_MAP.get(game_id, "HSRCN"),
|
||||
"stat": parse_stat(post_info.stat),
|
||||
"post": post_info,
|
||||
"author": post_info["post"]["user"],
|
||||
}
|
||||
|
||||
|
||||
async def process_article_text(game_id: str, post_id: int, post_info: PostInfo) -> str:
|
||||
post_soup = BeautifulSoup(post_info.content, features="lxml")
|
||||
return template.render(
|
||||
description=get_description(post_soup),
|
||||
article=parse_content(post_soup, post_info.subject, post_info.video_urls),
|
||||
**get_public_data(game_id, post_id, post_info),
|
||||
)
|
||||
|
||||
|
||||
async def process_article_image(game_id: str, post_id: int, post_info: PostInfo) -> str:
|
||||
json_data = json.loads(post_info.content)
|
||||
description = json_data.get("describe", "")
|
||||
article = ""
|
||||
for image in json_data.get("imgs", []):
|
||||
article += f'<img src="{image}"/>\n'
|
||||
if description:
|
||||
article += f"<p>{description}</p>\n"
|
||||
return template.render(
|
||||
description=description,
|
||||
article=article,
|
||||
**get_public_data(game_id, post_id, post_info),
|
||||
)
|
||||
|
||||
|
||||
async def process_article(game_id: str, post_id: int) -> str:
|
||||
path = get_article_cache_file_path(game_id, post_id)
|
||||
if content := await get_article_cache_file(path):
|
||||
@ -105,21 +143,10 @@ async def process_article(game_id: str, post_id: int) -> str:
|
||||
post_info = await hyperion.get_post_info(gids=gids, post_id=post_id)
|
||||
finally:
|
||||
await hyperion.close()
|
||||
post_data = post_info["post"]["post"]
|
||||
post_soup = BeautifulSoup(post_data["content"], features="lxml")
|
||||
author_data = post_info["post"]["user"]
|
||||
content = template.render(
|
||||
url=f"https://www.miyoushe.com/{game_id}/article/{post_id}",
|
||||
description=get_description(post_soup),
|
||||
published_time=datetime.fromtimestamp(post_info.created_at).strftime(
|
||||
"%Y-%m-%dT%H:%M:%S.%fZ"
|
||||
),
|
||||
channel=CHANNEL_MAP.get(game_id, "HSRCN"),
|
||||
article=parse_content(post_soup, post_info.subject, post_info.video_urls),
|
||||
stat=parse_stat(PostStat(**post_info["post"]["stat"])),
|
||||
post=post_data,
|
||||
author=author_data,
|
||||
)
|
||||
if post_info.view_type in [PostType.TEXT, PostType.VIDEO]:
|
||||
content = await process_article_text(game_id, post_id, post_info)
|
||||
elif post_info.view_type == PostType.IMAGE:
|
||||
content = await process_article_image(game_id, post_id, post_info)
|
||||
if not DEBUG:
|
||||
await write_article_cache_file(path, content)
|
||||
add_delete_file_job(path)
|
||||
|
@ -19,5 +19,5 @@ async def parse_article(game_id: str, post_id: int, request: Request):
|
||||
logger.warning(e.msg)
|
||||
return get_redirect_response(request)
|
||||
except Exception as _:
|
||||
logger.exception("Failed to get article.")
|
||||
logger.exception(f"Failed to get article {game_id} {post_id}")
|
||||
return get_redirect_response(request)
|
||||
|
Loading…
Reference in New Issue
Block a user