From ac2e61a58d5619601bf028114d4c6e0e22f0cef8 Mon Sep 17 00:00:00 2001 From: xtaodada Date: Sat, 17 Feb 2024 13:50:20 +0800 Subject: [PATCH] feat: support nitter --- defs/feed.py | 52 ++++++++++++++++++++++++++++++++++++++---- defs/glover.py.example | 1 + defs/models.py | 11 +++++++-- plugins/update.py | 4 ++-- 4 files changed, 59 insertions(+), 9 deletions(-) diff --git a/defs/feed.py b/defs/feed.py index cbdb05b..5ef4d15 100644 --- a/defs/feed.py +++ b/defs/feed.py @@ -1,14 +1,17 @@ import traceback +from asyncio import Lock from datetime import datetime from typing import List, Optional from bs4 import BeautifulSoup from init import request -from defs.glover import rss_hub_host +from defs.glover import rss_hub_host, nitter_host from defs.models import Tweet, User from feedparser import parse, FeedParserDict +LOCK = Lock() + class UsernameNotFound(Exception): pass @@ -32,7 +35,7 @@ def retry(func): @retry -async def get(username: str, host: str) -> Optional[FeedParserDict]: +async def rsshub_get(username: str, host: str) -> Optional[FeedParserDict]: url = f"{host}/twitter/user/{username}" response = await request.get(url) if response.status_code == 200: @@ -42,6 +45,18 @@ async def get(username: str, host: str) -> Optional[FeedParserDict]: raise HostNeedChange +@retry +async def nitter_get(username: str, host: str) -> Optional[FeedParserDict]: + url = f"{host}/{username}/rss" + async with LOCK: + response = await request.get(url) + if response.status_code == 200: + return parse(response.text) + elif response.status_code == 404: + raise UsernameNotFound + raise HostNeedChange + + async def parse_tweets(data: List[FeedParserDict]) -> List[Tweet]: tweets = [] for tweet in data: @@ -56,7 +71,7 @@ async def parse_tweets(data: List[FeedParserDict]) -> List[Tweet]: tweets.append( Tweet( content=content, - url=url, + old_url=url, time=time, images=images ) @@ -74,13 +89,40 @@ async def parse_user(username: str, data: FeedParserDict) -> User: async def get_user(username: str) -> Optional[User]: + data = None + try: + data = await get_user_rsshub(username) + except HostNeedChange: + pass + if not data: + try: + data = await get_user_nitter(username) + except HostNeedChange: + raise UsernameNotFound + return data + + +async def get_user_rsshub(username: str) -> Optional[User]: for host in rss_hub_host: try: - data = await get(username, host) + data = await rsshub_get(username, host) if data: return await parse_user(username, data) except HostNeedChange: if host == rss_hub_host[-1]: - raise UsernameNotFound + raise HostNeedChange + continue + return None + + +async def get_user_nitter(username: str) -> Optional[User]: + for host in nitter_host: + try: + data = await nitter_get(username, host) + if data: + return await parse_user(username, data) + except HostNeedChange: + if host == nitter_host[-1]: + raise HostNeedChange continue return None diff --git a/defs/glover.py.example b/defs/glover.py.example index 8122d5e..161e2ff 100644 --- a/defs/glover.py.example +++ b/defs/glover.py.example @@ -1,6 +1,7 @@ api_id = 1 api_hash = "a" rss_hub_host = ["https://rsshub.app"] +nitter_host = [] cid = 11 tid = None owner = 11 diff --git a/defs/models.py b/defs/models.py index b963f10..a7c20ac 100644 --- a/defs/models.py +++ b/defs/models.py @@ -2,23 +2,30 @@ from datetime import datetime, timedelta from typing import List from pydantic import BaseModel +from httpx import URL class Tweet(BaseModel): content: str - url: str + old_url: str time: datetime images: List[str] @property def id(self) -> int: - return int(self.url.split("/")[-1]) + tid = self.url.split("/")[-1].replace("#m", "") + return int(tid) @property def time_str(self) -> str: utc_8_time = self.time + timedelta(hours=8) return utc_8_time.strftime("%Y-%m-%d %H:%M:%S") + @property + def url(self) -> str: + u = URL(self.old_url) + return self.old_url.replace(u.host, "twitter.com") + class User(BaseModel): username: str diff --git a/plugins/update.py b/plugins/update.py index 84ebd94..e9d806d 100644 --- a/plugins/update.py +++ b/plugins/update.py @@ -24,8 +24,8 @@ async def update_all(_, message: Message): await msg.edit("检查更新完毕!") -@scheduler.scheduled_job("cron", minute="*/15", id="update_all") -async def update_all_15_minutes(): +@scheduler.scheduled_job("cron", hour="*", minute="0", id="update_all") +async def update_all_60_minutes(): if _lock.locked(): return async with _lock: