feat: support nitter

This commit is contained in:
xtaodada 2024-02-17 13:50:20 +08:00
parent 432af14c59
commit ac2e61a58d
Signed by: xtaodada
GPG Key ID: 4CBB3F4FA8C85659
4 changed files with 59 additions and 9 deletions

View File

@ -1,14 +1,17 @@
import traceback import traceback
from asyncio import Lock
from datetime import datetime from datetime import datetime
from typing import List, Optional from typing import List, Optional
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from init import request from init import request
from defs.glover import rss_hub_host from defs.glover import rss_hub_host, nitter_host
from defs.models import Tweet, User from defs.models import Tweet, User
from feedparser import parse, FeedParserDict from feedparser import parse, FeedParserDict
LOCK = Lock()
class UsernameNotFound(Exception): class UsernameNotFound(Exception):
pass pass
@ -32,7 +35,7 @@ def retry(func):
@retry @retry
async def get(username: str, host: str) -> Optional[FeedParserDict]: async def rsshub_get(username: str, host: str) -> Optional[FeedParserDict]:
url = f"{host}/twitter/user/{username}" url = f"{host}/twitter/user/{username}"
response = await request.get(url) response = await request.get(url)
if response.status_code == 200: if response.status_code == 200:
@ -42,6 +45,18 @@ async def get(username: str, host: str) -> Optional[FeedParserDict]:
raise HostNeedChange raise HostNeedChange
@retry
async def nitter_get(username: str, host: str) -> Optional[FeedParserDict]:
url = f"{host}/{username}/rss"
async with LOCK:
response = await request.get(url)
if response.status_code == 200:
return parse(response.text)
elif response.status_code == 404:
raise UsernameNotFound
raise HostNeedChange
async def parse_tweets(data: List[FeedParserDict]) -> List[Tweet]: async def parse_tweets(data: List[FeedParserDict]) -> List[Tweet]:
tweets = [] tweets = []
for tweet in data: for tweet in data:
@ -56,7 +71,7 @@ async def parse_tweets(data: List[FeedParserDict]) -> List[Tweet]:
tweets.append( tweets.append(
Tweet( Tweet(
content=content, content=content,
url=url, old_url=url,
time=time, time=time,
images=images images=images
) )
@ -74,13 +89,40 @@ async def parse_user(username: str, data: FeedParserDict) -> User:
async def get_user(username: str) -> Optional[User]: async def get_user(username: str) -> Optional[User]:
data = None
try:
data = await get_user_rsshub(username)
except HostNeedChange:
pass
if not data:
try:
data = await get_user_nitter(username)
except HostNeedChange:
raise UsernameNotFound
return data
async def get_user_rsshub(username: str) -> Optional[User]:
for host in rss_hub_host: for host in rss_hub_host:
try: try:
data = await get(username, host) data = await rsshub_get(username, host)
if data: if data:
return await parse_user(username, data) return await parse_user(username, data)
except HostNeedChange: except HostNeedChange:
if host == rss_hub_host[-1]: if host == rss_hub_host[-1]:
raise UsernameNotFound raise HostNeedChange
continue
return None
async def get_user_nitter(username: str) -> Optional[User]:
for host in nitter_host:
try:
data = await nitter_get(username, host)
if data:
return await parse_user(username, data)
except HostNeedChange:
if host == nitter_host[-1]:
raise HostNeedChange
continue continue
return None return None

View File

@ -1,6 +1,7 @@
api_id = 1 api_id = 1
api_hash = "a" api_hash = "a"
rss_hub_host = ["https://rsshub.app"] rss_hub_host = ["https://rsshub.app"]
nitter_host = []
cid = 11 cid = 11
tid = None tid = None
owner = 11 owner = 11

View File

@ -2,23 +2,30 @@ from datetime import datetime, timedelta
from typing import List from typing import List
from pydantic import BaseModel from pydantic import BaseModel
from httpx import URL
class Tweet(BaseModel): class Tweet(BaseModel):
content: str content: str
url: str old_url: str
time: datetime time: datetime
images: List[str] images: List[str]
@property @property
def id(self) -> int: def id(self) -> int:
return int(self.url.split("/")[-1]) tid = self.url.split("/")[-1].replace("#m", "")
return int(tid)
@property @property
def time_str(self) -> str: def time_str(self) -> str:
utc_8_time = self.time + timedelta(hours=8) utc_8_time = self.time + timedelta(hours=8)
return utc_8_time.strftime("%Y-%m-%d %H:%M:%S") return utc_8_time.strftime("%Y-%m-%d %H:%M:%S")
@property
def url(self) -> str:
u = URL(self.old_url)
return self.old_url.replace(u.host, "twitter.com")
class User(BaseModel): class User(BaseModel):
username: str username: str

View File

@ -24,8 +24,8 @@ async def update_all(_, message: Message):
await msg.edit("检查更新完毕!") await msg.edit("检查更新完毕!")
@scheduler.scheduled_job("cron", minute="*/15", id="update_all") @scheduler.scheduled_job("cron", hour="*", minute="0", id="update_all")
async def update_all_15_minutes(): async def update_all_60_minutes():
if _lock.locked(): if _lock.locked():
return return
async with _lock: async with _lock: