twitter2telegram/defs/feed.py

129 lines
3.5 KiB
Python
Raw Normal View History

2023-05-24 15:20:21 +00:00
import traceback
2024-02-17 05:50:20 +00:00
from asyncio import Lock
2023-05-24 15:20:21 +00:00
from datetime import datetime
from typing import List, Optional
from bs4 import BeautifulSoup
from init import request
2024-02-17 05:50:20 +00:00
from defs.glover import rss_hub_host, nitter_host
2023-05-24 15:20:21 +00:00
from defs.models import Tweet, User
from feedparser import parse, FeedParserDict
2024-02-17 05:50:20 +00:00
LOCK = Lock()
2023-05-24 15:20:21 +00:00
class UsernameNotFound(Exception):
pass
2023-05-25 12:23:38 +00:00
class HostNeedChange(Exception):
pass
2023-12-13 05:47:51 +00:00
def retry(func):
async def wrapper(*args, **kwargs):
for i in range(3):
try:
return await func(*args, **kwargs)
except HostNeedChange:
if i == 2:
raise HostNeedChange
continue
return wrapper
@retry
2024-02-17 05:50:20 +00:00
async def rsshub_get(username: str, host: str) -> Optional[FeedParserDict]:
2023-05-25 12:23:38 +00:00
url = f"{host}/twitter/user/{username}"
2023-05-24 15:20:21 +00:00
response = await request.get(url)
if response.status_code == 200:
return parse(response.text)
elif response.status_code == 404:
2023-12-13 05:47:51 +00:00
raise UsernameNotFound
raise HostNeedChange
2023-05-24 15:20:21 +00:00
2024-02-17 05:50:20 +00:00
@retry
async def nitter_get(username: str, host: str) -> Optional[FeedParserDict]:
url = f"{host}/{username}/rss"
async with LOCK:
response = await request.get(url)
if response.status_code == 200:
return parse(response.text)
elif response.status_code == 404:
raise UsernameNotFound
raise HostNeedChange
2023-05-24 15:20:21 +00:00
async def parse_tweets(data: List[FeedParserDict]) -> List[Tweet]:
tweets = []
for tweet in data:
try:
description = tweet.get("description", "")
soup = BeautifulSoup(description, "lxml")
content = soup.get_text()
img_tag = soup.find_all("img")
images = [img.get("src") for img in img_tag if img.get("src")]
url = tweet.get("link", "")
time = datetime.strptime(tweet.get("published", ""), "%a, %d %b %Y %H:%M:%S %Z")
tweets.append(
Tweet(
content=content,
2024-02-17 05:50:20 +00:00
old_url=url,
2023-05-24 15:20:21 +00:00
time=time,
images=images
)
)
except Exception:
traceback.print_exc()
return tweets
async def parse_user(username: str, data: FeedParserDict) -> User:
title = data.get("feed", {}).get("title", "")
name = title.replace("Twitter @", "")
tweets = await parse_tweets(data.get("entries", []))
return User(username=username, name=name, tweets=tweets)
async def get_user(username: str) -> Optional[User]:
2024-02-17 05:50:20 +00:00
data = None
try:
data = await get_user_rsshub(username)
except HostNeedChange:
pass
if not data:
try:
data = await get_user_nitter(username)
except HostNeedChange:
raise UsernameNotFound
return data
async def get_user_rsshub(username: str) -> Optional[User]:
2023-05-25 12:23:38 +00:00
for host in rss_hub_host:
try:
2024-02-17 05:50:20 +00:00
data = await rsshub_get(username, host)
2023-05-25 12:23:38 +00:00
if data:
return await parse_user(username, data)
except HostNeedChange:
if host == rss_hub_host[-1]:
2024-02-17 05:50:20 +00:00
raise HostNeedChange
continue
return None
async def get_user_nitter(username: str) -> Optional[User]:
for host in nitter_host:
try:
data = await nitter_get(username, host)
if data:
return await parse_user(username, data)
except HostNeedChange:
if host == nitter_host[-1]:
raise HostNeedChange
2023-05-25 12:23:38 +00:00
continue
return None