diff --git a/pyrogram/extensions/html.py b/pyrogram/extensions/html.py index 99aa36ad..aa6c3fc2 100644 --- a/pyrogram/extensions/html.py +++ b/pyrogram/extensions/html.py @@ -7,23 +7,15 @@ from pyrogram.api.types import ( MessageEntityCode as Code, MessageEntityTextUrl as Url, MessageEntityPre as Pre, - InputMessageEntityMentionName as Mention + MessageEntityMentionName as MentionInvalid, + InputMessageEntityMentionName as Mention, ) class HTML: SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]") - - BOLD_RE = r"(?P(?P.*?))" - STRONG_RE = r"(?P(?P.*?))" - ITALIC_RE = r"(?P(?P.*?))" - EMPATHIZE_RE = r"(?P(?P.*?))" - CODE_RE = r"(?P(?P.*?))" - PRE_RE = r"(?P
(?P.*?)
)" - MENTION_RE = r"(?P\d+?)\">(?P.*?))" - URL_RE = r"(?P.*?)\">(?P.*?))" - - HTML_RE = re.compile("|".join([BOLD_RE, STRONG_RE, ITALIC_RE, EMPATHIZE_RE, CODE_RE, PRE_RE, MENTION_RE, URL_RE])) + HTML_RE = re.compile(r"<(\w+)(?: href=\"(.*)\")?>(.*)") + MENTION_RE = re.compile(r"tg://user\?id=(\d+)") @classmethod def add_surrogates(cls, text): @@ -45,57 +37,38 @@ class HTML: text = self.add_surrogates(text) offset = 0 - # TODO: Beautify ifs for match in self.HTML_RE.finditer(text): start = match.start() - offset + style, url, body = match.groups() - if match.group("b"): - pattern = match.group("b") - body = match.group("b_body") - entity = Bold(start, len(body)) - offset += 7 - elif match.group("strong"): - pattern = match.group("strong") - body = match.group("strong_body") - entity = Bold(start, len(body)) - offset += 17 - elif match.group("i"): - pattern = match.group("i") - body = match.group("i_body") - entity = Italic(start, len(body)) - offset += 7 - elif match.group("em"): - pattern = match.group("em") - body = match.group("em_body") - entity = Italic(start, len(body)) - offset += 9 - elif match.group("code"): - pattern = match.group("code") - body = match.group("code_body") - entity = Code(start, len(body)) - offset += 13 - elif match.group("pre"): - pattern = match.group("pre") - body = match.group("pre_body") - entity = Pre(start, len(body), "") - offset += 11 - elif match.group("mention"): - pattern = match.group("mention") - body = match.group("mention_text") - user_id = match.group("user_id") - entity = Mention(start, len(body), self.peers_by_id[int(user_id)]) - offset += len(user_id) + 28 - elif match.group("url"): - pattern = match.group("url") - body = match.group("url_text") - path = match.group("url_path") - entity = Url(start, len(body), path) - offset += len(path) + 15 + if url: + mention = self.MENTION_RE.match(url) + + if mention: + user_id = int(mention.group(1)) + input_user = self.peers_by_id.get(user_id, None) + + entity = ( + Mention(start, len(body), input_user) + if input_user else MentionInvalid(start, len(body), user_id) + ) + else: + entity = Url(start, len(body), url) else: - continue + if style == "b" or style == "strong": + entity = Bold(start, len(body)) + elif style == "i" or style == "em": + entity = Italic(start, len(body)) + elif style == "code": + entity = Code(start, len(body)) + elif style == "pre": + entity = Pre(start, len(body), "") + else: + continue entities.append(entity) - text = text.replace(pattern, body) + text = text.replace(match.group(), body) + offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0) return dict( message=self.remove_surrogates(text),