diff --git a/pyrogram/parser/markdown.py b/pyrogram/parser/markdown.py index 4a65b64b..1388ffae 100644 --- a/pyrogram/parser/markdown.py +++ b/pyrogram/parser/markdown.py @@ -32,6 +32,7 @@ STRIKE_DELIM = "~~" SPOILER_DELIM = "||" CODE_DELIM = "`" PRE_DELIM = "```" +BLOCKQUOTE_DELIM = ">" MARKDOWN_RE = re.compile(r"({d})|\[(.+?)\]\((.+?)\)".format( d="|".join( @@ -59,10 +60,35 @@ class Markdown: def __init__(self, client: Optional["pyrogram.Client"]): self.html = HTML(client) + def _parse_blockquotes(self, text: str): + text = html.unescape(text) + lines = text.split('\n') + result = [] + in_blockquote = False + current_blockquote = [] + + for line in lines: + if line.startswith(BLOCKQUOTE_DELIM): + in_blockquote = True + current_blockquote.append(line[1:].strip()) + else: + if in_blockquote: + in_blockquote = False + result.append(OPENING_TAG.format("blockquote") + '\n'.join(current_blockquote) + CLOSING_TAG.format("blockquote")) + current_blockquote = [] + result.append(line) + + if in_blockquote: + result.append(OPENING_TAG.format("blockquote") + '\n'.join(current_blockquote) + CLOSING_TAG.format("blockquote")) + + return '\n'.join(result) + async def parse(self, text: str, strict: bool = False): if strict: text = html.escape(text) + text = self._parse_blockquotes(text) + delims = set() is_fixed_width = False @@ -140,6 +166,22 @@ class Markdown: language = getattr(entity, "language", "") or "" start_tag = f"{PRE_DELIM}{language}\n" end_tag = f"\n{PRE_DELIM}" + elif entity_type == MessageEntityType.BLOCKQUOTE: + start_tag = BLOCKQUOTE_DELIM + " " + end_tag = "" + blockquote_text = text[start:end] + lines = blockquote_text.split("\n") + last_length = 0 + for line in lines: + if len(line) == 0 and last_length == end: + continue + start_offset = start+last_length + last_length = last_length+len(line) + end_offset = start_offset+last_length + entities_offsets.append((start_tag, start_offset,)) + entities_offsets.append((end_tag, end_offset,)) + last_length = last_length+1 + continue elif entity_type == MessageEntityType.SPOILER: start_tag = end_tag = SPOILER_DELIM elif entity_type == MessageEntityType.TEXT_LINK: diff --git a/tests/parser/test_markdown.py b/tests/parser/test_markdown.py index 81aaacb3..403673c9 100644 --- a/tests/parser/test_markdown.py +++ b/tests/parser/test_markdown.py @@ -93,6 +93,21 @@ for i in range(10): assert Markdown.unparse(text=text, entities=entities) == expected +def test_markdown_unparse_blockquote(): + expected = """> Hello +> from + +> pyrogram!""" + + text = """Hello\nfrom\n\npyrogram!""" + + entities = pyrogram.types.List( + [pyrogram.types.MessageEntity(type=pyrogram.enums.MessageEntityType.BLOCKQUOTE, offset=0, length=10), + pyrogram.types.MessageEntity(type=pyrogram.enums.MessageEntityType.BLOCKQUOTE, offset=12, length=9)]) + + assert Markdown.unparse(text=text, entities=entities) == expected + + def test_markdown_unparse_mixed(): expected = "**aaaaaaa__aaabbb**__~~dddddddd||ddeee~~||||eeeeeeefff||ffff`fffggggggg`ggghhhhhhhhhh" text = "aaaaaaaaaabbbddddddddddeeeeeeeeeeffffffffffgggggggggghhhhhhhhhh"