# Pyrogram - Telegram MTProto API Client Library for Python # Copyright (C) 2017-2018 Dan Tès # # This file is part of Pyrogram. # # Pyrogram is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Pyrogram is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with Pyrogram. If not, see . import re from struct import unpack from pyrogram.api.types import ( MessageEntityBold as Bold, MessageEntityItalic as Italic, MessageEntityCode as Code, MessageEntityTextUrl as Url, MessageEntityPre as Pre, InputMessageEntityMentionName as Mention ) class Markdown: INLINE_DELIMITERS = { "**": Bold, "__": Italic, "`": Code } # SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]") # ``` python # for i in range(10): # print(i) # ``` PRE_RE = r"(?P
```(?P.*)\n(?P(.|\n)*)\n```)"

    # [url](github.com)
    URL_RE = r"(?P(\[(?P.+?)\]\((?P.+?)\)))"

    # [name](tg://user?id=123456789)
    MENTION_RE = r"(?P(\[(?P.+?)\]\(tg:\/\/user\?id=(?P\d+?)\)))"

    # **bold**
    # __italic__
    # `code`
    INLINE_RE = r"(?P(?P{d})(?P.+?)(?P{d}))".format(
        d="|".join(
            ["".join(i) for i in [
                ["\{}".format(j) for j in i]
                for i in sorted(  # Sort delimiters by length
                    INLINE_DELIMITERS.keys(),
                    key=lambda k: len(k),  # Or: key=len
                    reverse=True
                )
            ]]
        )
    )

    MARKDOWN_RE = re.compile("|".join([PRE_RE, MENTION_RE, URL_RE, INLINE_RE]))

    @classmethod
    def add_surrogates(cls, text):
        # Replace each SMP code point with a surrogate pair
        return cls.SMP_RE.sub(
            lambda match:  # Split SMP in two surrogates
            "".join(chr(i) for i in unpack("