From 0e3d08ae7581a84749b4d2cb3d0a0858c43f5601 Mon Sep 17 00:00:00 2001 From: Dan <14043624+delivrance@users.noreply.github.com> Date: Mon, 22 Jan 2018 00:26:43 +0100 Subject: [PATCH] Add HTML style parse mode --- pyrogram/client/client.py | 3 +- pyrogram/extensions/__init__.py | 1 + pyrogram/extensions/html.py | 103 ++++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 pyrogram/extensions/html.py diff --git a/pyrogram/client/client.py b/pyrogram/client/client.py index 131ce037..d325cc01 100644 --- a/pyrogram/client/client.py +++ b/pyrogram/client/client.py @@ -47,7 +47,7 @@ from pyrogram.api.types import ( InputPeerUser, InputPeerChat, InputPeerChannel ) from pyrogram.crypto import CTR -from pyrogram.extensions import Markdown +from pyrogram.extensions import Markdown, HTML from pyrogram.session import Auth, Session log = logging.getLogger(__name__) @@ -89,6 +89,7 @@ class Client: self.peers_by_username = {} self.markdown = Markdown(self.peers_by_id) + self.html = HTML(self.peers_by_id) self.config = None self.proxy = None diff --git a/pyrogram/extensions/__init__.py b/pyrogram/extensions/__init__.py index cd918a09..e60b4da1 100644 --- a/pyrogram/extensions/__init__.py +++ b/pyrogram/extensions/__init__.py @@ -16,4 +16,5 @@ # You should have received a copy of the GNU Lesser General Public License # along with Pyrogram. If not, see . +from .html import HTML from .markdown import Markdown diff --git a/pyrogram/extensions/html.py b/pyrogram/extensions/html.py new file mode 100644 index 00000000..99aa36ad --- /dev/null +++ b/pyrogram/extensions/html.py @@ -0,0 +1,103 @@ +import re +from struct import unpack + +from pyrogram.api.types import ( + MessageEntityBold as Bold, + MessageEntityItalic as Italic, + MessageEntityCode as Code, + MessageEntityTextUrl as Url, + MessageEntityPre as Pre, + InputMessageEntityMentionName as Mention +) + + +class HTML: + SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]") + + BOLD_RE = r"(?P(?P.*?))" + STRONG_RE = r"(?P(?P.*?))" + ITALIC_RE = r"(?P(?P.*?))" + EMPATHIZE_RE = r"(?P(?P.*?))" + CODE_RE = r"(?P(?P.*?))" + PRE_RE = r"(?P
(?P.*?)
)" + MENTION_RE = r"(?P\d+?)\">(?P.*?))" + URL_RE = r"(?P.*?)\">(?P.*?))" + + HTML_RE = re.compile("|".join([BOLD_RE, STRONG_RE, ITALIC_RE, EMPATHIZE_RE, CODE_RE, PRE_RE, MENTION_RE, URL_RE])) + + @classmethod + def add_surrogates(cls, text): + return cls.SMP_RE.sub( + lambda match: # Split SMP in two surrogates + "".join(chr(i) for i in unpack("