Move formatting classes inside the Client sub-package

This commit is contained in:
Dan 2018-01-23 15:17:48 +01:00
parent ae92c92c06
commit c39bf3043d
4 changed files with 61 additions and 35 deletions

View File

@ -1,5 +1,22 @@
# Pyrogram - Telegram MTProto API Client Library for Python
# Copyright (C) 2017-2018 Dan Tès <https://github.com/delivrance>
#
# This file is part of Pyrogram.
#
# Pyrogram is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyrogram is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import re import re
from struct import unpack
from pyrogram.api.types import ( from pyrogram.api.types import (
MessageEntityBold as Bold, MessageEntityBold as Bold,
@ -10,6 +27,7 @@ from pyrogram.api.types import (
MessageEntityMentionName as MentionInvalid, MessageEntityMentionName as MentionInvalid,
InputMessageEntityMentionName as Mention, InputMessageEntityMentionName as Mention,
) )
from . import utils
class HTML: class HTML:
@ -17,24 +35,12 @@ class HTML:
HTML_RE = re.compile(r"<(\w+)(?: href=\"(.*)\")?>(.*)</\1>") HTML_RE = re.compile(r"<(\w+)(?: href=\"(.*)\")?>(.*)</\1>")
MENTION_RE = re.compile(r"tg://user\?id=(\d+)") MENTION_RE = re.compile(r"tg://user\?id=(\d+)")
@classmethod
def add_surrogates(cls, text):
return cls.SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
@staticmethod
def remove_surrogates(text):
return text.encode("utf-16", "surrogatepass").decode("utf-16")
def __init__(self, peers_by_id): def __init__(self, peers_by_id):
self.peers_by_id = peers_by_id self.peers_by_id = peers_by_id
def parse(self, text): def parse(self, text):
entities = [] entities = []
text = self.add_surrogates(text) text = utils.add_surrogates(text)
offset = 0 offset = 0
for match in self.HTML_RE.finditer(text): for match in self.HTML_RE.finditer(text):
@ -71,6 +77,6 @@ class HTML:
offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0) offset += len(style) * 2 + 5 + (len(url) + 8 if url else 0)
return dict( return dict(
message=self.remove_surrogates(text), message=utils.remove_surrogates(text),
entities=entities entities=entities
) )

View File

@ -17,7 +17,6 @@
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>. # along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import re import re
from struct import unpack
from pyrogram.api.types import ( from pyrogram.api.types import (
MessageEntityBold as Bold, MessageEntityBold as Bold,
@ -27,6 +26,7 @@ from pyrogram.api.types import (
MessageEntityPre as Pre, MessageEntityPre as Pre,
InputMessageEntityMentionName as Mention InputMessageEntityMentionName as Mention
) )
from . import utils
class Markdown: class Markdown:
@ -36,9 +36,6 @@ class Markdown:
"`": Code "`": Code
} }
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
# ``` python # ``` python
# for i in range(10): # for i in range(10):
# print(i) # print(i)
@ -69,26 +66,12 @@ class Markdown:
MARKDOWN_RE = re.compile("|".join([PRE_RE, MENTION_RE, URL_RE, INLINE_RE])) MARKDOWN_RE = re.compile("|".join([PRE_RE, MENTION_RE, URL_RE, INLINE_RE]))
@classmethod
def add_surrogates(cls, text):
# Replace each SMP code point with a surrogate pair
return cls.SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
@staticmethod
def remove_surrogates(text):
# Replace each surrogate pair with a SMP code point
return text.encode("utf-16", "surrogatepass").decode("utf-16")
def __init__(self, peers_by_id): def __init__(self, peers_by_id):
self.peers_by_id = peers_by_id self.peers_by_id = peers_by_id
def parse(self, text): def parse(self, text):
entities = [] entities = []
text = self.add_surrogates(text) text = utils.add_surrogates(text)
offset = 0 offset = 0
for match in self.MARKDOWN_RE.finditer(text): for match in self.MARKDOWN_RE.finditer(text):
@ -130,6 +113,6 @@ class Markdown:
text = text.replace(pattern, replace) text = text.replace(pattern, replace)
return dict( return dict(
message=self.remove_surrogates(text), message=utils.remove_surrogates(text),
entities=entities entities=entities
) )

View File

@ -0,0 +1,37 @@
# Pyrogram - Telegram MTProto API Client Library for Python
# Copyright (C) 2017-2018 Dan Tès <https://github.com/delivrance>
#
# This file is part of Pyrogram.
#
# Pyrogram is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyrogram is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import re
from struct import unpack
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
def add_surrogates(text):
# Replace each SMP code point with a surrogate pair
return SMP_RE.sub(
lambda match: # Split SMP in two surrogates
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
text
)
def remove_surrogates(text):
# Replace each surrogate pair with a SMP code point
return text.encode("utf-16", "surrogatepass").decode("utf-16")