mirror of
https://github.com/TeamPGM/pyrogram.git
synced 2024-11-16 20:59:29 +00:00
Delete style utils.py and move its content inside html.py
The HTML parser is now the only one that makes use of those util methods
This commit is contained in:
parent
e7457de947
commit
cd1e41b130
@ -20,11 +20,11 @@ import html
|
||||
import re
|
||||
from collections import OrderedDict
|
||||
from html.parser import HTMLParser
|
||||
from struct import unpack
|
||||
|
||||
import pyrogram
|
||||
from pyrogram.api import types
|
||||
from pyrogram.errors import PeerIdInvalid
|
||||
from . import utils
|
||||
|
||||
|
||||
class Parser(HTMLParser):
|
||||
@ -111,11 +111,28 @@ class Parser(HTMLParser):
|
||||
|
||||
|
||||
class HTML:
|
||||
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
|
||||
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
|
||||
|
||||
def __init__(self, client: "pyrogram.BaseClient" = None):
|
||||
self.client = client
|
||||
|
||||
@staticmethod
|
||||
def add_surrogates(text):
|
||||
# Replace each SMP code point with a surrogate pair
|
||||
return HTML.SMP_RE.sub(
|
||||
lambda match: # Split SMP in two surrogates
|
||||
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
|
||||
text
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def remove_surrogates(text):
|
||||
# Replace each surrogate pair with a SMP code point
|
||||
return text.encode("utf-16", "surrogatepass").decode("utf-16")
|
||||
|
||||
def parse(self, text: str):
|
||||
text = utils.add_surrogates(str(text or "").strip())
|
||||
text = HTML.add_surrogates(str(text or "").strip())
|
||||
|
||||
parser = Parser(self.client)
|
||||
parser.feed(text)
|
||||
@ -123,6 +140,6 @@ class HTML:
|
||||
|
||||
# TODO: OrderedDict to be removed in Python 3.6
|
||||
return OrderedDict([
|
||||
("message", utils.remove_surrogates(parser.text)),
|
||||
("message", HTML.remove_surrogates(parser.text)),
|
||||
("entities", parser.entities)
|
||||
])
|
||||
|
@ -1,37 +0,0 @@
|
||||
# Pyrogram - Telegram MTProto API Client Library for Python
|
||||
# Copyright (C) 2017-2019 Dan Tès <https://github.com/delivrance>
|
||||
#
|
||||
# This file is part of Pyrogram.
|
||||
#
|
||||
# Pyrogram is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Lesser General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# Pyrogram is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Lesser General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Lesser General Public License
|
||||
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import re
|
||||
from struct import unpack
|
||||
|
||||
# SMP = Supplementary Multilingual Plane: https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
|
||||
SMP_RE = re.compile(r"[\U00010000-\U0010FFFF]")
|
||||
|
||||
|
||||
def add_surrogates(text):
|
||||
# Replace each SMP code point with a surrogate pair
|
||||
return SMP_RE.sub(
|
||||
lambda match: # Split SMP in two surrogates
|
||||
"".join(chr(i) for i in unpack("<HH", match.group().encode("utf-16le"))),
|
||||
text
|
||||
)
|
||||
|
||||
|
||||
def remove_surrogates(text):
|
||||
# Replace each surrogate pair with a SMP code point
|
||||
return text.encode("utf-16", "surrogatepass").decode("utf-16")
|
Loading…
Reference in New Issue
Block a user