2017-12-05 11:42:57 +00:00
|
|
|
# Pyrogram - Telegram MTProto API Client Library for Python
|
2019-01-01 11:36:16 +00:00
|
|
|
# Copyright (C) 2017-2019 Dan Tès <https://github.com/delivrance>
|
2017-12-05 11:42:57 +00:00
|
|
|
#
|
|
|
|
# This file is part of Pyrogram.
|
|
|
|
#
|
|
|
|
# Pyrogram is free software: you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU Lesser General Public License as published
|
|
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# Pyrogram is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU Lesser General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
|
|
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2019-06-24 08:54:58 +00:00
|
|
|
import html
|
2017-12-05 11:42:57 +00:00
|
|
|
import re
|
|
|
|
|
2019-03-26 12:32:30 +00:00
|
|
|
import pyrogram
|
2018-01-23 14:17:48 +00:00
|
|
|
from . import utils
|
2019-06-24 08:07:28 +00:00
|
|
|
from .html import HTML
|
2017-12-05 11:42:57 +00:00
|
|
|
|
2019-06-25 08:24:19 +00:00
|
|
|
BOLD_DELIM = "**"
|
|
|
|
ITALIC_DELIM = "__"
|
|
|
|
UNDERLINE_DELIM = "--"
|
|
|
|
STRIKE_DELIM = "~~"
|
|
|
|
CODE_DELIM = "`"
|
|
|
|
PRE_DELIM = "```"
|
2017-12-05 11:42:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Markdown:
|
2019-06-24 08:07:28 +00:00
|
|
|
MARKDOWN_RE = re.compile(r"({d})".format(
|
2017-12-05 11:42:57 +00:00
|
|
|
d="|".join(
|
|
|
|
["".join(i) for i in [
|
2019-06-24 08:07:28 +00:00
|
|
|
[r"\{}".format(j) for j in i]
|
2018-02-15 10:24:56 +00:00
|
|
|
for i in [
|
2019-06-24 11:35:58 +00:00
|
|
|
PRE_DELIM,
|
|
|
|
CODE_DELIM,
|
|
|
|
STRIKE_DELIM,
|
|
|
|
UNDERLINE_DELIM,
|
|
|
|
ITALIC_DELIM,
|
|
|
|
BOLD_DELIM
|
2018-02-15 10:24:56 +00:00
|
|
|
]
|
2017-12-05 11:42:57 +00:00
|
|
|
]]
|
2019-06-24 08:07:28 +00:00
|
|
|
)))
|
2017-12-13 09:44:24 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)")
|
2017-12-05 11:42:57 +00:00
|
|
|
|
2019-06-24 11:35:58 +00:00
|
|
|
OPENING_TAG = "<{}>"
|
|
|
|
CLOSING_TAG = "</{}>"
|
|
|
|
URL_MARKUP = '<a href="{}">{}</a>'
|
|
|
|
FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
|
2018-02-15 10:24:56 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
def __init__(self, client: "pyrogram.BaseClient"):
|
|
|
|
self.html = HTML(client)
|
2018-02-15 10:24:56 +00:00
|
|
|
|
2019-06-25 09:48:43 +00:00
|
|
|
async def parse(self, text: str):
|
2019-06-24 11:35:58 +00:00
|
|
|
text = html.escape(text)
|
2019-03-26 12:32:30 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
offset = 0
|
2019-06-24 11:35:58 +00:00
|
|
|
delims = set()
|
2019-06-24 08:07:28 +00:00
|
|
|
|
|
|
|
for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)):
|
|
|
|
start, stop = match.span()
|
2019-06-24 11:35:58 +00:00
|
|
|
delim = match.group(1)
|
2019-06-24 08:07:28 +00:00
|
|
|
|
2019-06-25 08:24:19 +00:00
|
|
|
if delim == BOLD_DELIM:
|
2019-06-24 08:07:28 +00:00
|
|
|
tag = "b"
|
2019-06-25 08:24:19 +00:00
|
|
|
elif delim == ITALIC_DELIM:
|
2019-06-24 08:07:28 +00:00
|
|
|
tag = "i"
|
2019-06-25 08:24:19 +00:00
|
|
|
elif delim == UNDERLINE_DELIM:
|
2019-06-24 08:07:28 +00:00
|
|
|
tag = "u"
|
2019-06-25 08:24:19 +00:00
|
|
|
elif delim == STRIKE_DELIM:
|
2019-06-24 08:07:28 +00:00
|
|
|
tag = "s"
|
2019-06-25 08:24:19 +00:00
|
|
|
elif delim == CODE_DELIM:
|
2019-06-24 08:07:28 +00:00
|
|
|
tag = "code"
|
2019-06-25 08:24:19 +00:00
|
|
|
elif delim == PRE_DELIM:
|
2019-06-24 08:07:28 +00:00
|
|
|
tag = "pre"
|
|
|
|
else:
|
|
|
|
continue
|
2018-02-15 10:24:56 +00:00
|
|
|
|
2019-06-24 11:35:58 +00:00
|
|
|
if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS):
|
|
|
|
continue
|
2018-02-15 10:24:56 +00:00
|
|
|
|
2019-06-24 11:35:58 +00:00
|
|
|
if delim not in delims:
|
|
|
|
delims.add(delim)
|
|
|
|
tag = Markdown.OPENING_TAG.format(tag)
|
2018-02-15 10:24:56 +00:00
|
|
|
else:
|
2019-06-24 11:35:58 +00:00
|
|
|
delims.remove(delim)
|
|
|
|
tag = Markdown.CLOSING_TAG.format(tag)
|
2019-03-26 12:32:30 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
text = text[:start + offset] + tag + text[stop + offset:]
|
2018-02-15 10:24:56 +00:00
|
|
|
|
2019-06-24 11:35:58 +00:00
|
|
|
offset += len(tag) - len(delim)
|
2018-02-15 10:24:56 +00:00
|
|
|
|
2018-05-10 12:46:14 +00:00
|
|
|
offset = 0
|
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
for match in re.finditer(Markdown.URL_RE, text):
|
|
|
|
start, stop = match.span()
|
|
|
|
full = match.group(0)
|
2019-06-24 08:54:58 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
body, url = match.groups()
|
2019-06-24 11:35:58 +00:00
|
|
|
replace = Markdown.URL_MARKUP.format(url, body)
|
2018-05-10 12:46:14 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
text = text[:start + offset] + replace + text[stop + offset:]
|
2019-06-24 08:54:58 +00:00
|
|
|
|
2019-06-24 08:07:28 +00:00
|
|
|
offset += len(replace) - len(full)
|
2018-05-10 12:46:14 +00:00
|
|
|
|
2019-06-25 09:48:43 +00:00
|
|
|
return await self.html.parse(text)
|
2019-06-25 08:24:19 +00:00
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def unparse(text: str, entities: list):
|
|
|
|
text = utils.add_surrogates(text)
|
|
|
|
copy = text
|
|
|
|
|
2018-05-10 12:46:14 +00:00
|
|
|
for entity in entities:
|
2019-06-25 08:24:19 +00:00
|
|
|
start = entity.offset
|
|
|
|
end = start + entity.length
|
|
|
|
|
2018-05-10 12:46:14 +00:00
|
|
|
type = entity.type
|
2019-06-25 08:24:19 +00:00
|
|
|
|
2018-05-10 12:46:14 +00:00
|
|
|
url = entity.url
|
|
|
|
user = entity.user
|
2019-06-25 08:24:19 +00:00
|
|
|
|
|
|
|
sub = copy[start:end]
|
2018-05-10 12:46:14 +00:00
|
|
|
|
|
|
|
if type == "bold":
|
2019-06-25 08:24:19 +00:00
|
|
|
style = BOLD_DELIM
|
2018-05-10 12:46:14 +00:00
|
|
|
elif type == "italic":
|
2019-06-25 08:24:19 +00:00
|
|
|
style = ITALIC_DELIM
|
2019-06-23 20:43:11 +00:00
|
|
|
elif type == "underline":
|
2019-06-25 08:24:19 +00:00
|
|
|
style = UNDERLINE_DELIM
|
2019-06-23 20:43:11 +00:00
|
|
|
elif type == "strike":
|
2019-06-25 08:24:19 +00:00
|
|
|
style = STRIKE_DELIM
|
2018-05-10 12:46:14 +00:00
|
|
|
elif type == "code":
|
2019-06-25 08:24:19 +00:00
|
|
|
style = CODE_DELIM
|
2018-05-10 12:46:14 +00:00
|
|
|
elif type == "pre":
|
2019-06-25 08:24:19 +00:00
|
|
|
style = PRE_DELIM
|
|
|
|
# TODO: Blockquote for MD
|
|
|
|
# elif type == "blockquote":
|
|
|
|
# style = ...
|
2018-05-10 12:46:14 +00:00
|
|
|
elif type == "text_link":
|
2019-06-25 08:24:19 +00:00
|
|
|
text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1)
|
2018-05-10 12:46:14 +00:00
|
|
|
continue
|
|
|
|
elif type == "text_mention":
|
2019-06-25 08:24:19 +00:00
|
|
|
text = text[:start] + text[start:].replace(
|
|
|
|
sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1)
|
2018-05-10 12:46:14 +00:00
|
|
|
continue
|
|
|
|
else:
|
|
|
|
continue
|
|
|
|
|
2019-06-25 08:24:19 +00:00
|
|
|
text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1)
|
2018-05-10 12:46:14 +00:00
|
|
|
|
2019-06-25 08:24:19 +00:00
|
|
|
return utils.remove_surrogates(text)
|