MTPyroger/pyrogram/client/style/markdown.py

158 lines
4.7 KiB
Python
Raw Normal View History

2017-12-05 11:42:57 +00:00
# Pyrogram - Telegram MTProto API Client Library for Python
2019-01-01 11:36:16 +00:00
# Copyright (C) 2017-2019 Dan Tès <https://github.com/delivrance>
2017-12-05 11:42:57 +00:00
#
# This file is part of Pyrogram.
#
# Pyrogram is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Pyrogram is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Pyrogram. If not, see <http://www.gnu.org/licenses/>.
import html
2017-12-05 11:42:57 +00:00
import re
import pyrogram
from . import utils
from .html import HTML
2017-12-05 11:42:57 +00:00
BOLD_DELIM = "**"
ITALIC_DELIM = "__"
UNDERLINE_DELIM = "--"
STRIKE_DELIM = "~~"
CODE_DELIM = "`"
PRE_DELIM = "```"
2017-12-05 11:42:57 +00:00
class Markdown:
MARKDOWN_RE = re.compile(r"({d})".format(
2017-12-05 11:42:57 +00:00
d="|".join(
["".join(i) for i in [
[r"\{}".format(j) for j in i]
2018-02-15 10:24:56 +00:00
for i in [
PRE_DELIM,
CODE_DELIM,
STRIKE_DELIM,
UNDERLINE_DELIM,
ITALIC_DELIM,
BOLD_DELIM
2018-02-15 10:24:56 +00:00
]
2017-12-05 11:42:57 +00:00
]]
)))
2017-12-13 09:44:24 +00:00
URL_RE = re.compile(r"\[([^[]+)]\(([^(]+)\)")
2017-12-05 11:42:57 +00:00
OPENING_TAG = "<{}>"
CLOSING_TAG = "</{}>"
URL_MARKUP = '<a href="{}">{}</a>'
FIXED_WIDTH_DELIMS = [CODE_DELIM, PRE_DELIM]
2018-02-15 10:24:56 +00:00
def __init__(self, client: "pyrogram.BaseClient"):
self.html = HTML(client)
2018-02-15 10:24:56 +00:00
async def parse(self, text: str):
text = html.escape(text)
offset = 0
delims = set()
for i, match in enumerate(re.finditer(Markdown.MARKDOWN_RE, text)):
start, stop = match.span()
delim = match.group(1)
if delim == BOLD_DELIM:
tag = "b"
elif delim == ITALIC_DELIM:
tag = "i"
elif delim == UNDERLINE_DELIM:
tag = "u"
elif delim == STRIKE_DELIM:
tag = "s"
elif delim == CODE_DELIM:
tag = "code"
elif delim == PRE_DELIM:
tag = "pre"
else:
continue
2018-02-15 10:24:56 +00:00
if delim not in Markdown.FIXED_WIDTH_DELIMS and any(x in delims for x in Markdown.FIXED_WIDTH_DELIMS):
continue
2018-02-15 10:24:56 +00:00
if delim not in delims:
delims.add(delim)
tag = Markdown.OPENING_TAG.format(tag)
2018-02-15 10:24:56 +00:00
else:
delims.remove(delim)
tag = Markdown.CLOSING_TAG.format(tag)
text = text[:start + offset] + tag + text[stop + offset:]
2018-02-15 10:24:56 +00:00
offset += len(tag) - len(delim)
2018-02-15 10:24:56 +00:00
2018-05-10 12:46:14 +00:00
offset = 0
for match in re.finditer(Markdown.URL_RE, text):
start, stop = match.span()
full = match.group(0)
body, url = match.groups()
replace = Markdown.URL_MARKUP.format(url, body)
2018-05-10 12:46:14 +00:00
text = text[:start + offset] + replace + text[stop + offset:]
offset += len(replace) - len(full)
2018-05-10 12:46:14 +00:00
return await self.html.parse(text)
@staticmethod
def unparse(text: str, entities: list):
text = utils.add_surrogates(text)
copy = text
2018-05-10 12:46:14 +00:00
for entity in entities:
start = entity.offset
end = start + entity.length
2018-05-10 12:46:14 +00:00
type = entity.type
2018-05-10 12:46:14 +00:00
url = entity.url
user = entity.user
sub = copy[start:end]
2018-05-10 12:46:14 +00:00
if type == "bold":
style = BOLD_DELIM
2018-05-10 12:46:14 +00:00
elif type == "italic":
style = ITALIC_DELIM
elif type == "underline":
style = UNDERLINE_DELIM
elif type == "strike":
style = STRIKE_DELIM
2018-05-10 12:46:14 +00:00
elif type == "code":
style = CODE_DELIM
2018-05-10 12:46:14 +00:00
elif type == "pre":
style = PRE_DELIM
# TODO: Blockquote for MD
# elif type == "blockquote":
# style = ...
2018-05-10 12:46:14 +00:00
elif type == "text_link":
text = text[:start] + text[start:].replace(sub, '[{1}]({0})'.format(url, sub), 1)
2018-05-10 12:46:14 +00:00
continue
elif type == "text_mention":
text = text[:start] + text[start:].replace(
sub, '[{1}](tg://user?id={0})'.format(user.id, sub), 1)
2018-05-10 12:46:14 +00:00
continue
else:
continue
text = text[:start] + text[start:].replace(sub, "{0}{1}{0}".format(style, sub), 1)
2018-05-10 12:46:14 +00:00
return utils.remove_surrogates(text)