Filter out empty entities internally (#1041)

* Filter out empty entities internally 

I guess it's fine to handle empty entities internally to avoid ENTITY_BOUNDS_INVALID , so the client won't send the empty entities

* revert utils and apply changes to parser/html.py

* Update utils.py

* Update utils.py

* Update utils.py

* Update html.py

* Update utils.py

* Update utils.py

Co-authored-by: Dan <14043624+delivrance@users.noreply.github.com>
This commit is contained in:
Harsh 2022-07-22 20:45:18 +05:30 committed by GitHub
parent d9c8e0450b
commit ed748952b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 10 deletions

View File

@ -140,6 +140,9 @@ class HTML:
entities.append(entity) entities.append(entity)
# Remove zero-length entities
entities = list(filter(lambda x: x.length > 0, entities))
return { return {
"message": utils.remove_surrogates(parser.text), "message": utils.remove_surrogates(parser.text),
"entities": sorted(entities, key=lambda e: e.offset) "entities": sorted(entities, key=lambda e: e.offset)
@ -156,13 +159,21 @@ class HTML:
start = entity.offset start = entity.offset
end = start + entity.length end = start + entity.length
if entity_type in (MessageEntityType.BOLD, MessageEntityType.ITALIC, MessageEntityType.UNDERLINE, if entity_type in (
MessageEntityType.STRIKETHROUGH): MessageEntityType.BOLD,
MessageEntityType.ITALIC,
MessageEntityType.UNDERLINE,
MessageEntityType.STRIKETHROUGH,
):
name = entity_type.name[0].lower() name = entity_type.name[0].lower()
start_tag = f"<{name}>" start_tag = f"<{name}>"
end_tag = f"</{name}>" end_tag = f"</{name}>"
elif entity_type in (MessageEntityType.CODE, MessageEntityType.PRE, MessageEntityType.BLOCKQUOTE, elif entity_type in (
MessageEntityType.SPOILER): MessageEntityType.CODE,
MessageEntityType.PRE,
MessageEntityType.BLOCKQUOTE,
MessageEntityType.SPOILER,
):
name = entity_type.name.lower() name = entity_type.name.lower()
start_tag = f"<{name}>" start_tag = f"<{name}>"
end_tag = f"</{name}>" end_tag = f"</{name}>"

View File

@ -48,8 +48,10 @@ def get_input_media_from_file_id(
try: try:
decoded = FileId.decode(file_id) decoded = FileId.decode(file_id)
except Exception: except Exception:
raise ValueError(f'Failed to decode "{file_id}". The value does not represent an existing local file, ' raise ValueError(
f'HTTP URL, or valid file id.') f'Failed to decode "{file_id}". The value does not represent an existing local file, '
f"HTTP URL, or valid file id."
)
file_type = decoded.file_type file_type = decoded.file_type
@ -82,7 +84,11 @@ def get_input_media_from_file_id(
raise ValueError(f"Unknown file id: {file_id}") raise ValueError(f"Unknown file id: {file_id}")
async def parse_messages(client, messages: "raw.types.messages.Messages", replies: int = 1) -> List["types.Message"]: async def parse_messages(
client,
messages: "raw.types.messages.Messages",
replies: int = 1
) -> List["types.Message"]:
users = {i.id: i for i in messages.users} users = {i.id: i for i in messages.users}
chats = {i.id: i for i in messages.chats} chats = {i.id: i for i in messages.chats}
@ -260,8 +266,10 @@ def xor(a: bytes, b: bytes) -> bytes:
return bytes(i ^ j for i, j in zip(a, b)) return bytes(i ^ j for i, j in zip(a, b))
def compute_password_hash(algo: raw.types.PasswordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow, def compute_password_hash(
password: str) -> bytes: algo: raw.types.PasswordKdfAlgoSHA256SHA256PBKDF2HMACSHA512iter100000SHA256ModPow,
password: str
) -> bytes:
hash1 = sha256(algo.salt1 + password.encode() + algo.salt1) hash1 = sha256(algo.salt1 + password.encode() + algo.salt1)
hash2 = sha256(algo.salt2 + hash1 + algo.salt2) hash2 = sha256(algo.salt2 + hash1 + algo.salt2)
hash3 = hashlib.pbkdf2_hmac("sha512", hash2, algo.salt1, 100000) hash3 = hashlib.pbkdf2_hmac("sha512", hash2, algo.salt1, 100000)
@ -270,7 +278,10 @@ def compute_password_hash(algo: raw.types.PasswordKdfAlgoSHA256SHA256PBKDF2HMACS
# noinspection PyPep8Naming # noinspection PyPep8Naming
def compute_password_check(r: raw.types.account.Password, password: str) -> raw.types.InputCheckPasswordSRP: def compute_password_check(
r: raw.types.account.Password,
password: str
) -> raw.types.InputCheckPasswordSRP:
algo = r.current_algo algo = r.current_algo
p_bytes = algo.p p_bytes = algo.p