From 301a13edd2294f8bb4654d31a3d071cb9d8903c5 Mon Sep 17 00:00:00 2001 From: Dan <14043624+delivrance@users.noreply.github.com> Date: Fri, 20 Nov 2020 01:22:01 +0100 Subject: [PATCH] Add file_id.py A module to deal with Telegram file ids --- pyrogram/file_id.py | 478 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 478 insertions(+) create mode 100644 pyrogram/file_id.py diff --git a/pyrogram/file_id.py b/pyrogram/file_id.py new file mode 100644 index 00000000..b64de423 --- /dev/null +++ b/pyrogram/file_id.py @@ -0,0 +1,478 @@ +# Pyrogram - Telegram MTProto API Client Library for Python +# Copyright (C) 2017-2020 Dan +# +# This file is part of Pyrogram. +# +# Pyrogram is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Pyrogram is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with Pyrogram. If not, see . + +import base64 +import logging +import struct +from enum import IntEnum +from io import BytesIO + +from pyrogram.raw.core import Bytes, String + +log = logging.getLogger(__name__) + + +def b64_encode(s: bytes) -> str: + """Encode bytes into a URL-safe Base64 string without padding + + Parameters: + s (``bytes``): + Bytes to encode + + Returns: + ``str``: The encoded bytes + """ + return base64.urlsafe_b64encode(s).decode().strip("=") + + +def b64_decode(s: str) -> bytes: + """Decode a URL-safe Base64 string without padding to bytes + + Parameters: + s (``str``): + String to decode + + Returns: + ``bytes``: The decoded string + """ + return base64.urlsafe_b64decode(s + "=" * (-len(s) % 4)) + + +def rle_encode(s: bytes) -> bytes: + """Zero-value RLE encoder + + Parameters: + s (``bytes``): + Bytes to encode + + Returns: + ``bytes``: The encoded bytes + """ + r = b"" + n = 0 + + for b in s: + if b == 0: + n += 1 + else: + if n > 0: + r += bytes([0, n]) + n = 0 + + r += bytes([b]) + + if n > 0: + r += bytes([0, n]) + + return r + + +def rle_decode(s: bytes) -> bytes: + """Zero-value RLE decoder + + Parameters: + s (``bytes``): + Bytes to encode + + Returns: + ``bytes``: The encoded bytes + """ + r = b"" + i = 0 + + while i < len(s): + if s[i] != 0: + r += bytes([s[i]]) + else: + r += b"\x00" * s[i + 1] + i += 1 + + i += 1 + + return r + + +class FileType(IntEnum): + """Known file types""" + THUMBNAIL = 0 + CHAT_PHOTO = 1 # ProfilePhoto + PHOTO = 2 + VOICE = 3 # VoiceNote + VIDEO = 4 + DOCUMENT = 5 + ENCRYPTED = 6 + TEMP = 7 + STICKER = 8 + AUDIO = 9 + ANIMATION = 10 + ENCRYPTED_THUMBNAIL = 11 + WALLPAPER = 12 + VIDEO_NOTE = 13 + SECURE_RAW = 14 + SECURE = 15 + BACKGROUND = 16 + DOCUMENT_AS_FILE = 17 + + +class ThumbnailSource(IntEnum): + """Known thumbnail sources""" + LEGACY = 0 + THUMBNAIL = 1 + CHAT_PHOTO_SMALL = 2 # DialogPhotoSmall + CHAT_PHOTO_BIG = 3 # DialogPhotoBig + STICKER_SET_THUMBNAIL = 4 + + +# Photo-like file ids are longer and contain extra info, the rest are all documents +PHOTO_TYPES = {FileType.THUMBNAIL, FileType.CHAT_PHOTO, FileType.PHOTO, FileType.WALLPAPER, + FileType.ENCRYPTED_THUMBNAIL} +DOCUMENT_TYPES = set(FileType) - PHOTO_TYPES + +# Since the file type values are small enough to fit them in few bits, Telegram thought it would be a good idea to +# encode extra information about web url and file reference existence as flag inside the 4 bytes allocated for the field +WEB_LOCATION_FLAG = 1 << 24 +FILE_REFERENCE_FLAG = 1 << 25 + + +class FileId: + MAJOR = 4 + MINOR = 30 + + def __init__( + self, *, + major: int = MAJOR, + minor: int = MINOR, + file_type: FileType, + dc_id: int, + file_reference: bytes = None, + url: str = None, + media_id: int = None, + access_hash: int = None, + volume_id: int = None, + thumbnail_source: ThumbnailSource = None, + thumbnail_file_type: str = None, + thumbnail_size: str = None, + secret: int = None, + local_id: str = None, + chat_id: int = None, + chat_access_hash: int = None, + sticker_set_id: int = None, + sticker_set_access_hash: int = None + ): + self.major = major + self.minor = minor + self.file_type = file_type + self.dc_id = dc_id + self.file_reference = file_reference + self.url = url + self.media_id = media_id + self.access_hash = access_hash + self.volume_id = volume_id + self.thumbnail_source = thumbnail_source + self.thumbnail_file_type = thumbnail_file_type + self.thumbnail_size = thumbnail_size + self.secret = secret + self.local_id = local_id + self.chat_id = chat_id + self.chat_access_hash = chat_access_hash + self.sticker_set_id = sticker_set_id + self.sticker_set_access_hash = sticker_set_access_hash + + @staticmethod + def decode(file_id: str): + decoded = rle_decode(b64_decode(file_id)) + + # region read version + # File id versioning. Major versions lower than 4 don't have a minor version + major = decoded[-1] + + if major < 4: + minor = 0 + buffer = BytesIO(decoded[:-1]) + else: + minor = decoded[-2] + buffer = BytesIO(decoded[:-2]) + # endregion + + file_type, dc_id = struct.unpack("= 4: + buffer.write(struct.pack("