enkanetwork.py-data/utils.py

117 lines
3.4 KiB
Python
Raw Normal View History

2022-07-06 19:35:44 +00:00
import os
import json
import logging
import aiohttp
import copy
2022-07-07 08:53:31 +00:00
import asyncio
2022-07-06 19:35:44 +00:00
2022-07-07 09:04:32 +00:00
from git import Repo
2022-07-06 19:35:44 +00:00
# DOWNLOAD CHUNK SIZE
CHUNK_SIZE = 5 * 2**20
2022-07-07 08:53:31 +00:00
RETRY_MAX = 10
2022-07-06 19:35:44 +00:00
LOGGER = logging.getLogger(__name__)
2022-07-07 08:53:31 +00:00
# HEADER
HEADER = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"
}
2022-12-14 03:20:47 +00:00
# Github Token for private repo
GITHUB_TOKEN = os.getenv('GITHUB_TOKEN')
if GITHUB_TOKEN:
HEADER["Authorization"] = f"token {GITHUB_TOKEN}"
2022-07-07 08:53:31 +00:00
2022-07-06 19:35:44 +00:00
async def request(url: str, method: str = "GET", headers: dict = None, body: str = None) -> dict:
2022-07-07 08:53:31 +00:00
_url = url.strip(" ")
2022-07-06 19:35:44 +00:00
if headers is None:
headers = {}
if body is None:
body = ""
2022-07-07 08:53:31 +00:00
retry = 0
2022-07-06 19:35:44 +00:00
async with aiohttp.ClientSession() as session:
2022-07-07 08:53:31 +00:00
async with session.request(method, _url, headers={**HEADER, **headers}, data=body) as response:
2022-07-06 19:35:44 +00:00
"""
From https://gist.github.com/foobarna/19c132304e140bf5031c273f6dc27ece
"""
2022-07-07 08:53:31 +00:00
while True:
if response.status >= 400:
LOGGER.warning(f"Failure to fetch {_url} ({response.status}) Retry {retry} / {RETRY_MAX}")
retry += 1
if retry > RETRY_MAX:
raise Exception(f"Failed to download {url}")
await asyncio.sleep(3)
continue
break
2022-07-06 19:35:44 +00:00
data = bytearray()
data_to_read = True
while data_to_read:
red = 0
while red < CHUNK_SIZE:
chunk = await response.content.read(CHUNK_SIZE - red)
if not chunk:
data_to_read = False
break
data.extend(chunk)
red += len(chunk)
2022-07-07 08:53:31 +00:00
try:
return json.loads(data)
except Exception as e:
print(response.status)
print(url)
print(data)
raise e
2022-07-06 19:35:44 +00:00
2022-12-14 03:20:47 +00:00
2022-07-06 19:35:44 +00:00
async def download_json(url: str, filename: str, path: str = ".") -> None:
LOGGER.debug(f"Fetching {filename} from GitHub...")
response = await request(url)
with open(os.path.join(path, filename), "w", encoding="utf-8") as f:
2022-07-07 08:53:31 +00:00
f.write(json.dumps(response, ensure_ascii=False, indent=4))
2022-07-06 19:35:44 +00:00
LOGGER.debug(f"{filename} saved")
2022-12-14 03:20:47 +00:00
2022-07-06 19:35:44 +00:00
async def load_commit_local():
if os.path.exists("last_commit.txt"):
with open("last_commit.txt", "r") as f:
last_commit_local = f.read()
else:
last_commit_local = ""
return last_commit_local
2022-12-14 03:20:47 +00:00
2022-07-06 19:35:44 +00:00
async def save_commit_local(commit_id: str):
with open("last_commit.txt", "w") as f:
f.write(commit_id)
2022-12-14 03:20:47 +00:00
2022-07-06 19:35:44 +00:00
async def save_data(data: dict, filename: str, delete_key: list = []) -> None:
_data = copy.deepcopy(data)
for key in _data:
for _del in delete_key:
del _data[key][_del]
2022-07-06 19:35:44 +00:00
with open(os.path.join("exports", "data", filename), "w", encoding="utf-8") as f:
f.write(json.dumps(_data, ensure_ascii=False, indent=4))
LOGGER.debug(f"{filename} saved")
2022-07-07 09:04:32 +00:00
2022-12-14 03:20:47 +00:00
2022-07-07 09:04:32 +00:00
async def push_to_github(commit: str = "") -> None:
repo = Repo("./")
repo.git.add(["./exports/**/*.json"])
repo.index.commit(commit)
origin = repo.remote(name='origin')
origin.push()
2022-12-14 03:20:47 +00:00
LOGGER.info("Pushed to GitHub")