diff --git a/ytdlbot/limit.py b/ytdlbot/limit.py index 6f3771e..087db12 100644 --- a/ytdlbot/limit.py +++ b/ytdlbot/limit.py @@ -8,6 +8,7 @@ __author__ = "Benny " import hashlib +import http import logging import math import os @@ -109,6 +110,13 @@ class VIP(Redis, MySQL): props = ["canonical", "alternate", "shortlinkUrl"] headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"} + # send head request first + r = requests.head(url, headers=headers) + if r.status_code != http.HTTPStatus.METHOD_NOT_ALLOWED and r.headers.get("content-type") != "text/html": + # get content-type, if it's not text/html, there's no need to issue a GET request + logging.warning("%s Content-type is not text/html, no need to GET for extract_canonical_link", url) + return url + html_doc = requests.get(url, headers=headers, timeout=5).text soup = BeautifulSoup(html_doc, "html.parser") for prop in props: