use cached 302 if present, in web request

This commit is contained in:
mein Name 2025-02-28 06:03:33 -05:00 committed by Henri Dickson
parent ac37f9ce43
commit d8d07c3777
2 changed files with 5 additions and 3 deletions

View file

@ -125,7 +125,7 @@ def fetch_item(request, url: str):
Some site may take ~90 seconds to fetch. Some site may take ~90 seconds to fetch.
If not getting the item after 120 seconds, please stop and consider the URL is not available. If not getting the item after 120 seconds, please stop and consider the URL is not available.
""" """
site = SiteManager.get_site_by_url(url) site = SiteManager.get_site_by_url(url, detect_redirection=False)
if not site: if not site:
return 404, {"message": "URL not supported"} return 404, {"message": "URL not supported"}
item = site.get_item() item = site.get_item()

View file

@ -309,13 +309,15 @@ class SiteManager:
raise ValueError(f"Site for {typ} not found") raise ValueError(f"Site for {typ} not found")
@staticmethod @staticmethod
def get_redirected_url(url: str) -> str: def get_redirected_url(url: str, allow_head: bool = True) -> str:
k = "_redir_" + md5(url.encode()).hexdigest() k = "_redir_" + md5(url.encode()).hexdigest()
u = cache.get(k, default=None) u = cache.get(k, default=None)
if u == "": if u == "":
return url return url
elif u: elif u:
return u return u
elif not allow_head:
return url
try: try:
u = requests.head(url, allow_redirects=True, timeout=2).url u = requests.head(url, allow_redirects=True, timeout=2).url
except requests.RequestException: except requests.RequestException:
@ -351,7 +353,7 @@ class SiteManager:
strict_query=False, strict_query=False,
): ):
return None return None
u = SiteManager.get_redirected_url(url) if detect_redirection else url u = SiteManager.get_redirected_url(url, allow_head=detect_redirection)
cls = SiteManager.get_class_by_url(u) cls = SiteManager.get_class_by_url(u)
if cls is None and u != url: if cls is None and u != url:
cls = SiteManager.get_fallback_class_by_url(url) cls = SiteManager.get_fallback_class_by_url(url)