From ac37f9ce43d28972fc34427f79be86335ae7eb8c Mon Sep 17 00:00:00 2001 From: mein Name Date: Fri, 28 Feb 2025 05:01:33 -0500 Subject: [PATCH] fix search url timeout --- catalog/common/sites.py | 51 +++++++++++++++++++---------------------- catalog/search/views.py | 16 ++++++------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/catalog/common/sites.py b/catalog/common/sites.py index d7502894..b683b090 100644 --- a/catalog/common/sites.py +++ b/catalog/common/sites.py @@ -317,7 +317,7 @@ class SiteManager: elif u: return u try: - u = requests.head(url, allow_redirects=True, timeout=1).url + u = requests.head(url, allow_redirects=True, timeout=2).url except requests.RequestException: logger.warning(f"HEAD timeout: {url}") u = url @@ -325,7 +325,24 @@ class SiteManager: return u @staticmethod - def get_site_by_url(url: str) -> AbstractSite | None: + def get_class_by_url(url: str) -> Type[AbstractSite] | None: + return next( + filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None + ) + + @staticmethod + def get_fallback_class_by_url(url: str) -> Type[AbstractSite] | None: + return next( + filter( + lambda p: p.validate_url_fallback(url), SiteManager.registry.values() + ), + None, + ) + + @staticmethod + def get_site_by_url( + url: str, detect_redirection: bool = True + ) -> AbstractSite | None: if not url or not url_validate( url, skip_ipv6_addr=True, @@ -334,36 +351,16 @@ class SiteManager: strict_query=False, ): return None - u = SiteManager.get_redirected_url(url) - cls = next( - filter(lambda p: p.validate_url(u), SiteManager.registry.values()), None - ) + u = SiteManager.get_redirected_url(url) if detect_redirection else url + cls = SiteManager.get_class_by_url(u) if cls is None and u != url: - cls = next( - filter( - lambda p: p.validate_url(url), - SiteManager.registry.values(), - ), - None, - ) + cls = SiteManager.get_fallback_class_by_url(url) if cls: u = url if cls is None: - cls = next( - filter( - lambda p: p.validate_url_fallback(u), - SiteManager.registry.values(), - ), - None, - ) + cls = SiteManager.get_class_by_url(u) if cls is None and u != url: - cls = next( - filter( - lambda p: p.validate_url_fallback(url), - SiteManager.registry.values(), - ), - None, - ) + cls = SiteManager.get_fallback_class_by_url(url) if cls: u = url return cls(u) if cls else None diff --git a/catalog/search/views.py b/catalog/search/views.py index 201154bc..67d5eabe 100644 --- a/catalog/search/views.py +++ b/catalog/search/views.py @@ -48,11 +48,7 @@ def fetch_refresh(request, job_id): ) -def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = None): - if not site: - site = SiteManager.get_site_by_url(url) - if not site: - raise BadRequest(_("Invalid URL")) +def fetch(request, url, site: AbstractSite, is_refetch: bool = False): item = site.get_item() if item and not is_refetch: return redirect(item.url) @@ -131,9 +127,10 @@ def search(request): host = keywords.split("://")[1].split("/")[0] if host in settings.SITE_DOMAINS: return redirect(keywords) - site = SiteManager.get_site_by_url(keywords) + # skip detecting redirection to avoid timeout + site = SiteManager.get_site_by_url(keywords, detect_redirection=False) if site: - return fetch(request, keywords, False, site) + return fetch(request, keywords, site, False) if request.GET.get("r"): return redirect(keywords) @@ -173,4 +170,7 @@ def refetch(request): url = request.POST.get("url") if not url: raise BadRequest(_("Invalid URL")) - return fetch(request, url, True) + site = SiteManager.get_site_by_url(url, detect_redirection=False) + if not site: + raise BadRequest(_("Unsupported URL")) + return fetch(request, url, site, True)