fix search url timeout

This commit is contained in:
mein Name 2025-02-28 05:01:33 -05:00 committed by Henri Dickson
parent 5ac83a6ba8
commit ac37f9ce43
2 changed files with 32 additions and 35 deletions

View file

@ -317,7 +317,7 @@ class SiteManager:
elif u: elif u:
return u return u
try: try:
u = requests.head(url, allow_redirects=True, timeout=1).url u = requests.head(url, allow_redirects=True, timeout=2).url
except requests.RequestException: except requests.RequestException:
logger.warning(f"HEAD timeout: {url}") logger.warning(f"HEAD timeout: {url}")
u = url u = url
@ -325,7 +325,24 @@ class SiteManager:
return u return u
@staticmethod @staticmethod
def get_site_by_url(url: str) -> AbstractSite | None: def get_class_by_url(url: str) -> Type[AbstractSite] | None:
return next(
filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None
)
@staticmethod
def get_fallback_class_by_url(url: str) -> Type[AbstractSite] | None:
return next(
filter(
lambda p: p.validate_url_fallback(url), SiteManager.registry.values()
),
None,
)
@staticmethod
def get_site_by_url(
url: str, detect_redirection: bool = True
) -> AbstractSite | None:
if not url or not url_validate( if not url or not url_validate(
url, url,
skip_ipv6_addr=True, skip_ipv6_addr=True,
@ -334,36 +351,16 @@ class SiteManager:
strict_query=False, strict_query=False,
): ):
return None return None
u = SiteManager.get_redirected_url(url) u = SiteManager.get_redirected_url(url) if detect_redirection else url
cls = next( cls = SiteManager.get_class_by_url(u)
filter(lambda p: p.validate_url(u), SiteManager.registry.values()), None
)
if cls is None and u != url: if cls is None and u != url:
cls = next( cls = SiteManager.get_fallback_class_by_url(url)
filter(
lambda p: p.validate_url(url),
SiteManager.registry.values(),
),
None,
)
if cls: if cls:
u = url u = url
if cls is None: if cls is None:
cls = next( cls = SiteManager.get_class_by_url(u)
filter(
lambda p: p.validate_url_fallback(u),
SiteManager.registry.values(),
),
None,
)
if cls is None and u != url: if cls is None and u != url:
cls = next( cls = SiteManager.get_fallback_class_by_url(url)
filter(
lambda p: p.validate_url_fallback(url),
SiteManager.registry.values(),
),
None,
)
if cls: if cls:
u = url u = url
return cls(u) if cls else None return cls(u) if cls else None

View file

@ -48,11 +48,7 @@ def fetch_refresh(request, job_id):
) )
def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = None): def fetch(request, url, site: AbstractSite, is_refetch: bool = False):
if not site:
site = SiteManager.get_site_by_url(url)
if not site:
raise BadRequest(_("Invalid URL"))
item = site.get_item() item = site.get_item()
if item and not is_refetch: if item and not is_refetch:
return redirect(item.url) return redirect(item.url)
@ -131,9 +127,10 @@ def search(request):
host = keywords.split("://")[1].split("/")[0] host = keywords.split("://")[1].split("/")[0]
if host in settings.SITE_DOMAINS: if host in settings.SITE_DOMAINS:
return redirect(keywords) return redirect(keywords)
site = SiteManager.get_site_by_url(keywords) # skip detecting redirection to avoid timeout
site = SiteManager.get_site_by_url(keywords, detect_redirection=False)
if site: if site:
return fetch(request, keywords, False, site) return fetch(request, keywords, site, False)
if request.GET.get("r"): if request.GET.get("r"):
return redirect(keywords) return redirect(keywords)
@ -173,4 +170,7 @@ def refetch(request):
url = request.POST.get("url") url = request.POST.get("url")
if not url: if not url:
raise BadRequest(_("Invalid URL")) raise BadRequest(_("Invalid URL"))
return fetch(request, url, True) site = SiteManager.get_site_by_url(url, detect_redirection=False)
if not site:
raise BadRequest(_("Unsupported URL"))
return fetch(request, url, site, True)