fix search url timeout

This commit is contained in:
mein Name 2025-02-28 05:01:33 -05:00 committed by Henri Dickson
parent 5ac83a6ba8
commit ac37f9ce43
2 changed files with 32 additions and 35 deletions

View file

@ -317,7 +317,7 @@ class SiteManager:
elif u:
return u
try:
u = requests.head(url, allow_redirects=True, timeout=1).url
u = requests.head(url, allow_redirects=True, timeout=2).url
except requests.RequestException:
logger.warning(f"HEAD timeout: {url}")
u = url
@ -325,7 +325,24 @@ class SiteManager:
return u
@staticmethod
def get_site_by_url(url: str) -> AbstractSite | None:
def get_class_by_url(url: str) -> Type[AbstractSite] | None:
return next(
filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None
)
@staticmethod
def get_fallback_class_by_url(url: str) -> Type[AbstractSite] | None:
return next(
filter(
lambda p: p.validate_url_fallback(url), SiteManager.registry.values()
),
None,
)
@staticmethod
def get_site_by_url(
url: str, detect_redirection: bool = True
) -> AbstractSite | None:
if not url or not url_validate(
url,
skip_ipv6_addr=True,
@ -334,36 +351,16 @@ class SiteManager:
strict_query=False,
):
return None
u = SiteManager.get_redirected_url(url)
cls = next(
filter(lambda p: p.validate_url(u), SiteManager.registry.values()), None
)
u = SiteManager.get_redirected_url(url) if detect_redirection else url
cls = SiteManager.get_class_by_url(u)
if cls is None and u != url:
cls = next(
filter(
lambda p: p.validate_url(url),
SiteManager.registry.values(),
),
None,
)
cls = SiteManager.get_fallback_class_by_url(url)
if cls:
u = url
if cls is None:
cls = next(
filter(
lambda p: p.validate_url_fallback(u),
SiteManager.registry.values(),
),
None,
)
cls = SiteManager.get_class_by_url(u)
if cls is None and u != url:
cls = next(
filter(
lambda p: p.validate_url_fallback(url),
SiteManager.registry.values(),
),
None,
)
cls = SiteManager.get_fallback_class_by_url(url)
if cls:
u = url
return cls(u) if cls else None

View file

@ -48,11 +48,7 @@ def fetch_refresh(request, job_id):
)
def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = None):
if not site:
site = SiteManager.get_site_by_url(url)
if not site:
raise BadRequest(_("Invalid URL"))
def fetch(request, url, site: AbstractSite, is_refetch: bool = False):
item = site.get_item()
if item and not is_refetch:
return redirect(item.url)
@ -131,9 +127,10 @@ def search(request):
host = keywords.split("://")[1].split("/")[0]
if host in settings.SITE_DOMAINS:
return redirect(keywords)
site = SiteManager.get_site_by_url(keywords)
# skip detecting redirection to avoid timeout
site = SiteManager.get_site_by_url(keywords, detect_redirection=False)
if site:
return fetch(request, keywords, False, site)
return fetch(request, keywords, site, False)
if request.GET.get("r"):
return redirect(keywords)
@ -173,4 +170,7 @@ def refetch(request):
url = request.POST.get("url")
if not url:
raise BadRequest(_("Invalid URL"))
return fetch(request, url, True)
site = SiteManager.get_site_by_url(url, detect_redirection=False)
if not site:
raise BadRequest(_("Unsupported URL"))
return fetch(request, url, site, True)