limit frequent fetch

This commit is contained in:
Her Email 2023-12-04 09:11:32 -05:00 committed by Henri Dickson
parent 0a104c920f
commit 294cf6c624
3 changed files with 17 additions and 8 deletions

View file

@ -67,8 +67,9 @@ def fetch_item(request, url: str):
Convert a URL from a supported site (e.g. https://m.imdb.com/title/tt2852400/) to an item.
If the item is not available in the catalog, HTTP 202 will be returned.
Wait 10 seconds or longer, call with same input again, it may return the actual fetched item.
Wait 15 seconds or longer, call with same input again, it may return the actual fetched item.
Some site may take ~90 seconds to fetch.
If not getting the item after 120 seconds, please stop and consider the URL is not available.
"""
site = SiteManager.get_site_by_url(url)
if not site:
@ -76,7 +77,7 @@ def fetch_item(request, url: str):
item = site.get_item()
if item:
return 200, item
if get_fetch_lock():
if get_fetch_lock(request.user, url):
enqueue_fetch(url, False)
return 202, {"message": "Fetch in progress"}

View file

@ -109,11 +109,19 @@ def query_index(keywords, categories=None, tag=None, page=1, prepare_external=Tr
return items, result.num_pages, result.count, duplicated_items
_fetch_lock_key = "_fetch_lock"
_fetch_lock_ttl = 2
def get_fetch_lock():
def get_fetch_lock(user, url):
if user and user.is_authenticated:
_fetch_lock_key = f"_fetch_lock:{user.id}"
_fetch_lock_ttl = 1 if settings.DEBUG else 3
else:
_fetch_lock_key = "_fetch_lock"
_fetch_lock_ttl = 1 if settings.DEBUG else 15
if cache.get(_fetch_lock_key):
return False
cache.set(_fetch_lock_key, 1, timeout=_fetch_lock_ttl)
# do not fetch the same url twice in 2 hours
_fetch_lock_key = f"_fetch_lock:{url}"
_fetch_lock_ttl = 1 if settings.DEBUG else 7200
if cache.get(_fetch_lock_key):
return False
cache.set(_fetch_lock_key, 1, timeout=_fetch_lock_ttl)

View file

@ -61,7 +61,7 @@ def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = No
}
)
job_id = None
if is_refetch or get_fetch_lock():
if is_refetch or get_fetch_lock(request.user, url):
job_id = enqueue_fetch(url, is_refetch, request.user)
return render(
request,