diff --git a/catalog/common/sites.py b/catalog/common/sites.py index ddb81050..ecfddaa5 100644 --- a/catalog/common/sites.py +++ b/catalog/common/sites.py @@ -95,7 +95,7 @@ class AbstractSite: # add this method to subclass to enable external search # @classmethod # async def search_task( - # cls, query: str, page: int, category: str + # cls, query: str, page: int, category: str, page_size:int # ) -> list[ExternalSearchResultItem]: # return [] diff --git a/catalog/search/external.py b/catalog/search/external.py index 4a7d8514..6ed2cf20 100644 --- a/catalog/search/external.py +++ b/catalog/search/external.py @@ -1,30 +1,46 @@ import asyncio -import logging + +from django.core.cache import cache from catalog.common import SiteManager +from catalog.common.models import ItemCategory from catalog.search.models import ExternalSearchResultItem from catalog.sites.fedi import FediverseInstance -SEARCH_PAGE_SIZE = 5 # not all apis support page size -logger = logging.getLogger(__name__) - class ExternalSources: @classmethod def search( - cls, query: str, page: int = 1, category: str | None = None + cls, + query: str, + page: int = 1, + category: str | None = None, + visible_categories: list[ItemCategory] = [], ) -> list[ExternalSearchResultItem]: - if not query or page < 1 or page > 10: + if not query or page < 1 or page > 10 or not query or len(query) > 100: return [] if category in ["", None]: category = "all" - tasks = FediverseInstance.search_tasks(query, page, category) - for site in SiteManager.get_sites_for_search(): - tasks.append(site.search_task(query, page, category)) - # loop = asyncio.get_event_loop() - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - results = [] - for r in loop.run_until_complete(asyncio.gather(*tasks)): - results.extend(r) + page_size = 5 if category == "all" else 10 + match category: + case "all": + cache_key = f"search_{','.join(visible_categories)}_{query}" + case "movie": + cache_key = f"search_movie,tv_{query}" + case _: + cache_key = f"search_{category}_{query}" + results = cache.get("ext_" + cache_key, None) + if results is None: + tasks = FediverseInstance.search_tasks(query, page, category, page_size) + for site in SiteManager.get_sites_for_search(): + tasks.append(site.search_task(query, page, category, page_size)) + # loop = asyncio.get_event_loop() + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + results = [] + for r in loop.run_until_complete(asyncio.gather(*tasks)): + results.extend(r) + cache.set("ext_" + cache_key, results, 300) + dedupe_urls = cache.get(cache_key, []) + results = [i for i in results if i.source_url not in dedupe_urls] return results diff --git a/catalog/search/models.py b/catalog/search/models.py index 3bf7d668..433754b7 100644 --- a/catalog/search/models.py +++ b/catalog/search/models.py @@ -108,6 +108,7 @@ def query_index(keywords, categories=None, tag=None, page=1, prepare_external=Tr page < 1 or page > 99 or (not tag and isinstance(keywords, str) and len(keywords) < 2) + or len(keywords) > 100 ): return [], 0, 0, [] result = Indexer.search(keywords, page=page, categories=categories, tag=tag) diff --git a/catalog/search/views.py b/catalog/search/views.py index 05e20e18..5368bd4e 100644 --- a/catalog/search/views.py +++ b/catalog/search/views.py @@ -3,7 +3,6 @@ import re import django_rq from django.conf import settings from django.contrib.auth.decorators import login_required -from django.core.cache import cache from django.core.exceptions import BadRequest from django.shortcuts import redirect, render from django.utils.translation import gettext as _ @@ -155,15 +154,15 @@ def search(request): @login_required def external_search(request): category = request.GET.get("c", default="all").strip().lower() - if category == "all": - category = None keywords = request.GET.get("q", default="").strip() page_number = int_(request.GET.get("page"), 1) - items = ExternalSources.search(keywords, page_number, category) if keywords else [] - cache_key = f"search_{category if category != 'movietv' else 'movie,tv'}_{keywords}" - dedupe_urls = cache.get(cache_key, []) - items = [i for i in items if i.source_url not in dedupe_urls] - + items = ( + ExternalSources.search( + keywords, page_number, category, visible_categories(request) + ) + if keywords + else [] + ) return render(request, "external_search_results.html", {"external_items": items}) diff --git a/catalog/sites/apple_podcast.py b/catalog/sites/apple_podcast.py index d1f53a35..c0f5e1e8 100644 --- a/catalog/sites/apple_podcast.py +++ b/catalog/sites/apple_podcast.py @@ -42,18 +42,17 @@ class ApplePodcast(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category != "podcast": return [] - SEARCH_PAGE_SIZE = 5 if category == "all" else 10 results = [] - search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * SEARCH_PAGE_SIZE}&term={quote_plus(q)}" + search_url = f"https://itunes.apple.com/search?entity=podcast&limit={page * page_size}&term={quote_plus(q)}" async with httpx.AsyncClient() as client: try: response = await client.get(search_url, timeout=2) r = response.json() - for p in r["results"][(page - 1) * SEARCH_PAGE_SIZE :]: + for p in r["results"][(page - 1) * page_size :]: if p.get("feedUrl"): results.append( ExternalSearchResultItem( diff --git a/catalog/sites/bandcamp.py b/catalog/sites/bandcamp.py index 88e9df74..9b8ef23d 100644 --- a/catalog/sites/bandcamp.py +++ b/catalog/sites/bandcamp.py @@ -109,13 +109,12 @@ class Bandcamp(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category != "music": return [] - SEARCH_PAGE_SIZE = 5 - p = (page - 1) * SEARCH_PAGE_SIZE // 18 + 1 - offset = (page - 1) * SEARCH_PAGE_SIZE % 18 + p = (page - 1) * page_size // 18 + 1 + offset = (page - 1) * page_size % 18 results = [] search_url = f"https://bandcamp.com/search?from=results&item_type=a&page={p}&q={urllib.parse.quote_plus(q)}" async with httpx.AsyncClient() as client: @@ -147,4 +146,4 @@ class Bandcamp(AbstractSite): logger.error( "Bandcamp search error", extra={"query": q, "exception": e} ) - return results[offset : offset + SEARCH_PAGE_SIZE] + return results[offset : offset + page_size] diff --git a/catalog/sites/fedi.py b/catalog/sites/fedi.py index 2e3101b7..c63a517a 100644 --- a/catalog/sites/fedi.py +++ b/catalog/sites/fedi.py @@ -125,10 +125,9 @@ class FediverseInstance(AbstractSite): return d @classmethod - async def peer_search_task(cls, host, q, page, category=None): - SEARCH_PAGE_SIZE = 5 - p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1 - offset = (page - 1) * SEARCH_PAGE_SIZE % 20 + async def peer_search_task(cls, host, q, page, category=None, page_size=5): + p = (page - 1) * page_size // 20 + 1 + offset = (page - 1) * page_size % 20 api_url = f"https://{host}/api/catalog/search?query={quote_plus(q)}&page={p}{'&category=' + category if category and category != 'all' else ''}" async with httpx.AsyncClient() as client: results = [] @@ -167,12 +166,14 @@ class FediverseInstance(AbstractSite): item["cover_image_url"], ) ) - return results[offset : offset + SEARCH_PAGE_SIZE] + return results[offset : offset + page_size] @classmethod - def search_tasks(cls, q: str, page: int = 1, category: str | None = None): + def search_tasks( + cls, q: str, page: int = 1, category: str | None = None, page_size=5 + ): from takahe.utils import Takahe peers = Takahe.get_neodb_peers() c = category if category != "movietv" else "movie,tv" - return [cls.peer_search_task(host, q, page, c) for host in peers] + return [cls.peer_search_task(host, q, page, c, page_size) for host in peers] diff --git a/catalog/sites/goodreads.py b/catalog/sites/goodreads.py index ee31b4da..4efbac27 100644 --- a/catalog/sites/goodreads.py +++ b/catalog/sites/goodreads.py @@ -123,13 +123,12 @@ class Goodreads(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category not in ["all", "book"]: return [] - SEARCH_PAGE_SIZE = 5 - p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1 - offset = (page - 1) * SEARCH_PAGE_SIZE % 20 + p = (page - 1) * page_size // 20 + 1 + offset = (page - 1) * page_size % 20 results = [] search_url = f"https://www.goodreads.com/search?page={p}&q={quote_plus(q)}" async with httpx.AsyncClient() as client: @@ -195,7 +194,7 @@ class Goodreads(AbstractSite): logger.error( "Goodreads search error", extra={"query": q, "exception": e} ) - return results[offset : offset + SEARCH_PAGE_SIZE] + return results[offset : offset + page_size] @SiteManager.register diff --git a/catalog/sites/google_books.py b/catalog/sites/google_books.py index 3498e910..8ce1e00b 100644 --- a/catalog/sites/google_books.py +++ b/catalog/sites/google_books.py @@ -1,4 +1,3 @@ -import logging import re from urllib.parse import quote_plus @@ -10,8 +9,6 @@ from catalog.book.utils import isbn_10_to_13 from catalog.common import * from catalog.models import * -_logger = logging.getLogger(__name__) - @SiteManager.register class GoogleBooks(AbstractSite): @@ -122,13 +119,12 @@ class GoogleBooks(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category not in ["all", "book"]: return [] - SEARCH_PAGE_SIZE = 5 results = [] - api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={SEARCH_PAGE_SIZE * (page - 1)}&maxResults={SEARCH_PAGE_SIZE}&maxAllowedMaturityRating=MATURE" + api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={page_size * (page - 1)}&maxResults={page_size}&maxAllowedMaturityRating=MATURE" async with httpx.AsyncClient() as client: try: response = await client.get(api_url, timeout=2) diff --git a/catalog/sites/igdb.py b/catalog/sites/igdb.py index 8f0b1894..31b57a2b 100644 --- a/catalog/sites/igdb.py +++ b/catalog/sites/igdb.py @@ -166,12 +166,11 @@ class IGDB(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category != "game": return [] - SEARCH_PAGE_SIZE = 5 if category == "all" else 10 - limit = SEARCH_PAGE_SIZE + limit = page_size offset = (page - 1) * limit q = f'fields *, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name; search "{quote_plus(q)}"; limit {limit}; offset {offset};' _wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, _igdb_access_token()) diff --git a/catalog/sites/spotify.py b/catalog/sites/spotify.py index f5f6c3d9..27a39f7a 100644 --- a/catalog/sites/spotify.py +++ b/catalog/sites/spotify.py @@ -111,13 +111,12 @@ class Spotify(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category not in ["music", "all"]: return [] - SEARCH_PAGE_SIZE = 5 results = [] - api_url = f"https://api.spotify.com/v1/search?q={q}&type=album&limit={SEARCH_PAGE_SIZE}&offset={page * SEARCH_PAGE_SIZE}" + api_url = f"https://api.spotify.com/v1/search?q={q}&type=album&limit={page_size}&offset={page * page_size}" async with httpx.AsyncClient() as client: try: headers = {"Authorization": f"Bearer {get_spotify_token()}"} diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py index 61257220..f3105dca 100644 --- a/catalog/sites/tmdb.py +++ b/catalog/sites/tmdb.py @@ -180,13 +180,12 @@ class TMDB_Movie(AbstractSite): @classmethod async def search_task( - cls, q: str, page: int, category: str + cls, q: str, page: int, category: str, page_size: int ) -> list[ExternalSearchResultItem]: if category not in ["movietv", "all", "movie", "tv"]: return [] - SEARCH_PAGE_SIZE = 5 if category == "all" else 10 - p = (page - 1) * SEARCH_PAGE_SIZE // 20 + 1 - offset = (page - 1) * SEARCH_PAGE_SIZE % 20 + p = (page - 1) * page_size // 20 + 1 + offset = (page - 1) * page_size % 20 results = [] api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={p}&api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&include_adult=true" async with httpx.AsyncClient() as client: @@ -225,7 +224,7 @@ class TMDB_Movie(AbstractSite): logger.warning(f"TMDB search '{q}' no results found.") except Exception as e: logger.error("TMDb search error", extra={"query": q, "exception": e}) - return results[offset : offset + SEARCH_PAGE_SIZE] + return results[offset : offset + page_size] @SiteManager.register