diff --git a/catalog/api.py b/catalog/api.py index c7d6e77a..cba5d5ec 100644 --- a/catalog/api.py +++ b/catalog/api.py @@ -1,17 +1,18 @@ +from django.utils.translation import gettext_lazy as _ +from django.http import HttpResponse +from django.http import Http404 +from ninja import Schema +from common.api import * from .models import * from .common import * from .sites import * -from ninja import Schema -from django.http import Http404 -from common.api import * -from .search.views import enqueue_fetch -from django.utils.translation import gettext_lazy as _ -from django.db import models -from django.http import HttpRequest, HttpResponse +from .search.models import enqueue_fetch, query_index class SearchResult(Schema): - items: list[ItemSchema] + data: List[ItemSchema] + pages: int + count: int @api.get( @@ -20,17 +21,29 @@ class SearchResult(Schema): summary="Search items in catalog", auth=None, ) -def search_item(request, query: str, category: AvailableItemCategory | None = None): +def search_item( + request, query: str, category: AvailableItemCategory | None = None, page: int = 1 +): + """ + Search items in catalog + + count and pages are estimated, the actual data may be less + + unlike the web search, this does not show external results, + nor does it parse a url to fetch an item. to do that, use /catalog/fetch. + """ query = query.strip() if not query: return 400, {"message": "Invalid query"} - result = Indexer.search(query, page=1, category=category) - return 200, {"items": result.items} + items, num_pages, count = query_index( + query, page=page, category=category, prepare_external=False + ) + return 200, {"data": items, "pages": num_pages, "count": count} @api.get( "/catalog/fetch", - response={200: ItemSchema, 202: Result}, + response={200: ItemSchema, 202: Result, 404: Result}, summary="Fetch item from URL of a supported site", auth=None, ) @@ -44,7 +57,7 @@ def fetch_item(request, url: str): """ site = SiteManager.get_site_by_url(url) if not site: - raise Http404(url) + return 404, {"message": "URL not supported"} item = site.get_item() if item: return 200, item @@ -130,6 +143,11 @@ def get_game(request, uuid: str, response: HttpResponse): # Legacy API will be removed soon +class SearchResultLegacy(Schema): + items: List[ItemSchema] + pages: int + + @api.post( "/catalog/search", response={200: SearchResult, 400: Result}, diff --git a/catalog/models.py b/catalog/models.py index 5b4f458d..f3803aa8 100644 --- a/catalog/models.py +++ b/catalog/models.py @@ -15,31 +15,13 @@ from .game.models import Game, GameSchema, GameInSchema from .podcast.models import Podcast, PodcastSchema, PodcastInSchema, PodcastEpisode from .performance.models import Performance from .collection.models import Collection as CatalogCollection +from .search.models import Indexer from django.contrib.contenttypes.models import ContentType from django.conf import settings import logging _logger = logging.getLogger(__name__) -# if settings.SEARCH_BACKEND == "MEILISEARCH": -# from .search.meilisearch import Indexer -# el -if settings.SEARCH_BACKEND == "TYPESENSE": - from .search.typesense import Indexer -else: - - class Indexer: - @classmethod - def search(cls, q, page=1, category=None, tag=None, sort=None): - result = lambda: None - result.items = Item.objects.filter(title__contains=q)[:10] - result.num_pages = 1 - return result - - @classmethod - def update_model_indexable(cls, model): - pass - # class Exhibition(Item): diff --git a/catalog/search/models.py b/catalog/search/models.py new file mode 100644 index 00000000..32b9119e --- /dev/null +++ b/catalog/search/models.py @@ -0,0 +1,104 @@ +import logging +from django.utils.translation import gettext_lazy as _ +from catalog.common.sites import SiteManager +from ..models import TVSeason, Item +from django.conf import settings +import django_rq +from rq.job import Job +from django.core.cache import cache +import hashlib +from .typesense import Indexer as TypeSenseIndexer + +# from .meilisearch import Indexer as MeiliSearchIndexer + +_logger = logging.getLogger(__name__) + + +class DbIndexer: + @classmethod + def search(cls, q, page=1, category=None, tag=None, sort=None): + result = lambda: None + result.items = Item.objects.filter(title__contains=q)[:10] + result.num_pages = 1 + result.count = len(result.items) + return result + + @classmethod + def update_model_indexable(cls, model): + pass + + +# if settings.SEARCH_BACKEND == "MEILISEARCH": +# +# el +if settings.SEARCH_BACKEND == "TYPESENSE": + Indexer = TypeSenseIndexer +else: + Indexer = DbIndexer + + +def query_index(keywords, category=None, tag=None, page=1, prepare_external=True): + result = Indexer.search(keywords, page=page, category=category, tag=tag) + keys = [] + items = [] + urls = [] + for i in result.items: + key = ( + i.isbn + if hasattr(i, "isbn") + else (i.imdb_code if hasattr(i, "imdb_code") else None) + ) + if key is None: + items.append(i) + elif key not in keys: # skip dup with same imdb or isbn + keys.append(key) + items.append(i) + for res in i.external_resources.all(): + urls.append(res.url) + + if prepare_external: + # store site url to avoid dups in external search + cache_key = f"search_{category}_{keywords}" + urls = list(set(cache.get(cache_key, []) + urls)) + cache.set(cache_key, urls, timeout=300) + + # hide show if its season exists + seasons = [i for i in items if i.__class__ == TVSeason] + for season in seasons: + if season.show in items: + items.remove(season.show) + + return items, result.num_pages, result.count + + +def enqueue_fetch(url, is_refetch): + job_id = "fetch_" + hashlib.md5(url.encode()).hexdigest() + in_progress = False + try: + job = Job.fetch(id=job_id, connection=django_rq.get_connection("fetch")) + in_progress = job.get_status() in ["queued", "started"] + except: + in_progress = False + if not in_progress: + django_rq.get_queue("fetch").enqueue( + _fetch_task, url, is_refetch, job_id=job_id + ) + return job_id + + +def _fetch_task(url, is_refetch): + item_url = "-" + try: + site = SiteManager.get_site_by_url(url) + if not site: + return None + site.get_resource_ready(ignore_existing_content=is_refetch) + item = site.get_item() + if item: + _logger.info(f"fetched {url} {item.url} {item}") + item_url = item.url + else: + _logger.error(f"fetch {url} failed") + except Exception as e: + _logger.error(f"fetch {url} error {e}") + return item_url diff --git a/catalog/search/typesense.py b/catalog/search/typesense.py index 9c94e6cd..32995c0c 100644 --- a/catalog/search/typesense.py +++ b/catalog/search/typesense.py @@ -132,20 +132,28 @@ class Indexer: @classmethod def init(cls): - # cls.instance().collections[INDEX_NAME].delete() - cls.instance().collections.create(cls.config()) + idx = cls.instance().collections[INDEX_NAME] + if idx: + # idx.delete() + idx.create(cls.config()) @classmethod def delete_index(cls): - cls.instance().collections[INDEX_NAME].delete() + idx = cls.instance().collections[INDEX_NAME] + if idx: + idx.delete() @classmethod def update_settings(cls): - cls.instance().collections[INDEX_NAME].update(cls.config()) + idx = cls.instance().collections[INDEX_NAME] + if idx: + idx.update(cls.config()) @classmethod def get_stats(cls): - return cls.instance().collections[INDEX_NAME].retrieve() + idx = cls.instance().collections[INDEX_NAME] + if idx: + return idx.retrieve() @classmethod def busy(cls): @@ -267,9 +275,11 @@ class Indexer: if x is not None ] ) + results.count = r["found"] results.num_pages = (r["found"] + SEARCH_PAGE_SIZE - 1) // SEARCH_PAGE_SIZE except ObjectNotFound: results.items = [] + results.count = 0 results.num_pages = 1 return results diff --git a/catalog/search/views.py b/catalog/search/views.py index 8a4c3648..42bd404e 100644 --- a/catalog/search/views.py +++ b/catalog/search/views.py @@ -16,6 +16,7 @@ from rq.job import Job from .external import ExternalSources from django.core.cache import cache import hashlib +from .models import query_index, enqueue_fetch _logger = logging.getLogger(__name__) @@ -53,19 +54,6 @@ def fetch_refresh(request, job_id): ) -def enqueue_fetch(url, is_refetch): - job_id = "fetch_" + hashlib.md5(url.encode()).hexdigest() - in_progress = False - try: - job = Job.fetch(id=job_id, connection=django_rq.get_connection("fetch")) - in_progress = job.get_status() in ["queued", "started"] - except: - in_progress = False - if not in_progress: - django_rq.get_queue("fetch").enqueue(fetch_task, url, is_refetch, job_id=job_id) - return job_id - - def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = None): if not site: site = SiteManager.get_site_by_url(url) @@ -93,7 +81,7 @@ def search(request): keywords = request.GET.get("q", default="").strip() tag = request.GET.get("tag", default="").strip() p = request.GET.get("page", default="1") - page_number = int(p) if p.isdigit() else 1 + p = int(p) if p.isdigit() else 1 if not (keywords or tag): return render( request, @@ -108,48 +96,14 @@ def search(request): site = SiteManager.get_site_by_url(keywords) if site: return fetch(request, keywords, False, site) - if settings.SEARCH_BACKEND is None: - # return limited results if no SEARCH_BACKEND - result = lambda: None - result.items = Item.objects.filter(title__contains=keywords)[:10] - result.num_pages = 1 - else: - result = Indexer.search(keywords, page=page_number, category=category, tag=tag) - keys = [] - items = [] - urls = [] - for i in result.items: - key = ( - i.isbn - if hasattr(i, "isbn") - else (i.imdb_code if hasattr(i, "imdb_code") else None) - ) - if key is None: - items.append(i) - elif key not in keys: - keys.append(key) - items.append(i) - for res in i.external_resources.all(): - urls.append(res.url) - cache_key = f"search_{category}_{keywords}" - urls = list(set(cache.get(cache_key, []) + urls)) - cache.set(cache_key, urls, timeout=300) - - # hide show if its season exists - seasons = [i for i in items if i.__class__ == TVSeason] - for season in seasons: - if season.show in items: - items.remove(season.show) + items, num_pages, _ = query_index(keywords, category, tag, p) return render( request, "search_results.html", { "items": items, - "pagination": PageLinksGenerator( - PAGE_LINK_NUMBER, page_number, result.num_pages - ), - "categories": ["book", "movie", "music", "game"], + "pagination": PageLinksGenerator(PAGE_LINK_NUMBER, p, num_pages), "sites": SiteName.labels, "hide_category": category is not None and category != "movietv", }, @@ -185,19 +139,3 @@ def refetch(request): if not url: raise BadRequest() return fetch(request, url, True) - - -def fetch_task(url, is_refetch): - item_url = "-" - try: - site = SiteManager.get_site_by_url(url) - site.get_resource_ready(ignore_existing_content=is_refetch) - item = site.get_item() - if item: - _logger.info(f"fetched {url} {item.url} {item}") - item_url = item.url - else: - _logger.error(f"fetch {url} failed") - except Exception as e: - _logger.error(f"fetch {url} error {e}") - return item_url diff --git a/catalog/templates/search_results.html b/catalog/templates/search_results.html index 59694584..189015d2 100644 --- a/catalog/templates/search_results.html +++ b/catalog/templates/search_results.html @@ -26,6 +26,12 @@
“{{ request.GET.q }}” {% trans '的搜索结果' %}
+ {% if request.GET.c and request.GET.c != 'all' %} + 全部 + {% else %} + 全部 + {% endif %} + | {% if request.GET.c != 'book' %} 书籍 {% else %} @@ -55,12 +61,6 @@ {% else %} 游戏 {% endif %} - | - {% if request.GET.c and request.GET.c != 'all' %} - 全部 - {% else %} - 全部 - {% endif %}
{% endif %} diff --git a/common/api.py b/common/api.py index 233434ca..ea37d559 100644 --- a/common/api.py +++ b/common/api.py @@ -57,11 +57,13 @@ class PageNumberPagination(NinjaPageNumberPagination): queryset: QuerySet, pagination: NinjaPageNumberPagination.Input, **params: Any, - ) -> Output: + ): val = super().paginate_queryset(queryset, pagination, **params) - val["data"] = val["items"] - val["pages"] = (val["count"] + self.page_size - 1) // self.page_size - return val + return { + "data": val["items"], + "count": val["count"], + "pages": (val["count"] + self.page_size - 1) // self.page_size, + } api = NinjaAPI(