refresh api schema

This commit is contained in:
Your Name 2023-06-03 11:10:48 -04:00 committed by Henri Dickson
parent 1c082f8308
commit 7ade7ec0a5
7 changed files with 167 additions and 113 deletions

View file

@ -1,17 +1,18 @@
from django.utils.translation import gettext_lazy as _
from django.http import HttpResponse
from django.http import Http404
from ninja import Schema
from common.api import *
from .models import *
from .common import *
from .sites import *
from ninja import Schema
from django.http import Http404
from common.api import *
from .search.views import enqueue_fetch
from django.utils.translation import gettext_lazy as _
from django.db import models
from django.http import HttpRequest, HttpResponse
from .search.models import enqueue_fetch, query_index
class SearchResult(Schema):
items: list[ItemSchema]
data: List[ItemSchema]
pages: int
count: int
@api.get(
@ -20,17 +21,29 @@ class SearchResult(Schema):
summary="Search items in catalog",
auth=None,
)
def search_item(request, query: str, category: AvailableItemCategory | None = None):
def search_item(
request, query: str, category: AvailableItemCategory | None = None, page: int = 1
):
"""
Search items in catalog
count and pages are estimated, the actual data may be less
unlike the web search, this does not show external results,
nor does it parse a url to fetch an item. to do that, use /catalog/fetch.
"""
query = query.strip()
if not query:
return 400, {"message": "Invalid query"}
result = Indexer.search(query, page=1, category=category)
return 200, {"items": result.items}
items, num_pages, count = query_index(
query, page=page, category=category, prepare_external=False
)
return 200, {"data": items, "pages": num_pages, "count": count}
@api.get(
"/catalog/fetch",
response={200: ItemSchema, 202: Result},
response={200: ItemSchema, 202: Result, 404: Result},
summary="Fetch item from URL of a supported site",
auth=None,
)
@ -44,7 +57,7 @@ def fetch_item(request, url: str):
"""
site = SiteManager.get_site_by_url(url)
if not site:
raise Http404(url)
return 404, {"message": "URL not supported"}
item = site.get_item()
if item:
return 200, item
@ -130,6 +143,11 @@ def get_game(request, uuid: str, response: HttpResponse):
# Legacy API will be removed soon
class SearchResultLegacy(Schema):
items: List[ItemSchema]
pages: int
@api.post(
"/catalog/search",
response={200: SearchResult, 400: Result},

View file

@ -15,31 +15,13 @@ from .game.models import Game, GameSchema, GameInSchema
from .podcast.models import Podcast, PodcastSchema, PodcastInSchema, PodcastEpisode
from .performance.models import Performance
from .collection.models import Collection as CatalogCollection
from .search.models import Indexer
from django.contrib.contenttypes.models import ContentType
from django.conf import settings
import logging
_logger = logging.getLogger(__name__)
# if settings.SEARCH_BACKEND == "MEILISEARCH":
# from .search.meilisearch import Indexer
# el
if settings.SEARCH_BACKEND == "TYPESENSE":
from .search.typesense import Indexer
else:
class Indexer:
@classmethod
def search(cls, q, page=1, category=None, tag=None, sort=None):
result = lambda: None
result.items = Item.objects.filter(title__contains=q)[:10]
result.num_pages = 1
return result
@classmethod
def update_model_indexable(cls, model):
pass
# class Exhibition(Item):

104
catalog/search/models.py Normal file
View file

@ -0,0 +1,104 @@
import logging
from django.utils.translation import gettext_lazy as _
from catalog.common.sites import SiteManager
from ..models import TVSeason, Item
from django.conf import settings
import django_rq
from rq.job import Job
from django.core.cache import cache
import hashlib
from .typesense import Indexer as TypeSenseIndexer
# from .meilisearch import Indexer as MeiliSearchIndexer
_logger = logging.getLogger(__name__)
class DbIndexer:
@classmethod
def search(cls, q, page=1, category=None, tag=None, sort=None):
result = lambda: None
result.items = Item.objects.filter(title__contains=q)[:10]
result.num_pages = 1
result.count = len(result.items)
return result
@classmethod
def update_model_indexable(cls, model):
pass
# if settings.SEARCH_BACKEND == "MEILISEARCH":
#
# el
if settings.SEARCH_BACKEND == "TYPESENSE":
Indexer = TypeSenseIndexer
else:
Indexer = DbIndexer
def query_index(keywords, category=None, tag=None, page=1, prepare_external=True):
result = Indexer.search(keywords, page=page, category=category, tag=tag)
keys = []
items = []
urls = []
for i in result.items:
key = (
i.isbn
if hasattr(i, "isbn")
else (i.imdb_code if hasattr(i, "imdb_code") else None)
)
if key is None:
items.append(i)
elif key not in keys: # skip dup with same imdb or isbn
keys.append(key)
items.append(i)
for res in i.external_resources.all():
urls.append(res.url)
if prepare_external:
# store site url to avoid dups in external search
cache_key = f"search_{category}_{keywords}"
urls = list(set(cache.get(cache_key, []) + urls))
cache.set(cache_key, urls, timeout=300)
# hide show if its season exists
seasons = [i for i in items if i.__class__ == TVSeason]
for season in seasons:
if season.show in items:
items.remove(season.show)
return items, result.num_pages, result.count
def enqueue_fetch(url, is_refetch):
job_id = "fetch_" + hashlib.md5(url.encode()).hexdigest()
in_progress = False
try:
job = Job.fetch(id=job_id, connection=django_rq.get_connection("fetch"))
in_progress = job.get_status() in ["queued", "started"]
except:
in_progress = False
if not in_progress:
django_rq.get_queue("fetch").enqueue(
_fetch_task, url, is_refetch, job_id=job_id
)
return job_id
def _fetch_task(url, is_refetch):
item_url = "-"
try:
site = SiteManager.get_site_by_url(url)
if not site:
return None
site.get_resource_ready(ignore_existing_content=is_refetch)
item = site.get_item()
if item:
_logger.info(f"fetched {url} {item.url} {item}")
item_url = item.url
else:
_logger.error(f"fetch {url} failed")
except Exception as e:
_logger.error(f"fetch {url} error {e}")
return item_url

View file

@ -132,20 +132,28 @@ class Indexer:
@classmethod
def init(cls):
# cls.instance().collections[INDEX_NAME].delete()
cls.instance().collections.create(cls.config())
idx = cls.instance().collections[INDEX_NAME]
if idx:
# idx.delete()
idx.create(cls.config())
@classmethod
def delete_index(cls):
cls.instance().collections[INDEX_NAME].delete()
idx = cls.instance().collections[INDEX_NAME]
if idx:
idx.delete()
@classmethod
def update_settings(cls):
cls.instance().collections[INDEX_NAME].update(cls.config())
idx = cls.instance().collections[INDEX_NAME]
if idx:
idx.update(cls.config())
@classmethod
def get_stats(cls):
return cls.instance().collections[INDEX_NAME].retrieve()
idx = cls.instance().collections[INDEX_NAME]
if idx:
return idx.retrieve()
@classmethod
def busy(cls):
@ -267,9 +275,11 @@ class Indexer:
if x is not None
]
)
results.count = r["found"]
results.num_pages = (r["found"] + SEARCH_PAGE_SIZE - 1) // SEARCH_PAGE_SIZE
except ObjectNotFound:
results.items = []
results.count = 0
results.num_pages = 1
return results

View file

@ -16,6 +16,7 @@ from rq.job import Job
from .external import ExternalSources
from django.core.cache import cache
import hashlib
from .models import query_index, enqueue_fetch
_logger = logging.getLogger(__name__)
@ -53,19 +54,6 @@ def fetch_refresh(request, job_id):
)
def enqueue_fetch(url, is_refetch):
job_id = "fetch_" + hashlib.md5(url.encode()).hexdigest()
in_progress = False
try:
job = Job.fetch(id=job_id, connection=django_rq.get_connection("fetch"))
in_progress = job.get_status() in ["queued", "started"]
except:
in_progress = False
if not in_progress:
django_rq.get_queue("fetch").enqueue(fetch_task, url, is_refetch, job_id=job_id)
return job_id
def fetch(request, url, is_refetch: bool = False, site: AbstractSite | None = None):
if not site:
site = SiteManager.get_site_by_url(url)
@ -93,7 +81,7 @@ def search(request):
keywords = request.GET.get("q", default="").strip()
tag = request.GET.get("tag", default="").strip()
p = request.GET.get("page", default="1")
page_number = int(p) if p.isdigit() else 1
p = int(p) if p.isdigit() else 1
if not (keywords or tag):
return render(
request,
@ -108,48 +96,14 @@ def search(request):
site = SiteManager.get_site_by_url(keywords)
if site:
return fetch(request, keywords, False, site)
if settings.SEARCH_BACKEND is None:
# return limited results if no SEARCH_BACKEND
result = lambda: None
result.items = Item.objects.filter(title__contains=keywords)[:10]
result.num_pages = 1
else:
result = Indexer.search(keywords, page=page_number, category=category, tag=tag)
keys = []
items = []
urls = []
for i in result.items:
key = (
i.isbn
if hasattr(i, "isbn")
else (i.imdb_code if hasattr(i, "imdb_code") else None)
)
if key is None:
items.append(i)
elif key not in keys:
keys.append(key)
items.append(i)
for res in i.external_resources.all():
urls.append(res.url)
cache_key = f"search_{category}_{keywords}"
urls = list(set(cache.get(cache_key, []) + urls))
cache.set(cache_key, urls, timeout=300)
# hide show if its season exists
seasons = [i for i in items if i.__class__ == TVSeason]
for season in seasons:
if season.show in items:
items.remove(season.show)
items, num_pages, _ = query_index(keywords, category, tag, p)
return render(
request,
"search_results.html",
{
"items": items,
"pagination": PageLinksGenerator(
PAGE_LINK_NUMBER, page_number, result.num_pages
),
"categories": ["book", "movie", "music", "game"],
"pagination": PageLinksGenerator(PAGE_LINK_NUMBER, p, num_pages),
"sites": SiteName.labels,
"hide_category": category is not None and category != "movietv",
},
@ -185,19 +139,3 @@ def refetch(request):
if not url:
raise BadRequest()
return fetch(request, url, True)
def fetch_task(url, is_refetch):
item_url = "-"
try:
site = SiteManager.get_site_by_url(url)
site.get_resource_ready(ignore_existing_content=is_refetch)
item = site.get_item()
if item:
_logger.info(f"fetched {url} {item.url} {item}")
item_url = item.url
else:
_logger.error(f"fetch {url} failed")
except Exception as e:
_logger.error(f"fetch {url} error {e}")
return item_url

View file

@ -26,6 +26,12 @@
<hgroup>
<h5>“{{ request.GET.q }}” {% trans '的搜索结果' %}</h5>
<div>
{% if request.GET.c and request.GET.c != 'all' %}
<a href="?q={{ request.GET.q }}&c=all">全部</a>
{% else %}
全部
{% endif %}
|
{% if request.GET.c != 'book' %}
<a href="?q={{ request.GET.q }}&c=book">书籍</a>
{% else %}
@ -55,12 +61,6 @@
{% else %}
游戏
{% endif %}
|
{% if request.GET.c and request.GET.c != 'all' %}
<a href="?q={{ request.GET.q }}&c=all">全部</a>
{% else %}
全部
{% endif %}
</div>
</hgroup>
{% endif %}

View file

@ -57,11 +57,13 @@ class PageNumberPagination(NinjaPageNumberPagination):
queryset: QuerySet,
pagination: NinjaPageNumberPagination.Input,
**params: Any,
) -> Output:
):
val = super().paginate_queryset(queryset, pagination, **params)
val["data"] = val["items"]
val["pages"] = (val["count"] + self.page_size - 1) // self.page_size
return val
return {
"data": val["items"],
"count": val["count"],
"pages": (val["count"] + self.page_size - 1) // self.page_size,
}
api = NinjaAPI(