new data model: fix search

This commit is contained in:
Your Name 2022-12-31 17:16:47 -05:00
parent 6f58627d9f
commit 8aa78cf297
7 changed files with 769 additions and 109 deletions

View file

@ -10,3 +10,6 @@ class CatalogConfig(AppConfig):
from catalog import models
from catalog import sites
from journal import models as journal_models
from catalog.models import init_catalog_search_models
init_catalog_search_models()

View file

@ -0,0 +1,43 @@
from django.core.management.base import BaseCommand
from django.conf import settings
from catalog.models import *
import pprint
class Command(BaseCommand):
help = "Manage the search index"
def add_arguments(self, parser):
parser.add_argument(
"--init",
help="initialize index",
action="store_true",
)
parser.add_argument(
"--stat",
action="store_true",
)
def init_index(self):
self.stdout.write(f"Connecting to search server")
Indexer.init()
self.stdout.write(self.style.SUCCESS("Index created."))
def stat(self, *args, **options):
self.stdout.write(f"Connecting to search server")
stats = Indexer.get_stats()
pprint.pp(stats)
def handle(self, *args, **options):
if options["init"]:
self.init_index()
elif options["stat"]:
self.stat()
# else:
# try:
# Indexer.init()
# self.stdout.write(self.style.SUCCESS('Index created.'))
# except Exception:
# Indexer.update_settings()
# self.stdout.write(self.style.SUCCESS('Index settings updated.'))

View file

@ -8,6 +8,19 @@ from .podcast.models import Podcast
from .performance.models import Performance
from .collection.models import Collection as CatalogCollection
from django.contrib.contenttypes.models import ContentType
from django.conf import settings
if settings.SEARCH_BACKEND == "MEILISEARCH":
from .search.meilisearch import Indexer
elif settings.SEARCH_BACKEND == "TYPESENSE":
from .search.typesense import Indexer
else:
class Indexer:
@classmethod
def update_model_indexable(cls, model):
pass
# class Exhibition(Item):
@ -54,3 +67,13 @@ def all_categories():
else:
_CATEGORY_LIST[c].append(cls)
return _CATEGORY_LIST
def init_catalog_search_models():
Indexer.update_model_indexable(Edition)
Indexer.update_model_indexable(Work)
Indexer.update_model_indexable(Movie)
Indexer.update_model_indexable(TVShow)
Indexer.update_model_indexable(TVSeason)
Indexer.update_model_indexable(Album)
Indexer.update_model_indexable(Game)

257
catalog/search/typesense.py Normal file
View file

@ -0,0 +1,257 @@
import types
import logging
import typesense
from typesense.exceptions import ObjectNotFound
from django.conf import settings
from django.db.models.signals import post_save, post_delete
INDEX_NAME = "catalog"
SEARCHABLE_ATTRIBUTES = [
"title",
"orig_title",
"other_title",
"subtitle",
"artist",
"author",
"translator",
"developer",
"director",
"actor",
"playwright",
"pub_house",
"company",
"publisher",
"isbn",
"imdb_code",
]
FILTERABLE_ATTRIBUTES = ["category", "tags", "class_name"]
INDEXABLE_DIRECT_TYPES = [
"BigAutoField",
"BooleanField",
"CharField",
"PositiveIntegerField",
"PositiveSmallIntegerField",
"TextField",
"ArrayField",
]
INDEXABLE_TIME_TYPES = ["DateTimeField"]
INDEXABLE_DICT_TYPES = ["JSONField"]
INDEXABLE_FLOAT_TYPES = ["DecimalField"]
SORTING_ATTRIBUTE = None
# NONINDEXABLE_TYPES = ['ForeignKey', 'FileField',]
SEARCH_PAGE_SIZE = 20
logger = logging.getLogger(__name__)
def item_post_save_handler(sender, instance, created, **kwargs):
if not created and settings.SEARCH_INDEX_NEW_ONLY:
return
Indexer.replace_item(instance)
def item_post_delete_handler(sender, instance, **kwargs):
Indexer.delete_item(instance)
def tag_post_save_handler(sender, instance, **kwargs):
pass
def tag_post_delete_handler(sender, instance, **kwargs):
pass
class Indexer:
class_map = {}
_instance = None
@classmethod
def instance(cls):
if cls._instance is None:
cls._instance = typesense.Client(settings.TYPESENSE_CONNECTION)
return cls._instance
@classmethod
def init(cls):
# cls.instance().collections[INDEX_NAME].delete()
# fields = [
# {"name": "_class", "type": "string", "facet": True},
# {"name": "source_site", "type": "string", "facet": True},
# {"name": ".*", "type": "auto", "locale": "zh"},
# ]
# use dumb schema below before typesense fix a bug
fields = [
{"name": "id", "type": "string"},
{"name": "category", "type": "string", "facet": True},
{"name": "class_name", "type": "string", "facet": True},
{"name": "rating_count", "optional": True, "type": "int32", "facet": True},
{"name": "isbn", "optional": True, "type": "string"},
{"name": "imdb_code", "optional": True, "type": "string"},
{"name": "author", "optional": True, "locale": "zh", "type": "string[]"},
{"name": "orig_title", "optional": True, "locale": "zh", "type": "string"},
{"name": "pub_house", "optional": True, "locale": "zh", "type": "string"},
{"name": "title", "optional": True, "locale": "zh", "type": "string"},
{
"name": "translator",
"optional": True,
"locale": "zh",
"type": "string[]",
},
{"name": "subtitle", "optional": True, "locale": "zh", "type": "string"},
{"name": "artist", "optional": True, "locale": "zh", "type": "string[]"},
{"name": "company", "optional": True, "locale": "zh", "type": "string[]"},
{"name": "developer", "optional": True, "locale": "zh", "type": "string[]"},
{
"name": "other_title",
"optional": True,
"locale": "zh",
"type": "string[]",
},
{"name": "publisher", "optional": True, "locale": "zh", "type": "string[]"},
{"name": "actor", "optional": True, "locale": "zh", "type": "string[]"},
{"name": "director", "optional": True, "locale": "zh", "type": "string[]"},
{
"name": "playwright",
"optional": True,
"locale": "zh",
"type": "string[]",
},
{"name": "tags", "optional": True, "locale": "zh", "type": "string[]"},
{"name": ".*", "optional": True, "locale": "zh", "type": "auto"},
]
cls.instance().collections.create({"name": INDEX_NAME, "fields": fields})
@classmethod
def update_settings(cls):
# https://github.com/typesense/typesense/issues/96
# FIXME
pass
@classmethod
def get_stats(cls):
return cls.instance().collections[INDEX_NAME].retrieve()
@classmethod
def busy(cls):
return False
@classmethod
def update_model_indexable(cls, model):
cls.class_map[model.__name__.lower()] = model
model.indexable_fields = ["tags"]
model.indexable_fields_time = []
model.indexable_fields_dict = []
model.indexable_fields_float = []
for field in model._meta.get_fields():
type = field.get_internal_type()
if type in INDEXABLE_DIRECT_TYPES:
model.indexable_fields.append(field.name)
elif type in INDEXABLE_TIME_TYPES:
model.indexable_fields_time.append(field.name)
elif type in INDEXABLE_DICT_TYPES:
model.indexable_fields_dict.append(field.name)
elif type in INDEXABLE_FLOAT_TYPES:
model.indexable_fields_float.append(field.name)
post_save.connect(item_post_save_handler, sender=model)
post_delete.connect(item_post_delete_handler, sender=model)
@classmethod
def obj_to_dict(cls, obj):
item = {}
for field in obj.__class__.indexable_fields:
item[field] = getattr(obj, field)
for field in obj.__class__.indexable_fields_time:
item[field] = getattr(obj, field).timestamp()
for field in obj.__class__.indexable_fields_float:
item[field] = float(getattr(obj, field)) if getattr(obj, field) else None
for field in obj.__class__.indexable_fields_dict:
d = getattr(obj, field)
if d.__class__ is dict:
item.update(d)
item["id"] = obj.uuid
item["category"] = obj.category
item["class_name"] = obj.class_name
item = {
k: v
for k, v in item.items()
if v
and (k in SEARCHABLE_ATTRIBUTES or k in FILTERABLE_ATTRIBUTES or k == "id")
}
# typesense requires primary key to be named 'id', type string
item["rating_count"] = obj.rating_count
return item
@classmethod
def replace_item(cls, obj):
try:
cls.instance().collections[INDEX_NAME].documents.upsert(
cls.obj_to_dict(obj), {"dirty_values": "coerce_or_drop"}
)
except Exception as e:
logger.error(f"replace item error: \n{e}")
@classmethod
def replace_batch(cls, objects):
try:
cls.instance().collections[INDEX_NAME].documents.import_(
objects, {"action": "upsert"}
)
except Exception as e:
logger.error(f"replace batch error: \n{e}")
@classmethod
def delete_item(cls, obj):
pk = f"{obj.__class__.__name__}-{obj.id}"
try:
cls.instance().collections[INDEX_NAME].documents[pk].delete()
except Exception as e:
logger.error(f"delete item error: \n{e}")
@classmethod
def search(cls, q, page=1, category=None, tag=None, sort=None):
f = []
if category:
f.append("category:= " + category)
if tag:
f.append(f"tags:= '{tag}'")
filters = " && ".join(f)
options = {
"q": q,
"page": page,
"per_page": SEARCH_PAGE_SIZE,
"query_by": ",".join(SEARCHABLE_ATTRIBUTES),
"filter_by": filters,
# 'facetsDistribution': ['_class'],
# 'sort_by': None,
}
results = types.SimpleNamespace()
try:
r = cls.instance().collections[INDEX_NAME].documents.search(options)
results.items = list(
[
x
for x in map(lambda i: cls.item_to_obj(i["document"]), r["hits"])
if x is not None
]
)
results.num_pages = (r["found"] + SEARCH_PAGE_SIZE - 1) // SEARCH_PAGE_SIZE
except ObjectNotFound:
results.items = []
results.num_pages = 1
return results
@classmethod
def item_to_obj(cls, item):
try:
return cls.class_map[item["class_name"]].get_by_url(item["id"])
except Exception as e:
logger.error(f"unable to load search result item from db:\n{item}")
return None

View file

@ -0,0 +1,245 @@
{% load static %}
{% load i18n %}
{% load l10n %}
{% load humanize %}
{% load admin_url %}
{% load mastodon %}
{% load oauth_token %}
{% load truncate %}
{% load highlight %}
{% load thumb %}
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ site_name }} - {% trans '搜索结果' %}</title>
<script src="https://cdn.staticfile.org/jquery/3.6.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/htmx/1.8.4/htmx.min.js"></script>
<script src="{% static 'lib/js/rating-star.js' %}"></script>
<script src="{% static 'js/rating-star-readonly.js' %}"></script>
<link rel="stylesheet" href="{% static 'css/boofilsic.min.css' %}">
<link rel="stylesheet" href="{% static 'lib/css/rating-star.css' %}">
<link rel="stylesheet" href="{% static 'lib/css/collection.css' %}">
</head>
<body>
<div id="page-wrapper">
<div id="content-wrapper">
{% include 'partial/_navbar.html' %}
<section id="content">
<div class="grid">
<div class="grid__main">
<div class="main-section-wrapper">
<div class="entity-list">
{% if request.GET.q %}
<h5 class="entity-list__title">“{{ request.GET.q }}” {% trans '的搜索结果' %}</h5>
{% endif %}
{% if request.GET.tag %}
<h5 class="entity-list__title">{% trans '含有标签' %} “{{ request.GET.tag }}” {% trans '的结果' %}</h5>
{% endif %}
<ul class="entity-list__entities">
{% for item in items %}
{% with "list_item_"|add:item.class_name|add:".html" as template %}
{% include template %}
{% endwith %}
{% empty %}
<li class="entity-list__entity">
{% trans '无站内条目匹配' %}
</li>
{% endfor %}
{% if request.GET.q and user.is_authenticated %}
<li class="entity-list__entity" hx-get="{% url 'common:external_search' %}?q={{ request.GET.q }}&c={{ request.GET.c }}&page={% if pagination.current_page %}{{ pagination.current_page }}{% else %}1{% endif %}" hx-trigger="load" hx-swap="outerHTML">
{% trans '正在实时搜索站外条目' %}
<div id="spinner">
<div class="spinner">
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
<div></div>
</div>
</div>
</li>
{% endif %}
</ul>
</div>
<div class="pagination" >
{% if pagination.has_prev %}
<a href="?page=1&{% if request.GET.q %}q={{ request.GET.q }}{% elif request.GET.tag %}tag={{ request.GET.tag }}{% endif %}{% if request.GET.c %}&c={{ request.GET.c }}{% endif %}" class="pagination__nav-link pagination__nav-link">&laquo;</a>
<a href="?page={{ pagination.previous_page }}&{% if request.GET.q %}q={{ request.GET.q }}{% elif request.GET.tag %}tag={{ request.GET.tag }}{% endif %}{% if request.GET.c %}&c={{ request.GET.c }}{% endif %}" class="pagination__nav-link pagination__nav-link--right-margin pagination__nav-link">&lsaquo;</a>
{% endif %}
{% for page in pagination.page_range %}
{% if page == pagination.current_page %}
<a href="?page={{ page }}&{% if request.GET.q %}q={{ request.GET.q }}{% elif request.GET.tag %}tag={{ request.GET.tag }}{% endif %}{% if request.GET.c %}&c={{ request.GET.c }}{% endif %}" class="pagination__page-link pagination__page-link--current">{{ page }}</a>
{% else %}
<a href="?page={{ page }}&{% if request.GET.q %}q={{ request.GET.q }}{% elif request.GET.tag %}tag={{ request.GET.tag }}{% endif %}{% if request.GET.c %}&c={{ request.GET.c }}{% endif %}" class="pagination__page-link">{{ page }}</a>
{% endif %}
{% endfor %}
{% if pagination.has_next %}
<a href="?page={{ pagination.next_page }}&{% if request.GET.q %}q={{ request.GET.q }}{% elif request.GET.tag %}tag={{ request.GET.tag }}{% endif %}{% if request.GET.c %}&c={{ request.GET.c }}{% endif %}" class="pagination__nav-link pagination__nav-link--left-margin">&rsaquo;</a>
<a href="?page={{ pagination.last_page }}&{% if request.GET.q %}q={{ request.GET.q }}{% elif request.GET.tag %}tag={{ request.GET.tag }}{% endif %}{% if request.GET.c %}&c={{ request.GET.c }}{% endif %}" class="pagination__nav-link">&raquo;</a>
{% endif %}
</div>
</div>
</div>
<div class="grid__aside">
<div class="aside-section-wrapper">
<div class="add-entity-entries">
<div class="add-entity-entries__entry">
<div class="add-entity-entries__label">
{% trans '没有想要的结果?' %}
</div>
{% if request.GET.c and request.GET.c in categories %}
{% if request.GET.c|lower == 'book' %}
<a href="{% url 'books:create' %}">
<button class="add-entity-entries__button">{% trans '添加书' %}</button>
</a>
{% elif request.GET.c|lower == 'movie' %}
<a href="{% url 'movies:create' %}">
<button class="add-entity-entries__button">{% trans '添加电影/剧集' %}</button>
</a>
{% elif request.GET.c|lower == 'music' %}
<a href="{% url 'music:create_album' %}">
<button class="add-entity-entries__button">{% trans '添加专辑' %}</button>
</a>
<a href="{% url 'music:create_song' %}">
<button class="add-entity-entries__button">{% trans '添加单曲' %}</button>
</a>
{% elif request.GET.c|lower == 'game' %}
<a href="{% url 'games:create' %}">
<button class="add-entity-entries__button">{% trans '添加游戏' %}</button>
</a>
{% endif %}
{% else %}
<a href="{% url 'books:create' %}">
<button class="add-entity-entries__button">{% trans '添加书' %}</button>
</a>
<a href="{% url 'movies:create' %}">
<button class="add-entity-entries__button">{% trans '添加电影/剧集' %}</button>
</a>
<a href="{% url 'music:create_album' %}">
<button class="add-entity-entries__button">{% trans '添加专辑' %}</button>
</a>
<a href="{% url 'music:create_song' %}">
<button class="add-entity-entries__button">{% trans '添加单曲' %}</button>
</a>
<a href="{% url 'games:create' %}">
<button class="add-entity-entries__button">{% trans '添加游戏' %}</button>
</a>
{% endif %}
</div>
<!-- div class="add-entity-entries__entry">
{% if request.GET.c and request.GET.c in categories %}
{% if request.GET.c|lower == 'book' %}
<div class="add-entity-entries__label">
{% trans '或者(≖ ◡ ≖)✧' %}
</div>
<a href="{% url 'books:scrape' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '从表瓣剽取数据' %}</button>
</a>
{% elif request.GET.c|lower == 'movie' %}
<div class="add-entity-entries__label">
{% trans '或者(≖ ◡ ≖)✧' %}
</div>
<a href="{% url 'movies:scrape' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '从表瓣剽取数据' %}</button>
</a>
{% elif request.GET.c|lower == 'game' %}
<div class="add-entity-entries__label">
{% trans '或者(≖ ◡ ≖)✧' %}
</div>
<a href="{% url 'games:scrape' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '从表瓣剽取数据' %}</button>
</a>
{% elif request.GET.c|lower == 'music' %}
<div class="add-entity-entries__label">
{% trans '或者(≖ ◡ ≖)✧' %}
</div>
<a href="{% url 'music:scrape_album' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '从表瓣剽取数据' %}</button>
</a>
{% endif %}
{% else %}
<div class="add-entity-entries__label">
{% trans '或从表瓣剽取' %}
</div>
<a href="{% url 'books:scrape' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '书' %}</button>
</a>
<a href="{% url 'movies:scrape' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '电影/剧集' %}</button>
</a>
<a href="{% url 'music:scrape_album' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '专辑' %}</button>
</a>
<a href="{% url 'games:scrape' %}{% if request.GET.q %}?q={{ request.GET.q }}{% endif %}">
<button class="add-entity-entries__button">{% trans '游戏' %}</button>
</a>
{% endif %}
</div -->
</div>
</div>
</div>
</div>
</section>
</div>
{% include 'partial/_footer.html' %}
</div>
<script>
document.body.addEventListener('htmx:configRequest', (event) => {
event.detail.headers['X-CSRFToken'] = '{{ csrf_token }}';
})
</script>
</body>
</html>

View file

@ -167,8 +167,6 @@ def fetch_task(url):
def fetch_refresh(request, job_id):
retry = request.GET
job = Job.fetch(id=job_id, connection=django_rq.get_connection("fetch"))
print(job_id)
print(job)
item_url = job.result if job else "-" # FIXME job.return_value() in rq 1.12
if item_url:
if item_url == "-":
@ -250,9 +248,8 @@ def search(request):
elif key not in keys:
keys.append(key)
items.append(i)
urls.append(i.source_url)
i.tag_list = i.all_tag_list[:TAG_NUMBER_ON_LIST]
for res in i.external_resources.all():
urls.append(res.url)
if request.path.endswith(".json/"):
return JsonResponse(
{
@ -260,11 +257,10 @@ def search(request):
"items": list(map(lambda i: i.get_json(), items)),
}
)
request.session["search_dedupe_urls"] = urls
return render(
request,
"common/search_result.html",
"search_results.html",
{
"items": items,
"pagination": PageLinksGenerator(

View file

@ -43,13 +43,13 @@ def home(request):
@login_required
def external_search(request):
category = request.GET.get("c", default='all').strip().lower()
if category == 'all':
category = request.GET.get("c", default="all").strip().lower()
if category == "all":
category = None
keywords = request.GET.get("q", default='').strip()
page_number = int(request.GET.get('page', default=1))
keywords = request.GET.get("q", default="").strip()
page_number = int(request.GET.get("page", default=1))
items = ExternalSources.search(category, keywords, page_number) if keywords else []
dedupe_urls = request.session.get('search_dedupe_urls', [])
dedupe_urls = request.session.get("search_dedupe_urls", [])
items = [i for i in items if i.source_url not in dedupe_urls]
return render(
@ -57,19 +57,24 @@ def external_search(request):
"common/external_search_result.html",
{
"external_items": items,
}
},
)
def search(request):
if settings.ENABLE_NEW_MODEL:
from catalog.views import search as new_search
return new_search(request)
if settings.SEARCH_BACKEND is None:
return search2(request)
category = request.GET.get("c", default='all').strip().lower()
if category == 'all':
category = request.GET.get("c", default="all").strip().lower()
if category == "all":
category = None
keywords = request.GET.get("q", default='').strip()
tag = request.GET.get("tag", default='').strip()
p = request.GET.get('page', default='1')
keywords = request.GET.get("q", default="").strip()
tag = request.GET.get("tag", default="").strip()
p = request.GET.get("page", default="1")
page_number = int(p) if p.isdigit() else 1
if not (keywords or tag):
return render(
@ -77,7 +82,7 @@ def search(request):
"common/search_result.html",
{
"items": None,
}
},
)
if request.user.is_authenticated:
url_validator = URLValidator()
@ -87,13 +92,17 @@ def search(request):
return jump_or_scrape(request, keywords)
except ValidationError as e:
pass
result = Indexer.search(keywords, page=page_number, category=category, tag=tag)
keys = []
items = []
urls = []
for i in result.items:
key = i.isbn if hasattr(i, 'isbn') else (i.imdb_code if hasattr(i, 'imdb_code') else None)
key = (
i.isbn
if hasattr(i, "isbn")
else (i.imdb_code if hasattr(i, "imdb_code") else None)
)
if key is None:
items.append(i)
elif key not in keys:
@ -102,35 +111,39 @@ def search(request):
urls.append(i.source_url)
i.tag_list = i.all_tag_list[:TAG_NUMBER_ON_LIST]
if request.path.endswith('.json/'):
return JsonResponse({
'num_pages': result.num_pages,
'items':list(map(lambda i:i.get_json(), items))
})
if request.path.endswith(".json/"):
return JsonResponse(
{
"num_pages": result.num_pages,
"items": list(map(lambda i: i.get_json(), items)),
}
)
request.session['search_dedupe_urls'] = urls
request.session["search_dedupe_urls"] = urls
return render(
request,
"common/search_result.html",
{
"items": items,
"pagination": PageLinksGenerator(PAGE_LINK_NUMBER, page_number, result.num_pages),
"categories": ['book', 'movie', 'music', 'game'],
}
"pagination": PageLinksGenerator(
PAGE_LINK_NUMBER, page_number, result.num_pages
),
"categories": ["book", "movie", "music", "game"],
},
)
def search2(request):
if request.method == 'GET':
if request.method == "GET":
# test if input serach string is empty or not excluding param ?c=
empty_querystring_criteria = {k: v for k, v in request.GET.items() if k != 'c'}
empty_querystring_criteria = {k: v for k, v in request.GET.items() if k != "c"}
if not len(empty_querystring_criteria):
return HttpResponseBadRequest()
# test if user input an URL, if so jump to URL handling function
url_validator = URLValidator()
input_string = request.GET.get('q', default='').strip()
input_string = request.GET.get("q", default="").strip()
try:
url_validator(input_string)
# validation success
@ -139,13 +152,13 @@ def search2(request):
pass
# category, book/movie/music etc
category = request.GET.get("c", default='').strip().lower()
category = request.GET.get("c", default="").strip().lower()
# keywords, seperated by blank space
# it is better not to split the keywords
keywords = request.GET.get("q", default='').strip()
keywords = [keywords] if keywords else ''
keywords = request.GET.get("q", default="").strip()
keywords = [keywords] if keywords else ""
# tag, when tag is provided there should be no keywords , for now
tag = request.GET.get("tag", default='')
tag = request.GET.get("tag", default="")
# white space string, empty query
if not (keywords or tag):
@ -154,14 +167,14 @@ def search2(request):
"common/search_result.html",
{
"items": None,
}
},
)
def book_param_handler(**kwargs):
# keywords
keywords = kwargs.get('keywords')
keywords = kwargs.get("keywords")
# tag
tag = kwargs.get('tag')
tag = kwargs.get("tag")
query_args = []
q = Q()
@ -181,29 +194,42 @@ def search2(request):
# search by keywords
similarity, n = 0, 0
for keyword in keywords:
similarity += 1/2 * SequenceMatcher(None, keyword, book.title).quick_ratio()
+ 1/3 * SequenceMatcher(None, keyword, book.orig_title).quick_ratio()
+ 1/6 * SequenceMatcher(None, keyword, book.subtitle).quick_ratio()
similarity += (
1
/ 2
* SequenceMatcher(None, keyword, book.title).quick_ratio()
)
+1 / 3 * SequenceMatcher(
None, keyword, book.orig_title
).quick_ratio()
+1 / 6 * SequenceMatcher(
None, keyword, book.subtitle
).quick_ratio()
n += 1
book.similarity = similarity / n
elif tag:
# search by single tag
book.similarity = 0 if book.rating_number is None else book.rating_number
book.similarity = (
0 if book.rating_number is None else book.rating_number
)
else:
book.similarity = 0
return book.similarity
if len(queryset) > 0:
ordered_queryset = sorted(queryset, key=calculate_similarity, reverse=True)
ordered_queryset = sorted(
queryset, key=calculate_similarity, reverse=True
)
else:
ordered_queryset = list(queryset)
return ordered_queryset
def movie_param_handler(**kwargs):
# keywords
keywords = kwargs.get('keywords')
keywords = kwargs.get("keywords")
# tag
tag = kwargs.get('tag')
tag = kwargs.get("tag")
query_args = []
q = Q()
@ -223,28 +249,41 @@ def search2(request):
# search by name
similarity, n = 0, 0
for keyword in keywords:
similarity += 1/2 * SequenceMatcher(None, keyword, movie.title).quick_ratio()
+ 1/4 * SequenceMatcher(None, keyword, movie.orig_title).quick_ratio()
+ 1/4 * SequenceMatcher(None, keyword, movie.other_title).quick_ratio()
similarity += (
1
/ 2
* SequenceMatcher(None, keyword, movie.title).quick_ratio()
)
+1 / 4 * SequenceMatcher(
None, keyword, movie.orig_title
).quick_ratio()
+1 / 4 * SequenceMatcher(
None, keyword, movie.other_title
).quick_ratio()
n += 1
movie.similarity = similarity / n
elif tag:
# search by single tag
movie.similarity = 0 if movie.rating_number is None else movie.rating_number
movie.similarity = (
0 if movie.rating_number is None else movie.rating_number
)
else:
movie.similarity = 0
return movie.similarity
if len(queryset) > 0:
ordered_queryset = sorted(queryset, key=calculate_similarity, reverse=True)
ordered_queryset = sorted(
queryset, key=calculate_similarity, reverse=True
)
else:
ordered_queryset = list(queryset)
return ordered_queryset
def game_param_handler(**kwargs):
# keywords
keywords = kwargs.get('keywords')
keywords = kwargs.get("keywords")
# tag
tag = kwargs.get('tag')
tag = kwargs.get("tag")
query_args = []
q = Q()
@ -263,33 +302,48 @@ def search2(request):
def calculate_similarity(game):
if keywords:
# search by name
developer_dump = ' '.join(game.developer)
publisher_dump = ' '.join(game.publisher)
developer_dump = " ".join(game.developer)
publisher_dump = " ".join(game.publisher)
similarity, n = 0, 0
for keyword in keywords:
similarity += 1/2 * SequenceMatcher(None, keyword, game.title).quick_ratio()
+ 1/4 * SequenceMatcher(None, keyword, game.other_title).quick_ratio()
+ 1/16 * SequenceMatcher(None, keyword, developer_dump).quick_ratio()
+ 1/16 * SequenceMatcher(None, keyword, publisher_dump).quick_ratio()
similarity += (
1
/ 2
* SequenceMatcher(None, keyword, game.title).quick_ratio()
)
+1 / 4 * SequenceMatcher(
None, keyword, game.other_title
).quick_ratio()
+1 / 16 * SequenceMatcher(
None, keyword, developer_dump
).quick_ratio()
+1 / 16 * SequenceMatcher(
None, keyword, publisher_dump
).quick_ratio()
n += 1
game.similarity = similarity / n
elif tag:
# search by single tag
game.similarity = 0 if game.rating_number is None else game.rating_number
game.similarity = (
0 if game.rating_number is None else game.rating_number
)
else:
game.similarity = 0
return game.similarity
if len(queryset) > 0:
ordered_queryset = sorted(queryset, key=calculate_similarity, reverse=True)
ordered_queryset = sorted(
queryset, key=calculate_similarity, reverse=True
)
else:
ordered_queryset = list(queryset)
return ordered_queryset
def music_param_handler(**kwargs):
# keywords
keywords = kwargs.get('keywords')
keywords = kwargs.get("keywords")
# tag
tag = kwargs.get('tag')
tag = kwargs.get("tag")
query_args = []
q = Q()
@ -321,25 +375,58 @@ def search2(request):
if keywords:
# search by name
similarity, n = 0, 0
artist_dump = ' '.join(music.artist)
artist_dump = " ".join(music.artist)
for keyword in keywords:
if music.__class__ == Album:
similarity += 1/2 * SequenceMatcher(None, keyword, music.title).quick_ratio() \
+ 1/2 * SequenceMatcher(None, keyword, artist_dump).quick_ratio()
similarity += (
1
/ 2
* SequenceMatcher(
None, keyword, music.title
).quick_ratio()
+ 1
/ 2
* SequenceMatcher(
None, keyword, artist_dump
).quick_ratio()
)
elif music.__class__ == Song:
similarity += 1/2 * SequenceMatcher(None, keyword, music.title).quick_ratio() \
+ 1/6 * SequenceMatcher(None, keyword, artist_dump).quick_ratio() \
+ 1/6 * (SequenceMatcher(None, keyword, music.album.title).quick_ratio() if music.album is not None else 0)
similarity += (
1
/ 2
* SequenceMatcher(
None, keyword, music.title
).quick_ratio()
+ 1
/ 6
* SequenceMatcher(
None, keyword, artist_dump
).quick_ratio()
+ 1
/ 6
* (
SequenceMatcher(
None, keyword, music.album.title
).quick_ratio()
if music.album is not None
else 0
)
)
n += 1
music.similarity = similarity / n
elif tag:
# search by single tag
music.similarity = 0 if music.rating_number is None else music.rating_number
music.similarity = (
0 if music.rating_number is None else music.rating_number
)
else:
music.similarity = 0
return music.similarity
if len(queryset) > 0:
ordered_queryset = sorted(queryset, key=calculate_similarity, reverse=True)
ordered_queryset = sorted(
queryset, key=calculate_similarity, reverse=True
)
else:
ordered_queryset = list(queryset)
return ordered_queryset
@ -350,40 +437,40 @@ def search2(request):
music_queryset = music_param_handler(**kwargs)
game_queryset = game_param_handler(**kwargs)
ordered_queryset = sorted(
book_queryset + movie_queryset + music_queryset + game_queryset,
key=operator.attrgetter('similarity'),
reverse=True
book_queryset + movie_queryset + music_queryset + game_queryset,
key=operator.attrgetter("similarity"),
reverse=True,
)
return ordered_queryset
param_handler = {
'book': book_param_handler,
'movie': movie_param_handler,
'music': music_param_handler,
'game': game_param_handler,
'all': all_param_handler,
'': all_param_handler
"book": book_param_handler,
"movie": movie_param_handler,
"music": music_param_handler,
"game": game_param_handler,
"all": all_param_handler,
"": all_param_handler,
}
categories = [k for k in param_handler.keys() if not k in ['all', '']]
categories = [k for k in param_handler.keys() if not k in ["all", ""]]
try:
queryset = param_handler[category](
keywords=keywords,
tag=tag
)
queryset = param_handler[category](keywords=keywords, tag=tag)
except KeyError as e:
queryset = param_handler['all'](
keywords=keywords,
tag=tag
)
queryset = param_handler["all"](keywords=keywords, tag=tag)
paginator = Paginator(queryset, ITEMS_PER_PAGE)
page_number = request.GET.get('page', default=1)
page_number = request.GET.get("page", default=1)
items = paginator.get_page(page_number)
items.pagination = PageLinksGenerator(PAGE_LINK_NUMBER, page_number, paginator.num_pages)
items.pagination = PageLinksGenerator(
PAGE_LINK_NUMBER, page_number, paginator.num_pages
)
for item in items:
item.tag_list = item.get_tags_manager().values('content').annotate(
tag_frequency=Count('content')).order_by('-tag_frequency')[:TAG_NUMBER_ON_LIST]
item.tag_list = (
item.get_tags_manager()
.values("content")
.annotate(tag_frequency=Count("content"))
.order_by("-tag_frequency")[:TAG_NUMBER_ON_LIST]
)
return render(
request,
@ -391,7 +478,7 @@ def search2(request):
{
"items": items,
"categories": categories,
}
},
)
else:
@ -415,21 +502,18 @@ def jump_or_scrape(request, url):
scraper = get_scraper_by_url(url)
if scraper is None:
# invalid url
return render(request, 'common/error.html', {'msg': _("链接无效,查询失败")})
return render(request, "common/error.html", {"msg": _("链接无效,查询失败")})
else:
try:
effective_url = scraper.get_effective_url(url)
except ValueError:
return render(request, 'common/error.html', {'msg': _("链接无效,查询失败")})
return render(request, "common/error.html", {"msg": _("链接无效,查询失败")})
try:
# raise ObjectDoesNotExist
entity = scraper.data_class.objects.get(source_url=effective_url)
# if exists then jump to detail page
if request.path.endswith('.json/'):
return JsonResponse({
'num_pages': 1,
'items': [entity.get_json()]
})
if request.path.endswith(".json/"):
return JsonResponse({"num_pages": 1, "items": [entity.get_json()]})
return redirect(entity)
except ObjectDoesNotExist:
# scrape if not exists
@ -443,18 +527,27 @@ def jump_or_scrape(request, url):
except Exception as e:
logger.error(f"Scrape Failed URL: {url}\n{e}")
if settings.DEBUG:
logger.error("Expections during saving scraped data:", exc_info=e)
return render(request, 'common/error.html', {'msg': _("爬取数据失败😫")})
logger.error(
"Expections during saving scraped data:", exc_info=e
)
return render(request, "common/error.html", {"msg": _("爬取数据失败😫")})
except Exception as e:
logger.error(f"Scrape Failed URL: {url}\n{e}")
if settings.DEBUG:
logger.error("Expections during saving scraped data:", exc_info=e)
return render(request, 'common/error.html', {'msg': _("爬取数据失败😫")})
return render(request, "common/error.html", {"msg": _("爬取数据失败😫")})
return redirect(form.instance)
def go_relogin(request):
return render(request, 'common/error.html', {
'url': reverse("users:connect") + '?domain=' + request.user.mastodon_site,
'msg': _("信息已保存,但是未能分享到联邦网络"),
'secondary_msg': _("可能是你在联邦网络(Mastodon/Pleroma/...)的登录状态过期了,正在跳转到联邦网络重新登录😼")})
return render(
request,
"common/error.html",
{
"url": reverse("users:connect") + "?domain=" + request.user.mastodon_site,
"msg": _("信息已保存,但是未能分享到联邦网络"),
"secondary_msg": _(
"可能是你在联邦网络(Mastodon/Pleroma/...)的登录状态过期了,正在跳转到联邦网络重新登录😼"
),
},
)