support sort parameter in journal search

This commit is contained in:
Your Name 2025-01-01 09:32:24 -05:00 committed by Henri Dickson
parent 0781bcdcf1
commit 4b9eeabfd7
13 changed files with 355 additions and 309 deletions

View file

@ -1,5 +1,5 @@
from .cron import BaseJob, JobManager
from .index import Index, SearchResult
from .index import Index, QueryParser, SearchResult
from .lang import (
LANGUAGE_CHOICES,
LOCALE_CHOICES,
@ -26,5 +26,6 @@ __all__ = [
"uniq",
"int_",
"Index",
"QueryParser",
"SearchResult",
]

View file

@ -1,3 +1,4 @@
import re
from functools import cached_property
from time import sleep
from typing import Iterable, Self, TypeVar
@ -5,10 +6,98 @@ from typing import Iterable, Self, TypeVar
import typesense
from django.conf import settings
from loguru import logger
from ninja import Query
from typesense.collection import Collection
from typesense.exceptions import ObjectNotFound
class QueryParser:
fields = ["sort"]
default_search_params = {
"q": "",
"filter_by": "",
"query_by": "",
"sort_by": "",
"per_page": 20,
"include_fields": "id",
"highlight_fields": "",
} # https://typesense.org/docs/latest/api/search.html#search-parameters
max_pages = 100
@classmethod
def re(cls):
return re.compile(
r"\b(?P<field>" + "|".join(cls.fields) + r")\s*:(?P<value>[^ ]+)", re.I
)
def __init__(self, query: str, page: int = 1, page_size: int = 0):
"""Parse fields from a query string, subclass should define and use these fields"""
self.raw_query = str(query) if query else ""
if self.fields:
r = self.re()
self.q = r.sub("", query).strip()
self.parsed_fields = {
m.group("field").strip().lower(): m.group("value").strip().lower()
for m in r.finditer(query)
}
else:
self.q = query.strip()
self.parsed_fields = {}
self.page = page
self.page_size = page_size
self.filter_by = {}
self.query_by = []
self.sort_by = []
def is_valid(self):
"""Check if the parsed query is valid"""
print(self.page, self.max_pages, self.q, self.filter_by)
return (
self.page > 0
and self.page <= self.max_pages
and bool(self.q or self.filter_by)
)
def __bool__(self):
return self.is_valid()
def filter(self, field: str, value: list[int | str] | int | str):
"""Override a specific filter"""
self.filter_by[field] = value if isinstance(value, list) else [value]
def sort(self, fields: list[str]):
"""Override the default sort fields"""
self.sort_by = fields
def to_search_params(self) -> dict:
"""Convert the parsed query to search parameters"""
params = self.default_search_params.copy()
params["q"] = self.q
params["page"] = (
self.page if self.page > 0 and self.page <= self.max_pages else 1
)
if self.page_size:
params["per_page"] = self.page_size
if self.filter_by:
filters = []
for field, values in self.filter_by.items():
if field == "_":
filters += values
elif values:
v = (
f"[{','.join(map(str, values))}]"
if len(values) > 1
else str(values[0])
)
filters.append(f"{field}:{v}")
params["filter_by"] = " && ".join(filters)
if self.query_by:
params["query_by"] = ",".join(self.query_by)
if self.sort_by:
params["sort_by"] = ",".join(self.sort_by)
return params
class SearchResult:
def __init__(self, index: "Index", response: dict):
self.index = index
@ -49,19 +138,10 @@ class SearchResult:
return item in self.response["hits"]
SearchResultClass = TypeVar("SearchResultClass", bound=SearchResult)
class Index:
name = "" # must be set in subclass
schema = {"fields": []} # must be set in subclass
max_pages = 100
default_search_params = {
# "query_by": ...,
"per_page": 20,
"highlight_fields": "",
"include_fields": "id",
}
search_result_class = SearchResult
_instance = None
_client: typesense.Client
@ -185,39 +265,13 @@ class Index:
def search(
self,
q: str,
page: int = 1,
page_size: int = 0,
query_by: list[str] = [],
sort_by: str = "",
filter_by: dict[str, list[str | int]] = {},
facet_by: list[str] = [],
result_class: type[SearchResultClass] = SearchResult,
) -> SearchResultClass:
params = self.default_search_params.copy()
params["q"] = q
params["page"] = page if page > 0 and page <= self.max_pages else 1
if page_size:
params["per_page"] = page_size
filters = []
for field, values in filter_by.items():
if field == "_":
filters += values
elif values:
v = f"[{','.join(map(str, values))}]" if len(values) > 1 else values[0]
filters.append(f"{field}:{v}")
if filters:
params["filter_by"] = " && ".join(filters)
if facet_by:
params["facet_by"] = ",".join(facet_by)
if query_by:
params["query_by"] = ",".join(query_by)
if sort_by:
params["sort_by"] = sort_by
query: QueryParser,
) -> SearchResult:
params = query.to_search_params()
if settings.DEBUG:
logger.debug(f"Typesense: search {self.name} {params}")
r = self.read_collection.documents.search(params)
sr = result_class(self, r)
sr = self.search_result_class(self, r)
if settings.DEBUG:
logger.debug(f"Typesense: search result {sr}")
return sr

View file

@ -8,7 +8,7 @@ def uniq(ls: list) -> list:
def int_(x, default=0):
return (
int(x)
if isinstance(x, str) and x.isdigit()
else (x if isinstance(x, int) else default)
x
if isinstance(x, int)
else (int(x) if (isinstance(x, str) and x.isdigit()) else default)
)

View file

@ -1,4 +1,5 @@
<div class="pagination">
{% if pagination %}
{% if pagination.has_prev %}
<a href="?{{ pagination.query_string }}page=1" class="s">&laquo;</a>
<a href="?{{ pagination.query_string }}page={{ pagination.previous_page }}"
@ -17,4 +18,5 @@
<a href="?{{ pagination.query_string }}page={{ pagination.last_page }}"
class="s">&raquo;</a>
{% endif %}
{% endif %}
</div>

View file

@ -17,6 +17,7 @@ from journal.models import (
ShelfMember,
update_journal_for_merged_item,
)
from journal.models.index import JournalQueryParser
from journal.models.itemlist import ListMember
from takahe.models import Post
from users.models import APIdentity, User
@ -237,15 +238,14 @@ class Command(BaseCommand):
# self.stdout.write(self.style.SUCCESS(f"indexed {c} posts."))
case "idx-search":
r = index.search(
"" if query == "-" else query,
filter_by={
"owner_id": owners,
"piece_class": piece_class,
"item_class": item_class,
},
page_size=100,
)
q = JournalQueryParser("" if query == "-" else query, page_size=100)
if owners:
q.filter("owner_id", owners)
if item_class:
q.filter("item_class", item_class)
if piece_class:
q.filter("piece_class", piece_class)
r = index.search(q)
self.stdout.write(self.style.SUCCESS(str(r)))
self.stdout.write(f"{r.facet_by_item_class}")
self.stdout.write(f"{r.facet_by_piece_class}")

View file

@ -12,7 +12,7 @@ from .common import (
q_piece_in_home_feed_of_user,
q_piece_visible_to_user,
)
from .index import JournalIndex, QueryParser
from .index import JournalIndex, JournalQueryParser
from .like import Like
from .mark import Mark
from .mixins import UserOwnedObjectMixin
@ -49,7 +49,7 @@ __all__ = [
"Like",
"Mark",
"Note",
"QueryParser",
"JournalQueryParser",
"Rating",
"render_md",
"Review",

View file

@ -8,7 +8,7 @@ from django.db.models import QuerySet
from catalog.common.models import item_categories
from catalog.models import Item
from common.models import Index, SearchResult, int_, uniq
from common.models import Index, QueryParser, SearchResult, int_, uniq
from takahe.models import Post
from takahe.utils import Takahe
@ -28,6 +28,131 @@ def _get_item_ids(doc):
)
class JournalQueryParser(QueryParser):
fields = ["status", "rating", "tag", "category", "type", "date", "sort"]
status_values = {"wishlist", "progress", "complete", "dropped"}
type_values = {"shelfmember", "rating", "comment", "review", "collection", "note"}
sort_values = {"date": "created:desc", "rating": "rating:desc"}
default_search_params = {
"query_by": "content, item_title, tag",
"per_page": 20,
"highlight_fields": "",
"include_fields": "post_id, piece_id, item_id, owner_id, piece_class",
"facet_by": "item_class, piece_class",
}
def __init__(self, query: str, page: int = 1, page_size: int = 0):
super().__init__(query, page, page_size)
v = list(
set(self.parsed_fields.get("sort", "").split(",")) & self.sort_values.keys()
)
if v:
self.sort_by = [self.sort_values[v[0]]]
v = list(
set(self.parsed_fields.get("status", "").split(",")) & self.status_values
)
if v:
self.filter_by["shelf_type"] = v
v = list(
set(
self.parsed_fields.get("type", "")
.replace("mark", "shelfmember")
.split(",")
)
& self.type_values
)
if v:
self.filter_by["piece_class"] = v
# else:
# # hide collection by default unless specified
# self.filter_by["piece_class"] = ["!collection"]
v = [i for i in set(self.parsed_fields.get("tag", "").split(",")) if i]
if v:
self.filter_by["tag"] = v
self.query_by = ["content", "item_title"]
v = self.parsed_fields.get("rating", "").split("..")
if len(v) == 2:
v = list(map(int_, v))
if all([i >= 0 and i <= 10 for i in v]):
self.filter_by["rating"] = ["..".join(map(str, v))]
elif len(v) == 1:
v = int_(v[0], -1)
if v >= 0 and v <= 10:
self.filter_by["rating"] = [v]
# v = self.filters.get("category", "").split(",")
v = self.parsed_fields.get("date", "").split("..")
if len(v) == 2:
start = self.start_date_to_int(v[0])
end = self.end_date_to_int(v[1])
elif len(v) == 1:
start, end = self.date_to_int_range(v[0])
else:
start, end = 0, 0
if start and end:
self.filter_by["created"] = [f"{start}..{end}"]
v = [i for i in set(self.parsed_fields.get("category", "").split(",")) if i]
if v:
cats = {
c.value: [ic.__name__ for ic in cl]
for c, cl in item_categories().items()
}
v = list(set(v) & cats.keys())
v = reduce(lambda a, b: a + b, [cats[i] for i in v], [])
self.filter_by["item_class"] = v
def start_date_to_int(self, date: str) -> int:
try:
if re.match(r"\d{4}-\d{1,2}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m-%d")
elif re.match(r"\d{4}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m")
elif re.match(r"\d{4}", date):
d = datetime.strptime(date, "%Y")
else:
return 0
return int(d.timestamp())
except ValueError:
return 0
def end_date_to_int(self, date: str) -> int:
try:
if re.match(r"\d{4}-\d{1,2}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m-%d") + relativedelta(days=1)
elif re.match(r"\d{4}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m") + relativedelta(months=1)
elif re.match(r"\d{4}", date):
d = datetime.strptime(date, "%Y") + relativedelta(years=1)
else:
return 0
return int(d.timestamp()) - 1
except ValueError:
return 0
def date_to_int_range(self, date: str) -> tuple[int, int]:
try:
if re.match(r"\d{4}-\d{1,2}-\d{1,2}", date):
start = datetime.strptime(date, "%Y-%m-%d")
end = start + relativedelta(days=1)
elif re.match(r"\d{4}-\d{1,2}", date):
start = datetime.strptime(date, "%Y-%m")
end = start + relativedelta(months=1)
elif re.match(r"\d{4}", date):
start = datetime.strptime(date, "%Y")
end = start + relativedelta(years=1)
else:
return 0, 0
return int(start.timestamp()), int(end.timestamp()) - 1
except ValueError:
return 0, 0
class JournalSearchResult(SearchResult):
@cached_property
def items(self):
@ -188,14 +313,7 @@ class JournalIndex(Index):
},
]
}
default_search_params = {
"query_by": "content, item_title, tag",
"sort_by": "created:desc",
"per_page": 20,
"highlight_fields": "",
"include_fields": "post_id, piece_id, item_id, owner_id, piece_class",
"facet_by": "item_class, piece_class",
}
search_result_class = JournalSearchResult
@classmethod
def piece_to_doc(cls, piece: "Piece") -> dict:
@ -285,144 +403,7 @@ class JournalIndex(Index):
def search(
self,
q: str,
page: int = 1,
page_size: int = 0,
query_by: list[str] = [],
sort_by: str = "",
filter_by: dict[str, list[str | int]] = {},
facet_by: list[str] = [],
result_class=JournalSearchResult,
query,
) -> JournalSearchResult:
r = super().search(
q=q,
page=page,
page_size=page_size,
query_by=query_by,
sort_by=sort_by,
filter_by=filter_by,
facet_by=facet_by,
result_class=result_class,
)
return r
class QueryParser:
fields = ["status", "rating", "tag", "category", "type", "date"]
@classmethod
def re(cls):
return re.compile(
r"\b(?P<field>" + "|".join(cls.fields) + r"):(?P<value>[^ ]+)", re.I
)
def __init__(self, query: str):
self.query = str(query) if query else ""
r = self.re()
self.filters = {
m.group("field").strip().lower(): m.group("value").strip().lower()
for m in r.finditer(query)
}
self.q = r.sub("", query).strip()
self.filter_by = {}
self.query_by = ["content", "item_title", "tag"]
v = list(
set(self.filters.get("status", "").split(","))
& {"wishlist", "progress", "complete", "dropped"}
)
if v:
self.filter_by["shelf_type"] = v
v = list(
set(self.filters.get("type", "").replace("mark", "shelfmember").split(","))
& {"shelfmember", "rating", "comment", "review", "collection", "note"}
)
if v:
self.filter_by["piece_class"] = v
# else:
# # hide collection by default unless specified
# self.filter_by["piece_class"] = ["!collection"]
v = [i for i in set(self.filters.get("tag", "").split(",")) if i]
if v:
self.filter_by["tag"] = v
self.query_by.remove("tag")
v = self.filters.get("rating", "").split("..")
if len(v) == 2:
v = map(int_, v)
if all([i >= 0 and i <= 10 for i in v]):
self.filter_by["rating"] = ["..".join(map(str, v))]
elif len(v) == 1:
v = int_(v[0], -1)
if v >= 0 and v <= 10:
self.filter_by["rating"] = [v]
# v = self.filters.get("category", "").split(",")
v = self.filters.get("date", "").split("..")
if len(v) == 2:
start = self.start_date_to_int(v[0])
end = self.end_date_to_int(v[1])
elif len(v) == 1:
start, end = self.date_to_int_range(v[0])
else:
start, end = 0, 0
if start and end:
self.filter_by["created"] = [f"{start}..{end}"]
v = self.filters.get("category", "").split(",")
if v:
cats = {
c.value: [ic.__name__ for ic in cl]
for c, cl in item_categories().items()
}
v = list(set(v) & cats.keys())
v = reduce(lambda a, b: a + b, [cats[i] for i in v], [])
self.filter_by["item_class"] = v
def start_date_to_int(self, date: str) -> int:
try:
if re.match(r"\d{4}-\d{1,2}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m-%d")
elif re.match(r"\d{4}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m")
elif re.match(r"\d{4}", date):
d = datetime.strptime(date, "%Y")
else:
return 0
return int(d.timestamp())
except ValueError:
return 0
def end_date_to_int(self, date: str) -> int:
try:
if re.match(r"\d{4}-\d{1,2}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m-%d") + relativedelta(days=1)
elif re.match(r"\d{4}-\d{1,2}", date):
d = datetime.strptime(date, "%Y-%m") + relativedelta(months=1)
elif re.match(r"\d{4}", date):
d = datetime.strptime(date, "%Y") + relativedelta(years=1)
else:
return 0
return int(d.timestamp()) - 1
except ValueError:
return 0
def date_to_int_range(self, date: str) -> tuple[int, int]:
try:
if re.match(r"\d{4}-\d{1,2}-\d{1,2}", date):
start = datetime.strptime(date, "%Y-%m-%d")
end = start + relativedelta(days=1)
elif re.match(r"\d{4}-\d{1,2}", date):
start = datetime.strptime(date, "%Y-%m")
end = start + relativedelta(months=1)
elif re.match(r"\d{4}", date):
start = datetime.strptime(date, "%Y")
end = start + relativedelta(years=1)
else:
return 0, 0
return int(start.timestamp()), int(end.timestamp()) - 1
except ValueError:
return 0, 0
r = super().search(query)
return r # type:ignore

View file

@ -3,24 +3,19 @@ from django.shortcuts import render
from common.models.misc import int_
from common.utils import PageLinksGenerator
from journal.models import JournalIndex, QueryParser
from journal.models import JournalIndex, JournalQueryParser
@login_required
def search(request):
identity_id = request.user.identity.pk
page = int_(request.GET.get("page"))
q = QueryParser(request.GET.get("q", default=""))
q.filter_by["owner_id"] = [identity_id] # only search for current user
q.filter_by["item_id"] = [">0"] # only search for records with items
page = int_(request.GET.get("page"), 1)
q = JournalQueryParser(request.GET.get("q", default=""), page)
q.filter("item_id", ">0")
q.filter("owner_id", identity_id)
if q:
index = JournalIndex.instance()
r = index.search(
q.q,
filter_by=q.filter_by,
query_by=q.query_by,
sort_by="_text_match:desc",
page=page,
)
r = index.search(q)
return render(
request,
"search_journal.html",
@ -29,3 +24,5 @@ def search(request):
"pagination": PageLinksGenerator(r.page, r.pages, request.GET),
},
)
else:
return render(request, "search_journal.html", {"items": []})

View file

@ -31,7 +31,7 @@
</small>
</h5>
<div class="feed">
<div hx-get="{% url 'social:data' %}?typ={{ feed_type }}&amp;q={{ request.GET.q }}"
<div hx-get="{% url 'social:data' %}?typ={{ feed_type }}"
hx-trigger="intersect once delay:0.1s"
hx-swap="outerHTML">
<i class="fa-solid fa-compact-disc fa-spin loading"></i>

View file

@ -125,7 +125,7 @@
{% if forloop.last %}
<div class="htmx-indicator"
style="margin-left: 60px"
{% if request.GET.q %} hx-get="{% url 'social:data' %}?q={{ request.GET.q }}&amp;lastpage={{ page }}" {% else %} hx-get="{% url 'social:data' %}?last={{ event.pk }}&amp;typ={{ feed_type }}" {% endif %}
{% if request.GET.q %} hx-get="{% url 'social:search_data' %}?q={{ request.GET.q }}&amp;lastpage={{ page }}" {% else %} hx-get="{% url 'social:data' %}?last={{ event.pk }}&amp;typ={{ feed_type }}" {% endif %}
hx-trigger="revealed"
hx-swap="outerHTML">
<i class="fa-solid fa-compact-disc fa-spin loading"></i>

View file

@ -21,7 +21,7 @@
<div class="grid__main">
{% include 'search_header.html' %}
<div class="feed">
<div hx-get="{% url 'social:data' %}?q={{ request.GET.q }}"
<div hx-get="{% url 'social:search_data' %}?q={{ request.GET.q }}"
hx-trigger="intersect once delay:0.1s"
hx-swap="outerHTML">
<i class="fa-solid fa-compact-disc fa-spin loading"></i>

View file

@ -7,6 +7,7 @@ urlpatterns = [
path("", feed, name="feed"),
path("focus", focus, name="focus"),
path("data", data, name="data"),
path("search_data", search_data, name="search_data"),
path("notification", notification, name="notification"),
path("events", events, name="events"),
]

View file

@ -5,7 +5,7 @@ from django.views.decorators.http import require_http_methods
from catalog.models import Edition, Item, ItemCategory, PodcastEpisode
from common.models.misc import int_
from journal.models import JournalIndex, Piece, QueryParser, ShelfType
from journal.models import JournalIndex, JournalQueryParser, Piece, ShelfType
from takahe.models import Post, PostInteraction, TimelineEvent
from takahe.utils import Takahe
from users.models import APIdentity
@ -46,6 +46,25 @@ def _sidebar_context(user):
}
def _add_interaction_to_events(events, identity_id):
interactions = PostInteraction.objects.filter(
identity_id=identity_id,
post_id__in=[event.subject_post_id for event in events],
type__in=["like", "boost"],
state__in=["new", "fanned_out"],
).values_list("post_id", "type")
for event in events:
if event.subject_post_id:
event.subject_post.liked_by_current_user = ( # type: ignore
event.subject_post_id,
"like",
) in interactions
event.subject_post.boosted_by_current_user = ( # type: ignore
event.subject_post_id,
"boost",
) in interactions
@require_http_methods(["GET"])
@login_required
def feed(request, typ=0):
@ -73,32 +92,38 @@ def search(request):
@login_required
@require_http_methods(["GET"])
def data(request):
since_id = int_(request.GET.get("last", 0))
typ = int_(request.GET.get("typ", 0))
q = request.GET.get("q")
def search_data(request):
identity_id = request.user.identity.pk
page = int_(request.GET.get("lastpage")) + 1
if q:
q = QueryParser(request.GET.get("q", default=""))
q = JournalQueryParser(request.GET.get("q", default=""), page, page_size=PAGE_SIZE)
index = JournalIndex.instance()
q.filter_by["owner_id"] = [identity_id]
q.filter_by["post_id"] = [">0"]
r = index.search(
q.q,
filter_by=q.filter_by,
query_by=q.query_by,
sort_by="created:desc",
page=page,
page_size=PAGE_SIZE,
)
q.filter("post_id", ">0")
q.filter("owner_id", identity_id)
q.sort(["created:desc"])
if q:
r = index.search(q)
events = [
SearchResultEvent(p)
for p in r.posts.select_related("author")
.prefetch_related("attachments")
.order_by("-id")
]
_add_interaction_to_events(events, identity_id)
else:
events = []
return render(
request,
"feed_events.html",
{"events": events, "page": page},
)
@login_required
@require_http_methods(["GET"])
def data(request):
since_id = int_(request.GET.get("last", 0))
typ = int_(request.GET.get("typ", 0))
identity_id = request.user.identity.pk
events = TimelineEvent.objects.filter(
identity_id=identity_id,
type__in=[TimelineEvent.Types.post, TimelineEvent.Types.boost],
@ -130,26 +155,11 @@ def data(request):
)
.order_by("-id")[:PAGE_SIZE]
)
interactions = PostInteraction.objects.filter(
identity_id=identity_id,
post_id__in=[event.subject_post_id for event in events],
type__in=["like", "boost"],
state__in=["new", "fanned_out"],
).values_list("post_id", "type")
for event in events:
if event.subject_post_id:
event.subject_post.liked_by_current_user = ( # type: ignore
event.subject_post_id,
"like",
) in interactions
event.subject_post.boosted_by_current_user = ( # type: ignore
event.subject_post_id,
"boost",
) in interactions
_add_interaction_to_events(events, identity_id)
return render(
request,
"feed_events.html",
{"feed_type": typ, "events": events, "page": page},
{"feed_type": typ, "events": events},
)