From c4b7a760c696c817c6c42e584cef19a288ce9ce2 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 30 Dec 2024 09:35:58 -0500 Subject: [PATCH] fix null title when index --- catalog/book/models.py | 4 +-- catalog/common/models.py | 2 +- catalog/movie/models.py | 2 +- catalog/tv/models.py | 4 +-- journal/management/commands/journal.py | 44 ++++++++++++-------------- journal/models/index.py | 4 ++- 6 files changed, 29 insertions(+), 31 deletions(-) diff --git a/catalog/book/models.py b/catalog/book/models.py index c1caae34..d3864d72 100644 --- a/catalog/book/models.py +++ b/catalog/book/models.py @@ -217,8 +217,8 @@ class Edition(Item): return self.get_localized_subtitle() def to_indexable_titles(self) -> list[str]: - titles = [t["text"] for t in self.localized_title if t] - titles += [t["text"] for t in self.localized_subtitle if t] + titles = [t["text"] for t in self.localized_title if t["text"]] + titles += [t["text"] for t in self.localized_subtitle if t["text"]] titles += [self.orig_title] if self.orig_title else [] return list(set(titles)) diff --git a/catalog/common/models.py b/catalog/common/models.py index db8fa015..82380136 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -664,7 +664,7 @@ class Item(PolymorphicModel): return (str(self.display_description) or "")[:155] def to_indexable_titles(self) -> list[str]: - titles = [t["text"] for t in self.localized_title if t] + titles = [t["text"] for t in self.localized_title if t["text"]] if self.parent_item: titles += self.parent_item.to_indexable_titles() return list(set(titles)) diff --git a/catalog/movie/models.py b/catalog/movie/models.py index 5955bf16..bfe4b66c 100644 --- a/catalog/movie/models.py +++ b/catalog/movie/models.py @@ -175,6 +175,6 @@ class Movie(Item): return super().lookup_id_cleanup(lookup_id_type, lookup_id_value) def to_indexable_titles(self) -> list[str]: - titles = [t["text"] for t in self.localized_title if t] + titles = [t["text"] for t in self.localized_title if t["text"]] titles += [self.orig_title] if self.orig_title else [] return list(set(titles)) diff --git a/catalog/tv/models.py b/catalog/tv/models.py index a94f7f16..0677a7d2 100644 --- a/catalog/tv/models.py +++ b/catalog/tv/models.py @@ -251,7 +251,7 @@ class TVShow(Item): return self.season_count or self.seasons.all().count() def to_indexable_titles(self) -> list[str]: - titles = [t["text"] for t in self.localized_title if t] + titles = [t["text"] for t in self.localized_title if t["text"]] titles += [self.orig_title] if self.orig_title else [] return list(set(titles)) @@ -440,7 +440,7 @@ class TVSeason(Item): ] def to_indexable_titles(self) -> list[str]: - titles = [t["text"] for t in self.localized_title if t] + titles = [t["text"] for t in self.localized_title if t["text"]] titles += [self.orig_title] if self.orig_title else [] titles += self.parent_item.to_indexable_titles() if self.parent_item else [] return list(set(titles)) diff --git a/journal/management/commands/journal.py b/journal/management/commands/journal.py index d652103f..00f5c921 100644 --- a/journal/management/commands/journal.py +++ b/journal/management/commands/journal.py @@ -5,14 +5,10 @@ from django.core.paginator import Paginator from tqdm import tqdm from catalog.models import Item -from journal.importers.douban import DoubanImporter -from journal.models import * -from journal.models import JournalIndex, Piece -from journal.models.common import Content +from journal.models import Content, JournalIndex, Piece, update_journal_for_merged_item from journal.models.itemlist import ListMember from takahe.models import Post -from users.models import * -from users.models import User +from users.models import APIdentity _CONFIRM = "confirm deleting collection? [Y/N] " @@ -98,21 +94,6 @@ class Command(BaseCommand): if self.fix: update_journal_for_merged_item(i.url) - def batch_index(self, index, typ, qs): - c = 0 - pg = Paginator(qs.order_by("id"), self.batch_size) - for p in tqdm(pg.page_range): - if typ == "post": - docs = index.posts_to_docs(pg.get_page(p).object_list) - else: - pieces = [ - p for p in pg.get_page(p).object_list if p.latest_post is None - ] - docs = index.pieces_to_docs(pieces) - c += len(docs) - index.replace_docs(docs) - self.stdout.write(self.style.SUCCESS(f"indexed {c} docs.")) - def handle( self, action, @@ -188,10 +169,25 @@ class Command(BaseCommand): if owners: pieces = pieces.filter(owner_id__in=owners) posts = posts.filter(author_id__in=owners) - # index all posts - self.batch_index(index, "post", posts) + # index all posts first + c = 0 + pg = Paginator(posts.order_by("id"), self.batch_size) + for p in tqdm(pg.page_range): + docs = index.posts_to_docs(pg.get_page(p).object_list) + c += len(docs) + index.replace_docs(docs) + self.stdout.write(self.style.SUCCESS(f"indexed {c} docs.")) # index remaining pieces without posts - self.batch_index(index, "piece", pieces) + c = 0 + pg = Paginator(pieces.order_by("id"), self.batch_size) + for p in tqdm(pg.page_range): + pieces = [ + p for p in pg.get_page(p).object_list if p.latest_post is None + ] + docs = index.pieces_to_docs(pieces) + c += len(docs) + index.replace_docs(docs) + self.stdout.write(self.style.SUCCESS(f"indexed {c} docs.")) # posts = posts.exclude(type_data__object__has_key="relatedWith") # docs = index.posts_to_docs(posts) # c = len(docs) diff --git a/journal/models/index.py b/journal/models/index.py index ea4f6ed2..449d36be 100644 --- a/journal/models/index.py +++ b/journal/models/index.py @@ -228,6 +228,8 @@ class JournalIndex(Index): pc = post.piece doc = {} if pc: + pc.latest_post = post + pc.latest_post_id = post.pk doc = cls.piece_to_doc(pc) if not doc: doc = { @@ -245,7 +247,7 @@ class JournalIndex(Index): return doc @classmethod - def posts_to_docs(cls, posts: QuerySet[Post]) -> list[dict]: + def posts_to_docs(cls, posts: Iterable[Post]) -> list[dict]: return [cls.post_to_doc(p) for p in posts] def delete_all(self):