fix null title when index

This commit is contained in:
Your Name 2024-12-30 09:35:58 -05:00 committed by Henri Dickson
parent 9cb681b9fc
commit c4b7a760c6
6 changed files with 29 additions and 31 deletions

View file

@ -217,8 +217,8 @@ class Edition(Item):
return self.get_localized_subtitle() return self.get_localized_subtitle()
def to_indexable_titles(self) -> list[str]: def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t] titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [t["text"] for t in self.localized_subtitle if t] titles += [t["text"] for t in self.localized_subtitle if t["text"]]
titles += [self.orig_title] if self.orig_title else [] titles += [self.orig_title] if self.orig_title else []
return list(set(titles)) return list(set(titles))

View file

@ -664,7 +664,7 @@ class Item(PolymorphicModel):
return (str(self.display_description) or "")[:155] return (str(self.display_description) or "")[:155]
def to_indexable_titles(self) -> list[str]: def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t] titles = [t["text"] for t in self.localized_title if t["text"]]
if self.parent_item: if self.parent_item:
titles += self.parent_item.to_indexable_titles() titles += self.parent_item.to_indexable_titles()
return list(set(titles)) return list(set(titles))

View file

@ -175,6 +175,6 @@ class Movie(Item):
return super().lookup_id_cleanup(lookup_id_type, lookup_id_value) return super().lookup_id_cleanup(lookup_id_type, lookup_id_value)
def to_indexable_titles(self) -> list[str]: def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t] titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [self.orig_title] if self.orig_title else [] titles += [self.orig_title] if self.orig_title else []
return list(set(titles)) return list(set(titles))

View file

@ -251,7 +251,7 @@ class TVShow(Item):
return self.season_count or self.seasons.all().count() return self.season_count or self.seasons.all().count()
def to_indexable_titles(self) -> list[str]: def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t] titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [self.orig_title] if self.orig_title else [] titles += [self.orig_title] if self.orig_title else []
return list(set(titles)) return list(set(titles))
@ -440,7 +440,7 @@ class TVSeason(Item):
] ]
def to_indexable_titles(self) -> list[str]: def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t] titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [self.orig_title] if self.orig_title else [] titles += [self.orig_title] if self.orig_title else []
titles += self.parent_item.to_indexable_titles() if self.parent_item else [] titles += self.parent_item.to_indexable_titles() if self.parent_item else []
return list(set(titles)) return list(set(titles))

View file

@ -5,14 +5,10 @@ from django.core.paginator import Paginator
from tqdm import tqdm from tqdm import tqdm
from catalog.models import Item from catalog.models import Item
from journal.importers.douban import DoubanImporter from journal.models import Content, JournalIndex, Piece, update_journal_for_merged_item
from journal.models import *
from journal.models import JournalIndex, Piece
from journal.models.common import Content
from journal.models.itemlist import ListMember from journal.models.itemlist import ListMember
from takahe.models import Post from takahe.models import Post
from users.models import * from users.models import APIdentity
from users.models import User
_CONFIRM = "confirm deleting collection? [Y/N] " _CONFIRM = "confirm deleting collection? [Y/N] "
@ -98,21 +94,6 @@ class Command(BaseCommand):
if self.fix: if self.fix:
update_journal_for_merged_item(i.url) update_journal_for_merged_item(i.url)
def batch_index(self, index, typ, qs):
c = 0
pg = Paginator(qs.order_by("id"), self.batch_size)
for p in tqdm(pg.page_range):
if typ == "post":
docs = index.posts_to_docs(pg.get_page(p).object_list)
else:
pieces = [
p for p in pg.get_page(p).object_list if p.latest_post is None
]
docs = index.pieces_to_docs(pieces)
c += len(docs)
index.replace_docs(docs)
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
def handle( def handle(
self, self,
action, action,
@ -188,10 +169,25 @@ class Command(BaseCommand):
if owners: if owners:
pieces = pieces.filter(owner_id__in=owners) pieces = pieces.filter(owner_id__in=owners)
posts = posts.filter(author_id__in=owners) posts = posts.filter(author_id__in=owners)
# index all posts # index all posts first
self.batch_index(index, "post", posts) c = 0
pg = Paginator(posts.order_by("id"), self.batch_size)
for p in tqdm(pg.page_range):
docs = index.posts_to_docs(pg.get_page(p).object_list)
c += len(docs)
index.replace_docs(docs)
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
# index remaining pieces without posts # index remaining pieces without posts
self.batch_index(index, "piece", pieces) c = 0
pg = Paginator(pieces.order_by("id"), self.batch_size)
for p in tqdm(pg.page_range):
pieces = [
p for p in pg.get_page(p).object_list if p.latest_post is None
]
docs = index.pieces_to_docs(pieces)
c += len(docs)
index.replace_docs(docs)
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
# posts = posts.exclude(type_data__object__has_key="relatedWith") # posts = posts.exclude(type_data__object__has_key="relatedWith")
# docs = index.posts_to_docs(posts) # docs = index.posts_to_docs(posts)
# c = len(docs) # c = len(docs)

View file

@ -228,6 +228,8 @@ class JournalIndex(Index):
pc = post.piece pc = post.piece
doc = {} doc = {}
if pc: if pc:
pc.latest_post = post
pc.latest_post_id = post.pk
doc = cls.piece_to_doc(pc) doc = cls.piece_to_doc(pc)
if not doc: if not doc:
doc = { doc = {
@ -245,7 +247,7 @@ class JournalIndex(Index):
return doc return doc
@classmethod @classmethod
def posts_to_docs(cls, posts: QuerySet[Post]) -> list[dict]: def posts_to_docs(cls, posts: Iterable[Post]) -> list[dict]:
return [cls.post_to_doc(p) for p in posts] return [cls.post_to_doc(p) for p in posts]
def delete_all(self): def delete_all(self):