fix null title when index

This commit is contained in:
Your Name 2024-12-30 09:35:58 -05:00 committed by Henri Dickson
parent 9cb681b9fc
commit c4b7a760c6
6 changed files with 29 additions and 31 deletions

View file

@ -217,8 +217,8 @@ class Edition(Item):
return self.get_localized_subtitle()
def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t]
titles += [t["text"] for t in self.localized_subtitle if t]
titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [t["text"] for t in self.localized_subtitle if t["text"]]
titles += [self.orig_title] if self.orig_title else []
return list(set(titles))

View file

@ -664,7 +664,7 @@ class Item(PolymorphicModel):
return (str(self.display_description) or "")[:155]
def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t]
titles = [t["text"] for t in self.localized_title if t["text"]]
if self.parent_item:
titles += self.parent_item.to_indexable_titles()
return list(set(titles))

View file

@ -175,6 +175,6 @@ class Movie(Item):
return super().lookup_id_cleanup(lookup_id_type, lookup_id_value)
def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t]
titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [self.orig_title] if self.orig_title else []
return list(set(titles))

View file

@ -251,7 +251,7 @@ class TVShow(Item):
return self.season_count or self.seasons.all().count()
def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t]
titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [self.orig_title] if self.orig_title else []
return list(set(titles))
@ -440,7 +440,7 @@ class TVSeason(Item):
]
def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t]
titles = [t["text"] for t in self.localized_title if t["text"]]
titles += [self.orig_title] if self.orig_title else []
titles += self.parent_item.to_indexable_titles() if self.parent_item else []
return list(set(titles))

View file

@ -5,14 +5,10 @@ from django.core.paginator import Paginator
from tqdm import tqdm
from catalog.models import Item
from journal.importers.douban import DoubanImporter
from journal.models import *
from journal.models import JournalIndex, Piece
from journal.models.common import Content
from journal.models import Content, JournalIndex, Piece, update_journal_for_merged_item
from journal.models.itemlist import ListMember
from takahe.models import Post
from users.models import *
from users.models import User
from users.models import APIdentity
_CONFIRM = "confirm deleting collection? [Y/N] "
@ -98,21 +94,6 @@ class Command(BaseCommand):
if self.fix:
update_journal_for_merged_item(i.url)
def batch_index(self, index, typ, qs):
c = 0
pg = Paginator(qs.order_by("id"), self.batch_size)
for p in tqdm(pg.page_range):
if typ == "post":
docs = index.posts_to_docs(pg.get_page(p).object_list)
else:
pieces = [
p for p in pg.get_page(p).object_list if p.latest_post is None
]
docs = index.pieces_to_docs(pieces)
c += len(docs)
index.replace_docs(docs)
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
def handle(
self,
action,
@ -188,10 +169,25 @@ class Command(BaseCommand):
if owners:
pieces = pieces.filter(owner_id__in=owners)
posts = posts.filter(author_id__in=owners)
# index all posts
self.batch_index(index, "post", posts)
# index all posts first
c = 0
pg = Paginator(posts.order_by("id"), self.batch_size)
for p in tqdm(pg.page_range):
docs = index.posts_to_docs(pg.get_page(p).object_list)
c += len(docs)
index.replace_docs(docs)
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
# index remaining pieces without posts
self.batch_index(index, "piece", pieces)
c = 0
pg = Paginator(pieces.order_by("id"), self.batch_size)
for p in tqdm(pg.page_range):
pieces = [
p for p in pg.get_page(p).object_list if p.latest_post is None
]
docs = index.pieces_to_docs(pieces)
c += len(docs)
index.replace_docs(docs)
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
# posts = posts.exclude(type_data__object__has_key="relatedWith")
# docs = index.posts_to_docs(posts)
# c = len(docs)

View file

@ -228,6 +228,8 @@ class JournalIndex(Index):
pc = post.piece
doc = {}
if pc:
pc.latest_post = post
pc.latest_post_id = post.pk
doc = cls.piece_to_doc(pc)
if not doc:
doc = {
@ -245,7 +247,7 @@ class JournalIndex(Index):
return doc
@classmethod
def posts_to_docs(cls, posts: QuerySet[Post]) -> list[dict]:
def posts_to_docs(cls, posts: Iterable[Post]) -> list[dict]:
return [cls.post_to_doc(p) for p in posts]
def delete_all(self):