index faster
This commit is contained in:
parent
bee21c6d14
commit
0af7032282
3 changed files with 70 additions and 23 deletions
|
@ -1,14 +1,25 @@
|
||||||
from argparse import RawTextHelpFormatter
|
from argparse import RawTextHelpFormatter
|
||||||
|
from datetime import timedelta
|
||||||
|
|
||||||
from django.core.management.base import BaseCommand
|
from django.core.management.base import BaseCommand
|
||||||
from django.core.paginator import Paginator
|
from django.core.paginator import Paginator
|
||||||
|
from django.db.models import Q
|
||||||
|
from django.utils import timezone
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from catalog.models import Item
|
from catalog.models import Item
|
||||||
from journal.models import Content, JournalIndex, Piece, update_journal_for_merged_item
|
from journal.models import (
|
||||||
|
Collection,
|
||||||
|
Content,
|
||||||
|
JournalIndex,
|
||||||
|
Piece,
|
||||||
|
Review,
|
||||||
|
ShelfMember,
|
||||||
|
update_journal_for_merged_item,
|
||||||
|
)
|
||||||
from journal.models.itemlist import ListMember
|
from journal.models.itemlist import ListMember
|
||||||
from takahe.models import Post
|
from takahe.models import Post
|
||||||
from users.models import APIdentity
|
from users.models import APIdentity, User
|
||||||
|
|
||||||
_CONFIRM = "confirm deleting collection? [Y/N] "
|
_CONFIRM = "confirm deleting collection? [Y/N] "
|
||||||
|
|
||||||
|
@ -20,7 +31,7 @@ idx-init: check and create index if not exists
|
||||||
idx-destroy: delete index
|
idx-destroy: delete index
|
||||||
idx-alt: update index schema
|
idx-alt: update index schema
|
||||||
idx-delete: delete docs in index
|
idx-delete: delete docs in index
|
||||||
idx-update: reindex docs
|
idx-reindex: reindex docs
|
||||||
idx-search: search docs in index
|
idx-search: search docs in index
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
@ -43,7 +54,7 @@ class Command(BaseCommand):
|
||||||
"idx-init",
|
"idx-init",
|
||||||
"idx-alt",
|
"idx-alt",
|
||||||
"idx-destroy",
|
"idx-destroy",
|
||||||
"idx-update",
|
"idx-reindex",
|
||||||
"idx-delete",
|
"idx-delete",
|
||||||
"idx-search",
|
"idx-search",
|
||||||
],
|
],
|
||||||
|
@ -80,6 +91,11 @@ class Command(BaseCommand):
|
||||||
"--yes",
|
"--yes",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--fast",
|
||||||
|
action="store_true",
|
||||||
|
help="skip some inactive users and rare cases to speed up index",
|
||||||
|
)
|
||||||
|
|
||||||
def integrity(self):
|
def integrity(self):
|
||||||
self.stdout.write(f"Checking deleted items with remaining journals...")
|
self.stdout.write(f"Checking deleted items with remaining journals...")
|
||||||
|
@ -105,6 +121,7 @@ class Command(BaseCommand):
|
||||||
verbose,
|
verbose,
|
||||||
fix,
|
fix,
|
||||||
batch_size,
|
batch_size,
|
||||||
|
fast,
|
||||||
*args,
|
*args,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
|
@ -161,15 +178,26 @@ class Command(BaseCommand):
|
||||||
c = index.delete_all()
|
c = index.delete_all()
|
||||||
self.stdout.write(self.style.SUCCESS(f"deleted {c} documents."))
|
self.stdout.write(self.style.SUCCESS(f"deleted {c} documents."))
|
||||||
|
|
||||||
case "idx-update":
|
case "idx-reindex":
|
||||||
pieces = Piece.objects.all()
|
if fast and not owners:
|
||||||
|
q = Q(social_accounts__type="mastodon.mastodonaccount") | Q(
|
||||||
|
social_accounts__last_reachable__gt=timezone.now()
|
||||||
|
- timedelta(days=365)
|
||||||
|
)
|
||||||
|
owners = list(
|
||||||
|
User.objects.filter(is_active=True)
|
||||||
|
.filter(q)
|
||||||
|
.values_list("identity", flat=True)
|
||||||
|
)
|
||||||
|
# index all posts first
|
||||||
posts = Post.objects.filter(local=True).exclude(
|
posts = Post.objects.filter(local=True).exclude(
|
||||||
state__in=["deleted", "deleted_fanned_out"]
|
state__in=["deleted", "deleted_fanned_out"]
|
||||||
)
|
)
|
||||||
if owners:
|
if owners:
|
||||||
pieces = pieces.filter(owner_id__in=owners)
|
self.stdout.write(
|
||||||
|
self.style.SUCCESS(f"indexing for {len(owners)} users.")
|
||||||
|
)
|
||||||
posts = posts.filter(author_id__in=owners)
|
posts = posts.filter(author_id__in=owners)
|
||||||
# index all posts first
|
|
||||||
c = 0
|
c = 0
|
||||||
pg = Paginator(posts.order_by("id"), self.batch_size)
|
pg = Paginator(posts.order_by("id"), self.batch_size)
|
||||||
for p in tqdm(pg.page_range):
|
for p in tqdm(pg.page_range):
|
||||||
|
@ -178,15 +206,29 @@ class Command(BaseCommand):
|
||||||
index.replace_docs(docs)
|
index.replace_docs(docs)
|
||||||
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
|
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
|
||||||
# index remaining pieces without posts
|
# index remaining pieces without posts
|
||||||
c = 0
|
for cls in (
|
||||||
pg = Paginator(pieces.order_by("id"), self.batch_size)
|
[
|
||||||
for p in tqdm(pg.page_range):
|
ShelfMember,
|
||||||
pieces = [
|
Review,
|
||||||
p for p in pg.get_page(p).object_list if p.latest_post is None
|
Collection,
|
||||||
]
|
]
|
||||||
docs = index.pieces_to_docs(pieces)
|
if fast
|
||||||
c += len(docs)
|
else [Piece]
|
||||||
index.replace_docs(docs)
|
):
|
||||||
|
pieces = cls.objects.filter(local=True)
|
||||||
|
if owners:
|
||||||
|
pieces = pieces.filter(owner_id__in=owners)
|
||||||
|
c = 0
|
||||||
|
pg = Paginator(pieces.order_by("id"), self.batch_size)
|
||||||
|
for p in tqdm(pg.page_range):
|
||||||
|
pieces = [
|
||||||
|
p
|
||||||
|
for p in pg.get_page(p).object_list
|
||||||
|
if p.latest_post is None
|
||||||
|
]
|
||||||
|
docs = index.pieces_to_docs(pieces)
|
||||||
|
c += len(docs)
|
||||||
|
index.replace_docs(docs)
|
||||||
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
|
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
|
||||||
# posts = posts.exclude(type_data__object__has_key="relatedWith")
|
# posts = posts.exclude(type_data__object__has_key="relatedWith")
|
||||||
# docs = index.posts_to_docs(posts)
|
# docs = index.posts_to_docs(posts)
|
||||||
|
|
|
@ -211,3 +211,6 @@ class FeaturedCollection(Piece):
|
||||||
@cached_property
|
@cached_property
|
||||||
def progress(self):
|
def progress(self):
|
||||||
return self.target.get_progress(self.owner)
|
return self.target.get_progress(self.owner)
|
||||||
|
|
||||||
|
def to_indexable_doc(self) -> dict[str, Any]:
|
||||||
|
return {}
|
||||||
|
|
|
@ -212,9 +212,10 @@ class JournalIndex(Index):
|
||||||
if piece.latest_post:
|
if piece.latest_post:
|
||||||
# fk is not enforced, so post might be deleted
|
# fk is not enforced, so post might be deleted
|
||||||
doc["post_id"] = [piece.latest_post_id]
|
doc["post_id"] = [piece.latest_post_id]
|
||||||
doc["viewer_id"] = list(
|
# enable this in future when we support search other users
|
||||||
piece.latest_post.interactions.values_list("identity_id", flat=True)
|
# doc["viewer_id"] = list(
|
||||||
)
|
# piece.latest_post.interactions.values_list("identity_id", flat=True)
|
||||||
|
# )
|
||||||
doc.update(d)
|
doc.update(d)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
@ -238,11 +239,12 @@ class JournalIndex(Index):
|
||||||
"piece_class": ["Post"],
|
"piece_class": ["Post"],
|
||||||
"content": [post.content],
|
"content": [post.content],
|
||||||
"created": int(post.created.timestamp()),
|
"created": int(post.created.timestamp()),
|
||||||
"owner_id": post.author_id,
|
|
||||||
"viewer_id": list(
|
|
||||||
post.interactions.values_list("identity_id", flat=True)
|
|
||||||
),
|
|
||||||
"visibility": Takahe.visibility_t2n(post.visibility),
|
"visibility": Takahe.visibility_t2n(post.visibility),
|
||||||
|
"owner_id": post.author_id,
|
||||||
|
# enable this in future when we support search other users
|
||||||
|
# "viewer_id": list(
|
||||||
|
# post.interactions.values_list("identity_id", flat=True)
|
||||||
|
# ),
|
||||||
}
|
}
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue