2024-12-30 01:51:19 -05:00
|
|
|
from argparse import RawTextHelpFormatter
|
2024-12-30 11:37:38 -05:00
|
|
|
from datetime import timedelta
|
2024-12-30 01:51:19 -05:00
|
|
|
|
2023-08-10 11:27:31 -04:00
|
|
|
from django.core.management.base import BaseCommand
|
2024-12-30 01:51:19 -05:00
|
|
|
from django.core.paginator import Paginator
|
2024-12-30 11:37:38 -05:00
|
|
|
from django.db.models import Q
|
|
|
|
from django.utils import timezone
|
2024-12-30 01:51:19 -05:00
|
|
|
from tqdm import tqdm
|
2023-08-10 11:27:31 -04:00
|
|
|
|
2023-07-20 21:59:49 -04:00
|
|
|
from catalog.models import Item
|
2025-01-28 21:38:02 -05:00
|
|
|
from journal.exporters.ndjson import NdjsonExporter
|
2024-12-30 11:37:38 -05:00
|
|
|
from journal.models import (
|
|
|
|
Collection,
|
|
|
|
Content,
|
|
|
|
JournalIndex,
|
|
|
|
Piece,
|
|
|
|
Review,
|
|
|
|
ShelfMember,
|
|
|
|
update_journal_for_merged_item,
|
|
|
|
)
|
2025-01-01 09:32:24 -05:00
|
|
|
from journal.models.index import JournalQueryParser
|
2023-07-20 21:59:49 -04:00
|
|
|
from journal.models.itemlist import ListMember
|
2024-12-30 01:51:19 -05:00
|
|
|
from takahe.models import Post
|
2024-12-30 11:37:38 -05:00
|
|
|
from users.models import APIdentity, User
|
2023-01-08 20:00:55 -05:00
|
|
|
|
2024-12-30 01:51:19 -05:00
|
|
|
_CONFIRM = "confirm deleting collection? [Y/N] "
|
|
|
|
|
|
|
|
_HELP_TEXT = """
|
|
|
|
intergrity: check and fix remaining journal for merged and deleted items
|
|
|
|
purge: delete invalid data (visibility=99)
|
2025-01-28 21:38:02 -05:00
|
|
|
export: run export task
|
2024-12-30 01:51:19 -05:00
|
|
|
idx-info: show index information
|
|
|
|
idx-init: check and create index if not exists
|
|
|
|
idx-destroy: delete index
|
|
|
|
idx-alt: update index schema
|
|
|
|
idx-delete: delete docs in index
|
2024-12-30 11:37:38 -05:00
|
|
|
idx-reindex: reindex docs
|
2024-12-30 01:51:19 -05:00
|
|
|
idx-search: search docs in index
|
|
|
|
"""
|
|
|
|
|
2023-01-08 20:00:55 -05:00
|
|
|
|
|
|
|
class Command(BaseCommand):
|
2023-01-08 20:28:18 -05:00
|
|
|
help = "journal app utilities"
|
2023-01-08 20:00:55 -05:00
|
|
|
|
2024-12-30 01:51:19 -05:00
|
|
|
def create_parser(self, *args, **kwargs):
|
|
|
|
parser = super(Command, self).create_parser(*args, **kwargs)
|
|
|
|
parser.formatter_class = RawTextHelpFormatter
|
|
|
|
return parser
|
|
|
|
|
2023-01-08 20:00:55 -05:00
|
|
|
def add_arguments(self, parser):
|
2024-12-30 01:51:19 -05:00
|
|
|
parser.add_argument(
|
|
|
|
"action",
|
|
|
|
choices=[
|
|
|
|
"integrity",
|
|
|
|
"purge",
|
2025-01-28 21:38:02 -05:00
|
|
|
"export",
|
2024-12-30 01:51:19 -05:00
|
|
|
"idx-info",
|
|
|
|
"idx-init",
|
|
|
|
"idx-alt",
|
|
|
|
"idx-destroy",
|
2024-12-30 11:37:38 -05:00
|
|
|
"idx-reindex",
|
2024-12-30 01:51:19 -05:00
|
|
|
"idx-delete",
|
|
|
|
"idx-search",
|
|
|
|
],
|
|
|
|
help=_HELP_TEXT,
|
|
|
|
)
|
2023-01-08 20:00:55 -05:00
|
|
|
parser.add_argument(
|
2023-07-01 19:47:01 -04:00
|
|
|
"--verbose",
|
|
|
|
action="store_true",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--fix",
|
|
|
|
action="store_true",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
2024-12-30 01:51:19 -05:00
|
|
|
"--owner",
|
|
|
|
action="append",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--query",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--batch-size",
|
|
|
|
default=1000,
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--item-class",
|
|
|
|
action="append",
|
2023-01-08 20:00:55 -05:00
|
|
|
)
|
2023-07-01 19:47:01 -04:00
|
|
|
parser.add_argument(
|
2024-12-30 01:51:19 -05:00
|
|
|
"--piece-class",
|
|
|
|
action="append",
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"--yes",
|
2023-07-01 19:47:01 -04:00
|
|
|
action="store_true",
|
|
|
|
)
|
2024-12-30 11:37:38 -05:00
|
|
|
parser.add_argument(
|
|
|
|
"--fast",
|
|
|
|
action="store_true",
|
|
|
|
help="skip some inactive users and rare cases to speed up index",
|
|
|
|
)
|
2023-07-01 19:47:01 -04:00
|
|
|
|
|
|
|
def integrity(self):
|
2025-01-11 17:20:02 -05:00
|
|
|
self.stdout.write("Checking deleted items with remaining journals...")
|
2023-07-01 19:47:01 -04:00
|
|
|
for i in Item.objects.filter(is_deleted=True):
|
|
|
|
if i.journal_exists():
|
|
|
|
self.stdout.write(f"! {i} : {i.absolute_url}?skipcheck=1")
|
|
|
|
|
2025-01-11 17:20:02 -05:00
|
|
|
self.stdout.write("Checking merged items with remaining journals...")
|
2023-07-01 19:47:01 -04:00
|
|
|
for i in Item.objects.filter(merged_to_item__isnull=False):
|
|
|
|
if i.journal_exists():
|
|
|
|
self.stdout.write(f"! {i} : {i.absolute_url}?skipcheck=1")
|
|
|
|
if self.fix:
|
|
|
|
update_journal_for_merged_item(i.url)
|
2023-01-08 20:00:55 -05:00
|
|
|
|
2025-01-28 21:38:02 -05:00
|
|
|
def export(self, owner_ids):
|
|
|
|
users = User.objects.filter(identity__in=owner_ids)
|
|
|
|
for user in users:
|
|
|
|
task = NdjsonExporter.create(user=user)
|
|
|
|
self.stdout.write(f"exporting for {user} (task {task.pk})...")
|
|
|
|
ok = task._run()
|
|
|
|
if ok:
|
|
|
|
self.stdout.write(f"complete {task.metadata['file']}")
|
|
|
|
else:
|
|
|
|
self.stdout.write("failed")
|
|
|
|
task.delete()
|
|
|
|
|
2024-12-30 01:51:19 -05:00
|
|
|
def handle(
|
|
|
|
self,
|
|
|
|
action,
|
|
|
|
yes,
|
|
|
|
query,
|
|
|
|
owner,
|
|
|
|
piece_class,
|
|
|
|
item_class,
|
|
|
|
verbose,
|
|
|
|
fix,
|
|
|
|
batch_size,
|
2024-12-30 11:37:38 -05:00
|
|
|
fast,
|
2024-12-30 01:51:19 -05:00
|
|
|
*args,
|
|
|
|
**kwargs,
|
|
|
|
):
|
|
|
|
self.verbose = verbose
|
|
|
|
self.fix = fix
|
|
|
|
self.batch_size = batch_size
|
|
|
|
index = JournalIndex.instance()
|
|
|
|
|
|
|
|
if owner:
|
|
|
|
owners = list(
|
|
|
|
APIdentity.objects.filter(username__in=owner, local=True).values_list(
|
|
|
|
"id", flat=True
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
owners = []
|
|
|
|
|
|
|
|
match action:
|
|
|
|
case "integrity":
|
|
|
|
self.integrity()
|
2025-01-11 17:20:02 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS("Done."))
|
2024-12-30 01:51:19 -05:00
|
|
|
|
|
|
|
case "purge":
|
|
|
|
for pcls in [Content, ListMember]:
|
|
|
|
for cls in pcls.__subclasses__():
|
|
|
|
self.stdout.write(f"Cleaning up {cls}...")
|
|
|
|
cls.objects.filter(visibility=99).delete()
|
2025-01-11 17:20:02 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS("Done."))
|
2024-12-30 01:51:19 -05:00
|
|
|
|
2025-01-28 21:38:02 -05:00
|
|
|
case "export":
|
|
|
|
self.export(owners)
|
|
|
|
|
2024-12-30 01:51:19 -05:00
|
|
|
case "idx-destroy":
|
|
|
|
if yes or input(_CONFIRM).upper().startswith("Y"):
|
|
|
|
index.delete_collection()
|
|
|
|
self.stdout.write(self.style.SUCCESS("deleted."))
|
|
|
|
|
|
|
|
case "idx-alt":
|
2024-12-30 15:34:18 -05:00
|
|
|
# index.update_schema()
|
|
|
|
self.stdout.write(self.style.SUCCESS("not implemented."))
|
2024-12-30 01:51:19 -05:00
|
|
|
|
|
|
|
case "idx-init":
|
|
|
|
index.initialize_collection()
|
|
|
|
self.stdout.write(self.style.SUCCESS("initialized."))
|
|
|
|
|
|
|
|
case "idx-info":
|
|
|
|
try:
|
|
|
|
r = index.check()
|
|
|
|
self.stdout.write(str(r))
|
|
|
|
except Exception as e:
|
|
|
|
self.stdout.write(self.style.ERROR(str(e)))
|
|
|
|
|
|
|
|
case "idx-delete":
|
|
|
|
if owners:
|
|
|
|
c = index.delete_by_owner(owners)
|
|
|
|
else:
|
|
|
|
c = index.delete_all()
|
|
|
|
self.stdout.write(self.style.SUCCESS(f"deleted {c} documents."))
|
|
|
|
|
2024-12-30 11:37:38 -05:00
|
|
|
case "idx-reindex":
|
|
|
|
if fast and not owners:
|
|
|
|
q = Q(social_accounts__type="mastodon.mastodonaccount") | Q(
|
|
|
|
social_accounts__last_reachable__gt=timezone.now()
|
|
|
|
- timedelta(days=365)
|
|
|
|
)
|
|
|
|
owners = list(
|
|
|
|
User.objects.filter(is_active=True)
|
|
|
|
.filter(q)
|
|
|
|
.values_list("identity", flat=True)
|
|
|
|
)
|
|
|
|
# index all posts first
|
2024-12-30 01:51:19 -05:00
|
|
|
posts = Post.objects.filter(local=True).exclude(
|
|
|
|
state__in=["deleted", "deleted_fanned_out"]
|
|
|
|
)
|
|
|
|
if owners:
|
2024-12-30 11:37:38 -05:00
|
|
|
self.stdout.write(
|
|
|
|
self.style.SUCCESS(f"indexing for {len(owners)} users.")
|
|
|
|
)
|
2024-12-30 01:51:19 -05:00
|
|
|
posts = posts.filter(author_id__in=owners)
|
2024-12-30 09:35:58 -05:00
|
|
|
c = 0
|
|
|
|
pg = Paginator(posts.order_by("id"), self.batch_size)
|
|
|
|
for p in tqdm(pg.page_range):
|
|
|
|
docs = index.posts_to_docs(pg.get_page(p).object_list)
|
|
|
|
c += len(docs)
|
|
|
|
index.replace_docs(docs)
|
|
|
|
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
|
2024-12-30 01:51:19 -05:00
|
|
|
# index remaining pieces without posts
|
2024-12-30 11:37:38 -05:00
|
|
|
for cls in (
|
|
|
|
[
|
|
|
|
ShelfMember,
|
|
|
|
Review,
|
|
|
|
Collection,
|
2024-12-30 09:35:58 -05:00
|
|
|
]
|
2024-12-30 11:37:38 -05:00
|
|
|
if fast
|
|
|
|
else [Piece]
|
|
|
|
):
|
|
|
|
pieces = cls.objects.filter(local=True)
|
|
|
|
if owners:
|
|
|
|
pieces = pieces.filter(owner_id__in=owners)
|
|
|
|
c = 0
|
|
|
|
pg = Paginator(pieces.order_by("id"), self.batch_size)
|
|
|
|
for p in tqdm(pg.page_range):
|
|
|
|
pieces = [
|
|
|
|
p
|
|
|
|
for p in pg.get_page(p).object_list
|
|
|
|
if p.latest_post is None
|
|
|
|
]
|
|
|
|
docs = index.pieces_to_docs(pieces)
|
|
|
|
c += len(docs)
|
|
|
|
index.replace_docs(docs)
|
2024-12-30 09:35:58 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS(f"indexed {c} docs."))
|
2024-12-30 01:51:19 -05:00
|
|
|
# posts = posts.exclude(type_data__object__has_key="relatedWith")
|
|
|
|
# docs = index.posts_to_docs(posts)
|
|
|
|
# c = len(docs)
|
|
|
|
# index.insert_docs(docs)
|
|
|
|
# self.stdout.write(self.style.SUCCESS(f"indexed {c} posts."))
|
2023-07-01 19:47:01 -04:00
|
|
|
|
2024-12-30 01:51:19 -05:00
|
|
|
case "idx-search":
|
2025-01-01 09:32:24 -05:00
|
|
|
q = JournalQueryParser("" if query == "-" else query, page_size=100)
|
|
|
|
if owners:
|
|
|
|
q.filter("owner_id", owners)
|
|
|
|
if item_class:
|
|
|
|
q.filter("item_class", item_class)
|
|
|
|
if piece_class:
|
|
|
|
q.filter("piece_class", piece_class)
|
|
|
|
r = index.search(q)
|
2024-12-30 01:51:19 -05:00
|
|
|
self.stdout.write(self.style.SUCCESS(str(r)))
|
|
|
|
self.stdout.write(f"{r.facet_by_item_class}")
|
|
|
|
self.stdout.write(f"{r.facet_by_piece_class}")
|
|
|
|
self.stdout.write(self.style.SUCCESS("matched posts:"))
|
|
|
|
for post in r:
|
|
|
|
self.stdout.write(str(post))
|
|
|
|
self.stdout.write(self.style.SUCCESS("matched pieces:"))
|
|
|
|
for pc in r.pieces:
|
|
|
|
self.stdout.write(str(pc))
|
|
|
|
self.stdout.write(self.style.SUCCESS("matched items:"))
|
|
|
|
for i in r.items:
|
|
|
|
self.stdout.write(str(i))
|
2023-01-08 20:00:55 -05:00
|
|
|
|
2024-12-30 01:51:19 -05:00
|
|
|
case _:
|
|
|
|
self.stdout.write(self.style.ERROR("action not found."))
|