export ndjson

2025-01-28 21:38:02 -05:00 · 2025-01-28 21:38:02 -05:00 · 26858ee905
commit 26858ee905
parent 409ba0a6fd
17 changed files with 277 additions and 335 deletions
--- a/catalog/book/models.py
+++ b/catalog/book/models.py
@ -39,6 +39,7 @@ from catalog.common import (
 from catalog.common.models import (
    LIST_OF_ONE_PLUS_STR_SCHEMA,
    LOCALE_CHOICES_JSONFORM,
    ItemType,
    LanguageListField,
 )
 from common.models import uniq
@ -119,6 +120,8 @@ class Edition(Item):
        WEB = "web", _("Web Fiction")
        OTHER = "other", _("Other")
    schema = EditionSchema
    type = ItemType.Edition
    category = ItemCategory.Book
    url_path = "book"
--- a/catalog/common/models.py
+++ b/catalog/common/models.py
@ -120,7 +120,7 @@ IdealIdTypes = [
 class ItemType(models.TextChoices):
-    Book = "book", _("Book")  # type:ignore[reportCallIssue]
+    Edition = "edition", _("Edition")  # type:ignore[reportCallIssue]
    TVShow = "tvshow", _("TV Serie")  # type:ignore[reportCallIssue]
    TVSeason = "tvseason", _("TV Season")  # type:ignore[reportCallIssue]
    TVEpisode = "tvepisode", _("TV Episode")  # type:ignore[reportCallIssue]
@ -346,6 +346,7 @@ class Item(PolymorphicModel):
        collections: QuerySet["Collection"]
        merged_from_items: QuerySet["Item"]
        merged_to_item_id: int
    schema = ItemSchema
    category: ItemCategory  # subclass must specify this
    url_path = "item"  # subclass must specify this
    child_class = None  # subclass may specify this to allow link to parent item
@ -514,15 +515,19 @@ class Item(PolymorphicModel):
    def ap_object_type(self) -> str:
        return self.get_ap_object_type()
    @property
    def ap_object(self):
        return self.schema.from_orm(self).model_dump()
    @property
    def ap_object_ref(self) -> dict[str, Any]:
        o = {
            "type": self.get_ap_object_type(),
            "href": self.absolute_url,
-            "name": self.title,
+            "name": self.display_title,
        }
        if self.has_cover():
-            o["image"] = self.cover_image_url
+            o["image"] = self.cover_image_url or ""
        return o
    def log_action(self, changes: dict[str, Any]):
--- a/catalog/game/models.py
+++ b/catalog/game/models.py
@ -44,6 +44,7 @@ class GameSchema(GameInSchema, BaseSchema):
 class Game(Item):
    type = ItemType.Game
    schema = GameSchema
    category = ItemCategory.Game
    url_path = "game"
    igdb = PrimaryLookupIdDescriptor(IdType.IGDB)
--- a/catalog/movie/models.py
+++ b/catalog/movie/models.py
@ -35,6 +35,7 @@ class MovieSchema(MovieInSchema, BaseSchema):
 class Movie(Item):
    type = ItemType.Movie
    schema = MovieSchema
    category = ItemCategory.Movie
    url_path = "movie"
    imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
--- a/catalog/music/models.py
+++ b/catalog/music/models.py
@ -33,6 +33,7 @@ class AlbumSchema(AlbumInSchema, BaseSchema):
 class Album(Item):
    schema = AlbumSchema
    type = ItemType.Album
    url_path = "album"
    category = ItemCategory.Music
--- a/catalog/performance/models.py
+++ b/catalog/performance/models.py
@ -104,6 +104,7 @@ def _crew_by_role(crew):
 class Performance(Item):
    if TYPE_CHECKING:
        productions: models.QuerySet["PerformanceProduction"]
    schema = PerformanceSchema
    type = ItemType.Performance
    child_class = "PerformanceProduction"
    category = ItemCategory.Performance
@ -247,6 +248,7 @@ class Performance(Item):
 class PerformanceProduction(Item):
    schema = PerformanceProductionSchema
    type = ItemType.PerformanceProduction
    category = ItemCategory.Performance
    url_path = "performance/production"
--- a/catalog/podcast/models.py
+++ b/catalog/podcast/models.py
@ -13,7 +13,11 @@ from catalog.common import (
    ItemInSchema,
    jsondata,
 )
-from catalog.common.models import LIST_OF_ONE_PLUS_STR_SCHEMA, LanguageListField
+from catalog.common.models import (
    LIST_OF_ONE_PLUS_STR_SCHEMA,
    ItemType,
    LanguageListField,
 )
 class PodcastInSchema(ItemInSchema):
@ -44,6 +48,8 @@ class PodcastEpisodeSchema(PodcastEpisodeInSchema, BaseSchema):
 class Podcast(Item):
    if TYPE_CHECKING:
        episodes: models.QuerySet["PodcastEpisode"]
    type = ItemType.Podcast
    schema = PodcastSchema
    category = ItemCategory.Podcast
    child_class = "PodcastEpisode"
    url_path = "podcast"
@ -118,6 +124,8 @@ class Podcast(Item):
 class PodcastEpisode(Item):
    schema = PodcastEpisodeSchema
    type = ItemType.PodcastEpisode
    category = ItemCategory.Podcast
    url_path = "podcast/episode"
    # uid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True)
--- a/catalog/tv/models.py
+++ b/catalog/tv/models.py
@ -97,6 +97,7 @@ class TVEpisodeSchema(ItemSchema):
 class TVShow(Item):
    if TYPE_CHECKING:
        seasons: QuerySet["TVSeason"]
    schema = TVShowSchema
    type = ItemType.TVShow
    child_class = "TVSeason"
    category = ItemCategory.TV
@ -261,6 +262,7 @@ class TVShow(Item):
 class TVSeason(Item):
    if TYPE_CHECKING:
        episodes: models.QuerySet["TVEpisode"]
    schema = TVSeasonSchema
    type = ItemType.TVSeason
    category = ItemCategory.TV
    url_path = "tv/season"
@ -480,6 +482,8 @@ class TVSeason(Item):
 class TVEpisode(Item):
    schema = TVEpisodeSchema
    type = ItemType.TVEpisode
    category = ItemCategory.TV
    url_path = "tv/episode"
    season = models.ForeignKey(
--- a/journal/exporters/csv.py
+++ b/journal/exporters/csv.py
@ -87,6 +87,7 @@ class CsvExporter(Task):
        temp_dir = tempfile.mkdtemp()
        temp_folder_path = os.path.join(temp_dir, self.filename)
        os.makedirs(temp_folder_path)
        total = 0
        for category in [
            ItemCategory.Movie,
            ItemCategory.TV,
@ -107,6 +108,7 @@ class CsvExporter(Task):
                writer = csv.writer(csvfile)
                writer.writerow(_mark_heading)
                for mark in marks:
                    total += 1
                    item = mark.item
                    line = [
                        item.display_title,
@ -128,6 +130,7 @@ class CsvExporter(Task):
                writer = csv.writer(csvfile)
                writer.writerow(_review_heading)
                for review in reviews:
                    total += 1
                    item = review.item
                    line = [
                        item.display_title,
@ -147,6 +150,7 @@ class CsvExporter(Task):
                    .order_by("created_time")
                )
                for note in notes:
                    total += 1
                    item = note.item
                    line = [
                        item.display_title,
@ -166,5 +170,6 @@ class CsvExporter(Task):
            os.makedirs(os.path.dirname(filename))
        shutil.make_archive(filename[:-4], "zip", temp_folder_path)
        self.metadata["file"] = filename
        self.metadata["total"] = total
        self.message = "Export complete."
        self.save()
--- a/journal/exporters/ndjson.py
+++ b/journal/exporters/ndjson.py
@ -1,36 +1,24 @@
 import json
 import os
 import re
 import shutil
 import tempfile
 from django.conf import settings
-from openpyxl import Workbook
+from django.utils import timezone
-from catalog.models import IdType, ItemCategory, TVEpisode
+from catalog.common.downloaders import ProxiedImageDownloader
 from common.utils import GenerateDateUUIDMediaFilePath
-from journal.models import Review, ShelfType, q_item_in_category
+from journal.models import ShelfMember
 from journal.models.collection import Collection
 from journal.models.common import Content
 from journal.models.note import Note
 from journal.models.review import Review
 from journal.models.shelf import ShelfLogEntry
 from takahe.models import Post
 from users.models import Task
 def _get_source_url(item):
    res = (
        item.external_resources.all()
        .filter(
            id_type__in=[
                IdType.DoubanBook,
                IdType.DoubanMovie,
                IdType.DoubanMusic,
                IdType.DoubanGame,
                IdType.DoubanDrama,
            ]
        )
        .first()
    )
    if not res:
        res = item.external_resources.all().first()
    return res.url if res else ""
 # def export_marks_task(user):
 #     user.preference.export_status["marks_pending"] = True
 #     user.preference.save(update_fields=["export_status"])
 class NdjsonExporter(Task):
    class Meta:
        app_label = "journal"  # workaround bug in TypedModel
@ -40,302 +28,149 @@ class NdjsonExporter(Task):
        "file": None,
        "total": 0,
    }
    ref_items = []
    @property
    def filename(self) -> str:
        d = self.created_time.strftime("%Y%m%d%H%M%S")
        return f"neodb_{self.user.username}_{d}_ndjson"
    def ref(self, item) -> str:
        if item not in self.ref_items:
            self.ref_items.append(item)
        return item.absolute_url
    def get_header(self):
        return {
            "server": settings.SITE_DOMAIN,
            "neodb_version": settings.NEODB_VERSION,
            "username": self.user.username,
            "actor": self.user.identity.actor_uri,
            "request_time": self.created_time.isoformat(),
            "created_time": timezone.now().isoformat(),
        }
    def run(self):
        user = self.user
        temp_dir = tempfile.mkdtemp()
        temp_folder_path = os.path.join(temp_dir, self.filename)
        os.makedirs(temp_folder_path)
        attachment_path = os.path.join(temp_folder_path, "attachments")
        os.makedirs(attachment_path, exist_ok=True)
        def _save_image(url):
            if url.startswith("http"):
                imgdl = ProxiedImageDownloader(url)
                raw_img = imgdl.download().content
                ext = imgdl.extention
                file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path)
                with open(file, "wb") as binary_file:
                    binary_file.write(raw_img)
                return file
            elif url.startswith("/"):
                p = os.path.abspath(
                    os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :])
                )
                if p.startswith(settings.MEDIA_ROOT):
                    shutil.copy2(p, attachment_path)
                return p
            return url
        filename = os.path.join(temp_folder_path, "journal.ndjson")
        total = 0
        with open(filename, "w") as f:
            f.write(json.dumps(self.get_header()) + "\n")
            for cls in list(Content.__subclasses__()):
                pieces = cls.objects.filter(owner=user.identity)
                for p in pieces:
                    total += 1
                    self.ref(p.item)
                    o = {
                        "type": p.__class__.__name__,
                        "content": p.ap_object,
                        "visibility": p.visibility,
                        "metadata": p.metadata,
                    }
                    f.write(json.dumps(o, default=str) + "\n")
                    if cls == Review:
                        re.sub(
                            r"(?<=!\[\]\()([^)]+)(?=\))",
                            lambda x: _save_image(x[1]),
                            p.body,  # type: ignore
                        )
                    elif cls == Note and p.latest_post:
                        for a in p.latest_post.attachments.all():
                            dest = os.path.join(
                                attachment_path, os.path.basename(a.file.name)
                            )
                            shutil.copy2(a.file.path, dest)
            collections = Collection.objects.filter(owner=user.identity)
            for c in collections:
                total += 1
                o = {
                    "type": "Collection",
                    "content": c.ap_object,
                    "visibility": c.visibility,
                    "metadata": c.metadata,
                    "items": [
                        {"item": self.ref(m.item), "metadata": m.metadata}
                        for m in c.ordered_members
                    ],
                }
                f.write(json.dumps(o, default=str) + "\n")
            marks = ShelfMember.objects.filter(owner=user.identity)
            for m in marks:
                total += 1
                o = {
                    "type": "ShelfMember",
                    "item": self.ref(m.item),
                    "status": m.shelf_type,
                    "visibility": m.visibility,
                    "metadata": m.metadata,
                    "published": self.created_time.isoformat(),
                }
                f.write(json.dumps(o, default=str) + "\n")
            logs = ShelfLogEntry.objects.filter(owner=user.identity)
            for log in logs:
                total += 1
                o = {
                    "type": "ShelfLog",
                    "item": self.ref(log.item),
                    "posts": list(log.all_post_ids()),
                    "timestamp": log.created_time,
                }
                f.write(json.dumps(o, default=str) + "\n")
            posts = Post.objects.filter(author_id=user.identity.pk).exclude(
                type_data__has_key="object"
            )
            for p in posts:
                total += 1
                o = {"type": "post", "post": p.to_mastodon_json()}
                for a in p.attachments.all():
                    dest = os.path.join(attachment_path, os.path.basename(a.file.name))
                    shutil.copy2(a.file.path, dest)
                f.write(json.dumps(o, default=str) + "\n")
        filename = os.path.join(temp_folder_path, "catalog.ndjson")
        with open(filename, "w") as f:
            f.write(json.dumps(self.get_header()) + "\n")
            for item in self.ref_items:
                f.write(json.dumps(item.ap_object, default=str) + "\n")
        filename = GenerateDateUUIDMediaFilePath(
-            "f.xlsx", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
+            "f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
        )
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
-        heading = [
+        shutil.make_archive(filename[:-4], "zip", temp_folder_path)
            "标题",
            "简介",
            "豆瓣评分",
            "链接",
            "创建时间",
            "我的评分",
            "标签",
            "评论",
            "NeoDB链接",
            "其它ID",
        ]
        wb = Workbook()
        # adding write_only=True will speed up but corrupt the xlsx and won't be importable
        for status, label in [
            (ShelfType.COMPLETE, "看过"),
            (ShelfType.PROGRESS, "在看"),
            (ShelfType.WISHLIST, "想看"),
        ]:
            ws = wb.create_sheet(title=label)
            shelf = user.shelf_manager.get_shelf(status)
            q = q_item_in_category(ItemCategory.Movie) | q_item_in_category(
                ItemCategory.TV
            )
            marks = shelf.members.all().filter(q).order_by("created_time")
            ws.append(heading)
            for mm in marks:
                mark = mm.mark
                movie = mark.item
                title = movie.display_title
                if movie.__class__ == TVEpisode:
                    season_number = movie.season.season_number if movie.season else 0
                    summary = f"S{season_number:02d}E{movie.episode_number:02d}"
                else:
                    summary = (
                        str(movie.year or "")
                        + " / "
                        + ",".join(movie.area or [])
                        + " / "
                        + ",".join(movie.genre or [])
                        + " / "
                        + ",".join(movie.director or [])
                        + " / "
                        + ",".join(movie.actor or [])
                    )
                tags = ",".join(mark.tags)
                world_rating = (movie.rating / 2) if movie.rating else None
                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
                text = mark.comment_text
                source_url = _get_source_url(movie)
                url = movie.absolute_url
                line = [
                    title,
                    summary,
                    world_rating,
                    source_url,
                    timestamp,
                    my_rating,
                    tags,
                    text,
                    url,
                    movie.imdb,
                ]
                ws.append(line)
        for status, label in [
            (ShelfType.COMPLETE, "听过"),
            (ShelfType.PROGRESS, "在听"),
            (ShelfType.WISHLIST, "想听"),
        ]:
            ws = wb.create_sheet(title=label)
            shelf = user.shelf_manager.get_shelf(status)
            q = q_item_in_category(ItemCategory.Music)
            marks = shelf.members.all().filter(q).order_by("created_time")
            ws.append(heading)
            for mm in marks:
                mark = mm.mark
                album = mark.item
                title = album.display_title
                summary = (
                    ",".join(album.artist)
                    + " / "
                    + (album.release_date.strftime("%Y") if album.release_date else "")
                )
                tags = ",".join(mark.tags)
                world_rating = (album.rating / 2) if album.rating else None
                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
                text = mark.comment_text
                source_url = _get_source_url(album)
                url = album.absolute_url
                line = [
                    title,
                    summary,
                    world_rating,
                    source_url,
                    timestamp,
                    my_rating,
                    tags,
                    text,
                    url,
                    album.barcode,
                ]
                ws.append(line)
        for status, label in [
            (ShelfType.COMPLETE, "读过"),
            (ShelfType.PROGRESS, "在读"),
            (ShelfType.WISHLIST, "想读"),
        ]:
            ws = wb.create_sheet(title=label)
            shelf = user.shelf_manager.get_shelf(status)
            q = q_item_in_category(ItemCategory.Book)
            marks = shelf.members.all().filter(q).order_by("created_time")
            ws.append(heading)
            for mm in marks:
                mark = mm.mark
                book = mark.item
                title = book.display_title
                summary = (
                    ",".join(book.author or [])
                    + " / "
                    + str(book.pub_year or "")
                    + " / "
                    + (book.pub_house or "")
                )
                tags = ",".join(mark.tags)
                world_rating = (book.rating / 2) if book.rating else None
                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
                text = mark.comment_text
                source_url = _get_source_url(book)
                url = book.absolute_url
                line = [
                    title,
                    summary,
                    world_rating,
                    source_url,
                    timestamp,
                    my_rating,
                    tags,
                    text,
                    url,
                    book.isbn,
                ]
                ws.append(line)
        for status, label in [
            (ShelfType.COMPLETE, "玩过"),
            (ShelfType.PROGRESS, "在玩"),
            (ShelfType.WISHLIST, "想玩"),
        ]:
            ws = wb.create_sheet(title=label)
            shelf = user.shelf_manager.get_shelf(status)
            q = q_item_in_category(ItemCategory.Game)
            marks = shelf.members.all().filter(q).order_by("created_time")
            ws.append(heading)
            for mm in marks:
                mark = mm.mark
                game = mark.item
                title = game.display_title
                summary = (
                    ",".join(game.genre or [])
                    + " / "
                    + ",".join(game.platform or [])
                    + " / "
                    + (
                        game.release_date.strftime("%Y-%m-%d")
                        if game.release_date
                        else ""
                    )
                )
                tags = ",".join(mark.tags)
                world_rating = (game.rating / 2) if game.rating else None
                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
                text = mark.comment_text
                source_url = _get_source_url(game)
                url = game.absolute_url
                line = [
                    title,
                    summary,
                    world_rating,
                    source_url,
                    timestamp,
                    my_rating,
                    tags,
                    text,
                    url,
                    "",
                ]
                ws.append(line)
        for status, label in [
            (ShelfType.COMPLETE, "听过的播客"),
            (ShelfType.PROGRESS, "在听的播客"),
            (ShelfType.WISHLIST, "想听的播客"),
        ]:
            ws = wb.create_sheet(title=label)
            shelf = user.shelf_manager.get_shelf(status)
            q = q_item_in_category(ItemCategory.Podcast)
            marks = shelf.members.all().filter(q).order_by("created_time")
            ws.append(heading)
            for mm in marks:
                mark = mm.mark
                podcast = mark.item
                title = podcast.display_title
                summary = ",".join(podcast.host or [])
                tags = ",".join(mark.tags)
                world_rating = (podcast.rating / 2) if podcast.rating else None
                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
                text = mark.comment_text
                source_url = _get_source_url(podcast)
                url = podcast.absolute_url
                line = [
                    title,
                    summary,
                    world_rating,
                    source_url,
                    timestamp,
                    my_rating,
                    tags,
                    text,
                    url,
                    "",
                ]
                ws.append(line)
        review_heading = [
            "标题",
            "评论对象",
            "链接",
            "创建时间",
            "我的评分",
            "类型",
            "内容",
            "评论对象原始链接",
            "评论对象NeoDB链接",
        ]
        for category, label in [
            (ItemCategory.Movie, "影评"),
            (ItemCategory.Book, "书评"),
            (ItemCategory.Music, "乐评"),
            (ItemCategory.Game, "游戏评论"),
            (ItemCategory.Podcast, "播客评论"),
        ]:
            ws = wb.create_sheet(title=label)
            q = q_item_in_category(category)
            reviews = (
                Review.objects.filter(owner=user.identity)
                .filter(q)
                .order_by("created_time")
            )
            ws.append(review_heading)
            for review in reviews:
                title = review.title
                target = "《" + review.item.display_title + "》"
                url = review.absolute_url
                timestamp = review.created_time.strftime("%Y-%m-%d %H:%M:%S")
                my_rating = (
                    None  # (mark.rating_grade / 2) if mark.rating_grade else None
                )
                content = review.body
                target_source_url = _get_source_url(review.item)
                target_url = review.item.absolute_url
                line = [
                    title,
                    target,
                    url,
                    timestamp,
                    my_rating,
                    label,
                    content,
                    target_source_url,
                    target_url,
                ]
                ws.append(line)
        wb.save(filename=filename)
        self.metadata["file"] = filename
        self.metadata["total"] = total
        self.message = "Export complete."
        self.save()
        # user.preference.export_status["marks_pending"] = False
        # user.preference.export_status["marks_file"] = filename
        # user.preference.export_status["marks_date"] = datetime.now().strftime(
        #     "%Y-%m-%d %H:%M"
        # )
        # user.preference.save(update_fields=["export_status"])
--- a/journal/management/commands/journal.py
+++ b/journal/management/commands/journal.py
@ -8,6 +8,7 @@ from django.utils import timezone
 from tqdm import tqdm
 from catalog.models import Item
 from journal.exporters.ndjson import NdjsonExporter
 from journal.models import (
    Collection,
    Content,
@ -27,6 +28,7 @@ _CONFIRM = "confirm deleting collection? [Y/N] "
 _HELP_TEXT = """
 intergrity:     check and fix remaining journal for merged and deleted items
 purge:          delete invalid data (visibility=99)
 export:         run export task
 idx-info:       show index information
 idx-init:       check and create index if not exists
 idx-destroy:    delete index
@ -51,6 +53,7 @@ class Command(BaseCommand):
            choices=[
                "integrity",
                "purge",
                "export",
                "idx-info",
                "idx-init",
                "idx-alt",
@ -111,6 +114,18 @@ class Command(BaseCommand):
                if self.fix:
                    update_journal_for_merged_item(i.url)
    def export(self, owner_ids):
        users = User.objects.filter(identity__in=owner_ids)
        for user in users:
            task = NdjsonExporter.create(user=user)
            self.stdout.write(f"exporting for {user} (task {task.pk})...")
            ok = task._run()
            if ok:
                self.stdout.write(f"complete {task.metadata['file']}")
            else:
                self.stdout.write("failed")
            task.delete()
    def handle(
        self,
        action,
@ -152,6 +167,9 @@ class Command(BaseCommand):
                        cls.objects.filter(visibility=99).delete()
                self.stdout.write(self.style.SUCCESS("Done."))
            case "export":
                self.export(owners)
            case "idx-destroy":
                if yes or input(_CONFIRM).upper().startswith("Y"):
                    index.delete_collection()
--- a/journal/models/utils.py
+++ b/journal/models/utils.py
@ -10,6 +10,7 @@ from .collection import Collection, CollectionMember, FeaturedCollection
 from .comment import Comment
 from .common import Content, Debris
 from .itemlist import ListMember
 from .note import Note
 from .rating import Rating
 from .review import Review
 from .shelf import ShelfLogEntry, ShelfMember
@ -31,6 +32,7 @@ def remove_data_by_user(owner: APIdentity):
    Review.objects.filter(owner=owner).delete()
    TagMember.objects.filter(owner=owner).delete()
    Tag.objects.filter(owner=owner).delete()
    Note.objects.filter(owner=owner).delete()
    CollectionMember.objects.filter(owner=owner).delete()
    Collection.objects.filter(owner=owner).delete()
    FeaturedCollection.objects.filter(owner=owner).delete()
--- a/users/management/commands/task.py
+++ b/users/management/commands/task.py
@ -52,7 +52,7 @@ class Command(BaseCommand):
            for task in tqdm(tasks):
                task.state = Task.States.pending
                task.save(update_fields=["state"])
-                Task._run(task.pk)
+                Task._execute(task.pk)
        if options["requeue"]:
            for task in tqdm(tasks):
                task.state = Task.States.pending
--- a/users/models/task.py
+++ b/users/models/task.py
@ -1,3 +1,5 @@
 from typing import Self
 import django_rq
 from auditlog.context import set_actor
 from django.db import models
@ -46,14 +48,27 @@ class Task(TypedModel):
        return cls.objects.filter(user=user).order_by("-created_time").first()
    @classmethod
-    def create(cls, user: User, **kwargs) -> "Task":
+    def create(cls, user: User, **kwargs) -> Self:
        d = cls.DefaultMetadata.copy()
        d.update(kwargs)
        t = cls.objects.create(user=user, metadata=d)
        return t
    def _run(self) -> bool:
        activate_language_for_user(self.user)
        with set_actor(self.user):
            try:
                self.run()
                return True
            except Exception as e:
                logger.exception(
                    f"error running {self.__class__}",
                    extra={"exception": e, "task": self.pk},
                )
                return False
    @classmethod
-    def _run(cls, task_id: int):
+    def _execute(cls, task_id: int):
        task = cls.objects.get(pk=task_id)
        logger.info(f"running {task}")
        if task.state != cls.States.pending:
@ -63,17 +78,7 @@ class Task(TypedModel):
            return
        task.state = cls.States.started
        task.save()
-        activate_language_for_user(task.user)
+        ok = task._run()
        with set_actor(task.user):
            try:
                task.run()
                ok = True
            except Exception as e:
                logger.exception(
                    f"error running {cls.__name__}",
                    extra={"exception": e, "task": task_id},
                )
                ok = False
        task.refresh_from_db()
        task.state = cls.States.complete if ok else cls.States.failed
        task.save()
@ -81,7 +86,7 @@ class Task(TypedModel):
    def enqueue(self):
        return django_rq.get_queue(self.TaskQueue).enqueue(
-            self._run, self.pk, job_id=self.job_id
+            self._execute, self.pk, job_id=self.job_id
        )
    def notify(self) -> None:
--- a/users/templates/users/data.html
+++ b/users/templates/users/data.html
@ -250,6 +250,23 @@
                {% endif %}
              {% endif %}
            </form>
            <hr>
            <form action="{% url 'users:export_ndjson' %}"
                  method="post"
                  enctype="multipart/form-data">
              {% csrf_token %}
              <input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
              {% if ndjson_export_task %}
                <br>
                {% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
                {% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
                <br>
                {{ ndjson_export_task.message }}
                {% if ndjson_export_task.metadata.file %}
                  <a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
                {% endif %}
              {% endif %}
            </form>
          </details>
        </article>
        <article>
--- a/users/urls.py
+++ b/users/urls.py
@ -18,6 +18,7 @@ urlpatterns = [
    path("data/export/reviews", export_reviews, name="export_reviews"),
    path("data/export/marks", export_marks, name="export_marks"),
    path("data/export/csv", export_csv, name="export_csv"),
    path("data/export/ndjson", export_ndjson, name="export_ndjson"),
    path("data/sync_mastodon", sync_mastodon, name="sync_mastodon"),
    path(
        "data/sync_mastodon_preference",
--- a/users/views/data.py
+++ b/users/views/data.py
@ -14,6 +14,7 @@ from django.utils.translation import gettext_lazy as _
 from common.utils import GenerateDateUUIDMediaFilePath
 from journal.exporters import DoufenExporter
 from journal.exporters.csv import CsvExporter
 from journal.exporters.ndjson import NdjsonExporter
 from journal.importers import (
    DoubanImporter,
    GoodreadsImporter,
@ -99,6 +100,7 @@ def data(request):
            "import_task": DoubanImporter.latest_task(request.user),
            "export_task": DoufenExporter.latest_task(request.user),
            "csv_export_task": CsvExporter.latest_task(request.user),
            "ndjson_export_task": NdjsonExporter.latest_task(request.user),
            "letterboxd_task": LetterboxdImporter.latest_task(request.user),
            "goodreads_task": GoodreadsImporter.latest_task(request.user),
            "years": years,
@ -183,6 +185,38 @@ def export_csv(request):
        return response
@login_required
 def export_ndjson(request):
    if request.method == "POST":
        task = NdjsonExporter.latest_task(request.user)
        if (
            task
            and task.state not in [Task.States.complete, Task.States.failed]
            and task.created_time > (timezone.now() - datetime.timedelta(hours=1))
        ):
            messages.add_message(
                request, messages.INFO, _("Recent export still in progress.")
            )
            return redirect(reverse("users:data"))
        NdjsonExporter.create(request.user).enqueue()
        messages.add_message(request, messages.INFO, _("Generating exports."))
        return redirect(reverse("users:data"))
    else:
        task = NdjsonExporter.latest_task(request.user)
        if not task or task.state != Task.States.complete:
            messages.add_message(
                request, messages.ERROR, _("Export file not available.")
            )
            return redirect(reverse("users:data"))
        response = HttpResponse()
        response["X-Accel-Redirect"] = (
            settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
        )
        response["Content-Type"] = "application/zip"
        response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
        return response
@login_required
 def sync_mastodon(request):
    if request.method == "POST":