export ndjson

2025-01-28 21:38:02 -05:00 · 2025-01-28 21:38:02 -05:00 · 26858ee905
commit 26858ee905
parent 409ba0a6fd
17 changed files with 277 additions and 335 deletions
--- a/catalog/book/models.py
+++ b/catalog/book/models.py
@ -39,6 +39,7 @@ from catalog.common import (
 from catalog.common.models import (
    LIST_OF_ONE_PLUS_STR_SCHEMA,
    LOCALE_CHOICES_JSONFORM,
+    ItemType,
    LanguageListField,
 )
 from common.models import uniq
@ -119,6 +120,8 @@ class Edition(Item):
        WEB = "web", _("Web Fiction")
        OTHER = "other", _("Other")

+    schema = EditionSchema
+    type = ItemType.Edition
    category = ItemCategory.Book
    url_path = "book"

--- a/catalog/common/models.py
+++ b/catalog/common/models.py
@ -120,7 +120,7 @@ IdealIdTypes = [


 class ItemType(models.TextChoices):
-    Book = "book", _("Book")  # type:ignore[reportCallIssue]
+    Edition = "edition", _("Edition")  # type:ignore[reportCallIssue]
    TVShow = "tvshow", _("TV Serie")  # type:ignore[reportCallIssue]
    TVSeason = "tvseason", _("TV Season")  # type:ignore[reportCallIssue]
    TVEpisode = "tvepisode", _("TV Episode")  # type:ignore[reportCallIssue]
@ -346,6 +346,7 @@ class Item(PolymorphicModel):
        collections: QuerySet["Collection"]
        merged_from_items: QuerySet["Item"]
        merged_to_item_id: int
+    schema = ItemSchema
    category: ItemCategory  # subclass must specify this
    url_path = "item"  # subclass must specify this
    child_class = None  # subclass may specify this to allow link to parent item
@ -514,15 +515,19 @@ class Item(PolymorphicModel):
    def ap_object_type(self) -> str:
        return self.get_ap_object_type()

+    @property
+    def ap_object(self):
+        return self.schema.from_orm(self).model_dump()
+
    @property
    def ap_object_ref(self) -> dict[str, Any]:
        o = {
            "type": self.get_ap_object_type(),
            "href": self.absolute_url,
-            "name": self.title,
+            "name": self.display_title,
        }
        if self.has_cover():
-            o["image"] = self.cover_image_url
+            o["image"] = self.cover_image_url or ""
        return o

    def log_action(self, changes: dict[str, Any]):
--- a/catalog/game/models.py
+++ b/catalog/game/models.py
@ -44,6 +44,7 @@ class GameSchema(GameInSchema, BaseSchema):

 class Game(Item):
    type = ItemType.Game
+    schema = GameSchema
    category = ItemCategory.Game
    url_path = "game"
    igdb = PrimaryLookupIdDescriptor(IdType.IGDB)
--- a/catalog/movie/models.py
+++ b/catalog/movie/models.py
@ -35,6 +35,7 @@ class MovieSchema(MovieInSchema, BaseSchema):

 class Movie(Item):
    type = ItemType.Movie
+    schema = MovieSchema
    category = ItemCategory.Movie
    url_path = "movie"
    imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
--- a/catalog/music/models.py
+++ b/catalog/music/models.py
@ -33,6 +33,7 @@ class AlbumSchema(AlbumInSchema, BaseSchema):


 class Album(Item):
+    schema = AlbumSchema
    type = ItemType.Album
    url_path = "album"
    category = ItemCategory.Music
--- a/catalog/performance/models.py
+++ b/catalog/performance/models.py
@ -104,6 +104,7 @@ def _crew_by_role(crew):
 class Performance(Item):
    if TYPE_CHECKING:
        productions: models.QuerySet["PerformanceProduction"]
+    schema = PerformanceSchema
    type = ItemType.Performance
    child_class = "PerformanceProduction"
    category = ItemCategory.Performance
@ -247,6 +248,7 @@ class Performance(Item):


 class PerformanceProduction(Item):
+    schema = PerformanceProductionSchema
    type = ItemType.PerformanceProduction
    category = ItemCategory.Performance
    url_path = "performance/production"
--- a/catalog/podcast/models.py
+++ b/catalog/podcast/models.py
@ -13,7 +13,11 @@ from catalog.common import (
    ItemInSchema,
    jsondata,
 )
-from catalog.common.models import LIST_OF_ONE_PLUS_STR_SCHEMA, LanguageListField
+from catalog.common.models import (
+    LIST_OF_ONE_PLUS_STR_SCHEMA,
+    ItemType,
+    LanguageListField,
+)


 class PodcastInSchema(ItemInSchema):
@ -44,6 +48,8 @@ class PodcastEpisodeSchema(PodcastEpisodeInSchema, BaseSchema):
 class Podcast(Item):
    if TYPE_CHECKING:
        episodes: models.QuerySet["PodcastEpisode"]
+    type = ItemType.Podcast
+    schema = PodcastSchema
    category = ItemCategory.Podcast
    child_class = "PodcastEpisode"
    url_path = "podcast"
@ -118,6 +124,8 @@ class Podcast(Item):


 class PodcastEpisode(Item):
+    schema = PodcastEpisodeSchema
+    type = ItemType.PodcastEpisode
    category = ItemCategory.Podcast
    url_path = "podcast/episode"
    # uid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True)
--- a/catalog/tv/models.py
+++ b/catalog/tv/models.py
@ -97,6 +97,7 @@ class TVEpisodeSchema(ItemSchema):
 class TVShow(Item):
    if TYPE_CHECKING:
        seasons: QuerySet["TVSeason"]
+    schema = TVShowSchema
    type = ItemType.TVShow
    child_class = "TVSeason"
    category = ItemCategory.TV
@ -261,6 +262,7 @@ class TVShow(Item):
 class TVSeason(Item):
    if TYPE_CHECKING:
        episodes: models.QuerySet["TVEpisode"]
+    schema = TVSeasonSchema
    type = ItemType.TVSeason
    category = ItemCategory.TV
    url_path = "tv/season"
@ -480,6 +482,8 @@ class TVSeason(Item):


 class TVEpisode(Item):
+    schema = TVEpisodeSchema
+    type = ItemType.TVEpisode
    category = ItemCategory.TV
    url_path = "tv/episode"
    season = models.ForeignKey(
--- a/journal/exporters/csv.py
+++ b/journal/exporters/csv.py
@ -87,6 +87,7 @@ class CsvExporter(Task):
        temp_dir = tempfile.mkdtemp()
        temp_folder_path = os.path.join(temp_dir, self.filename)
        os.makedirs(temp_folder_path)
+        total = 0
        for category in [
            ItemCategory.Movie,
            ItemCategory.TV,
@ -107,6 +108,7 @@ class CsvExporter(Task):
                writer = csv.writer(csvfile)
                writer.writerow(_mark_heading)
                for mark in marks:
+                    total += 1
                    item = mark.item
                    line = [
                        item.display_title,
@ -128,6 +130,7 @@ class CsvExporter(Task):
                writer = csv.writer(csvfile)
                writer.writerow(_review_heading)
                for review in reviews:
+                    total += 1
                    item = review.item
                    line = [
                        item.display_title,
@ -147,6 +150,7 @@ class CsvExporter(Task):
                    .order_by("created_time")
                )
                for note in notes:
+                    total += 1
                    item = note.item
                    line = [
                        item.display_title,
@ -166,5 +170,6 @@ class CsvExporter(Task):
            os.makedirs(os.path.dirname(filename))
        shutil.make_archive(filename[:-4], "zip", temp_folder_path)
        self.metadata["file"] = filename
+        self.metadata["total"] = total
        self.message = "Export complete."
        self.save()
--- a/journal/exporters/ndjson.py
+++ b/journal/exporters/ndjson.py
@ -1,36 +1,24 @@
+import json
 import os
+import re
+import shutil
+import tempfile

 from django.conf import settings
-from openpyxl import Workbook
+from django.utils import timezone

-from catalog.models import IdType, ItemCategory, TVEpisode
+from catalog.common.downloaders import ProxiedImageDownloader
 from common.utils import GenerateDateUUIDMediaFilePath
-from journal.models import Review, ShelfType, q_item_in_category
+from journal.models import ShelfMember
+from journal.models.collection import Collection
+from journal.models.common import Content
+from journal.models.note import Note
+from journal.models.review import Review
+from journal.models.shelf import ShelfLogEntry
+from takahe.models import Post
 from users.models import Task


-def _get_source_url(item):
-    res = (
-        item.external_resources.all()
-        .filter(
-            id_type__in=[
-                IdType.DoubanBook,
-                IdType.DoubanMovie,
-                IdType.DoubanMusic,
-                IdType.DoubanGame,
-                IdType.DoubanDrama,
-            ]
-        )
-        .first()
-    )
-    if not res:
-        res = item.external_resources.all().first()
-    return res.url if res else ""
-
-
-# def export_marks_task(user):
-#     user.preference.export_status["marks_pending"] = True
-#     user.preference.save(update_fields=["export_status"])
 class NdjsonExporter(Task):
    class Meta:
        app_label = "journal"  # workaround bug in TypedModel
@ -40,302 +28,149 @@ class NdjsonExporter(Task):
        "file": None,
        "total": 0,
    }
+    ref_items = []
+
+    @property
+    def filename(self) -> str:
+        d = self.created_time.strftime("%Y%m%d%H%M%S")
+        return f"neodb_{self.user.username}_{d}_ndjson"
+
+    def ref(self, item) -> str:
+        if item not in self.ref_items:
+            self.ref_items.append(item)
+        return item.absolute_url
+
+    def get_header(self):
+        return {
+            "server": settings.SITE_DOMAIN,
+            "neodb_version": settings.NEODB_VERSION,
+            "username": self.user.username,
+            "actor": self.user.identity.actor_uri,
+            "request_time": self.created_time.isoformat(),
+            "created_time": timezone.now().isoformat(),
+        }

    def run(self):
        user = self.user
+        temp_dir = tempfile.mkdtemp()
+        temp_folder_path = os.path.join(temp_dir, self.filename)
+        os.makedirs(temp_folder_path)
+        attachment_path = os.path.join(temp_folder_path, "attachments")
+        os.makedirs(attachment_path, exist_ok=True)
+
+        def _save_image(url):
+            if url.startswith("http"):
+                imgdl = ProxiedImageDownloader(url)
+                raw_img = imgdl.download().content
+                ext = imgdl.extention
+                file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path)
+                with open(file, "wb") as binary_file:
+                    binary_file.write(raw_img)
+                return file
+            elif url.startswith("/"):
+                p = os.path.abspath(
+                    os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :])
+                )
+                if p.startswith(settings.MEDIA_ROOT):
+                    shutil.copy2(p, attachment_path)
+                return p
+            return url
+
+        filename = os.path.join(temp_folder_path, "journal.ndjson")
+        total = 0
+        with open(filename, "w") as f:
+            f.write(json.dumps(self.get_header()) + "\n")
+
+            for cls in list(Content.__subclasses__()):
+                pieces = cls.objects.filter(owner=user.identity)
+                for p in pieces:
+                    total += 1
+                    self.ref(p.item)
+                    o = {
+                        "type": p.__class__.__name__,
+                        "content": p.ap_object,
+                        "visibility": p.visibility,
+                        "metadata": p.metadata,
+                    }
+                    f.write(json.dumps(o, default=str) + "\n")
+                    if cls == Review:
+                        re.sub(
+                            r"(?<=!\[\]\()([^)]+)(?=\))",
+                            lambda x: _save_image(x[1]),
+                            p.body,  # type: ignore
+                        )
+                    elif cls == Note and p.latest_post:
+                        for a in p.latest_post.attachments.all():
+                            dest = os.path.join(
+                                attachment_path, os.path.basename(a.file.name)
+                            )
+                            shutil.copy2(a.file.path, dest)
+
+            collections = Collection.objects.filter(owner=user.identity)
+            for c in collections:
+                total += 1
+                o = {
+                    "type": "Collection",
+                    "content": c.ap_object,
+                    "visibility": c.visibility,
+                    "metadata": c.metadata,
+                    "items": [
+                        {"item": self.ref(m.item), "metadata": m.metadata}
+                        for m in c.ordered_members
+                    ],
+                }
+                f.write(json.dumps(o, default=str) + "\n")
+
+            marks = ShelfMember.objects.filter(owner=user.identity)
+            for m in marks:
+                total += 1
+                o = {
+                    "type": "ShelfMember",
+                    "item": self.ref(m.item),
+                    "status": m.shelf_type,
+                    "visibility": m.visibility,
+                    "metadata": m.metadata,
+                    "published": self.created_time.isoformat(),
+                }
+                f.write(json.dumps(o, default=str) + "\n")
+
+            logs = ShelfLogEntry.objects.filter(owner=user.identity)
+            for log in logs:
+                total += 1
+                o = {
+                    "type": "ShelfLog",
+                    "item": self.ref(log.item),
+                    "posts": list(log.all_post_ids()),
+                    "timestamp": log.created_time,
+                }
+                f.write(json.dumps(o, default=str) + "\n")
+
+            posts = Post.objects.filter(author_id=user.identity.pk).exclude(
+                type_data__has_key="object"
+            )
+
+            for p in posts:
+                total += 1
+                o = {"type": "post", "post": p.to_mastodon_json()}
+                for a in p.attachments.all():
+                    dest = os.path.join(attachment_path, os.path.basename(a.file.name))
+                    shutil.copy2(a.file.path, dest)
+                f.write(json.dumps(o, default=str) + "\n")
+
+        filename = os.path.join(temp_folder_path, "catalog.ndjson")
+        with open(filename, "w") as f:
+            f.write(json.dumps(self.get_header()) + "\n")
+            for item in self.ref_items:
+                f.write(json.dumps(item.ap_object, default=str) + "\n")

        filename = GenerateDateUUIDMediaFilePath(
-            "f.xlsx", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
+            "f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
        )
        if not os.path.exists(os.path.dirname(filename)):
            os.makedirs(os.path.dirname(filename))
-        heading = [
-            "标题",
-            "简介",
-            "豆瓣评分",
-            "链接",
-            "创建时间",
-            "我的评分",
-            "标签",
-            "评论",
-            "NeoDB链接",
-            "其它ID",
-        ]
-        wb = Workbook()
-        # adding write_only=True will speed up but corrupt the xlsx and won't be importable
-        for status, label in [
-            (ShelfType.COMPLETE, "看过"),
-            (ShelfType.PROGRESS, "在看"),
-            (ShelfType.WISHLIST, "想看"),
-        ]:
-            ws = wb.create_sheet(title=label)
-            shelf = user.shelf_manager.get_shelf(status)
-            q = q_item_in_category(ItemCategory.Movie) | q_item_in_category(
-                ItemCategory.TV
-            )
-            marks = shelf.members.all().filter(q).order_by("created_time")
-            ws.append(heading)
-            for mm in marks:
-                mark = mm.mark
-                movie = mark.item
-                title = movie.display_title
-                if movie.__class__ == TVEpisode:
-                    season_number = movie.season.season_number if movie.season else 0
-                    summary = f"S{season_number:02d}E{movie.episode_number:02d}"
-                else:
-                    summary = (
-                        str(movie.year or "")
-                        + " / "
-                        + ",".join(movie.area or [])
-                        + " / "
-                        + ",".join(movie.genre or [])
-                        + " / "
-                        + ",".join(movie.director or [])
-                        + " / "
-                        + ",".join(movie.actor or [])
-                    )
-                tags = ",".join(mark.tags)
-                world_rating = (movie.rating / 2) if movie.rating else None
-                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
-                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
-                text = mark.comment_text
-                source_url = _get_source_url(movie)
-                url = movie.absolute_url
-                line = [
-                    title,
-                    summary,
-                    world_rating,
-                    source_url,
-                    timestamp,
-                    my_rating,
-                    tags,
-                    text,
-                    url,
-                    movie.imdb,
-                ]
-                ws.append(line)
+        shutil.make_archive(filename[:-4], "zip", temp_folder_path)

-        for status, label in [
-            (ShelfType.COMPLETE, "听过"),
-            (ShelfType.PROGRESS, "在听"),
-            (ShelfType.WISHLIST, "想听"),
-        ]:
-            ws = wb.create_sheet(title=label)
-            shelf = user.shelf_manager.get_shelf(status)
-            q = q_item_in_category(ItemCategory.Music)
-            marks = shelf.members.all().filter(q).order_by("created_time")
-            ws.append(heading)
-            for mm in marks:
-                mark = mm.mark
-                album = mark.item
-                title = album.display_title
-                summary = (
-                    ",".join(album.artist)
-                    + " / "
-                    + (album.release_date.strftime("%Y") if album.release_date else "")
-                )
-                tags = ",".join(mark.tags)
-                world_rating = (album.rating / 2) if album.rating else None
-                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
-                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
-                text = mark.comment_text
-                source_url = _get_source_url(album)
-                url = album.absolute_url
-                line = [
-                    title,
-                    summary,
-                    world_rating,
-                    source_url,
-                    timestamp,
-                    my_rating,
-                    tags,
-                    text,
-                    url,
-                    album.barcode,
-                ]
-                ws.append(line)
-
-        for status, label in [
-            (ShelfType.COMPLETE, "读过"),
-            (ShelfType.PROGRESS, "在读"),
-            (ShelfType.WISHLIST, "想读"),
-        ]:
-            ws = wb.create_sheet(title=label)
-            shelf = user.shelf_manager.get_shelf(status)
-            q = q_item_in_category(ItemCategory.Book)
-            marks = shelf.members.all().filter(q).order_by("created_time")
-            ws.append(heading)
-            for mm in marks:
-                mark = mm.mark
-                book = mark.item
-                title = book.display_title
-                summary = (
-                    ",".join(book.author or [])
-                    + " / "
-                    + str(book.pub_year or "")
-                    + " / "
-                    + (book.pub_house or "")
-                )
-                tags = ",".join(mark.tags)
-                world_rating = (book.rating / 2) if book.rating else None
-                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
-                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
-                text = mark.comment_text
-                source_url = _get_source_url(book)
-                url = book.absolute_url
-                line = [
-                    title,
-                    summary,
-                    world_rating,
-                    source_url,
-                    timestamp,
-                    my_rating,
-                    tags,
-                    text,
-                    url,
-                    book.isbn,
-                ]
-                ws.append(line)
-
-        for status, label in [
-            (ShelfType.COMPLETE, "玩过"),
-            (ShelfType.PROGRESS, "在玩"),
-            (ShelfType.WISHLIST, "想玩"),
-        ]:
-            ws = wb.create_sheet(title=label)
-            shelf = user.shelf_manager.get_shelf(status)
-            q = q_item_in_category(ItemCategory.Game)
-            marks = shelf.members.all().filter(q).order_by("created_time")
-            ws.append(heading)
-            for mm in marks:
-                mark = mm.mark
-                game = mark.item
-                title = game.display_title
-                summary = (
-                    ",".join(game.genre or [])
-                    + " / "
-                    + ",".join(game.platform or [])
-                    + " / "
-                    + (
-                        game.release_date.strftime("%Y-%m-%d")
-                        if game.release_date
-                        else ""
-                    )
-                )
-                tags = ",".join(mark.tags)
-                world_rating = (game.rating / 2) if game.rating else None
-                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
-                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
-                text = mark.comment_text
-                source_url = _get_source_url(game)
-                url = game.absolute_url
-                line = [
-                    title,
-                    summary,
-                    world_rating,
-                    source_url,
-                    timestamp,
-                    my_rating,
-                    tags,
-                    text,
-                    url,
-                    "",
-                ]
-                ws.append(line)
-
-        for status, label in [
-            (ShelfType.COMPLETE, "听过的播客"),
-            (ShelfType.PROGRESS, "在听的播客"),
-            (ShelfType.WISHLIST, "想听的播客"),
-        ]:
-            ws = wb.create_sheet(title=label)
-            shelf = user.shelf_manager.get_shelf(status)
-            q = q_item_in_category(ItemCategory.Podcast)
-            marks = shelf.members.all().filter(q).order_by("created_time")
-            ws.append(heading)
-            for mm in marks:
-                mark = mm.mark
-                podcast = mark.item
-                title = podcast.display_title
-                summary = ",".join(podcast.host or [])
-                tags = ",".join(mark.tags)
-                world_rating = (podcast.rating / 2) if podcast.rating else None
-                timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
-                my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
-                text = mark.comment_text
-                source_url = _get_source_url(podcast)
-                url = podcast.absolute_url
-                line = [
-                    title,
-                    summary,
-                    world_rating,
-                    source_url,
-                    timestamp,
-                    my_rating,
-                    tags,
-                    text,
-                    url,
-                    "",
-                ]
-                ws.append(line)
-
-        review_heading = [
-            "标题",
-            "评论对象",
-            "链接",
-            "创建时间",
-            "我的评分",
-            "类型",
-            "内容",
-            "评论对象原始链接",
-            "评论对象NeoDB链接",
-        ]
-        for category, label in [
-            (ItemCategory.Movie, "影评"),
-            (ItemCategory.Book, "书评"),
-            (ItemCategory.Music, "乐评"),
-            (ItemCategory.Game, "游戏评论"),
-            (ItemCategory.Podcast, "播客评论"),
-        ]:
-            ws = wb.create_sheet(title=label)
-            q = q_item_in_category(category)
-            reviews = (
-                Review.objects.filter(owner=user.identity)
-                .filter(q)
-                .order_by("created_time")
-            )
-            ws.append(review_heading)
-            for review in reviews:
-                title = review.title
-                target = "《" + review.item.display_title + "》"
-                url = review.absolute_url
-                timestamp = review.created_time.strftime("%Y-%m-%d %H:%M:%S")
-                my_rating = (
-                    None  # (mark.rating_grade / 2) if mark.rating_grade else None
-                )
-                content = review.body
-                target_source_url = _get_source_url(review.item)
-                target_url = review.item.absolute_url
-                line = [
-                    title,
-                    target,
-                    url,
-                    timestamp,
-                    my_rating,
-                    label,
-                    content,
-                    target_source_url,
-                    target_url,
-                ]
-                ws.append(line)
-
-        wb.save(filename=filename)
        self.metadata["file"] = filename
+        self.metadata["total"] = total
        self.message = "Export complete."
        self.save()
-        # user.preference.export_status["marks_pending"] = False
-        # user.preference.export_status["marks_file"] = filename
-        # user.preference.export_status["marks_date"] = datetime.now().strftime(
-        #     "%Y-%m-%d %H:%M"
-        # )
-        # user.preference.save(update_fields=["export_status"])
--- a/journal/management/commands/journal.py
+++ b/journal/management/commands/journal.py
@ -8,6 +8,7 @@ from django.utils import timezone
 from tqdm import tqdm

 from catalog.models import Item
+from journal.exporters.ndjson import NdjsonExporter
 from journal.models import (
    Collection,
    Content,
@ -27,6 +28,7 @@ _CONFIRM = "confirm deleting collection? [Y/N] "
 _HELP_TEXT = """
 intergrity:     check and fix remaining journal for merged and deleted items
 purge:          delete invalid data (visibility=99)
+export:         run export task
 idx-info:       show index information
 idx-init:       check and create index if not exists
 idx-destroy:    delete index
@ -51,6 +53,7 @@ class Command(BaseCommand):
            choices=[
                "integrity",
                "purge",
+                "export",
                "idx-info",
                "idx-init",
                "idx-alt",
@ -111,6 +114,18 @@ class Command(BaseCommand):
                if self.fix:
                    update_journal_for_merged_item(i.url)

+    def export(self, owner_ids):
+        users = User.objects.filter(identity__in=owner_ids)
+        for user in users:
+            task = NdjsonExporter.create(user=user)
+            self.stdout.write(f"exporting for {user} (task {task.pk})...")
+            ok = task._run()
+            if ok:
+                self.stdout.write(f"complete {task.metadata['file']}")
+            else:
+                self.stdout.write("failed")
+            task.delete()
+
    def handle(
        self,
        action,
@ -152,6 +167,9 @@ class Command(BaseCommand):
                        cls.objects.filter(visibility=99).delete()
                self.stdout.write(self.style.SUCCESS("Done."))

+            case "export":
+                self.export(owners)
+
            case "idx-destroy":
                if yes or input(_CONFIRM).upper().startswith("Y"):
                    index.delete_collection()
--- a/journal/models/utils.py
+++ b/journal/models/utils.py
@ -10,6 +10,7 @@ from .collection import Collection, CollectionMember, FeaturedCollection
 from .comment import Comment
 from .common import Content, Debris
 from .itemlist import ListMember
+from .note import Note
 from .rating import Rating
 from .review import Review
 from .shelf import ShelfLogEntry, ShelfMember
@ -31,6 +32,7 @@ def remove_data_by_user(owner: APIdentity):
    Review.objects.filter(owner=owner).delete()
    TagMember.objects.filter(owner=owner).delete()
    Tag.objects.filter(owner=owner).delete()
+    Note.objects.filter(owner=owner).delete()
    CollectionMember.objects.filter(owner=owner).delete()
    Collection.objects.filter(owner=owner).delete()
    FeaturedCollection.objects.filter(owner=owner).delete()
--- a/users/management/commands/task.py
+++ b/users/management/commands/task.py
@ -52,7 +52,7 @@ class Command(BaseCommand):
            for task in tqdm(tasks):
                task.state = Task.States.pending
                task.save(update_fields=["state"])
-                Task._run(task.pk)
+                Task._execute(task.pk)
        if options["requeue"]:
            for task in tqdm(tasks):
                task.state = Task.States.pending
--- a/users/models/task.py
+++ b/users/models/task.py
@ -1,3 +1,5 @@
+from typing import Self
+
 import django_rq
 from auditlog.context import set_actor
 from django.db import models
@ -46,14 +48,27 @@ class Task(TypedModel):
        return cls.objects.filter(user=user).order_by("-created_time").first()

    @classmethod
-    def create(cls, user: User, **kwargs) -> "Task":
+    def create(cls, user: User, **kwargs) -> Self:
        d = cls.DefaultMetadata.copy()
        d.update(kwargs)
        t = cls.objects.create(user=user, metadata=d)
        return t

+    def _run(self) -> bool:
+        activate_language_for_user(self.user)
+        with set_actor(self.user):
+            try:
+                self.run()
+                return True
+            except Exception as e:
+                logger.exception(
+                    f"error running {self.__class__}",
+                    extra={"exception": e, "task": self.pk},
+                )
+                return False
+
    @classmethod
-    def _run(cls, task_id: int):
+    def _execute(cls, task_id: int):
        task = cls.objects.get(pk=task_id)
        logger.info(f"running {task}")
        if task.state != cls.States.pending:
@ -63,17 +78,7 @@ class Task(TypedModel):
            return
        task.state = cls.States.started
        task.save()
-        activate_language_for_user(task.user)
-        with set_actor(task.user):
-            try:
-                task.run()
-                ok = True
-            except Exception as e:
-                logger.exception(
-                    f"error running {cls.__name__}",
-                    extra={"exception": e, "task": task_id},
-                )
-                ok = False
+        ok = task._run()
        task.refresh_from_db()
        task.state = cls.States.complete if ok else cls.States.failed
        task.save()
@ -81,7 +86,7 @@ class Task(TypedModel):

    def enqueue(self):
        return django_rq.get_queue(self.TaskQueue).enqueue(
-            self._run, self.pk, job_id=self.job_id
+            self._execute, self.pk, job_id=self.job_id
        )

    def notify(self) -> None:
--- a/users/templates/users/data.html
+++ b/users/templates/users/data.html
@ -250,6 +250,23 @@
                {% endif %}
              {% endif %}
            </form>
+            <hr>
+            <form action="{% url 'users:export_ndjson' %}"
+                  method="post"
+                  enctype="multipart/form-data">
+              {% csrf_token %}
+              <input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
+              {% if ndjson_export_task %}
+                <br>
+                {% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
+                {% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
+                <br>
+                {{ ndjson_export_task.message }}
+                {% if ndjson_export_task.metadata.file %}
+                  <a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
+                {% endif %}
+              {% endif %}
+            </form>
          </details>
        </article>
        <article>
--- a/users/urls.py
+++ b/users/urls.py
@ -18,6 +18,7 @@ urlpatterns = [
    path("data/export/reviews", export_reviews, name="export_reviews"),
    path("data/export/marks", export_marks, name="export_marks"),
    path("data/export/csv", export_csv, name="export_csv"),
+    path("data/export/ndjson", export_ndjson, name="export_ndjson"),
    path("data/sync_mastodon", sync_mastodon, name="sync_mastodon"),
    path(
        "data/sync_mastodon_preference",
--- a/users/views/data.py
+++ b/users/views/data.py
@ -14,6 +14,7 @@ from django.utils.translation import gettext_lazy as _
 from common.utils import GenerateDateUUIDMediaFilePath
 from journal.exporters import DoufenExporter
 from journal.exporters.csv import CsvExporter
+from journal.exporters.ndjson import NdjsonExporter
 from journal.importers import (
    DoubanImporter,
    GoodreadsImporter,
@ -99,6 +100,7 @@ def data(request):
            "import_task": DoubanImporter.latest_task(request.user),
            "export_task": DoufenExporter.latest_task(request.user),
            "csv_export_task": CsvExporter.latest_task(request.user),
+            "ndjson_export_task": NdjsonExporter.latest_task(request.user),
            "letterboxd_task": LetterboxdImporter.latest_task(request.user),
            "goodreads_task": GoodreadsImporter.latest_task(request.user),
            "years": years,
@ -183,6 +185,38 @@ def export_csv(request):
        return response


+@login_required
+def export_ndjson(request):
+    if request.method == "POST":
+        task = NdjsonExporter.latest_task(request.user)
+        if (
+            task
+            and task.state not in [Task.States.complete, Task.States.failed]
+            and task.created_time > (timezone.now() - datetime.timedelta(hours=1))
+        ):
+            messages.add_message(
+                request, messages.INFO, _("Recent export still in progress.")
+            )
+            return redirect(reverse("users:data"))
+        NdjsonExporter.create(request.user).enqueue()
+        messages.add_message(request, messages.INFO, _("Generating exports."))
+        return redirect(reverse("users:data"))
+    else:
+        task = NdjsonExporter.latest_task(request.user)
+        if not task or task.state != Task.States.complete:
+            messages.add_message(
+                request, messages.ERROR, _("Export file not available.")
+            )
+            return redirect(reverse("users:data"))
+        response = HttpResponse()
+        response["X-Accel-Redirect"] = (
+            settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
+        )
+        response["Content-Type"] = "application/zip"
+        response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
+        return response
+
+
@login_required
 def sync_mastodon(request):
    if request.method == "POST":