diff --git a/catalog/book/models.py b/catalog/book/models.py index c204e626..ec208e8a 100644 --- a/catalog/book/models.py +++ b/catalog/book/models.py @@ -39,6 +39,7 @@ from catalog.common import ( from catalog.common.models import ( LIST_OF_ONE_PLUS_STR_SCHEMA, LOCALE_CHOICES_JSONFORM, + ItemType, LanguageListField, ) from common.models import uniq @@ -119,6 +120,8 @@ class Edition(Item): WEB = "web", _("Web Fiction") OTHER = "other", _("Other") + schema = EditionSchema + type = ItemType.Edition category = ItemCategory.Book url_path = "book" diff --git a/catalog/common/models.py b/catalog/common/models.py index 98180034..990277ee 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -120,7 +120,7 @@ IdealIdTypes = [ class ItemType(models.TextChoices): - Book = "book", _("Book") # type:ignore[reportCallIssue] + Edition = "edition", _("Edition") # type:ignore[reportCallIssue] TVShow = "tvshow", _("TV Serie") # type:ignore[reportCallIssue] TVSeason = "tvseason", _("TV Season") # type:ignore[reportCallIssue] TVEpisode = "tvepisode", _("TV Episode") # type:ignore[reportCallIssue] @@ -346,6 +346,7 @@ class Item(PolymorphicModel): collections: QuerySet["Collection"] merged_from_items: QuerySet["Item"] merged_to_item_id: int + schema = ItemSchema category: ItemCategory # subclass must specify this url_path = "item" # subclass must specify this child_class = None # subclass may specify this to allow link to parent item @@ -514,15 +515,19 @@ class Item(PolymorphicModel): def ap_object_type(self) -> str: return self.get_ap_object_type() + @property + def ap_object(self): + return self.schema.from_orm(self).model_dump() + @property def ap_object_ref(self) -> dict[str, Any]: o = { "type": self.get_ap_object_type(), "href": self.absolute_url, - "name": self.title, + "name": self.display_title, } if self.has_cover(): - o["image"] = self.cover_image_url + o["image"] = self.cover_image_url or "" return o def log_action(self, changes: dict[str, Any]): diff --git a/catalog/game/models.py b/catalog/game/models.py index 28f030be..9290ed4a 100644 --- a/catalog/game/models.py +++ b/catalog/game/models.py @@ -44,6 +44,7 @@ class GameSchema(GameInSchema, BaseSchema): class Game(Item): type = ItemType.Game + schema = GameSchema category = ItemCategory.Game url_path = "game" igdb = PrimaryLookupIdDescriptor(IdType.IGDB) diff --git a/catalog/movie/models.py b/catalog/movie/models.py index db09fff4..03b4db13 100644 --- a/catalog/movie/models.py +++ b/catalog/movie/models.py @@ -35,6 +35,7 @@ class MovieSchema(MovieInSchema, BaseSchema): class Movie(Item): type = ItemType.Movie + schema = MovieSchema category = ItemCategory.Movie url_path = "movie" imdb = PrimaryLookupIdDescriptor(IdType.IMDB) diff --git a/catalog/music/models.py b/catalog/music/models.py index c629d972..f3ec4f33 100644 --- a/catalog/music/models.py +++ b/catalog/music/models.py @@ -33,6 +33,7 @@ class AlbumSchema(AlbumInSchema, BaseSchema): class Album(Item): + schema = AlbumSchema type = ItemType.Album url_path = "album" category = ItemCategory.Music diff --git a/catalog/performance/models.py b/catalog/performance/models.py index 11ba735d..91dc0189 100644 --- a/catalog/performance/models.py +++ b/catalog/performance/models.py @@ -104,6 +104,7 @@ def _crew_by_role(crew): class Performance(Item): if TYPE_CHECKING: productions: models.QuerySet["PerformanceProduction"] + schema = PerformanceSchema type = ItemType.Performance child_class = "PerformanceProduction" category = ItemCategory.Performance @@ -247,6 +248,7 @@ class Performance(Item): class PerformanceProduction(Item): + schema = PerformanceProductionSchema type = ItemType.PerformanceProduction category = ItemCategory.Performance url_path = "performance/production" diff --git a/catalog/podcast/models.py b/catalog/podcast/models.py index 75ed55a6..4061131c 100644 --- a/catalog/podcast/models.py +++ b/catalog/podcast/models.py @@ -13,7 +13,11 @@ from catalog.common import ( ItemInSchema, jsondata, ) -from catalog.common.models import LIST_OF_ONE_PLUS_STR_SCHEMA, LanguageListField +from catalog.common.models import ( + LIST_OF_ONE_PLUS_STR_SCHEMA, + ItemType, + LanguageListField, +) class PodcastInSchema(ItemInSchema): @@ -44,6 +48,8 @@ class PodcastEpisodeSchema(PodcastEpisodeInSchema, BaseSchema): class Podcast(Item): if TYPE_CHECKING: episodes: models.QuerySet["PodcastEpisode"] + type = ItemType.Podcast + schema = PodcastSchema category = ItemCategory.Podcast child_class = "PodcastEpisode" url_path = "podcast" @@ -118,6 +124,8 @@ class Podcast(Item): class PodcastEpisode(Item): + schema = PodcastEpisodeSchema + type = ItemType.PodcastEpisode category = ItemCategory.Podcast url_path = "podcast/episode" # uid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True) diff --git a/catalog/tv/models.py b/catalog/tv/models.py index 8dd1140d..1c85a1bd 100644 --- a/catalog/tv/models.py +++ b/catalog/tv/models.py @@ -97,6 +97,7 @@ class TVEpisodeSchema(ItemSchema): class TVShow(Item): if TYPE_CHECKING: seasons: QuerySet["TVSeason"] + schema = TVShowSchema type = ItemType.TVShow child_class = "TVSeason" category = ItemCategory.TV @@ -261,6 +262,7 @@ class TVShow(Item): class TVSeason(Item): if TYPE_CHECKING: episodes: models.QuerySet["TVEpisode"] + schema = TVSeasonSchema type = ItemType.TVSeason category = ItemCategory.TV url_path = "tv/season" @@ -480,6 +482,8 @@ class TVSeason(Item): class TVEpisode(Item): + schema = TVEpisodeSchema + type = ItemType.TVEpisode category = ItemCategory.TV url_path = "tv/episode" season = models.ForeignKey( diff --git a/journal/exporters/csv.py b/journal/exporters/csv.py index 23e7e0dc..5bbd9d78 100644 --- a/journal/exporters/csv.py +++ b/journal/exporters/csv.py @@ -87,6 +87,7 @@ class CsvExporter(Task): temp_dir = tempfile.mkdtemp() temp_folder_path = os.path.join(temp_dir, self.filename) os.makedirs(temp_folder_path) + total = 0 for category in [ ItemCategory.Movie, ItemCategory.TV, @@ -107,6 +108,7 @@ class CsvExporter(Task): writer = csv.writer(csvfile) writer.writerow(_mark_heading) for mark in marks: + total += 1 item = mark.item line = [ item.display_title, @@ -128,6 +130,7 @@ class CsvExporter(Task): writer = csv.writer(csvfile) writer.writerow(_review_heading) for review in reviews: + total += 1 item = review.item line = [ item.display_title, @@ -147,6 +150,7 @@ class CsvExporter(Task): .order_by("created_time") ) for note in notes: + total += 1 item = note.item line = [ item.display_title, @@ -166,5 +170,6 @@ class CsvExporter(Task): os.makedirs(os.path.dirname(filename)) shutil.make_archive(filename[:-4], "zip", temp_folder_path) self.metadata["file"] = filename + self.metadata["total"] = total self.message = "Export complete." self.save() diff --git a/journal/exporters/ndjson.py b/journal/exporters/ndjson.py index 6289989c..3415694b 100644 --- a/journal/exporters/ndjson.py +++ b/journal/exporters/ndjson.py @@ -1,36 +1,24 @@ +import json import os +import re +import shutil +import tempfile from django.conf import settings -from openpyxl import Workbook +from django.utils import timezone -from catalog.models import IdType, ItemCategory, TVEpisode +from catalog.common.downloaders import ProxiedImageDownloader from common.utils import GenerateDateUUIDMediaFilePath -from journal.models import Review, ShelfType, q_item_in_category +from journal.models import ShelfMember +from journal.models.collection import Collection +from journal.models.common import Content +from journal.models.note import Note +from journal.models.review import Review +from journal.models.shelf import ShelfLogEntry +from takahe.models import Post from users.models import Task -def _get_source_url(item): - res = ( - item.external_resources.all() - .filter( - id_type__in=[ - IdType.DoubanBook, - IdType.DoubanMovie, - IdType.DoubanMusic, - IdType.DoubanGame, - IdType.DoubanDrama, - ] - ) - .first() - ) - if not res: - res = item.external_resources.all().first() - return res.url if res else "" - - -# def export_marks_task(user): -# user.preference.export_status["marks_pending"] = True -# user.preference.save(update_fields=["export_status"]) class NdjsonExporter(Task): class Meta: app_label = "journal" # workaround bug in TypedModel @@ -40,302 +28,149 @@ class NdjsonExporter(Task): "file": None, "total": 0, } + ref_items = [] + + @property + def filename(self) -> str: + d = self.created_time.strftime("%Y%m%d%H%M%S") + return f"neodb_{self.user.username}_{d}_ndjson" + + def ref(self, item) -> str: + if item not in self.ref_items: + self.ref_items.append(item) + return item.absolute_url + + def get_header(self): + return { + "server": settings.SITE_DOMAIN, + "neodb_version": settings.NEODB_VERSION, + "username": self.user.username, + "actor": self.user.identity.actor_uri, + "request_time": self.created_time.isoformat(), + "created_time": timezone.now().isoformat(), + } def run(self): user = self.user + temp_dir = tempfile.mkdtemp() + temp_folder_path = os.path.join(temp_dir, self.filename) + os.makedirs(temp_folder_path) + attachment_path = os.path.join(temp_folder_path, "attachments") + os.makedirs(attachment_path, exist_ok=True) + + def _save_image(url): + if url.startswith("http"): + imgdl = ProxiedImageDownloader(url) + raw_img = imgdl.download().content + ext = imgdl.extention + file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path) + with open(file, "wb") as binary_file: + binary_file.write(raw_img) + return file + elif url.startswith("/"): + p = os.path.abspath( + os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :]) + ) + if p.startswith(settings.MEDIA_ROOT): + shutil.copy2(p, attachment_path) + return p + return url + + filename = os.path.join(temp_folder_path, "journal.ndjson") + total = 0 + with open(filename, "w") as f: + f.write(json.dumps(self.get_header()) + "\n") + + for cls in list(Content.__subclasses__()): + pieces = cls.objects.filter(owner=user.identity) + for p in pieces: + total += 1 + self.ref(p.item) + o = { + "type": p.__class__.__name__, + "content": p.ap_object, + "visibility": p.visibility, + "metadata": p.metadata, + } + f.write(json.dumps(o, default=str) + "\n") + if cls == Review: + re.sub( + r"(?<=!\[\]\()([^)]+)(?=\))", + lambda x: _save_image(x[1]), + p.body, # type: ignore + ) + elif cls == Note and p.latest_post: + for a in p.latest_post.attachments.all(): + dest = os.path.join( + attachment_path, os.path.basename(a.file.name) + ) + shutil.copy2(a.file.path, dest) + + collections = Collection.objects.filter(owner=user.identity) + for c in collections: + total += 1 + o = { + "type": "Collection", + "content": c.ap_object, + "visibility": c.visibility, + "metadata": c.metadata, + "items": [ + {"item": self.ref(m.item), "metadata": m.metadata} + for m in c.ordered_members + ], + } + f.write(json.dumps(o, default=str) + "\n") + + marks = ShelfMember.objects.filter(owner=user.identity) + for m in marks: + total += 1 + o = { + "type": "ShelfMember", + "item": self.ref(m.item), + "status": m.shelf_type, + "visibility": m.visibility, + "metadata": m.metadata, + "published": self.created_time.isoformat(), + } + f.write(json.dumps(o, default=str) + "\n") + + logs = ShelfLogEntry.objects.filter(owner=user.identity) + for log in logs: + total += 1 + o = { + "type": "ShelfLog", + "item": self.ref(log.item), + "posts": list(log.all_post_ids()), + "timestamp": log.created_time, + } + f.write(json.dumps(o, default=str) + "\n") + + posts = Post.objects.filter(author_id=user.identity.pk).exclude( + type_data__has_key="object" + ) + + for p in posts: + total += 1 + o = {"type": "post", "post": p.to_mastodon_json()} + for a in p.attachments.all(): + dest = os.path.join(attachment_path, os.path.basename(a.file.name)) + shutil.copy2(a.file.path, dest) + f.write(json.dumps(o, default=str) + "\n") + + filename = os.path.join(temp_folder_path, "catalog.ndjson") + with open(filename, "w") as f: + f.write(json.dumps(self.get_header()) + "\n") + for item in self.ref_items: + f.write(json.dumps(item.ap_object, default=str) + "\n") filename = GenerateDateUUIDMediaFilePath( - "f.xlsx", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT + "f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT ) if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) - heading = [ - "标题", - "简介", - "豆瓣评分", - "链接", - "创建时间", - "我的评分", - "标签", - "评论", - "NeoDB链接", - "其它ID", - ] - wb = Workbook() - # adding write_only=True will speed up but corrupt the xlsx and won't be importable - for status, label in [ - (ShelfType.COMPLETE, "看过"), - (ShelfType.PROGRESS, "在看"), - (ShelfType.WISHLIST, "想看"), - ]: - ws = wb.create_sheet(title=label) - shelf = user.shelf_manager.get_shelf(status) - q = q_item_in_category(ItemCategory.Movie) | q_item_in_category( - ItemCategory.TV - ) - marks = shelf.members.all().filter(q).order_by("created_time") - ws.append(heading) - for mm in marks: - mark = mm.mark - movie = mark.item - title = movie.display_title - if movie.__class__ == TVEpisode: - season_number = movie.season.season_number if movie.season else 0 - summary = f"S{season_number:02d}E{movie.episode_number:02d}" - else: - summary = ( - str(movie.year or "") - + " / " - + ",".join(movie.area or []) - + " / " - + ",".join(movie.genre or []) - + " / " - + ",".join(movie.director or []) - + " / " - + ",".join(movie.actor or []) - ) - tags = ",".join(mark.tags) - world_rating = (movie.rating / 2) if movie.rating else None - timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S") - my_rating = (mark.rating_grade / 2) if mark.rating_grade else None - text = mark.comment_text - source_url = _get_source_url(movie) - url = movie.absolute_url - line = [ - title, - summary, - world_rating, - source_url, - timestamp, - my_rating, - tags, - text, - url, - movie.imdb, - ] - ws.append(line) + shutil.make_archive(filename[:-4], "zip", temp_folder_path) - for status, label in [ - (ShelfType.COMPLETE, "听过"), - (ShelfType.PROGRESS, "在听"), - (ShelfType.WISHLIST, "想听"), - ]: - ws = wb.create_sheet(title=label) - shelf = user.shelf_manager.get_shelf(status) - q = q_item_in_category(ItemCategory.Music) - marks = shelf.members.all().filter(q).order_by("created_time") - ws.append(heading) - for mm in marks: - mark = mm.mark - album = mark.item - title = album.display_title - summary = ( - ",".join(album.artist) - + " / " - + (album.release_date.strftime("%Y") if album.release_date else "") - ) - tags = ",".join(mark.tags) - world_rating = (album.rating / 2) if album.rating else None - timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S") - my_rating = (mark.rating_grade / 2) if mark.rating_grade else None - text = mark.comment_text - source_url = _get_source_url(album) - url = album.absolute_url - line = [ - title, - summary, - world_rating, - source_url, - timestamp, - my_rating, - tags, - text, - url, - album.barcode, - ] - ws.append(line) - - for status, label in [ - (ShelfType.COMPLETE, "读过"), - (ShelfType.PROGRESS, "在读"), - (ShelfType.WISHLIST, "想读"), - ]: - ws = wb.create_sheet(title=label) - shelf = user.shelf_manager.get_shelf(status) - q = q_item_in_category(ItemCategory.Book) - marks = shelf.members.all().filter(q).order_by("created_time") - ws.append(heading) - for mm in marks: - mark = mm.mark - book = mark.item - title = book.display_title - summary = ( - ",".join(book.author or []) - + " / " - + str(book.pub_year or "") - + " / " - + (book.pub_house or "") - ) - tags = ",".join(mark.tags) - world_rating = (book.rating / 2) if book.rating else None - timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S") - my_rating = (mark.rating_grade / 2) if mark.rating_grade else None - text = mark.comment_text - source_url = _get_source_url(book) - url = book.absolute_url - line = [ - title, - summary, - world_rating, - source_url, - timestamp, - my_rating, - tags, - text, - url, - book.isbn, - ] - ws.append(line) - - for status, label in [ - (ShelfType.COMPLETE, "玩过"), - (ShelfType.PROGRESS, "在玩"), - (ShelfType.WISHLIST, "想玩"), - ]: - ws = wb.create_sheet(title=label) - shelf = user.shelf_manager.get_shelf(status) - q = q_item_in_category(ItemCategory.Game) - marks = shelf.members.all().filter(q).order_by("created_time") - ws.append(heading) - for mm in marks: - mark = mm.mark - game = mark.item - title = game.display_title - summary = ( - ",".join(game.genre or []) - + " / " - + ",".join(game.platform or []) - + " / " - + ( - game.release_date.strftime("%Y-%m-%d") - if game.release_date - else "" - ) - ) - tags = ",".join(mark.tags) - world_rating = (game.rating / 2) if game.rating else None - timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S") - my_rating = (mark.rating_grade / 2) if mark.rating_grade else None - text = mark.comment_text - source_url = _get_source_url(game) - url = game.absolute_url - line = [ - title, - summary, - world_rating, - source_url, - timestamp, - my_rating, - tags, - text, - url, - "", - ] - ws.append(line) - - for status, label in [ - (ShelfType.COMPLETE, "听过的播客"), - (ShelfType.PROGRESS, "在听的播客"), - (ShelfType.WISHLIST, "想听的播客"), - ]: - ws = wb.create_sheet(title=label) - shelf = user.shelf_manager.get_shelf(status) - q = q_item_in_category(ItemCategory.Podcast) - marks = shelf.members.all().filter(q).order_by("created_time") - ws.append(heading) - for mm in marks: - mark = mm.mark - podcast = mark.item - title = podcast.display_title - summary = ",".join(podcast.host or []) - tags = ",".join(mark.tags) - world_rating = (podcast.rating / 2) if podcast.rating else None - timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S") - my_rating = (mark.rating_grade / 2) if mark.rating_grade else None - text = mark.comment_text - source_url = _get_source_url(podcast) - url = podcast.absolute_url - line = [ - title, - summary, - world_rating, - source_url, - timestamp, - my_rating, - tags, - text, - url, - "", - ] - ws.append(line) - - review_heading = [ - "标题", - "评论对象", - "链接", - "创建时间", - "我的评分", - "类型", - "内容", - "评论对象原始链接", - "评论对象NeoDB链接", - ] - for category, label in [ - (ItemCategory.Movie, "影评"), - (ItemCategory.Book, "书评"), - (ItemCategory.Music, "乐评"), - (ItemCategory.Game, "游戏评论"), - (ItemCategory.Podcast, "播客评论"), - ]: - ws = wb.create_sheet(title=label) - q = q_item_in_category(category) - reviews = ( - Review.objects.filter(owner=user.identity) - .filter(q) - .order_by("created_time") - ) - ws.append(review_heading) - for review in reviews: - title = review.title - target = "《" + review.item.display_title + "》" - url = review.absolute_url - timestamp = review.created_time.strftime("%Y-%m-%d %H:%M:%S") - my_rating = ( - None # (mark.rating_grade / 2) if mark.rating_grade else None - ) - content = review.body - target_source_url = _get_source_url(review.item) - target_url = review.item.absolute_url - line = [ - title, - target, - url, - timestamp, - my_rating, - label, - content, - target_source_url, - target_url, - ] - ws.append(line) - - wb.save(filename=filename) self.metadata["file"] = filename + self.metadata["total"] = total self.message = "Export complete." self.save() - # user.preference.export_status["marks_pending"] = False - # user.preference.export_status["marks_file"] = filename - # user.preference.export_status["marks_date"] = datetime.now().strftime( - # "%Y-%m-%d %H:%M" - # ) - # user.preference.save(update_fields=["export_status"]) diff --git a/journal/management/commands/journal.py b/journal/management/commands/journal.py index 76800bda..69f3ec15 100644 --- a/journal/management/commands/journal.py +++ b/journal/management/commands/journal.py @@ -8,6 +8,7 @@ from django.utils import timezone from tqdm import tqdm from catalog.models import Item +from journal.exporters.ndjson import NdjsonExporter from journal.models import ( Collection, Content, @@ -27,6 +28,7 @@ _CONFIRM = "confirm deleting collection? [Y/N] " _HELP_TEXT = """ intergrity: check and fix remaining journal for merged and deleted items purge: delete invalid data (visibility=99) +export: run export task idx-info: show index information idx-init: check and create index if not exists idx-destroy: delete index @@ -51,6 +53,7 @@ class Command(BaseCommand): choices=[ "integrity", "purge", + "export", "idx-info", "idx-init", "idx-alt", @@ -111,6 +114,18 @@ class Command(BaseCommand): if self.fix: update_journal_for_merged_item(i.url) + def export(self, owner_ids): + users = User.objects.filter(identity__in=owner_ids) + for user in users: + task = NdjsonExporter.create(user=user) + self.stdout.write(f"exporting for {user} (task {task.pk})...") + ok = task._run() + if ok: + self.stdout.write(f"complete {task.metadata['file']}") + else: + self.stdout.write("failed") + task.delete() + def handle( self, action, @@ -152,6 +167,9 @@ class Command(BaseCommand): cls.objects.filter(visibility=99).delete() self.stdout.write(self.style.SUCCESS("Done.")) + case "export": + self.export(owners) + case "idx-destroy": if yes or input(_CONFIRM).upper().startswith("Y"): index.delete_collection() diff --git a/journal/models/utils.py b/journal/models/utils.py index 2faf9f24..8ae35d0a 100644 --- a/journal/models/utils.py +++ b/journal/models/utils.py @@ -10,6 +10,7 @@ from .collection import Collection, CollectionMember, FeaturedCollection from .comment import Comment from .common import Content, Debris from .itemlist import ListMember +from .note import Note from .rating import Rating from .review import Review from .shelf import ShelfLogEntry, ShelfMember @@ -31,6 +32,7 @@ def remove_data_by_user(owner: APIdentity): Review.objects.filter(owner=owner).delete() TagMember.objects.filter(owner=owner).delete() Tag.objects.filter(owner=owner).delete() + Note.objects.filter(owner=owner).delete() CollectionMember.objects.filter(owner=owner).delete() Collection.objects.filter(owner=owner).delete() FeaturedCollection.objects.filter(owner=owner).delete() diff --git a/users/management/commands/task.py b/users/management/commands/task.py index 8384e51d..bd93fc58 100644 --- a/users/management/commands/task.py +++ b/users/management/commands/task.py @@ -52,7 +52,7 @@ class Command(BaseCommand): for task in tqdm(tasks): task.state = Task.States.pending task.save(update_fields=["state"]) - Task._run(task.pk) + Task._execute(task.pk) if options["requeue"]: for task in tqdm(tasks): task.state = Task.States.pending diff --git a/users/models/task.py b/users/models/task.py index 0fb2a2a0..85fab411 100644 --- a/users/models/task.py +++ b/users/models/task.py @@ -1,3 +1,5 @@ +from typing import Self + import django_rq from auditlog.context import set_actor from django.db import models @@ -46,14 +48,27 @@ class Task(TypedModel): return cls.objects.filter(user=user).order_by("-created_time").first() @classmethod - def create(cls, user: User, **kwargs) -> "Task": + def create(cls, user: User, **kwargs) -> Self: d = cls.DefaultMetadata.copy() d.update(kwargs) t = cls.objects.create(user=user, metadata=d) return t + def _run(self) -> bool: + activate_language_for_user(self.user) + with set_actor(self.user): + try: + self.run() + return True + except Exception as e: + logger.exception( + f"error running {self.__class__}", + extra={"exception": e, "task": self.pk}, + ) + return False + @classmethod - def _run(cls, task_id: int): + def _execute(cls, task_id: int): task = cls.objects.get(pk=task_id) logger.info(f"running {task}") if task.state != cls.States.pending: @@ -63,25 +78,15 @@ class Task(TypedModel): return task.state = cls.States.started task.save() - activate_language_for_user(task.user) - with set_actor(task.user): - try: - task.run() - ok = True - except Exception as e: - logger.exception( - f"error running {cls.__name__}", - extra={"exception": e, "task": task_id}, - ) - ok = False - task.refresh_from_db() - task.state = cls.States.complete if ok else cls.States.failed - task.save() - task.notify() + ok = task._run() + task.refresh_from_db() + task.state = cls.States.complete if ok else cls.States.failed + task.save() + task.notify() def enqueue(self): return django_rq.get_queue(self.TaskQueue).enqueue( - self._run, self.pk, job_id=self.job_id + self._execute, self.pk, job_id=self.job_id ) def notify(self) -> None: diff --git a/users/templates/users/data.html b/users/templates/users/data.html index 40b2665c..0652d741 100644 --- a/users/templates/users/data.html +++ b/users/templates/users/data.html @@ -250,6 +250,23 @@ {% endif %} {% endif %} +
+
+ {% csrf_token %} + + {% if ndjson_export_task %} +
+ {% trans 'Last export' %}: {{ ndjson_export_task.created_time }} + {% trans 'Status' %}: {{ ndjson_export_task.get_state_display }} +
+ {{ ndjson_export_task.message }} + {% if ndjson_export_task.metadata.file %} + {% trans 'Download' %} + {% endif %} + {% endif %} +
diff --git a/users/urls.py b/users/urls.py index 9e53a57b..e6ad7af2 100644 --- a/users/urls.py +++ b/users/urls.py @@ -18,6 +18,7 @@ urlpatterns = [ path("data/export/reviews", export_reviews, name="export_reviews"), path("data/export/marks", export_marks, name="export_marks"), path("data/export/csv", export_csv, name="export_csv"), + path("data/export/ndjson", export_ndjson, name="export_ndjson"), path("data/sync_mastodon", sync_mastodon, name="sync_mastodon"), path( "data/sync_mastodon_preference", diff --git a/users/views/data.py b/users/views/data.py index 6ac555f0..146b0551 100644 --- a/users/views/data.py +++ b/users/views/data.py @@ -14,6 +14,7 @@ from django.utils.translation import gettext_lazy as _ from common.utils import GenerateDateUUIDMediaFilePath from journal.exporters import DoufenExporter from journal.exporters.csv import CsvExporter +from journal.exporters.ndjson import NdjsonExporter from journal.importers import ( DoubanImporter, GoodreadsImporter, @@ -99,6 +100,7 @@ def data(request): "import_task": DoubanImporter.latest_task(request.user), "export_task": DoufenExporter.latest_task(request.user), "csv_export_task": CsvExporter.latest_task(request.user), + "ndjson_export_task": NdjsonExporter.latest_task(request.user), "letterboxd_task": LetterboxdImporter.latest_task(request.user), "goodreads_task": GoodreadsImporter.latest_task(request.user), "years": years, @@ -183,6 +185,38 @@ def export_csv(request): return response +@login_required +def export_ndjson(request): + if request.method == "POST": + task = NdjsonExporter.latest_task(request.user) + if ( + task + and task.state not in [Task.States.complete, Task.States.failed] + and task.created_time > (timezone.now() - datetime.timedelta(hours=1)) + ): + messages.add_message( + request, messages.INFO, _("Recent export still in progress.") + ) + return redirect(reverse("users:data")) + NdjsonExporter.create(request.user).enqueue() + messages.add_message(request, messages.INFO, _("Generating exports.")) + return redirect(reverse("users:data")) + else: + task = NdjsonExporter.latest_task(request.user) + if not task or task.state != Task.States.complete: + messages.add_message( + request, messages.ERROR, _("Export file not available.") + ) + return redirect(reverse("users:data")) + response = HttpResponse() + response["X-Accel-Redirect"] = ( + settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :] + ) + response["Content-Type"] = "application/zip" + response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"' + return response + + @login_required def sync_mastodon(request): if request.method == "POST":