export ndjson

This commit is contained in:
Your Name 2025-01-28 21:38:02 -05:00 committed by Henri Dickson
parent 409ba0a6fd
commit 26858ee905
17 changed files with 277 additions and 335 deletions

View file

@ -39,6 +39,7 @@ from catalog.common import (
from catalog.common.models import ( from catalog.common.models import (
LIST_OF_ONE_PLUS_STR_SCHEMA, LIST_OF_ONE_PLUS_STR_SCHEMA,
LOCALE_CHOICES_JSONFORM, LOCALE_CHOICES_JSONFORM,
ItemType,
LanguageListField, LanguageListField,
) )
from common.models import uniq from common.models import uniq
@ -119,6 +120,8 @@ class Edition(Item):
WEB = "web", _("Web Fiction") WEB = "web", _("Web Fiction")
OTHER = "other", _("Other") OTHER = "other", _("Other")
schema = EditionSchema
type = ItemType.Edition
category = ItemCategory.Book category = ItemCategory.Book
url_path = "book" url_path = "book"

View file

@ -120,7 +120,7 @@ IdealIdTypes = [
class ItemType(models.TextChoices): class ItemType(models.TextChoices):
Book = "book", _("Book") # type:ignore[reportCallIssue] Edition = "edition", _("Edition") # type:ignore[reportCallIssue]
TVShow = "tvshow", _("TV Serie") # type:ignore[reportCallIssue] TVShow = "tvshow", _("TV Serie") # type:ignore[reportCallIssue]
TVSeason = "tvseason", _("TV Season") # type:ignore[reportCallIssue] TVSeason = "tvseason", _("TV Season") # type:ignore[reportCallIssue]
TVEpisode = "tvepisode", _("TV Episode") # type:ignore[reportCallIssue] TVEpisode = "tvepisode", _("TV Episode") # type:ignore[reportCallIssue]
@ -346,6 +346,7 @@ class Item(PolymorphicModel):
collections: QuerySet["Collection"] collections: QuerySet["Collection"]
merged_from_items: QuerySet["Item"] merged_from_items: QuerySet["Item"]
merged_to_item_id: int merged_to_item_id: int
schema = ItemSchema
category: ItemCategory # subclass must specify this category: ItemCategory # subclass must specify this
url_path = "item" # subclass must specify this url_path = "item" # subclass must specify this
child_class = None # subclass may specify this to allow link to parent item child_class = None # subclass may specify this to allow link to parent item
@ -514,15 +515,19 @@ class Item(PolymorphicModel):
def ap_object_type(self) -> str: def ap_object_type(self) -> str:
return self.get_ap_object_type() return self.get_ap_object_type()
@property
def ap_object(self):
return self.schema.from_orm(self).model_dump()
@property @property
def ap_object_ref(self) -> dict[str, Any]: def ap_object_ref(self) -> dict[str, Any]:
o = { o = {
"type": self.get_ap_object_type(), "type": self.get_ap_object_type(),
"href": self.absolute_url, "href": self.absolute_url,
"name": self.title, "name": self.display_title,
} }
if self.has_cover(): if self.has_cover():
o["image"] = self.cover_image_url o["image"] = self.cover_image_url or ""
return o return o
def log_action(self, changes: dict[str, Any]): def log_action(self, changes: dict[str, Any]):

View file

@ -44,6 +44,7 @@ class GameSchema(GameInSchema, BaseSchema):
class Game(Item): class Game(Item):
type = ItemType.Game type = ItemType.Game
schema = GameSchema
category = ItemCategory.Game category = ItemCategory.Game
url_path = "game" url_path = "game"
igdb = PrimaryLookupIdDescriptor(IdType.IGDB) igdb = PrimaryLookupIdDescriptor(IdType.IGDB)

View file

@ -35,6 +35,7 @@ class MovieSchema(MovieInSchema, BaseSchema):
class Movie(Item): class Movie(Item):
type = ItemType.Movie type = ItemType.Movie
schema = MovieSchema
category = ItemCategory.Movie category = ItemCategory.Movie
url_path = "movie" url_path = "movie"
imdb = PrimaryLookupIdDescriptor(IdType.IMDB) imdb = PrimaryLookupIdDescriptor(IdType.IMDB)

View file

@ -33,6 +33,7 @@ class AlbumSchema(AlbumInSchema, BaseSchema):
class Album(Item): class Album(Item):
schema = AlbumSchema
type = ItemType.Album type = ItemType.Album
url_path = "album" url_path = "album"
category = ItemCategory.Music category = ItemCategory.Music

View file

@ -104,6 +104,7 @@ def _crew_by_role(crew):
class Performance(Item): class Performance(Item):
if TYPE_CHECKING: if TYPE_CHECKING:
productions: models.QuerySet["PerformanceProduction"] productions: models.QuerySet["PerformanceProduction"]
schema = PerformanceSchema
type = ItemType.Performance type = ItemType.Performance
child_class = "PerformanceProduction" child_class = "PerformanceProduction"
category = ItemCategory.Performance category = ItemCategory.Performance
@ -247,6 +248,7 @@ class Performance(Item):
class PerformanceProduction(Item): class PerformanceProduction(Item):
schema = PerformanceProductionSchema
type = ItemType.PerformanceProduction type = ItemType.PerformanceProduction
category = ItemCategory.Performance category = ItemCategory.Performance
url_path = "performance/production" url_path = "performance/production"

View file

@ -13,7 +13,11 @@ from catalog.common import (
ItemInSchema, ItemInSchema,
jsondata, jsondata,
) )
from catalog.common.models import LIST_OF_ONE_PLUS_STR_SCHEMA, LanguageListField from catalog.common.models import (
LIST_OF_ONE_PLUS_STR_SCHEMA,
ItemType,
LanguageListField,
)
class PodcastInSchema(ItemInSchema): class PodcastInSchema(ItemInSchema):
@ -44,6 +48,8 @@ class PodcastEpisodeSchema(PodcastEpisodeInSchema, BaseSchema):
class Podcast(Item): class Podcast(Item):
if TYPE_CHECKING: if TYPE_CHECKING:
episodes: models.QuerySet["PodcastEpisode"] episodes: models.QuerySet["PodcastEpisode"]
type = ItemType.Podcast
schema = PodcastSchema
category = ItemCategory.Podcast category = ItemCategory.Podcast
child_class = "PodcastEpisode" child_class = "PodcastEpisode"
url_path = "podcast" url_path = "podcast"
@ -118,6 +124,8 @@ class Podcast(Item):
class PodcastEpisode(Item): class PodcastEpisode(Item):
schema = PodcastEpisodeSchema
type = ItemType.PodcastEpisode
category = ItemCategory.Podcast category = ItemCategory.Podcast
url_path = "podcast/episode" url_path = "podcast/episode"
# uid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True) # uid = models.UUIDField(default=uuid.uuid4, editable=False, db_index=True)

View file

@ -97,6 +97,7 @@ class TVEpisodeSchema(ItemSchema):
class TVShow(Item): class TVShow(Item):
if TYPE_CHECKING: if TYPE_CHECKING:
seasons: QuerySet["TVSeason"] seasons: QuerySet["TVSeason"]
schema = TVShowSchema
type = ItemType.TVShow type = ItemType.TVShow
child_class = "TVSeason" child_class = "TVSeason"
category = ItemCategory.TV category = ItemCategory.TV
@ -261,6 +262,7 @@ class TVShow(Item):
class TVSeason(Item): class TVSeason(Item):
if TYPE_CHECKING: if TYPE_CHECKING:
episodes: models.QuerySet["TVEpisode"] episodes: models.QuerySet["TVEpisode"]
schema = TVSeasonSchema
type = ItemType.TVSeason type = ItemType.TVSeason
category = ItemCategory.TV category = ItemCategory.TV
url_path = "tv/season" url_path = "tv/season"
@ -480,6 +482,8 @@ class TVSeason(Item):
class TVEpisode(Item): class TVEpisode(Item):
schema = TVEpisodeSchema
type = ItemType.TVEpisode
category = ItemCategory.TV category = ItemCategory.TV
url_path = "tv/episode" url_path = "tv/episode"
season = models.ForeignKey( season = models.ForeignKey(

View file

@ -87,6 +87,7 @@ class CsvExporter(Task):
temp_dir = tempfile.mkdtemp() temp_dir = tempfile.mkdtemp()
temp_folder_path = os.path.join(temp_dir, self.filename) temp_folder_path = os.path.join(temp_dir, self.filename)
os.makedirs(temp_folder_path) os.makedirs(temp_folder_path)
total = 0
for category in [ for category in [
ItemCategory.Movie, ItemCategory.Movie,
ItemCategory.TV, ItemCategory.TV,
@ -107,6 +108,7 @@ class CsvExporter(Task):
writer = csv.writer(csvfile) writer = csv.writer(csvfile)
writer.writerow(_mark_heading) writer.writerow(_mark_heading)
for mark in marks: for mark in marks:
total += 1
item = mark.item item = mark.item
line = [ line = [
item.display_title, item.display_title,
@ -128,6 +130,7 @@ class CsvExporter(Task):
writer = csv.writer(csvfile) writer = csv.writer(csvfile)
writer.writerow(_review_heading) writer.writerow(_review_heading)
for review in reviews: for review in reviews:
total += 1
item = review.item item = review.item
line = [ line = [
item.display_title, item.display_title,
@ -147,6 +150,7 @@ class CsvExporter(Task):
.order_by("created_time") .order_by("created_time")
) )
for note in notes: for note in notes:
total += 1
item = note.item item = note.item
line = [ line = [
item.display_title, item.display_title,
@ -166,5 +170,6 @@ class CsvExporter(Task):
os.makedirs(os.path.dirname(filename)) os.makedirs(os.path.dirname(filename))
shutil.make_archive(filename[:-4], "zip", temp_folder_path) shutil.make_archive(filename[:-4], "zip", temp_folder_path)
self.metadata["file"] = filename self.metadata["file"] = filename
self.metadata["total"] = total
self.message = "Export complete." self.message = "Export complete."
self.save() self.save()

View file

@ -1,36 +1,24 @@
import json
import os import os
import re
import shutil
import tempfile
from django.conf import settings from django.conf import settings
from openpyxl import Workbook from django.utils import timezone
from catalog.models import IdType, ItemCategory, TVEpisode from catalog.common.downloaders import ProxiedImageDownloader
from common.utils import GenerateDateUUIDMediaFilePath from common.utils import GenerateDateUUIDMediaFilePath
from journal.models import Review, ShelfType, q_item_in_category from journal.models import ShelfMember
from journal.models.collection import Collection
from journal.models.common import Content
from journal.models.note import Note
from journal.models.review import Review
from journal.models.shelf import ShelfLogEntry
from takahe.models import Post
from users.models import Task from users.models import Task
def _get_source_url(item):
res = (
item.external_resources.all()
.filter(
id_type__in=[
IdType.DoubanBook,
IdType.DoubanMovie,
IdType.DoubanMusic,
IdType.DoubanGame,
IdType.DoubanDrama,
]
)
.first()
)
if not res:
res = item.external_resources.all().first()
return res.url if res else ""
# def export_marks_task(user):
# user.preference.export_status["marks_pending"] = True
# user.preference.save(update_fields=["export_status"])
class NdjsonExporter(Task): class NdjsonExporter(Task):
class Meta: class Meta:
app_label = "journal" # workaround bug in TypedModel app_label = "journal" # workaround bug in TypedModel
@ -40,302 +28,149 @@ class NdjsonExporter(Task):
"file": None, "file": None,
"total": 0, "total": 0,
} }
ref_items = []
@property
def filename(self) -> str:
d = self.created_time.strftime("%Y%m%d%H%M%S")
return f"neodb_{self.user.username}_{d}_ndjson"
def ref(self, item) -> str:
if item not in self.ref_items:
self.ref_items.append(item)
return item.absolute_url
def get_header(self):
return {
"server": settings.SITE_DOMAIN,
"neodb_version": settings.NEODB_VERSION,
"username": self.user.username,
"actor": self.user.identity.actor_uri,
"request_time": self.created_time.isoformat(),
"created_time": timezone.now().isoformat(),
}
def run(self): def run(self):
user = self.user user = self.user
temp_dir = tempfile.mkdtemp()
temp_folder_path = os.path.join(temp_dir, self.filename)
os.makedirs(temp_folder_path)
attachment_path = os.path.join(temp_folder_path, "attachments")
os.makedirs(attachment_path, exist_ok=True)
def _save_image(url):
if url.startswith("http"):
imgdl = ProxiedImageDownloader(url)
raw_img = imgdl.download().content
ext = imgdl.extention
file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path)
with open(file, "wb") as binary_file:
binary_file.write(raw_img)
return file
elif url.startswith("/"):
p = os.path.abspath(
os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :])
)
if p.startswith(settings.MEDIA_ROOT):
shutil.copy2(p, attachment_path)
return p
return url
filename = os.path.join(temp_folder_path, "journal.ndjson")
total = 0
with open(filename, "w") as f:
f.write(json.dumps(self.get_header()) + "\n")
for cls in list(Content.__subclasses__()):
pieces = cls.objects.filter(owner=user.identity)
for p in pieces:
total += 1
self.ref(p.item)
o = {
"type": p.__class__.__name__,
"content": p.ap_object,
"visibility": p.visibility,
"metadata": p.metadata,
}
f.write(json.dumps(o, default=str) + "\n")
if cls == Review:
re.sub(
r"(?<=!\[\]\()([^)]+)(?=\))",
lambda x: _save_image(x[1]),
p.body, # type: ignore
)
elif cls == Note and p.latest_post:
for a in p.latest_post.attachments.all():
dest = os.path.join(
attachment_path, os.path.basename(a.file.name)
)
shutil.copy2(a.file.path, dest)
collections = Collection.objects.filter(owner=user.identity)
for c in collections:
total += 1
o = {
"type": "Collection",
"content": c.ap_object,
"visibility": c.visibility,
"metadata": c.metadata,
"items": [
{"item": self.ref(m.item), "metadata": m.metadata}
for m in c.ordered_members
],
}
f.write(json.dumps(o, default=str) + "\n")
marks = ShelfMember.objects.filter(owner=user.identity)
for m in marks:
total += 1
o = {
"type": "ShelfMember",
"item": self.ref(m.item),
"status": m.shelf_type,
"visibility": m.visibility,
"metadata": m.metadata,
"published": self.created_time.isoformat(),
}
f.write(json.dumps(o, default=str) + "\n")
logs = ShelfLogEntry.objects.filter(owner=user.identity)
for log in logs:
total += 1
o = {
"type": "ShelfLog",
"item": self.ref(log.item),
"posts": list(log.all_post_ids()),
"timestamp": log.created_time,
}
f.write(json.dumps(o, default=str) + "\n")
posts = Post.objects.filter(author_id=user.identity.pk).exclude(
type_data__has_key="object"
)
for p in posts:
total += 1
o = {"type": "post", "post": p.to_mastodon_json()}
for a in p.attachments.all():
dest = os.path.join(attachment_path, os.path.basename(a.file.name))
shutil.copy2(a.file.path, dest)
f.write(json.dumps(o, default=str) + "\n")
filename = os.path.join(temp_folder_path, "catalog.ndjson")
with open(filename, "w") as f:
f.write(json.dumps(self.get_header()) + "\n")
for item in self.ref_items:
f.write(json.dumps(item.ap_object, default=str) + "\n")
filename = GenerateDateUUIDMediaFilePath( filename = GenerateDateUUIDMediaFilePath(
"f.xlsx", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT "f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
) )
if not os.path.exists(os.path.dirname(filename)): if not os.path.exists(os.path.dirname(filename)):
os.makedirs(os.path.dirname(filename)) os.makedirs(os.path.dirname(filename))
heading = [ shutil.make_archive(filename[:-4], "zip", temp_folder_path)
"标题",
"简介",
"豆瓣评分",
"链接",
"创建时间",
"我的评分",
"标签",
"评论",
"NeoDB链接",
"其它ID",
]
wb = Workbook()
# adding write_only=True will speed up but corrupt the xlsx and won't be importable
for status, label in [
(ShelfType.COMPLETE, "看过"),
(ShelfType.PROGRESS, "在看"),
(ShelfType.WISHLIST, "想看"),
]:
ws = wb.create_sheet(title=label)
shelf = user.shelf_manager.get_shelf(status)
q = q_item_in_category(ItemCategory.Movie) | q_item_in_category(
ItemCategory.TV
)
marks = shelf.members.all().filter(q).order_by("created_time")
ws.append(heading)
for mm in marks:
mark = mm.mark
movie = mark.item
title = movie.display_title
if movie.__class__ == TVEpisode:
season_number = movie.season.season_number if movie.season else 0
summary = f"S{season_number:02d}E{movie.episode_number:02d}"
else:
summary = (
str(movie.year or "")
+ " / "
+ ",".join(movie.area or [])
+ " / "
+ ",".join(movie.genre or [])
+ " / "
+ ",".join(movie.director or [])
+ " / "
+ ",".join(movie.actor or [])
)
tags = ",".join(mark.tags)
world_rating = (movie.rating / 2) if movie.rating else None
timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
text = mark.comment_text
source_url = _get_source_url(movie)
url = movie.absolute_url
line = [
title,
summary,
world_rating,
source_url,
timestamp,
my_rating,
tags,
text,
url,
movie.imdb,
]
ws.append(line)
for status, label in [
(ShelfType.COMPLETE, "听过"),
(ShelfType.PROGRESS, "在听"),
(ShelfType.WISHLIST, "想听"),
]:
ws = wb.create_sheet(title=label)
shelf = user.shelf_manager.get_shelf(status)
q = q_item_in_category(ItemCategory.Music)
marks = shelf.members.all().filter(q).order_by("created_time")
ws.append(heading)
for mm in marks:
mark = mm.mark
album = mark.item
title = album.display_title
summary = (
",".join(album.artist)
+ " / "
+ (album.release_date.strftime("%Y") if album.release_date else "")
)
tags = ",".join(mark.tags)
world_rating = (album.rating / 2) if album.rating else None
timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
text = mark.comment_text
source_url = _get_source_url(album)
url = album.absolute_url
line = [
title,
summary,
world_rating,
source_url,
timestamp,
my_rating,
tags,
text,
url,
album.barcode,
]
ws.append(line)
for status, label in [
(ShelfType.COMPLETE, "读过"),
(ShelfType.PROGRESS, "在读"),
(ShelfType.WISHLIST, "想读"),
]:
ws = wb.create_sheet(title=label)
shelf = user.shelf_manager.get_shelf(status)
q = q_item_in_category(ItemCategory.Book)
marks = shelf.members.all().filter(q).order_by("created_time")
ws.append(heading)
for mm in marks:
mark = mm.mark
book = mark.item
title = book.display_title
summary = (
",".join(book.author or [])
+ " / "
+ str(book.pub_year or "")
+ " / "
+ (book.pub_house or "")
)
tags = ",".join(mark.tags)
world_rating = (book.rating / 2) if book.rating else None
timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
text = mark.comment_text
source_url = _get_source_url(book)
url = book.absolute_url
line = [
title,
summary,
world_rating,
source_url,
timestamp,
my_rating,
tags,
text,
url,
book.isbn,
]
ws.append(line)
for status, label in [
(ShelfType.COMPLETE, "玩过"),
(ShelfType.PROGRESS, "在玩"),
(ShelfType.WISHLIST, "想玩"),
]:
ws = wb.create_sheet(title=label)
shelf = user.shelf_manager.get_shelf(status)
q = q_item_in_category(ItemCategory.Game)
marks = shelf.members.all().filter(q).order_by("created_time")
ws.append(heading)
for mm in marks:
mark = mm.mark
game = mark.item
title = game.display_title
summary = (
",".join(game.genre or [])
+ " / "
+ ",".join(game.platform or [])
+ " / "
+ (
game.release_date.strftime("%Y-%m-%d")
if game.release_date
else ""
)
)
tags = ",".join(mark.tags)
world_rating = (game.rating / 2) if game.rating else None
timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
text = mark.comment_text
source_url = _get_source_url(game)
url = game.absolute_url
line = [
title,
summary,
world_rating,
source_url,
timestamp,
my_rating,
tags,
text,
url,
"",
]
ws.append(line)
for status, label in [
(ShelfType.COMPLETE, "听过的播客"),
(ShelfType.PROGRESS, "在听的播客"),
(ShelfType.WISHLIST, "想听的播客"),
]:
ws = wb.create_sheet(title=label)
shelf = user.shelf_manager.get_shelf(status)
q = q_item_in_category(ItemCategory.Podcast)
marks = shelf.members.all().filter(q).order_by("created_time")
ws.append(heading)
for mm in marks:
mark = mm.mark
podcast = mark.item
title = podcast.display_title
summary = ",".join(podcast.host or [])
tags = ",".join(mark.tags)
world_rating = (podcast.rating / 2) if podcast.rating else None
timestamp = mark.created_time.strftime("%Y-%m-%d %H:%M:%S")
my_rating = (mark.rating_grade / 2) if mark.rating_grade else None
text = mark.comment_text
source_url = _get_source_url(podcast)
url = podcast.absolute_url
line = [
title,
summary,
world_rating,
source_url,
timestamp,
my_rating,
tags,
text,
url,
"",
]
ws.append(line)
review_heading = [
"标题",
"评论对象",
"链接",
"创建时间",
"我的评分",
"类型",
"内容",
"评论对象原始链接",
"评论对象NeoDB链接",
]
for category, label in [
(ItemCategory.Movie, "影评"),
(ItemCategory.Book, "书评"),
(ItemCategory.Music, "乐评"),
(ItemCategory.Game, "游戏评论"),
(ItemCategory.Podcast, "播客评论"),
]:
ws = wb.create_sheet(title=label)
q = q_item_in_category(category)
reviews = (
Review.objects.filter(owner=user.identity)
.filter(q)
.order_by("created_time")
)
ws.append(review_heading)
for review in reviews:
title = review.title
target = "" + review.item.display_title + ""
url = review.absolute_url
timestamp = review.created_time.strftime("%Y-%m-%d %H:%M:%S")
my_rating = (
None # (mark.rating_grade / 2) if mark.rating_grade else None
)
content = review.body
target_source_url = _get_source_url(review.item)
target_url = review.item.absolute_url
line = [
title,
target,
url,
timestamp,
my_rating,
label,
content,
target_source_url,
target_url,
]
ws.append(line)
wb.save(filename=filename)
self.metadata["file"] = filename self.metadata["file"] = filename
self.metadata["total"] = total
self.message = "Export complete." self.message = "Export complete."
self.save() self.save()
# user.preference.export_status["marks_pending"] = False
# user.preference.export_status["marks_file"] = filename
# user.preference.export_status["marks_date"] = datetime.now().strftime(
# "%Y-%m-%d %H:%M"
# )
# user.preference.save(update_fields=["export_status"])

View file

@ -8,6 +8,7 @@ from django.utils import timezone
from tqdm import tqdm from tqdm import tqdm
from catalog.models import Item from catalog.models import Item
from journal.exporters.ndjson import NdjsonExporter
from journal.models import ( from journal.models import (
Collection, Collection,
Content, Content,
@ -27,6 +28,7 @@ _CONFIRM = "confirm deleting collection? [Y/N] "
_HELP_TEXT = """ _HELP_TEXT = """
intergrity: check and fix remaining journal for merged and deleted items intergrity: check and fix remaining journal for merged and deleted items
purge: delete invalid data (visibility=99) purge: delete invalid data (visibility=99)
export: run export task
idx-info: show index information idx-info: show index information
idx-init: check and create index if not exists idx-init: check and create index if not exists
idx-destroy: delete index idx-destroy: delete index
@ -51,6 +53,7 @@ class Command(BaseCommand):
choices=[ choices=[
"integrity", "integrity",
"purge", "purge",
"export",
"idx-info", "idx-info",
"idx-init", "idx-init",
"idx-alt", "idx-alt",
@ -111,6 +114,18 @@ class Command(BaseCommand):
if self.fix: if self.fix:
update_journal_for_merged_item(i.url) update_journal_for_merged_item(i.url)
def export(self, owner_ids):
users = User.objects.filter(identity__in=owner_ids)
for user in users:
task = NdjsonExporter.create(user=user)
self.stdout.write(f"exporting for {user} (task {task.pk})...")
ok = task._run()
if ok:
self.stdout.write(f"complete {task.metadata['file']}")
else:
self.stdout.write("failed")
task.delete()
def handle( def handle(
self, self,
action, action,
@ -152,6 +167,9 @@ class Command(BaseCommand):
cls.objects.filter(visibility=99).delete() cls.objects.filter(visibility=99).delete()
self.stdout.write(self.style.SUCCESS("Done.")) self.stdout.write(self.style.SUCCESS("Done."))
case "export":
self.export(owners)
case "idx-destroy": case "idx-destroy":
if yes or input(_CONFIRM).upper().startswith("Y"): if yes or input(_CONFIRM).upper().startswith("Y"):
index.delete_collection() index.delete_collection()

View file

@ -10,6 +10,7 @@ from .collection import Collection, CollectionMember, FeaturedCollection
from .comment import Comment from .comment import Comment
from .common import Content, Debris from .common import Content, Debris
from .itemlist import ListMember from .itemlist import ListMember
from .note import Note
from .rating import Rating from .rating import Rating
from .review import Review from .review import Review
from .shelf import ShelfLogEntry, ShelfMember from .shelf import ShelfLogEntry, ShelfMember
@ -31,6 +32,7 @@ def remove_data_by_user(owner: APIdentity):
Review.objects.filter(owner=owner).delete() Review.objects.filter(owner=owner).delete()
TagMember.objects.filter(owner=owner).delete() TagMember.objects.filter(owner=owner).delete()
Tag.objects.filter(owner=owner).delete() Tag.objects.filter(owner=owner).delete()
Note.objects.filter(owner=owner).delete()
CollectionMember.objects.filter(owner=owner).delete() CollectionMember.objects.filter(owner=owner).delete()
Collection.objects.filter(owner=owner).delete() Collection.objects.filter(owner=owner).delete()
FeaturedCollection.objects.filter(owner=owner).delete() FeaturedCollection.objects.filter(owner=owner).delete()

View file

@ -52,7 +52,7 @@ class Command(BaseCommand):
for task in tqdm(tasks): for task in tqdm(tasks):
task.state = Task.States.pending task.state = Task.States.pending
task.save(update_fields=["state"]) task.save(update_fields=["state"])
Task._run(task.pk) Task._execute(task.pk)
if options["requeue"]: if options["requeue"]:
for task in tqdm(tasks): for task in tqdm(tasks):
task.state = Task.States.pending task.state = Task.States.pending

View file

@ -1,3 +1,5 @@
from typing import Self
import django_rq import django_rq
from auditlog.context import set_actor from auditlog.context import set_actor
from django.db import models from django.db import models
@ -46,14 +48,27 @@ class Task(TypedModel):
return cls.objects.filter(user=user).order_by("-created_time").first() return cls.objects.filter(user=user).order_by("-created_time").first()
@classmethod @classmethod
def create(cls, user: User, **kwargs) -> "Task": def create(cls, user: User, **kwargs) -> Self:
d = cls.DefaultMetadata.copy() d = cls.DefaultMetadata.copy()
d.update(kwargs) d.update(kwargs)
t = cls.objects.create(user=user, metadata=d) t = cls.objects.create(user=user, metadata=d)
return t return t
def _run(self) -> bool:
activate_language_for_user(self.user)
with set_actor(self.user):
try:
self.run()
return True
except Exception as e:
logger.exception(
f"error running {self.__class__}",
extra={"exception": e, "task": self.pk},
)
return False
@classmethod @classmethod
def _run(cls, task_id: int): def _execute(cls, task_id: int):
task = cls.objects.get(pk=task_id) task = cls.objects.get(pk=task_id)
logger.info(f"running {task}") logger.info(f"running {task}")
if task.state != cls.States.pending: if task.state != cls.States.pending:
@ -63,17 +78,7 @@ class Task(TypedModel):
return return
task.state = cls.States.started task.state = cls.States.started
task.save() task.save()
activate_language_for_user(task.user) ok = task._run()
with set_actor(task.user):
try:
task.run()
ok = True
except Exception as e:
logger.exception(
f"error running {cls.__name__}",
extra={"exception": e, "task": task_id},
)
ok = False
task.refresh_from_db() task.refresh_from_db()
task.state = cls.States.complete if ok else cls.States.failed task.state = cls.States.complete if ok else cls.States.failed
task.save() task.save()
@ -81,7 +86,7 @@ class Task(TypedModel):
def enqueue(self): def enqueue(self):
return django_rq.get_queue(self.TaskQueue).enqueue( return django_rq.get_queue(self.TaskQueue).enqueue(
self._run, self.pk, job_id=self.job_id self._execute, self.pk, job_id=self.job_id
) )
def notify(self) -> None: def notify(self) -> None:

View file

@ -250,6 +250,23 @@
{% endif %} {% endif %}
{% endif %} {% endif %}
</form> </form>
<hr>
<form action="{% url 'users:export_ndjson' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
{% if ndjson_export_task %}
<br>
{% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
{% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
<br>
{{ ndjson_export_task.message }}
{% if ndjson_export_task.metadata.file %}
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
</details> </details>
</article> </article>
<article> <article>

View file

@ -18,6 +18,7 @@ urlpatterns = [
path("data/export/reviews", export_reviews, name="export_reviews"), path("data/export/reviews", export_reviews, name="export_reviews"),
path("data/export/marks", export_marks, name="export_marks"), path("data/export/marks", export_marks, name="export_marks"),
path("data/export/csv", export_csv, name="export_csv"), path("data/export/csv", export_csv, name="export_csv"),
path("data/export/ndjson", export_ndjson, name="export_ndjson"),
path("data/sync_mastodon", sync_mastodon, name="sync_mastodon"), path("data/sync_mastodon", sync_mastodon, name="sync_mastodon"),
path( path(
"data/sync_mastodon_preference", "data/sync_mastodon_preference",

View file

@ -14,6 +14,7 @@ from django.utils.translation import gettext_lazy as _
from common.utils import GenerateDateUUIDMediaFilePath from common.utils import GenerateDateUUIDMediaFilePath
from journal.exporters import DoufenExporter from journal.exporters import DoufenExporter
from journal.exporters.csv import CsvExporter from journal.exporters.csv import CsvExporter
from journal.exporters.ndjson import NdjsonExporter
from journal.importers import ( from journal.importers import (
DoubanImporter, DoubanImporter,
GoodreadsImporter, GoodreadsImporter,
@ -99,6 +100,7 @@ def data(request):
"import_task": DoubanImporter.latest_task(request.user), "import_task": DoubanImporter.latest_task(request.user),
"export_task": DoufenExporter.latest_task(request.user), "export_task": DoufenExporter.latest_task(request.user),
"csv_export_task": CsvExporter.latest_task(request.user), "csv_export_task": CsvExporter.latest_task(request.user),
"ndjson_export_task": NdjsonExporter.latest_task(request.user),
"letterboxd_task": LetterboxdImporter.latest_task(request.user), "letterboxd_task": LetterboxdImporter.latest_task(request.user),
"goodreads_task": GoodreadsImporter.latest_task(request.user), "goodreads_task": GoodreadsImporter.latest_task(request.user),
"years": years, "years": years,
@ -183,6 +185,38 @@ def export_csv(request):
return response return response
@login_required
def export_ndjson(request):
if request.method == "POST":
task = NdjsonExporter.latest_task(request.user)
if (
task
and task.state not in [Task.States.complete, Task.States.failed]
and task.created_time > (timezone.now() - datetime.timedelta(hours=1))
):
messages.add_message(
request, messages.INFO, _("Recent export still in progress.")
)
return redirect(reverse("users:data"))
NdjsonExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
return redirect(reverse("users:data"))
else:
task = NdjsonExporter.latest_task(request.user)
if not task or task.state != Task.States.complete:
messages.add_message(
request, messages.ERROR, _("Export file not available.")
)
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
@login_required @login_required
def sync_mastodon(request): def sync_mastodon(request):
if request.method == "POST": if request.method == "POST":