diff --git a/journal/importers/__init__.py b/journal/importers/__init__.py index 2d77e688..ab794d2f 100644 --- a/journal/importers/__init__.py +++ b/journal/importers/__init__.py @@ -1,13 +1,34 @@ +import os +import zipfile + from .csv import CsvImporter from .douban import DoubanImporter from .goodreads import GoodreadsImporter from .letterboxd import LetterboxdImporter from .opml import OPMLImporter + +def get_neodb_importer(filename: str) -> type[CsvImporter] | None: + if not os.path.exists(filename) or not zipfile.is_zipfile(filename): + return None + with zipfile.ZipFile(filename, "r") as z: + files = z.namelist() + if any(f == "journal.ndjson" for f in files): + return None + if any( + f.endswith("_mark.csv") + or f.endswith("_review.csv") + or f.endswith("_note.csv") + for f in files + ): + return CsvImporter + + __all__ = [ "CsvImporter", "LetterboxdImporter", "OPMLImporter", "DoubanImporter", "GoodreadsImporter", + "get_neodb_importer", ] diff --git a/journal/importers/csv.py b/journal/importers/csv.py index 73372404..650671d7 100644 --- a/journal/importers/csv.py +++ b/journal/importers/csv.py @@ -12,10 +12,20 @@ from django.utils.translation import gettext as _ from loguru import logger from catalog.common.sites import SiteManager -from catalog.models import Edition, IdType, Item, ItemCategory +from catalog.models import Edition, IdType, Item, ItemCategory, SiteName from journal.models import Mark, Note, Review, ShelfType from users.models import Task +_PREFERRED_SITES = [ + SiteName.Fediverse, + SiteName.RSS, + SiteName.TMDB, + SiteName.IMDB, + SiteName.GoogleBooks, + SiteName.Goodreads, + SiteName.IGDB, +] + class CsvImporter(Task): class Meta: @@ -49,18 +59,39 @@ class CsvImporter(Task): site_url = settings.SITE_INFO["site_url"] + "/" links = links_str.strip().split() + # look for local items first for link in links: if link.startswith("/") or link.startswith(site_url): item = Item.get_by_url(link) if item: return item - for link in links: - site = SiteManager.get_site_by_url(link) - if site: + + sites = [SiteManager.get_site_by_url(link) for link in links] + sites = [site for site in sites if site] + sites.sort( + key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME) + if x.SITE_NAME in _PREFERRED_SITES + else 99 + ) + + # look for external items that already matched + for site in sites: + logger.debug(f"matching {site.url}") + item = site.get_item() + if item: + return item + + # fetch external item if possible + for site in sites: + try: + logger.debug(f"fetching {site.url}") site.get_resource_ready() item = site.get_item() if item: return item + except Exception as e: + logger.error(f"Error fetching item: {e}") + # Try using the info string if info_str: info_dict = {} @@ -304,8 +335,6 @@ class CsvImporter(Task): return True except Exception as e: logger.error(f"Error importing note: {e}") - if "failed_items" not in self.metadata: - self.metadata["failed_items"] = [] self.metadata["failed_items"].append( f"Error importing note for {row.get('title', '')}: {str(e)}" ) @@ -333,32 +362,11 @@ class CsvImporter(Task): success = import_function(row) self.progress(success) - @classmethod - def validate_file(cls, filename: str) -> bool: - """Validate that the given file is a valid CSV export ZIP file. - - Args: - filename: Path to the file to validate - - Returns: - bool: True if the file is valid, False otherwise - """ - return os.path.exists(filename) and zipfile.is_zipfile(filename) - def run(self) -> None: """Run the CSV import.""" - # Ensure failed_items is initialized - if "failed_items" not in self.metadata: - self.metadata["failed_items"] = [] - filename = self.metadata["file"] logger.debug(f"Importing {filename}") - # Validate the file before processing - if not self.validate_file(filename): - self.save() - return - with zipfile.ZipFile(filename, "r") as zipref: with tempfile.TemporaryDirectory() as tmpdirname: logger.debug(f"Extracting {filename} to {tmpdirname}") diff --git a/journal/migrations/0006_csvimporter.py b/journal/migrations/0006_csvimporter.py new file mode 100644 index 00000000..7b6f45c6 --- /dev/null +++ b/journal/migrations/0006_csvimporter.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.18 on 2025-03-03 23:16 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("users", "0008_alter_task_type"), + ("journal", "0005_csvexporter"), + ] + + operations = [ + migrations.CreateModel( + name="CsvImporter", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("users.task",), + ), + ] diff --git a/journal/tests/csv.py b/journal/tests/csv.py index 29b49714..22f3ace7 100644 --- a/journal/tests/csv.py +++ b/journal/tests/csv.py @@ -9,7 +9,7 @@ from loguru import logger from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow from journal.exporters import CsvExporter -from journal.importers import CsvImporter +from journal.importers import CsvImporter, get_neodb_importer from users.models import User from ..models import * @@ -219,6 +219,7 @@ class CsvExportImportTest(TestCase): f"Expected file {filename} with {expected_data_count} data rows, but file not found" ) + self.assertEqual(get_neodb_importer(export_path), CsvImporter) importer = CsvImporter.create(user=self.user2, file=export_path, visibility=2) importer.run() self.assertEqual(importer.message, "Import complete") diff --git a/requirements-dev.lock b/requirements-dev.lock index f6ac65fb..aa871438 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -28,6 +28,8 @@ attrs==25.1.0 # via aiohttp babel==2.17.0 # via mkdocs-material +backrefs==5.8 + # via mkdocs-material beautifulsoup4==4.13.3 # via markdownify bleach==5.0.1 @@ -54,7 +56,7 @@ click==8.1.8 colorama==0.4.6 # via djlint # via mkdocs-material -cryptography==44.0.1 +cryptography==44.0.2 # via atproto cssbeautifier==1.15.4 # via djlint @@ -169,7 +171,7 @@ mkdocs==1.6.1 # via mkdocs-material mkdocs-get-deps==0.2.0 # via mkdocs -mkdocs-material==9.6.5 +mkdocs-material==9.6.7 mkdocs-material-extensions==1.3.1 # via mkdocs-material multidict==6.1.0 @@ -213,7 +215,7 @@ pygments==2.19.1 # via mkdocs-material pymdown-extensions==10.14.3 # via mkdocs-material -pyright==1.1.395 +pyright==1.1.396 python-dateutil==2.9.0.post0 # via dateparser # via django-auditlog @@ -241,7 +243,6 @@ redis==5.2.1 regex==2024.11.6 # via dateparser # via djlint - # via mkdocs-material requests==2.32.3 # via django-anymail # via igdb-api-v4 @@ -251,7 +252,7 @@ rjsmin==1.2.2 # via django-compressor rq==2.1.0 # via django-rq -ruff==0.9.8 +ruff==0.9.9 sentry-sdk==2.22.0 setproctitle==1.3.5 six==1.17.0 diff --git a/requirements.lock b/requirements.lock index 2391a389..7bccec2a 100644 --- a/requirements.lock +++ b/requirements.lock @@ -44,7 +44,7 @@ charset-normalizer==3.4.1 click==8.1.8 # via atproto # via rq -cryptography==44.0.1 +cryptography==44.0.2 # via atproto dateparser==1.2.1 deepmerge==2.0 diff --git a/users/migrations/0008_alter_task_type.py b/users/migrations/0008_alter_task_type.py new file mode 100644 index 00000000..aba30336 --- /dev/null +++ b/users/migrations/0008_alter_task_type.py @@ -0,0 +1,29 @@ +# Generated by Django 4.2.18 on 2025-03-03 23:16 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("users", "0007_alter_task_type"), + ] + + operations = [ + migrations.AlterField( + model_name="task", + name="type", + field=models.CharField( + choices=[ + ("journal.csvexporter", "csv exporter"), + ("journal.csvimporter", "csv importer"), + ("journal.doubanimporter", "douban importer"), + ("journal.doufenexporter", "doufen exporter"), + ("journal.goodreadsimporter", "goodreads importer"), + ("journal.letterboxdimporter", "letterboxd importer"), + ("journal.ndjsonexporter", "ndjson exporter"), + ], + db_index=True, + max_length=255, + ), + ), + ] diff --git a/users/templates/users/data.html b/users/templates/users/data.html index 419a0529..74f5bbdb 100644 --- a/users/templates/users/data.html +++ b/users/templates/users/data.html @@ -15,6 +15,127 @@ {% include "_header.html" %}
+
+
+ {% trans 'Export Data' %} +
+ {% csrf_token %} + + {% if csv_export_task %} +
+ {% trans 'Last export' %}: {{ csv_export_task.created_time }} + {% trans 'Status' %}: {{ csv_export_task.get_state_display }} +
+ {{ csv_export_task.message }} + {% if csv_export_task.metadata.file %} + {% trans 'Download' %} + {% endif %} + {% endif %} +
+
+
+ {% csrf_token %} + + {% if ndjson_export_task %} +
+ {% trans 'Last export' %}: {{ ndjson_export_task.created_time }} + {% trans 'Status' %}: {{ ndjson_export_task.get_state_display }} +
+ {{ ndjson_export_task.message }} + {% if ndjson_export_task.metadata.file %} + {% trans 'Download' %} + {% endif %} + {% endif %} +
+
+
+ {% csrf_token %} + + exporting to this format will be deprecated soon. + {% if export_task %} +
+ {% trans 'Last export' %}: {{ export_task.created_time }} + {% trans 'Status' %}: {{ export_task.get_state_display }} +
+ {{ export_task.message }} + {% if export_task.metadata.file %} + {% trans 'Download' %} + {% endif %} + {% endif %} +
+
+
+
+
+ {% trans 'Import Data' %} +
+ {% csrf_token %} +
    +
  • + {% trans 'Upload a .zip file containing .csv or .ndjson files exported from NeoDB.' %} +
  • +
  • {% trans 'Existing marks and reviews with newer dates will be preserved.' %}
  • +
+
+ +

+ {% trans 'Visibility' %}: .csv only +
+ + + +

+ + + {% if csv_import_task %} +
+ {% trans 'Last import started' %}: {{ csv_import_task.created_time }} + {% trans 'Status' %}: {{ csv_import_task.get_state_display }}。 +
+ {{ csv_import_task.message }} + {% if csv_import_task.metadata.failed_items %} + {% trans 'Failed items' %}: +
+ + {% endif %} + {% endif %} +
+
+
+
{% trans 'Import Marks and Reviews from Douban' %} @@ -213,123 +334,6 @@
-
-
- {% trans 'Import marks, reviews and notes from CSV' %} -
- {% csrf_token %} -
    -
  • {% trans 'Upload a ZIP file containing CSV files exported from NeoDB.' %}
  • -
  • {% trans 'Existing marks and reviews with newer dates will be preserved.' %}
  • -
-
- -

- {% trans 'Visibility' %}: -
- - - -

- - - {% if csv_import_task %} -
- {% trans 'Last import started' %}: {{ csv_import_task.created_time }} - {% trans 'Status' %}: {{ csv_import_task.get_state_display }}。 -
- {{ csv_import_task.message }} - {% if csv_import_task.metadata.failed_items %} - {% trans 'Failed items' %}: -
- - {% endif %} - {% endif %} -
-
-
-
-
-
- {% trans 'Export Data' %} -
- {% csrf_token %} - - {% if export_task %} -
- {% trans 'Last export' %}: {{ export_task.created_time }} - {% trans 'Status' %}: {{ export_task.get_state_display }} -
- {{ export_task.message }} - {% if export_task.metadata.file %} - {% trans 'Download' %} - {% endif %} - {% endif %} -
-
-
- {% csrf_token %} - - {% if csv_export_task %} -
- {% trans 'Last export' %}: {{ csv_export_task.created_time }} - {% trans 'Status' %}: {{ csv_export_task.get_state_display }} -
- {{ csv_export_task.message }} - {% if csv_export_task.metadata.file %} - {% trans 'Download' %} - {% endif %} - {% endif %} -
-
-
- {% csrf_token %} - - {% if ndjson_export_task %} -
- {% trans 'Last export' %}: {{ ndjson_export_task.created_time }} - {% trans 'Status' %}: {{ ndjson_export_task.get_state_display }} -
- {{ ndjson_export_task.message }} - {% if ndjson_export_task.metadata.file %} - {% trans 'Download' %} - {% endif %} - {% endif %} -
-
-
{% trans 'View Annual Summary' %} diff --git a/users/urls.py b/users/urls.py index 689f478e..1f8d9857 100644 --- a/users/urls.py +++ b/users/urls.py @@ -15,7 +15,7 @@ urlpatterns = [ path("data/import/douban", import_douban, name="import_douban"), path("data/import/letterboxd", import_letterboxd, name="import_letterboxd"), path("data/import/opml", import_opml, name="import_opml"), - path("data/import/csv", import_csv, name="import_csv"), + path("data/import/neodb", import_neodb, name="import_neodb"), path("data/export/reviews", export_reviews, name="export_reviews"), path("data/export/marks", export_marks, name="export_marks"), path("data/export/csv", export_csv, name="export_csv"), diff --git a/users/views/data.py b/users/views/data.py index f8ae1be0..624e9a76 100644 --- a/users/views/data.py +++ b/users/views/data.py @@ -19,6 +19,7 @@ from journal.importers import ( GoodreadsImporter, LetterboxdImporter, OPMLImporter, + get_neodb_importer, ) from journal.models import ShelfType from takahe.utils import Takahe @@ -324,7 +325,7 @@ def import_opml(request): @login_required -def import_csv(request): +def import_neodb(request): if request.method == "POST": f = ( settings.MEDIA_ROOT @@ -335,10 +336,11 @@ def import_csv(request): with open(f, "wb+") as destination: for chunk in request.FILES["file"].chunks(): destination.write(chunk) - if not CsvImporter.validate_file(f): + importer = get_neodb_importer(f) + if not importer: messages.add_message(request, messages.ERROR, _("Invalid file.")) return redirect(reverse("users:data")) - CsvImporter.create( + importer.create( request.user, visibility=int(request.POST.get("visibility", 0)), file=f,