diff --git a/journal/importers/__init__.py b/journal/importers/__init__.py
index ab794d2f..67693cc3 100644
--- a/journal/importers/__init__.py
+++ b/journal/importers/__init__.py
@@ -5,16 +5,19 @@ from .csv import CsvImporter
from .douban import DoubanImporter
from .goodreads import GoodreadsImporter
from .letterboxd import LetterboxdImporter
+from .ndjson import NdjsonImporter
from .opml import OPMLImporter
-def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
+def get_neodb_importer(
+ filename: str,
+) -> type[CsvImporter] | type[NdjsonImporter] | None:
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
return None
with zipfile.ZipFile(filename, "r") as z:
files = z.namelist()
if any(f == "journal.ndjson" for f in files):
- return None
+ return NdjsonImporter
if any(
f.endswith("_mark.csv")
or f.endswith("_review.csv")
@@ -26,6 +29,7 @@ def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
__all__ = [
"CsvImporter",
+ "NdjsonImporter",
"LetterboxdImporter",
"OPMLImporter",
"DoubanImporter",
diff --git a/journal/importers/base.py b/journal/importers/base.py
new file mode 100644
index 00000000..8edde4f7
--- /dev/null
+++ b/journal/importers/base.py
@@ -0,0 +1,197 @@
+import datetime
+from typing import Dict, List, Literal, Optional
+
+from django.conf import settings
+from django.utils.dateparse import parse_datetime
+from loguru import logger
+
+from catalog.common.sites import SiteManager
+from catalog.models import Edition, IdType, Item, SiteName
+from journal.models import ShelfType
+from users.models import Task
+
+_PREFERRED_SITES = [
+ SiteName.Fediverse,
+ SiteName.RSS,
+ SiteName.TMDB,
+ SiteName.IMDB,
+ SiteName.GoogleBooks,
+ SiteName.Goodreads,
+ SiteName.IGDB,
+]
+
+
+class BaseImporter(Task):
+ class Meta:
+ app_label = "journal" # workaround bug in TypedModel
+
+ ImportResult = Literal["imported", "skipped", "failed"]
+ TaskQueue = "import"
+ DefaultMetadata = {
+ "total": 0,
+ "processed": 0,
+ "skipped": 0,
+ "imported": 0,
+ "failed": 0,
+ "failed_items": [],
+ "file": None,
+ "visibility": 0,
+ }
+
+ def progress(self, result: ImportResult) -> None:
+ """Update import progress.
+
+ Args:
+ result: The import result ('imported', 'skipped', or 'failed')
+ """
+ self.metadata["processed"] += 1
+ self.metadata[result] = self.metadata.get(result, 0) + 1
+
+ if self.metadata["total"]:
+ progress_percentage = round(
+ self.metadata["processed"] / self.metadata["total"] * 100
+ )
+ self.message = f"Progress: {progress_percentage}% - "
+ else:
+ self.message = ""
+ self.message += (
+ f"{self.metadata['imported']} imported, "
+ f"{self.metadata['skipped']} skipped, "
+ f"{self.metadata['failed']} failed"
+ )
+ self.save(update_fields=["metadata", "message"])
+
+ def run(self) -> None:
+ raise NotImplementedError
+
+ def get_item_by_info_and_links(
+ self, title: str, info_str: str, links: list[str]
+ ) -> Optional[Item]:
+ """Find an item based on information from CSV export.
+
+ Args:
+ title: Item title
+ info_str: Item info string (space-separated key:value pairs)
+ links_str: Space-separated URLs
+
+ Returns:
+ Item if found, None otherwise
+ """
+ site_url = settings.SITE_INFO["site_url"] + "/"
+ # look for local items first
+ for link in links:
+ if link.startswith("/") or link.startswith(site_url):
+ item = Item.get_by_url(link, resolve_merge=True)
+ if item and not item.is_deleted:
+ return item
+
+ sites = [
+ SiteManager.get_site_by_url(link, detect_redirection=False)
+ for link in links
+ ]
+ sites = [site for site in sites if site]
+ sites.sort(
+ key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
+ if x.SITE_NAME in _PREFERRED_SITES
+ else 99
+ )
+
+ # match items without extra requests
+ for site in sites:
+ item = site.get_item()
+ if item:
+ return item
+
+ # match items after HEAD
+ sites = [
+ SiteManager.get_site_by_url(site.url) if site.url else site
+ for site in sites
+ ]
+ sites = [site for site in sites if site]
+ for site in sites:
+ item = site.get_item()
+ if item:
+ return item
+
+ # fetch from remote
+ for site in sites:
+ try:
+ logger.debug(f"fetching {site.url}")
+ site.get_resource_ready()
+ item = site.get_item()
+ if item:
+ return item
+ except Exception as e:
+ logger.error(f"Error fetching item: {e}")
+
+ # Try using the info string
+ if info_str:
+ info_dict = {}
+ for pair in info_str.strip().split():
+ if ":" in pair:
+ key, value = pair.split(":", 1)
+ info_dict[key] = value
+
+ # Check for ISBN, IMDB, etc.
+ item = None
+ for key, value in info_dict.items():
+ if key == "isbn" and value:
+ item = Edition.objects.filter(
+ primary_lookup_id_type=IdType.ISBN,
+ primary_lookup_id_value=value,
+ ).first()
+ elif key == "imdb" and value:
+ item = Item.objects.filter(
+ primary_lookup_id_type=IdType.IMDB,
+ primary_lookup_id_value=value,
+ ).first()
+ if item:
+ return item
+ return None
+
+ def parse_tags(self, tags_str: str) -> List[str]:
+ """Parse space-separated tags string into a list of tags."""
+ if not tags_str:
+ return []
+ return [tag.strip() for tag in tags_str.split() if tag.strip()]
+
+ def parse_info(self, info_str: str) -> Dict[str, str]:
+ """Parse info string into a dictionary."""
+ info_dict = {}
+ if not info_str:
+ return info_dict
+
+ for pair in info_str.split():
+ if ":" in pair:
+ key, value = pair.split(":", 1)
+ info_dict[key] = value
+
+ return info_dict
+
+ def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]:
+ """Parse ISO format timestamp into datetime."""
+ if not timestamp_str:
+ return None
+
+ try:
+ dt = parse_datetime(timestamp_str)
+ if dt and dt.tzinfo is None:
+ dt = dt.replace(tzinfo=datetime.UTC)
+ return dt
+ except Exception as e:
+ logger.error(f"Error parsing datetime {timestamp_str}: {e}")
+ return None
+
+ def parse_shelf_type(self, status_str: str) -> ShelfType:
+ """Parse shelf type string into ShelfType enum."""
+ if not status_str:
+ return ShelfType.WISHLIST
+
+ status_map = {
+ "wishlist": ShelfType.WISHLIST,
+ "progress": ShelfType.PROGRESS,
+ "complete": ShelfType.COMPLETE,
+ "dropped": ShelfType.DROPPED,
+ }
+
+ return status_map.get(status_str.lower(), ShelfType.WISHLIST)
diff --git a/journal/importers/csv.py b/journal/importers/csv.py
index 93656bba..f84bc98a 100644
--- a/journal/importers/csv.py
+++ b/journal/importers/csv.py
@@ -1,181 +1,20 @@
import csv
-import datetime
import os
import tempfile
import zipfile
-from typing import Dict, List, Optional
+from typing import Dict
-from django.conf import settings
from django.utils import timezone
-from django.utils.dateparse import parse_datetime
from django.utils.translation import gettext as _
from loguru import logger
-from catalog.common.sites import SiteManager
-from catalog.models import Edition, IdType, Item, ItemCategory, SiteName
-from journal.models import Mark, Note, Review, ShelfType
-from users.models import Task
+from catalog.models import ItemCategory
+from journal.models import Mark, Note, Review
-_PREFERRED_SITES = [
- SiteName.Fediverse,
- SiteName.RSS,
- SiteName.TMDB,
- SiteName.IMDB,
- SiteName.GoogleBooks,
- SiteName.Goodreads,
- SiteName.IGDB,
-]
+from .base import BaseImporter
-class CsvImporter(Task):
- class Meta:
- app_label = "journal" # workaround bug in TypedModel
-
- TaskQueue = "import"
- DefaultMetadata = {
- "total": 0,
- "processed": 0,
- "skipped": 0,
- "imported": 0,
- "failed": 0,
- "failed_items": [],
- "file": None,
- "visibility": 0,
- }
-
- def get_item_by_info_and_links(
- self, title: str, info_str: str, links_str: str
- ) -> Optional[Item]:
- """Find an item based on information from CSV export.
-
- Args:
- title: Item title
- info_str: Item info string (space-separated key:value pairs)
- links_str: Space-separated URLs
-
- Returns:
- Item if found, None otherwise
- """
- site_url = settings.SITE_INFO["site_url"] + "/"
- links = links_str.strip().split()
- # look for local items first
- for link in links:
- if link.startswith("/") or link.startswith(site_url):
- item = Item.get_by_url(link, resolve_merge=True)
- if item and not item.is_deleted:
- return item
-
- sites = [
- SiteManager.get_site_by_url(link, detect_redirection=False)
- for link in links
- ]
- sites = [site for site in sites if site]
- sites.sort(
- key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
- if x.SITE_NAME in _PREFERRED_SITES
- else 99
- )
-
- # match items without extra requests
- for site in sites:
- item = site.get_item()
- if item:
- return item
-
- # match items after HEAD
- sites = [
- SiteManager.get_site_by_url(site.url) if site.url else site
- for site in sites
- ]
- sites = [site for site in sites if site]
- for site in sites:
- item = site.get_item()
- if item:
- return item
-
- # fetch from remote
- for site in sites:
- try:
- logger.debug(f"fetching {site.url}")
- site.get_resource_ready()
- item = site.get_item()
- if item:
- return item
- except Exception as e:
- logger.error(f"Error fetching item: {e}")
-
- # Try using the info string
- if info_str:
- info_dict = {}
- for pair in info_str.strip().split():
- if ":" in pair:
- key, value = pair.split(":", 1)
- info_dict[key] = value
-
- # Check for ISBN, IMDB, etc.
- item = None
- for key, value in info_dict.items():
- if key == "isbn" and value:
- item = Edition.objects.filter(
- primary_lookup_id_type=IdType.ISBN,
- primary_lookup_id_value=value,
- ).first()
- elif key == "imdb" and value:
- item = Item.objects.filter(
- primary_lookup_id_type=IdType.IMDB,
- primary_lookup_id_value=value,
- ).first()
- if item:
- return item
- return None
-
- def parse_tags(self, tags_str: str) -> List[str]:
- """Parse space-separated tags string into a list of tags."""
- if not tags_str:
- return []
- return [tag.strip() for tag in tags_str.split() if tag.strip()]
-
- def parse_info(self, info_str: str) -> Dict[str, str]:
- """Parse info string into a dictionary."""
- info_dict = {}
- if not info_str:
- return info_dict
-
- for pair in info_str.split():
- if ":" in pair:
- key, value = pair.split(":", 1)
- info_dict[key] = value
-
- return info_dict
-
- def parse_datetime(self, timestamp_str: str) -> Optional[datetime.datetime]:
- """Parse ISO format timestamp into datetime."""
- if not timestamp_str:
- return None
-
- try:
- dt = parse_datetime(timestamp_str)
- if dt and dt.tzinfo is None:
- dt = dt.replace(tzinfo=datetime.UTC)
- return dt
- except Exception as e:
- logger.error(f"Error parsing datetime {timestamp_str}: {e}")
- return None
-
- def parse_shelf_type(self, status_str: str) -> ShelfType:
- """Parse shelf type string into ShelfType enum."""
- if not status_str:
- return ShelfType.WISHLIST
-
- status_map = {
- "wishlist": ShelfType.WISHLIST,
- "progress": ShelfType.PROGRESS,
- "complete": ShelfType.COMPLETE,
- "dropped": ShelfType.DROPPED,
- }
-
- return status_map.get(status_str.lower(), ShelfType.WISHLIST)
-
+class CsvImporter(BaseImporter):
def import_mark(self, row: Dict[str, str]) -> str:
"""Import a mark from a CSV row.
@@ -184,7 +23,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
- row.get("title", ""), row.get("info", ""), row.get("links", "")
+ row.get("title", ""),
+ row.get("info", ""),
+ row.get("links", "").strip().split(),
)
if not item:
@@ -246,7 +87,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
- row.get("title", ""), row.get("info", ""), row.get("links", "")
+ row.get("title", ""),
+ row.get("info", ""),
+ row.get("links", "").strip().split(),
)
if not item:
@@ -304,7 +147,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
- row.get("title", ""), row.get("info", ""), row.get("links", "")
+ row.get("title", ""),
+ row.get("info", ""),
+ row.get("links", "").strip().split(),
)
if not item:
@@ -361,26 +206,6 @@ class CsvImporter(Task):
)
return "failed"
- def progress(self, result: str) -> None:
- """Update import progress.
-
- Args:
- result: The import result ('imported', 'skipped', or 'failed')
- """
- self.metadata["processed"] += 1
- self.metadata[result] = self.metadata.get(result, 0) + 1
-
- progress_percentage = round(
- self.metadata["processed"] / self.metadata["total"] * 100
- )
- self.message = (
- f"Progress: {progress_percentage}% - "
- f"{self.metadata['imported']} imported, "
- f"{self.metadata['skipped']} skipped, "
- f"{self.metadata['failed']} failed"
- )
- self.save(update_fields=["metadata", "message"])
-
def process_csv_file(self, file_path: str, import_function) -> None:
"""Process a CSV file using the specified import function."""
logger.debug(f"Processing {file_path}")
diff --git a/journal/importers/ndjson.py b/journal/importers/ndjson.py
new file mode 100644
index 00000000..bb243923
--- /dev/null
+++ b/journal/importers/ndjson.py
@@ -0,0 +1,447 @@
+import json
+import os
+import tempfile
+import zipfile
+from typing import Any, Dict
+
+from django.utils.translation import gettext as _
+from loguru import logger
+
+from journal.models import (
+ Collection,
+ Comment,
+ Mark,
+ Note,
+ Rating,
+ Review,
+ ShelfLogEntry,
+ ShelfType,
+ Tag,
+ TagMember,
+)
+
+from .base import BaseImporter
+
+
+class NdjsonImporter(BaseImporter):
+ """Importer for NDJSON files exported from NeoDB."""
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.items = {}
+
+ def import_collection(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a collection from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ metadata = data.get("metadata", {})
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ name = content_data.get("name", "")
+ content = content_data.get("content", "")
+ collection = Collection.objects.create(
+ owner=owner,
+ title=name,
+ brief=content,
+ visibility=visibility,
+ metadata=data.get("metadata", {}),
+ created_time=published_dt,
+ )
+ item_data = data.get("items", [])
+ for item_entry in item_data:
+ item_url = item_entry.get("item")
+ if not item_url:
+ continue
+ item = self.items.get(item_url)
+ if not item:
+ logger.warning(f"Could not find item for collection: {item_url}")
+ continue
+ metadata = item_entry.get("metadata", {})
+ collection.append_item(item, metadata=metadata)
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing collection: {e}")
+ return "failed"
+
+ def import_shelf_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a shelf member (mark) from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ metadata = data.get("metadata", {})
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ item = self.items.get(content_data.get("withRegardTo", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ shelf_type = content_data.get("status", ShelfType.WISHLIST)
+ mark = Mark(owner, item)
+ if mark.created_time and published_dt and mark.created_time >= published_dt:
+ return "skipped"
+ mark.update(
+ shelf_type=shelf_type,
+ visibility=visibility,
+ metadata=metadata,
+ created_time=published_dt,
+ )
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing shelf member: {e}")
+ return "failed"
+
+ def import_shelf_log(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a shelf log entry from NDJSON data."""
+ try:
+ item = self.items.get(data.get("item", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ owner = self.user.identity
+ shelf_type = data.get("status", ShelfType.WISHLIST)
+ # posts = data.get("posts", []) # TODO but will be tricky
+ timestamp = data.get("timestamp")
+ timestamp_dt = self.parse_datetime(timestamp) if timestamp else None
+ _, created = ShelfLogEntry.objects.update_or_create(
+ owner=owner,
+ item=item,
+ shelf_type=shelf_type,
+ timestamp=timestamp_dt,
+ )
+ # return "imported" if created else "skipped"
+ # count skip as success otherwise it may confuse user
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing shelf log: {e}")
+ return "failed"
+
+ def import_post(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a post from NDJSON data."""
+ # TODO
+ return "skipped"
+
+ def import_review(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a review from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ metadata = data.get("metadata", {})
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ item = self.items.get(content_data.get("withRegardTo", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ name = content_data.get("name", "")
+ content = content_data.get("content", "")
+ existing_review = Review.objects.filter(
+ owner=owner, item=item, title=name
+ ).first()
+ if (
+ existing_review
+ and existing_review.created_time
+ and published_dt
+ and existing_review.created_time >= published_dt
+ ):
+ return "skipped"
+ Review.objects.create(
+ owner=owner,
+ item=item,
+ title=name,
+ body=content,
+ created_time=published_dt,
+ visibility=visibility,
+ metadata=metadata,
+ )
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing review: {e}")
+ return "failed"
+
+ def import_note(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a note from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ item = self.items.get(content_data.get("withRegardTo", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ title = content_data.get("title", "")
+ content = content_data.get("content", "")
+ sensitive = content_data.get("sensitive", False)
+ progress = content_data.get("progress", {})
+ progress_type = progress.get("type", "")
+ progress_value = progress.get("value", "")
+ Note.objects.create(
+ item=item,
+ owner=owner,
+ title=title,
+ content=content,
+ sensitive=sensitive,
+ progress_type=progress_type,
+ progress_value=progress_value,
+ created_time=published_dt,
+ visibility=visibility,
+ metadata=data.get("metadata", {}),
+ )
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing note: {e}")
+ return "failed"
+
+ def import_comment(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a comment from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ metadata = data.get("metadata", {})
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ item = self.items.get(content_data.get("withRegardTo", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ content = content_data.get("content", "")
+ existing_comment = Comment.objects.filter(owner=owner, item=item).first()
+ if (
+ existing_comment
+ and existing_comment.created_time
+ and published_dt
+ and existing_comment.created_time >= published_dt
+ ):
+ return "skipped"
+ Comment.objects.create(
+ owner=owner,
+ item=item,
+ text=content,
+ created_time=published_dt,
+ visibility=visibility,
+ metadata=metadata,
+ )
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing comment: {e}")
+ return "failed"
+
+ def import_rating(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import a rating from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ metadata = data.get("metadata", {})
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ item = self.items.get(content_data.get("withRegardTo", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ rating_grade = int(float(content_data.get("value", 0)))
+ existing_rating = Comment.objects.filter(owner=owner, item=item).first()
+ if (
+ existing_rating
+ and existing_rating.created_time
+ and published_dt
+ and existing_rating.created_time >= published_dt
+ ):
+ return "skipped"
+ Rating.objects.create(
+ owner=owner,
+ item=item,
+ grade=rating_grade,
+ created_time=published_dt,
+ visibility=visibility,
+ metadata=metadata,
+ )
+ return "imported"
+ except Exception as e:
+ logger.error(f"Error importing rating: {e}")
+ return "failed"
+
+ def import_tag(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import tags from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ pinned = data.get("pinned", self.metadata.get("pinned", False))
+ tag_title = Tag.cleanup_title(data.get("name", ""))
+ _, created = Tag.objects.update_or_create(
+ owner=owner,
+ title=tag_title,
+ defaults={
+ "visibility": visibility,
+ "pinned": pinned,
+ },
+ )
+ return "imported" if created else "skipped"
+ except Exception as e:
+ logger.error(f"Error importing tag member: {e}")
+ return "failed"
+
+ def import_tag_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
+ """Import tags from NDJSON data."""
+ try:
+ owner = self.user.identity
+ visibility = data.get("visibility", self.metadata.get("visibility", 0))
+ metadata = data.get("metadata", {})
+ content_data = data.get("content", {})
+ published_dt = self.parse_datetime(content_data.get("published"))
+ item = self.items.get(content_data.get("withRegardTo", ""))
+ if not item:
+ raise KeyError(f"Could not find item: {data.get('item', '')}")
+ tag_title = Tag.cleanup_title(content_data.get("tag", ""))
+ tag, _ = Tag.objects.get_or_create(
+ owner=owner,
+ title=tag_title,
+ defaults={
+ "created_time": published_dt,
+ "visibility": visibility,
+ "pinned": False,
+ "metadata": metadata,
+ },
+ )
+ _, created = TagMember.objects.update_or_create(
+ owner=owner,
+ item=item,
+ parent=tag,
+ defaults={
+ "created_time": published_dt,
+ "visibility": visibility,
+ "metadata": metadata,
+ "position": 0,
+ },
+ )
+ return "imported" if created else "skipped"
+ except Exception as e:
+ logger.error(f"Error importing tag member: {e}")
+ return "failed"
+
+ def process_journal(self, file_path: str) -> None:
+ """Process a NDJSON file and import all items."""
+ logger.debug(f"Processing {file_path}")
+ lines_error = 0
+ import_funcs = {
+ "Tag": self.import_tag,
+ "TagMember": self.import_tag_member,
+ "Rating": self.import_rating,
+ "Comment": self.import_comment,
+ "ShelfMember": self.import_shelf_member,
+ "Review": self.import_review,
+ "Note": self.import_note,
+ "Collection": self.import_collection,
+ "ShelfLog": self.import_shelf_log,
+ "Post": self.import_post,
+ }
+ journal = {k: [] for k in import_funcs.keys()}
+ with open(file_path, "r") as jsonfile:
+ # Skip header line
+ next(jsonfile, None)
+
+ for line in jsonfile:
+ try:
+ data = json.loads(line)
+ except json.JSONDecodeError:
+ lines_error += 1
+ continue
+ data_type = data.get("type")
+ if not data_type:
+ continue
+ if data_type not in journal:
+ journal[data_type] = []
+ journal[data_type].append(data)
+
+ self.metadata["total"] = sum(len(items) for items in journal.values())
+ logger.debug(f"Processing {self.metadata['total']} entries")
+ if lines_error:
+ logger.error(f"Error processing journal.ndjson: {lines_error} lines")
+
+ for typ, func in import_funcs.items():
+ for data in journal.get(typ, []):
+ result = func(data)
+ self.progress(result)
+ logger.info(
+ f"Imported {self.metadata['imported']}, skipped {self.metadata['skipped']}, failed {self.metadata['failed']}"
+ )
+
+ def parse_catalog(self, file_path: str) -> None:
+ """Parse the catalog.ndjson file and build item lookup tables."""
+ logger.debug(f"Parsing catalog file: {file_path}")
+ item_count = 0
+ try:
+ with open(file_path, "r") as jsonfile:
+ for line in jsonfile:
+ try:
+ i = json.loads(line)
+ except (json.JSONDecodeError, Exception) as e:
+ logger.error(f"Error processing catalog item: {e}")
+ continue
+ u = i.get("id")
+ if not u:
+ continue
+ # self.catalog_items[u] = i
+ item_count += 1
+ links = [u] + [r["url"] for r in i.get("external_resources", [])]
+ self.items[u] = self.get_item_by_info_and_links("", "", links)
+ logger.info(f"Loaded {item_count} items from catalog")
+ self.metadata["catalog_processed"] = item_count
+ except Exception as e:
+ logger.error(f"Error parsing catalog file: {e}")
+
+ def parse_header(self, file_path: str) -> Dict[str, Any]:
+ try:
+ with open(file_path, "r") as jsonfile:
+ first_line = jsonfile.readline().strip()
+ if first_line:
+ header = json.loads(first_line)
+ if header.get("server"):
+ return header
+ except (json.JSONDecodeError, IOError) as e:
+ logger.error(f"Error parsing NDJSON header: {e}")
+ return {}
+
+ def run(self) -> None:
+ """Run the NDJSON import."""
+ filename = self.metadata["file"]
+ logger.debug(f"Importing {filename}")
+
+ with zipfile.ZipFile(filename, "r") as zipref:
+ with tempfile.TemporaryDirectory() as tmpdirname:
+ zipref.extractall(tmpdirname)
+
+ catalog_path = os.path.join(tmpdirname, "catalog.ndjson")
+ if os.path.exists(catalog_path):
+ catalog_header = self.parse_header(catalog_path)
+ logger.debug(f"Loading catalog.ndjson with {catalog_header}")
+ self.parse_catalog(catalog_path)
+ else:
+ logger.warning("catalog.ndjson file not found in the archive")
+
+ journal_path = os.path.join(tmpdirname, "journal.ndjson")
+ if not os.path.exists(journal_path):
+ logger.error("journal.ndjson file not found in the archive")
+ self.message = "Import failed: journal.ndjson file not found"
+ self.save()
+ return
+ header = self.parse_header(journal_path)
+ self.metadata["journal_header"] = header
+ logger.debug(f"Importing journal.ndjson with {header}")
+ self.process_journal(journal_path)
+
+ source_info = self.metadata.get("journal_header", {})
+ source_summary = f" from {source_info.get('username', 'unknown')}@{source_info.get('server', 'unknown')} ver:{source_info.get('neodb_version', 'unknown')}."
+ self.message = _("Import complete") + source_summary
+
+ metadata_stats = self.metadata.get("metadata_stats", {})
+ partial_updates = metadata_stats.get("partial_updates", 0)
+ if partial_updates > 0:
+ self.message += f", {partial_updates} items with partial metadata updates"
+
+ ratings = metadata_stats.get("ratings_updated", 0)
+ comments = metadata_stats.get("comments_updated", 0)
+ tags = metadata_stats.get("tags_updated", 0)
+
+ if ratings > 0 or comments > 0 or tags > 0:
+ self.message += (
+ f" ({ratings} ratings, {comments} comments, {tags} tag sets)"
+ )
+
+ if self.metadata.get("failed_items", []):
+ self.message += f": {self.metadata['failed']} items failed ({len(self.metadata['failed_items'])} unique items)"
+ self.save()
diff --git a/journal/tests/__init__.py b/journal/tests/__init__.py
index e94df8d2..f3bf6f07 100644
--- a/journal/tests/__init__.py
+++ b/journal/tests/__init__.py
@@ -1,3 +1,4 @@
from .csv import *
+from .ndjson import *
from .piece import *
from .search import *
diff --git a/journal/tests/ndjson.py b/journal/tests/ndjson.py
new file mode 100644
index 00000000..f882e6d0
--- /dev/null
+++ b/journal/tests/ndjson.py
@@ -0,0 +1,496 @@
+import json
+import os
+import zipfile
+from tempfile import TemporaryDirectory
+
+from django.test import TestCase
+from django.utils.dateparse import parse_datetime
+from loguru import logger
+
+from catalog.models import (
+ Edition,
+ IdType,
+ Movie,
+ Podcast,
+ PodcastEpisode,
+ TVEpisode,
+ TVSeason,
+ TVShow,
+)
+from journal.exporters import NdjsonExporter
+from journal.importers import NdjsonImporter, get_neodb_importer
+from users.models import User
+
+from ..models import *
+
+
+class NdjsonExportImportTest(TestCase):
+ databases = "__all__"
+ maxDiff = None
+
+ def setUp(self):
+ self.user1 = User.register(
+ email="ndjson_export@test.com", username="ndjson_exporter"
+ )
+ self.user2 = User.register(
+ email="ndjson_import@test.com", username="ndjson_importer"
+ )
+ self.tag1 = Tag.objects.create(
+ owner=self.user1.identity, title="favorite", pinned=True, visibility=2
+ )
+ self.dt = parse_datetime("2021-01-01T00:00:00Z")
+ self.dt2 = parse_datetime("2021-02-01T00:00:00Z")
+ self.dt3 = parse_datetime("2021-03-01T00:00:00Z")
+ self.book1 = Edition.objects.create(
+ localized_title=[{"lang": "en", "text": "Hyperion"}],
+ primary_lookup_id_type=IdType.ISBN,
+ primary_lookup_id_value="9780553283686",
+ author=["Dan Simmons"],
+ pub_year=1989,
+ )
+ self.book2 = Edition.objects.create(
+ localized_title=[{"lang": "en", "text": "Dune"}],
+ primary_lookup_id_type=IdType.ISBN,
+ primary_lookup_id_value="9780441172719",
+ author=["Frank Herbert"],
+ pub_year=1965,
+ )
+ self.movie1 = Movie.objects.create(
+ localized_title=[{"lang": "en", "text": "Inception"}],
+ primary_lookup_id_type=IdType.IMDB,
+ primary_lookup_id_value="tt1375666",
+ director=["Christopher Nolan"],
+ year=2010,
+ )
+ self.movie2 = Movie.objects.create(
+ localized_title=[{"lang": "en", "text": "The Matrix"}],
+ primary_lookup_id_type=IdType.IMDB,
+ primary_lookup_id_value="tt0133093",
+ director=["Lana Wachowski", "Lilly Wachowski"],
+ year=1999,
+ )
+ self.tvshow = TVShow.objects.create(
+ localized_title=[{"lang": "en", "text": "Breaking Bad"}],
+ primary_lookup_id_type=IdType.IMDB,
+ primary_lookup_id_value="tt0903747",
+ year=2008,
+ )
+ self.tvseason = TVSeason.objects.create(
+ localized_title=[{"lang": "en", "text": "Breaking Bad Season 1"}],
+ show=self.tvshow,
+ season_number=1,
+ )
+ self.tvepisode1 = TVEpisode.objects.create(
+ localized_title=[{"lang": "en", "text": "Pilot"}],
+ season=self.tvseason,
+ episode_number=1,
+ )
+ self.tvepisode2 = TVEpisode.objects.create(
+ localized_title=[{"lang": "en", "text": "Cat's in the Bag..."}],
+ season=self.tvseason,
+ episode_number=2,
+ )
+ # Create podcast test items
+ self.podcast = Podcast.objects.create(
+ localized_title=[{"lang": "en", "text": "Test Podcast"}],
+ primary_lookup_id_type=IdType.RSS,
+ primary_lookup_id_value="https://example.com/feed.xml",
+ host=["Test Host"],
+ )
+ self.podcastepisode = PodcastEpisode.objects.create(
+ localized_title=[{"lang": "en", "text": "Test Episode 1"}],
+ program=self.podcast,
+ guid="111",
+ pub_date=self.dt,
+ )
+
+ def test_ndjson_export_import(self):
+ # Create marks, reviews and notes for user1
+
+ # Book marks with ratings and tags
+ mark_book1 = Mark(self.user1.identity, self.book1)
+ mark_book1.update(
+ ShelfType.COMPLETE,
+ "Great sci-fi classic",
+ 10,
+ ["sci-fi", "favorite", "space"],
+ 1,
+ created_time=self.dt,
+ )
+ mark_book2 = Mark(self.user1.identity, self.book2)
+ mark_book2.update(
+ ShelfType.WISHLIST,
+ "Read it?",
+ None,
+ ["sci-fi", "desert"],
+ 1,
+ created_time=self.dt,
+ )
+ mark_book2.update(
+ ShelfType.PROGRESS,
+ "Reading!",
+ None,
+ ["sci-fi", "desert"],
+ 0,
+ created_time=self.dt2,
+ )
+ mark_book2.update(
+ ShelfType.COMPLETE,
+ "Read.",
+ None,
+ ["sci-fi", "desert"],
+ 0,
+ created_time=self.dt3,
+ )
+
+ # Movie marks with ratings
+ mark_movie1 = Mark(self.user1.identity, self.movie1)
+ mark_movie1.update(
+ ShelfType.COMPLETE,
+ "Mind-bending",
+ 8,
+ ["mindbender", "scifi"],
+ 1,
+ created_time=self.dt,
+ )
+
+ mark_movie2 = Mark(self.user1.identity, self.movie2)
+ mark_movie2.update(
+ ShelfType.WISHLIST, "Need to rewatch", None, [], 1, created_time=self.dt2
+ )
+
+ # TV show mark
+ mark_tvshow = Mark(self.user1.identity, self.tvshow)
+ mark_tvshow.update(
+ ShelfType.WISHLIST,
+ "Heard it's good",
+ None,
+ ["drama"],
+ 1,
+ created_time=self.dt,
+ )
+
+ # TV episode marks
+ mark_episode1 = Mark(self.user1.identity, self.tvepisode1)
+ mark_episode1.update(
+ ShelfType.COMPLETE,
+ "Great start",
+ 9,
+ ["pilot", "drama"],
+ 1,
+ created_time=self.dt2,
+ )
+
+ mark_episode2 = Mark(self.user1.identity, self.tvepisode2)
+ mark_episode2.update(
+ ShelfType.COMPLETE, "It gets better", 9, [], 1, created_time=self.dt3
+ )
+
+ # Podcast episode mark
+ mark_podcast = Mark(self.user1.identity, self.podcastepisode)
+ mark_podcast.update(
+ ShelfType.COMPLETE,
+ "Insightful episode",
+ 8,
+ ["tech", "interview"],
+ 1,
+ created_time=self.dt,
+ )
+
+ # Create reviews
+ Review.update_item_review(
+ self.book1,
+ self.user1.identity,
+ "My thoughts on Hyperion",
+ "A masterpiece of science fiction that weaves multiple storylines into a captivating narrative.",
+ visibility=1,
+ created_time=self.dt,
+ )
+
+ Review.update_item_review(
+ self.movie1,
+ self.user1.identity,
+ "Inception Review",
+ "Christopher Nolan at his best. The movie plays with reality and dreams in a fascinating way.",
+ visibility=1,
+ )
+
+ # Create notes
+ Note.objects.create(
+ item=self.book2,
+ owner=self.user1.identity,
+ title="Reading progress",
+ content="Just finished the first part. The world-building is incredible.\n\n - p 125",
+ progress_type=Note.ProgressType.PAGE,
+ progress_value="125",
+ visibility=1,
+ )
+
+ Note.objects.create(
+ item=self.tvshow,
+ owner=self.user1.identity,
+ title="Before watching",
+ content="Things to look out for according to friends:\n- Character development\n- Color symbolism\n\n - e 0",
+ progress_type=Note.ProgressType.EPISODE,
+ progress_value="2",
+ visibility=1,
+ )
+
+ # Create TV episode note
+ Note.objects.create(
+ item=self.tvepisode1,
+ owner=self.user1.identity,
+ title="Episode thoughts",
+ content="Great pilot episode. Sets up the character arcs really well.",
+ visibility=1,
+ )
+
+ # Create podcast episode note
+ Note.objects.create(
+ item=self.podcastepisode,
+ owner=self.user1.identity,
+ title="Podcast episode notes",
+ content="Interesting discussion about tech trends. Timestamp 23:45 has a good point about AI.",
+ progress_type=Note.ProgressType.TIMESTAMP,
+ progress_value="23:45",
+ visibility=1,
+ )
+
+ # Create collections
+ items = [self.book1, self.movie1]
+ collection = Collection.objects.create(
+ owner=self.user1.identity,
+ title="Favorites",
+ brief="My all-time favorites",
+ visibility=1,
+ )
+ for i in items:
+ collection.append_item(i)
+
+ # Create another collection with different items
+ items2 = [self.book2, self.movie2, self.tvshow]
+ collection2 = Collection.objects.create(
+ owner=self.user1.identity,
+ title="To Review",
+ brief="Items I need to review soon",
+ visibility=1,
+ )
+ for i in items2:
+ collection2.append_item(i)
+
+ # Create shelf log entries
+ logs = ShelfLogEntry.objects.filter(owner=self.user1.identity).order_by(
+ "timestamp", "item_id"
+ )
+
+ # Export data to NDJSON
+ exporter = NdjsonExporter.create(user=self.user1)
+ exporter.run()
+ export_path = exporter.metadata["file"]
+ logger.debug(f"exported to {export_path}")
+ self.assertTrue(os.path.exists(export_path))
+
+ # Validate the NDJSON export file structure
+ with TemporaryDirectory() as extract_dir:
+ with zipfile.ZipFile(export_path, "r") as zip_ref:
+ zip_ref.extractall(extract_dir)
+ logger.debug(f"unzipped to {extract_dir}")
+
+ # Check journal.ndjson exists
+ journal_path = os.path.join(extract_dir, "journal.ndjson")
+ self.assertTrue(
+ os.path.exists(journal_path), "journal.ndjson file missing"
+ )
+
+ # Check catalog.ndjson exists
+ catalog_path = os.path.join(extract_dir, "catalog.ndjson")
+ self.assertTrue(
+ os.path.exists(catalog_path), "catalog.ndjson file missing"
+ )
+
+ # Check attachments directory exists
+ attachments_path = os.path.join(extract_dir, "attachments")
+ self.assertTrue(
+ os.path.exists(attachments_path), "attachments directory missing"
+ )
+
+ # Count the number of JSON objects in journal.ndjson
+ with open(journal_path, "r") as f:
+ lines = f.readlines()
+ # First line is header, rest are data
+ self.assertGreater(
+ len(lines), 1, "journal.ndjson has no data lines"
+ )
+
+ # Check the first line is a header
+ header = json.loads(lines[0])
+ self.assertIn("server", header, "Missing server in header")
+ self.assertIn("username", header, "Missing username in header")
+ self.assertEqual(
+ header["username"],
+ "ndjson_exporter",
+ "Wrong username in header",
+ )
+
+ # Count data objects by type
+ type_counts = {
+ "ShelfMember": 0,
+ "Review": 0,
+ "Note": 0,
+ "Collection": 0,
+ "ShelfLog": 0,
+ "post": 0,
+ }
+
+ for line in lines[1:]:
+ data = json.loads(line)
+ if "type" in data:
+ type_counts[data["type"]] = (
+ type_counts.get(data["type"], 0) + 1
+ )
+
+ # Verify counts
+ self.assertEqual(
+ type_counts["ShelfMember"], 8, "Expected 8 ShelfMember entries"
+ )
+ self.assertEqual(
+ type_counts["Review"], 2, "Expected 2 Review entries"
+ )
+ self.assertEqual(type_counts["Note"], 4, "Expected 4 Note entries")
+ self.assertEqual(
+ type_counts["Collection"], 2, "Expected 2 Collection entries"
+ )
+ self.assertEqual(type_counts["ShelfLog"], logs.count())
+
+ # Now import the export file into a different user account
+ self.assertEqual(get_neodb_importer(export_path), NdjsonImporter)
+ importer = NdjsonImporter.create(
+ user=self.user2, file=export_path, visibility=2
+ )
+ importer.run()
+ self.assertIn("Import complete", importer.message)
+
+ # Verify imported data
+
+ # Check marks
+ mark_book1_imported = Mark(self.user2.identity, self.book1)
+ self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE)
+ self.assertEqual(mark_book1_imported.comment_text, "Great sci-fi classic")
+ self.assertEqual(mark_book1_imported.rating_grade, 10)
+ self.assertEqual(mark_book1_imported.visibility, 1)
+ self.assertEqual(
+ set(mark_book1_imported.tags), set(["sci-fi", "favorite", "space"])
+ )
+
+ mark_book2_imported = Mark(self.user2.identity, self.book2)
+ self.assertEqual(mark_book2_imported.shelf_type, ShelfType.COMPLETE)
+ self.assertEqual(mark_book2_imported.comment_text, "Read.")
+ self.assertIsNone(mark_book2_imported.rating_grade)
+ self.assertEqual(set(mark_book2_imported.tags), set(["sci-fi", "desert"]))
+ self.assertEqual(mark_book2_imported.visibility, 0)
+
+ mark_movie1_imported = Mark(self.user2.identity, self.movie1)
+ self.assertEqual(mark_movie1_imported.shelf_type, ShelfType.COMPLETE)
+ self.assertEqual(mark_movie1_imported.comment_text, "Mind-bending")
+ self.assertEqual(mark_movie1_imported.rating_grade, 8)
+ self.assertEqual(set(mark_movie1_imported.tags), set(["mindbender", "scifi"]))
+
+ mark_episode1_imported = Mark(self.user2.identity, self.tvepisode1)
+ self.assertEqual(mark_episode1_imported.shelf_type, ShelfType.COMPLETE)
+ self.assertEqual(mark_episode1_imported.comment_text, "Great start")
+ self.assertEqual(mark_episode1_imported.rating_grade, 9)
+ self.assertEqual(set(mark_episode1_imported.tags), set(["pilot", "drama"]))
+
+ # Check podcast episode mark
+ mark_podcast_imported = Mark(self.user2.identity, self.podcastepisode)
+ self.assertEqual(mark_podcast_imported.shelf_type, ShelfType.COMPLETE)
+ self.assertEqual(mark_podcast_imported.comment_text, "Insightful episode")
+ self.assertEqual(mark_podcast_imported.rating_grade, 8)
+ self.assertEqual(set(mark_podcast_imported.tags), set(["tech", "interview"]))
+
+ # Check reviews
+ book1_reviews = Review.objects.filter(
+ owner=self.user2.identity, item=self.book1
+ )
+ self.assertEqual(book1_reviews.count(), 1)
+ self.assertEqual(book1_reviews[0].title, "My thoughts on Hyperion")
+ self.assertIn("masterpiece of science fiction", book1_reviews[0].body)
+
+ movie1_reviews = Review.objects.filter(
+ owner=self.user2.identity, item=self.movie1
+ )
+ self.assertEqual(movie1_reviews.count(), 1)
+ self.assertEqual(movie1_reviews[0].title, "Inception Review")
+ self.assertIn("Christopher Nolan", movie1_reviews[0].body)
+
+ # Check notes
+ book2_notes = Note.objects.filter(owner=self.user2.identity, item=self.book2)
+ self.assertEqual(book2_notes.count(), 1)
+ self.assertEqual(book2_notes[0].title, "Reading progress")
+ self.assertIn("world-building is incredible", book2_notes[0].content)
+ self.assertEqual(book2_notes[0].progress_type, Note.ProgressType.PAGE)
+ self.assertEqual(book2_notes[0].progress_value, "125")
+
+ tvshow_notes = Note.objects.filter(owner=self.user2.identity, item=self.tvshow)
+ self.assertEqual(tvshow_notes.count(), 1)
+ self.assertEqual(tvshow_notes[0].title, "Before watching")
+ self.assertIn("Character development", tvshow_notes[0].content)
+
+ # Check TV episode notes
+ tvepisode_notes = Note.objects.filter(
+ owner=self.user2.identity, item=self.tvepisode1
+ )
+ self.assertEqual(tvepisode_notes.count(), 1)
+ self.assertEqual(tvepisode_notes[0].title, "Episode thoughts")
+ self.assertIn("Sets up the character arcs", tvepisode_notes[0].content)
+
+ # Check podcast episode notes
+ podcast_notes = Note.objects.filter(
+ owner=self.user2.identity, item=self.podcastepisode
+ )
+ self.assertEqual(podcast_notes.count(), 1)
+ self.assertEqual(podcast_notes[0].title, "Podcast episode notes")
+ self.assertIn(
+ "Interesting discussion about tech trends", podcast_notes[0].content
+ )
+ self.assertEqual(podcast_notes[0].progress_type, Note.ProgressType.TIMESTAMP)
+ self.assertEqual(podcast_notes[0].progress_value, "23:45")
+
+ # Check first collection
+ collections = Collection.objects.filter(
+ owner=self.user2.identity, title="Favorites"
+ )
+ self.assertEqual(collections.count(), 1)
+ self.assertEqual(collections[0].brief, "My all-time favorites")
+ self.assertEqual(collections[0].visibility, 1)
+ collection_items = list(collections[0].ordered_items)
+ self.assertEqual([self.book1, self.movie1], collection_items)
+
+ # Check second collection
+ collections2 = Collection.objects.filter(
+ owner=self.user2.identity, title="To Review"
+ )
+ self.assertEqual(collections2.count(), 1)
+ self.assertEqual(collections2[0].brief, "Items I need to review soon")
+ self.assertEqual(collections2[0].visibility, 1)
+
+ # Check second collection items
+ collection2_items = [m.item for m in collections2[0].members.all()]
+ self.assertEqual(len(collection2_items), 3)
+ self.assertIn(self.book2, collection2_items)
+ self.assertIn(self.movie2, collection2_items)
+ self.assertIn(self.tvshow, collection2_items)
+
+ tag1 = Tag.objects.filter(owner=self.user2.identity, title="favorite").first()
+ self.assertIsNotNone(tag1)
+ if tag1:
+ self.assertTrue(tag1.pinned)
+ self.assertEqual(tag1.visibility, 2)
+
+ # Check shelf log entries
+ logs2 = ShelfLogEntry.objects.filter(owner=self.user2.identity).order_by(
+ "timestamp", "item_id"
+ )
+ l1 = [(log.item, log.shelf_type, log.timestamp) for log in logs]
+ l2 = [(log.item, log.shelf_type, log.timestamp) for log in logs2]
+ self.assertEqual(l1, l2)
diff --git a/users/templates/users/data.html b/users/templates/users/data.html
index 20279ea0..631a1fe9 100644
--- a/users/templates/users/data.html
+++ b/users/templates/users/data.html
@@ -48,7 +48,7 @@
{{ ndjson_export_task.message }}
{% if ndjson_export_task.metadata.file %}
- {% trans 'Download' %}
+ {% trans 'Download' %}
{% endif %}
{% endif %}
@@ -86,58 +86,148 @@
{% trans 'Upload a .zip
file containing .csv
or .ndjson
files exported from NeoDB.' %}
- {% trans 'Visibility' %}: .csv
only
-
-
-
-
-