import ndjson

This commit is contained in:
mein Name 2025-03-06 11:21:01 -05:00 committed by Henri Dickson
parent 25e3536985
commit d343d6e71e
10 changed files with 1378 additions and 267 deletions

View file

@ -5,16 +5,19 @@ from .csv import CsvImporter
from .douban import DoubanImporter
from .goodreads import GoodreadsImporter
from .letterboxd import LetterboxdImporter
from .ndjson import NdjsonImporter
from .opml import OPMLImporter
def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
def get_neodb_importer(
filename: str,
) -> type[CsvImporter] | type[NdjsonImporter] | None:
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
return None
with zipfile.ZipFile(filename, "r") as z:
files = z.namelist()
if any(f == "journal.ndjson" for f in files):
return None
return NdjsonImporter
if any(
f.endswith("_mark.csv")
or f.endswith("_review.csv")
@ -26,6 +29,7 @@ def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
__all__ = [
"CsvImporter",
"NdjsonImporter",
"LetterboxdImporter",
"OPMLImporter",
"DoubanImporter",

197
journal/importers/base.py Normal file
View file

@ -0,0 +1,197 @@
import datetime
from typing import Dict, List, Literal, Optional
from django.conf import settings
from django.utils.dateparse import parse_datetime
from loguru import logger
from catalog.common.sites import SiteManager
from catalog.models import Edition, IdType, Item, SiteName
from journal.models import ShelfType
from users.models import Task
_PREFERRED_SITES = [
SiteName.Fediverse,
SiteName.RSS,
SiteName.TMDB,
SiteName.IMDB,
SiteName.GoogleBooks,
SiteName.Goodreads,
SiteName.IGDB,
]
class BaseImporter(Task):
class Meta:
app_label = "journal" # workaround bug in TypedModel
ImportResult = Literal["imported", "skipped", "failed"]
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"failed_items": [],
"file": None,
"visibility": 0,
}
def progress(self, result: ImportResult) -> None:
"""Update import progress.
Args:
result: The import result ('imported', 'skipped', or 'failed')
"""
self.metadata["processed"] += 1
self.metadata[result] = self.metadata.get(result, 0) + 1
if self.metadata["total"]:
progress_percentage = round(
self.metadata["processed"] / self.metadata["total"] * 100
)
self.message = f"Progress: {progress_percentage}% - "
else:
self.message = ""
self.message += (
f"{self.metadata['imported']} imported, "
f"{self.metadata['skipped']} skipped, "
f"{self.metadata['failed']} failed"
)
self.save(update_fields=["metadata", "message"])
def run(self) -> None:
raise NotImplementedError
def get_item_by_info_and_links(
self, title: str, info_str: str, links: list[str]
) -> Optional[Item]:
"""Find an item based on information from CSV export.
Args:
title: Item title
info_str: Item info string (space-separated key:value pairs)
links_str: Space-separated URLs
Returns:
Item if found, None otherwise
"""
site_url = settings.SITE_INFO["site_url"] + "/"
# look for local items first
for link in links:
if link.startswith("/") or link.startswith(site_url):
item = Item.get_by_url(link, resolve_merge=True)
if item and not item.is_deleted:
return item
sites = [
SiteManager.get_site_by_url(link, detect_redirection=False)
for link in links
]
sites = [site for site in sites if site]
sites.sort(
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
if x.SITE_NAME in _PREFERRED_SITES
else 99
)
# match items without extra requests
for site in sites:
item = site.get_item()
if item:
return item
# match items after HEAD
sites = [
SiteManager.get_site_by_url(site.url) if site.url else site
for site in sites
]
sites = [site for site in sites if site]
for site in sites:
item = site.get_item()
if item:
return item
# fetch from remote
for site in sites:
try:
logger.debug(f"fetching {site.url}")
site.get_resource_ready()
item = site.get_item()
if item:
return item
except Exception as e:
logger.error(f"Error fetching item: {e}")
# Try using the info string
if info_str:
info_dict = {}
for pair in info_str.strip().split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
# Check for ISBN, IMDB, etc.
item = None
for key, value in info_dict.items():
if key == "isbn" and value:
item = Edition.objects.filter(
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value=value,
).first()
elif key == "imdb" and value:
item = Item.objects.filter(
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value=value,
).first()
if item:
return item
return None
def parse_tags(self, tags_str: str) -> List[str]:
"""Parse space-separated tags string into a list of tags."""
if not tags_str:
return []
return [tag.strip() for tag in tags_str.split() if tag.strip()]
def parse_info(self, info_str: str) -> Dict[str, str]:
"""Parse info string into a dictionary."""
info_dict = {}
if not info_str:
return info_dict
for pair in info_str.split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
return info_dict
def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]:
"""Parse ISO format timestamp into datetime."""
if not timestamp_str:
return None
try:
dt = parse_datetime(timestamp_str)
if dt and dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.UTC)
return dt
except Exception as e:
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
return None
def parse_shelf_type(self, status_str: str) -> ShelfType:
"""Parse shelf type string into ShelfType enum."""
if not status_str:
return ShelfType.WISHLIST
status_map = {
"wishlist": ShelfType.WISHLIST,
"progress": ShelfType.PROGRESS,
"complete": ShelfType.COMPLETE,
"dropped": ShelfType.DROPPED,
}
return status_map.get(status_str.lower(), ShelfType.WISHLIST)

View file

@ -1,181 +1,20 @@
import csv
import datetime
import os
import tempfile
import zipfile
from typing import Dict, List, Optional
from typing import Dict
from django.conf import settings
from django.utils import timezone
from django.utils.dateparse import parse_datetime
from django.utils.translation import gettext as _
from loguru import logger
from catalog.common.sites import SiteManager
from catalog.models import Edition, IdType, Item, ItemCategory, SiteName
from journal.models import Mark, Note, Review, ShelfType
from users.models import Task
from catalog.models import ItemCategory
from journal.models import Mark, Note, Review
_PREFERRED_SITES = [
SiteName.Fediverse,
SiteName.RSS,
SiteName.TMDB,
SiteName.IMDB,
SiteName.GoogleBooks,
SiteName.Goodreads,
SiteName.IGDB,
]
from .base import BaseImporter
class CsvImporter(Task):
class Meta:
app_label = "journal" # workaround bug in TypedModel
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"failed_items": [],
"file": None,
"visibility": 0,
}
def get_item_by_info_and_links(
self, title: str, info_str: str, links_str: str
) -> Optional[Item]:
"""Find an item based on information from CSV export.
Args:
title: Item title
info_str: Item info string (space-separated key:value pairs)
links_str: Space-separated URLs
Returns:
Item if found, None otherwise
"""
site_url = settings.SITE_INFO["site_url"] + "/"
links = links_str.strip().split()
# look for local items first
for link in links:
if link.startswith("/") or link.startswith(site_url):
item = Item.get_by_url(link, resolve_merge=True)
if item and not item.is_deleted:
return item
sites = [
SiteManager.get_site_by_url(link, detect_redirection=False)
for link in links
]
sites = [site for site in sites if site]
sites.sort(
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
if x.SITE_NAME in _PREFERRED_SITES
else 99
)
# match items without extra requests
for site in sites:
item = site.get_item()
if item:
return item
# match items after HEAD
sites = [
SiteManager.get_site_by_url(site.url) if site.url else site
for site in sites
]
sites = [site for site in sites if site]
for site in sites:
item = site.get_item()
if item:
return item
# fetch from remote
for site in sites:
try:
logger.debug(f"fetching {site.url}")
site.get_resource_ready()
item = site.get_item()
if item:
return item
except Exception as e:
logger.error(f"Error fetching item: {e}")
# Try using the info string
if info_str:
info_dict = {}
for pair in info_str.strip().split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
# Check for ISBN, IMDB, etc.
item = None
for key, value in info_dict.items():
if key == "isbn" and value:
item = Edition.objects.filter(
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value=value,
).first()
elif key == "imdb" and value:
item = Item.objects.filter(
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value=value,
).first()
if item:
return item
return None
def parse_tags(self, tags_str: str) -> List[str]:
"""Parse space-separated tags string into a list of tags."""
if not tags_str:
return []
return [tag.strip() for tag in tags_str.split() if tag.strip()]
def parse_info(self, info_str: str) -> Dict[str, str]:
"""Parse info string into a dictionary."""
info_dict = {}
if not info_str:
return info_dict
for pair in info_str.split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
return info_dict
def parse_datetime(self, timestamp_str: str) -> Optional[datetime.datetime]:
"""Parse ISO format timestamp into datetime."""
if not timestamp_str:
return None
try:
dt = parse_datetime(timestamp_str)
if dt and dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.UTC)
return dt
except Exception as e:
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
return None
def parse_shelf_type(self, status_str: str) -> ShelfType:
"""Parse shelf type string into ShelfType enum."""
if not status_str:
return ShelfType.WISHLIST
status_map = {
"wishlist": ShelfType.WISHLIST,
"progress": ShelfType.PROGRESS,
"complete": ShelfType.COMPLETE,
"dropped": ShelfType.DROPPED,
}
return status_map.get(status_str.lower(), ShelfType.WISHLIST)
class CsvImporter(BaseImporter):
def import_mark(self, row: Dict[str, str]) -> str:
"""Import a mark from a CSV row.
@ -184,7 +23,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -246,7 +87,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -304,7 +147,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -361,26 +206,6 @@ class CsvImporter(Task):
)
return "failed"
def progress(self, result: str) -> None:
"""Update import progress.
Args:
result: The import result ('imported', 'skipped', or 'failed')
"""
self.metadata["processed"] += 1
self.metadata[result] = self.metadata.get(result, 0) + 1
progress_percentage = round(
self.metadata["processed"] / self.metadata["total"] * 100
)
self.message = (
f"Progress: {progress_percentage}% - "
f"{self.metadata['imported']} imported, "
f"{self.metadata['skipped']} skipped, "
f"{self.metadata['failed']} failed"
)
self.save(update_fields=["metadata", "message"])
def process_csv_file(self, file_path: str, import_function) -> None:
"""Process a CSV file using the specified import function."""
logger.debug(f"Processing {file_path}")

447
journal/importers/ndjson.py Normal file
View file

@ -0,0 +1,447 @@
import json
import os
import tempfile
import zipfile
from typing import Any, Dict
from django.utils.translation import gettext as _
from loguru import logger
from journal.models import (
Collection,
Comment,
Mark,
Note,
Rating,
Review,
ShelfLogEntry,
ShelfType,
Tag,
TagMember,
)
from .base import BaseImporter
class NdjsonImporter(BaseImporter):
"""Importer for NDJSON files exported from NeoDB."""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.items = {}
def import_collection(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a collection from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
name = content_data.get("name", "")
content = content_data.get("content", "")
collection = Collection.objects.create(
owner=owner,
title=name,
brief=content,
visibility=visibility,
metadata=data.get("metadata", {}),
created_time=published_dt,
)
item_data = data.get("items", [])
for item_entry in item_data:
item_url = item_entry.get("item")
if not item_url:
continue
item = self.items.get(item_url)
if not item:
logger.warning(f"Could not find item for collection: {item_url}")
continue
metadata = item_entry.get("metadata", {})
collection.append_item(item, metadata=metadata)
return "imported"
except Exception as e:
logger.error(f"Error importing collection: {e}")
return "failed"
def import_shelf_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a shelf member (mark) from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
shelf_type = content_data.get("status", ShelfType.WISHLIST)
mark = Mark(owner, item)
if mark.created_time and published_dt and mark.created_time >= published_dt:
return "skipped"
mark.update(
shelf_type=shelf_type,
visibility=visibility,
metadata=metadata,
created_time=published_dt,
)
return "imported"
except Exception as e:
logger.error(f"Error importing shelf member: {e}")
return "failed"
def import_shelf_log(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a shelf log entry from NDJSON data."""
try:
item = self.items.get(data.get("item", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
owner = self.user.identity
shelf_type = data.get("status", ShelfType.WISHLIST)
# posts = data.get("posts", []) # TODO but will be tricky
timestamp = data.get("timestamp")
timestamp_dt = self.parse_datetime(timestamp) if timestamp else None
_, created = ShelfLogEntry.objects.update_or_create(
owner=owner,
item=item,
shelf_type=shelf_type,
timestamp=timestamp_dt,
)
# return "imported" if created else "skipped"
# count skip as success otherwise it may confuse user
return "imported"
except Exception as e:
logger.error(f"Error importing shelf log: {e}")
return "failed"
def import_post(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a post from NDJSON data."""
# TODO
return "skipped"
def import_review(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a review from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
name = content_data.get("name", "")
content = content_data.get("content", "")
existing_review = Review.objects.filter(
owner=owner, item=item, title=name
).first()
if (
existing_review
and existing_review.created_time
and published_dt
and existing_review.created_time >= published_dt
):
return "skipped"
Review.objects.create(
owner=owner,
item=item,
title=name,
body=content,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception as e:
logger.error(f"Error importing review: {e}")
return "failed"
def import_note(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a note from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
title = content_data.get("title", "")
content = content_data.get("content", "")
sensitive = content_data.get("sensitive", False)
progress = content_data.get("progress", {})
progress_type = progress.get("type", "")
progress_value = progress.get("value", "")
Note.objects.create(
item=item,
owner=owner,
title=title,
content=content,
sensitive=sensitive,
progress_type=progress_type,
progress_value=progress_value,
created_time=published_dt,
visibility=visibility,
metadata=data.get("metadata", {}),
)
return "imported"
except Exception as e:
logger.error(f"Error importing note: {e}")
return "failed"
def import_comment(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a comment from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
content = content_data.get("content", "")
existing_comment = Comment.objects.filter(owner=owner, item=item).first()
if (
existing_comment
and existing_comment.created_time
and published_dt
and existing_comment.created_time >= published_dt
):
return "skipped"
Comment.objects.create(
owner=owner,
item=item,
text=content,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception as e:
logger.error(f"Error importing comment: {e}")
return "failed"
def import_rating(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a rating from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
rating_grade = int(float(content_data.get("value", 0)))
existing_rating = Comment.objects.filter(owner=owner, item=item).first()
if (
existing_rating
and existing_rating.created_time
and published_dt
and existing_rating.created_time >= published_dt
):
return "skipped"
Rating.objects.create(
owner=owner,
item=item,
grade=rating_grade,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception as e:
logger.error(f"Error importing rating: {e}")
return "failed"
def import_tag(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import tags from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
pinned = data.get("pinned", self.metadata.get("pinned", False))
tag_title = Tag.cleanup_title(data.get("name", ""))
_, created = Tag.objects.update_or_create(
owner=owner,
title=tag_title,
defaults={
"visibility": visibility,
"pinned": pinned,
},
)
return "imported" if created else "skipped"
except Exception as e:
logger.error(f"Error importing tag member: {e}")
return "failed"
def import_tag_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import tags from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
tag_title = Tag.cleanup_title(content_data.get("tag", ""))
tag, _ = Tag.objects.get_or_create(
owner=owner,
title=tag_title,
defaults={
"created_time": published_dt,
"visibility": visibility,
"pinned": False,
"metadata": metadata,
},
)
_, created = TagMember.objects.update_or_create(
owner=owner,
item=item,
parent=tag,
defaults={
"created_time": published_dt,
"visibility": visibility,
"metadata": metadata,
"position": 0,
},
)
return "imported" if created else "skipped"
except Exception as e:
logger.error(f"Error importing tag member: {e}")
return "failed"
def process_journal(self, file_path: str) -> None:
"""Process a NDJSON file and import all items."""
logger.debug(f"Processing {file_path}")
lines_error = 0
import_funcs = {
"Tag": self.import_tag,
"TagMember": self.import_tag_member,
"Rating": self.import_rating,
"Comment": self.import_comment,
"ShelfMember": self.import_shelf_member,
"Review": self.import_review,
"Note": self.import_note,
"Collection": self.import_collection,
"ShelfLog": self.import_shelf_log,
"Post": self.import_post,
}
journal = {k: [] for k in import_funcs.keys()}
with open(file_path, "r") as jsonfile:
# Skip header line
next(jsonfile, None)
for line in jsonfile:
try:
data = json.loads(line)
except json.JSONDecodeError:
lines_error += 1
continue
data_type = data.get("type")
if not data_type:
continue
if data_type not in journal:
journal[data_type] = []
journal[data_type].append(data)
self.metadata["total"] = sum(len(items) for items in journal.values())
logger.debug(f"Processing {self.metadata['total']} entries")
if lines_error:
logger.error(f"Error processing journal.ndjson: {lines_error} lines")
for typ, func in import_funcs.items():
for data in journal.get(typ, []):
result = func(data)
self.progress(result)
logger.info(
f"Imported {self.metadata['imported']}, skipped {self.metadata['skipped']}, failed {self.metadata['failed']}"
)
def parse_catalog(self, file_path: str) -> None:
"""Parse the catalog.ndjson file and build item lookup tables."""
logger.debug(f"Parsing catalog file: {file_path}")
item_count = 0
try:
with open(file_path, "r") as jsonfile:
for line in jsonfile:
try:
i = json.loads(line)
except (json.JSONDecodeError, Exception) as e:
logger.error(f"Error processing catalog item: {e}")
continue
u = i.get("id")
if not u:
continue
# self.catalog_items[u] = i
item_count += 1
links = [u] + [r["url"] for r in i.get("external_resources", [])]
self.items[u] = self.get_item_by_info_and_links("", "", links)
logger.info(f"Loaded {item_count} items from catalog")
self.metadata["catalog_processed"] = item_count
except Exception as e:
logger.error(f"Error parsing catalog file: {e}")
def parse_header(self, file_path: str) -> Dict[str, Any]:
try:
with open(file_path, "r") as jsonfile:
first_line = jsonfile.readline().strip()
if first_line:
header = json.loads(first_line)
if header.get("server"):
return header
except (json.JSONDecodeError, IOError) as e:
logger.error(f"Error parsing NDJSON header: {e}")
return {}
def run(self) -> None:
"""Run the NDJSON import."""
filename = self.metadata["file"]
logger.debug(f"Importing {filename}")
with zipfile.ZipFile(filename, "r") as zipref:
with tempfile.TemporaryDirectory() as tmpdirname:
zipref.extractall(tmpdirname)
catalog_path = os.path.join(tmpdirname, "catalog.ndjson")
if os.path.exists(catalog_path):
catalog_header = self.parse_header(catalog_path)
logger.debug(f"Loading catalog.ndjson with {catalog_header}")
self.parse_catalog(catalog_path)
else:
logger.warning("catalog.ndjson file not found in the archive")
journal_path = os.path.join(tmpdirname, "journal.ndjson")
if not os.path.exists(journal_path):
logger.error("journal.ndjson file not found in the archive")
self.message = "Import failed: journal.ndjson file not found"
self.save()
return
header = self.parse_header(journal_path)
self.metadata["journal_header"] = header
logger.debug(f"Importing journal.ndjson with {header}")
self.process_journal(journal_path)
source_info = self.metadata.get("journal_header", {})
source_summary = f" from {source_info.get('username', 'unknown')}@{source_info.get('server', 'unknown')} ver:{source_info.get('neodb_version', 'unknown')}."
self.message = _("Import complete") + source_summary
metadata_stats = self.metadata.get("metadata_stats", {})
partial_updates = metadata_stats.get("partial_updates", 0)
if partial_updates > 0:
self.message += f", {partial_updates} items with partial metadata updates"
ratings = metadata_stats.get("ratings_updated", 0)
comments = metadata_stats.get("comments_updated", 0)
tags = metadata_stats.get("tags_updated", 0)
if ratings > 0 or comments > 0 or tags > 0:
self.message += (
f" ({ratings} ratings, {comments} comments, {tags} tag sets)"
)
if self.metadata.get("failed_items", []):
self.message += f": {self.metadata['failed']} items failed ({len(self.metadata['failed_items'])} unique items)"
self.save()

View file

@ -1,3 +1,4 @@
from .csv import *
from .ndjson import *
from .piece import *
from .search import *

496
journal/tests/ndjson.py Normal file
View file

@ -0,0 +1,496 @@
import json
import os
import zipfile
from tempfile import TemporaryDirectory
from django.test import TestCase
from django.utils.dateparse import parse_datetime
from loguru import logger
from catalog.models import (
Edition,
IdType,
Movie,
Podcast,
PodcastEpisode,
TVEpisode,
TVSeason,
TVShow,
)
from journal.exporters import NdjsonExporter
from journal.importers import NdjsonImporter, get_neodb_importer
from users.models import User
from ..models import *
class NdjsonExportImportTest(TestCase):
databases = "__all__"
maxDiff = None
def setUp(self):
self.user1 = User.register(
email="ndjson_export@test.com", username="ndjson_exporter"
)
self.user2 = User.register(
email="ndjson_import@test.com", username="ndjson_importer"
)
self.tag1 = Tag.objects.create(
owner=self.user1.identity, title="favorite", pinned=True, visibility=2
)
self.dt = parse_datetime("2021-01-01T00:00:00Z")
self.dt2 = parse_datetime("2021-02-01T00:00:00Z")
self.dt3 = parse_datetime("2021-03-01T00:00:00Z")
self.book1 = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Hyperion"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780553283686",
author=["Dan Simmons"],
pub_year=1989,
)
self.book2 = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Dune"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780441172719",
author=["Frank Herbert"],
pub_year=1965,
)
self.movie1 = Movie.objects.create(
localized_title=[{"lang": "en", "text": "Inception"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt1375666",
director=["Christopher Nolan"],
year=2010,
)
self.movie2 = Movie.objects.create(
localized_title=[{"lang": "en", "text": "The Matrix"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt0133093",
director=["Lana Wachowski", "Lilly Wachowski"],
year=1999,
)
self.tvshow = TVShow.objects.create(
localized_title=[{"lang": "en", "text": "Breaking Bad"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt0903747",
year=2008,
)
self.tvseason = TVSeason.objects.create(
localized_title=[{"lang": "en", "text": "Breaking Bad Season 1"}],
show=self.tvshow,
season_number=1,
)
self.tvepisode1 = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Pilot"}],
season=self.tvseason,
episode_number=1,
)
self.tvepisode2 = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Cat's in the Bag..."}],
season=self.tvseason,
episode_number=2,
)
# Create podcast test items
self.podcast = Podcast.objects.create(
localized_title=[{"lang": "en", "text": "Test Podcast"}],
primary_lookup_id_type=IdType.RSS,
primary_lookup_id_value="https://example.com/feed.xml",
host=["Test Host"],
)
self.podcastepisode = PodcastEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Test Episode 1"}],
program=self.podcast,
guid="111",
pub_date=self.dt,
)
def test_ndjson_export_import(self):
# Create marks, reviews and notes for user1
# Book marks with ratings and tags
mark_book1 = Mark(self.user1.identity, self.book1)
mark_book1.update(
ShelfType.COMPLETE,
"Great sci-fi classic",
10,
["sci-fi", "favorite", "space"],
1,
created_time=self.dt,
)
mark_book2 = Mark(self.user1.identity, self.book2)
mark_book2.update(
ShelfType.WISHLIST,
"Read it?",
None,
["sci-fi", "desert"],
1,
created_time=self.dt,
)
mark_book2.update(
ShelfType.PROGRESS,
"Reading!",
None,
["sci-fi", "desert"],
0,
created_time=self.dt2,
)
mark_book2.update(
ShelfType.COMPLETE,
"Read.",
None,
["sci-fi", "desert"],
0,
created_time=self.dt3,
)
# Movie marks with ratings
mark_movie1 = Mark(self.user1.identity, self.movie1)
mark_movie1.update(
ShelfType.COMPLETE,
"Mind-bending",
8,
["mindbender", "scifi"],
1,
created_time=self.dt,
)
mark_movie2 = Mark(self.user1.identity, self.movie2)
mark_movie2.update(
ShelfType.WISHLIST, "Need to rewatch", None, [], 1, created_time=self.dt2
)
# TV show mark
mark_tvshow = Mark(self.user1.identity, self.tvshow)
mark_tvshow.update(
ShelfType.WISHLIST,
"Heard it's good",
None,
["drama"],
1,
created_time=self.dt,
)
# TV episode marks
mark_episode1 = Mark(self.user1.identity, self.tvepisode1)
mark_episode1.update(
ShelfType.COMPLETE,
"Great start",
9,
["pilot", "drama"],
1,
created_time=self.dt2,
)
mark_episode2 = Mark(self.user1.identity, self.tvepisode2)
mark_episode2.update(
ShelfType.COMPLETE, "It gets better", 9, [], 1, created_time=self.dt3
)
# Podcast episode mark
mark_podcast = Mark(self.user1.identity, self.podcastepisode)
mark_podcast.update(
ShelfType.COMPLETE,
"Insightful episode",
8,
["tech", "interview"],
1,
created_time=self.dt,
)
# Create reviews
Review.update_item_review(
self.book1,
self.user1.identity,
"My thoughts on Hyperion",
"A masterpiece of science fiction that weaves multiple storylines into a captivating narrative.",
visibility=1,
created_time=self.dt,
)
Review.update_item_review(
self.movie1,
self.user1.identity,
"Inception Review",
"Christopher Nolan at his best. The movie plays with reality and dreams in a fascinating way.",
visibility=1,
)
# Create notes
Note.objects.create(
item=self.book2,
owner=self.user1.identity,
title="Reading progress",
content="Just finished the first part. The world-building is incredible.\n\n - p 125",
progress_type=Note.ProgressType.PAGE,
progress_value="125",
visibility=1,
)
Note.objects.create(
item=self.tvshow,
owner=self.user1.identity,
title="Before watching",
content="Things to look out for according to friends:\n- Character development\n- Color symbolism\n\n - e 0",
progress_type=Note.ProgressType.EPISODE,
progress_value="2",
visibility=1,
)
# Create TV episode note
Note.objects.create(
item=self.tvepisode1,
owner=self.user1.identity,
title="Episode thoughts",
content="Great pilot episode. Sets up the character arcs really well.",
visibility=1,
)
# Create podcast episode note
Note.objects.create(
item=self.podcastepisode,
owner=self.user1.identity,
title="Podcast episode notes",
content="Interesting discussion about tech trends. Timestamp 23:45 has a good point about AI.",
progress_type=Note.ProgressType.TIMESTAMP,
progress_value="23:45",
visibility=1,
)
# Create collections
items = [self.book1, self.movie1]
collection = Collection.objects.create(
owner=self.user1.identity,
title="Favorites",
brief="My all-time favorites",
visibility=1,
)
for i in items:
collection.append_item(i)
# Create another collection with different items
items2 = [self.book2, self.movie2, self.tvshow]
collection2 = Collection.objects.create(
owner=self.user1.identity,
title="To Review",
brief="Items I need to review soon",
visibility=1,
)
for i in items2:
collection2.append_item(i)
# Create shelf log entries
logs = ShelfLogEntry.objects.filter(owner=self.user1.identity).order_by(
"timestamp", "item_id"
)
# Export data to NDJSON
exporter = NdjsonExporter.create(user=self.user1)
exporter.run()
export_path = exporter.metadata["file"]
logger.debug(f"exported to {export_path}")
self.assertTrue(os.path.exists(export_path))
# Validate the NDJSON export file structure
with TemporaryDirectory() as extract_dir:
with zipfile.ZipFile(export_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
logger.debug(f"unzipped to {extract_dir}")
# Check journal.ndjson exists
journal_path = os.path.join(extract_dir, "journal.ndjson")
self.assertTrue(
os.path.exists(journal_path), "journal.ndjson file missing"
)
# Check catalog.ndjson exists
catalog_path = os.path.join(extract_dir, "catalog.ndjson")
self.assertTrue(
os.path.exists(catalog_path), "catalog.ndjson file missing"
)
# Check attachments directory exists
attachments_path = os.path.join(extract_dir, "attachments")
self.assertTrue(
os.path.exists(attachments_path), "attachments directory missing"
)
# Count the number of JSON objects in journal.ndjson
with open(journal_path, "r") as f:
lines = f.readlines()
# First line is header, rest are data
self.assertGreater(
len(lines), 1, "journal.ndjson has no data lines"
)
# Check the first line is a header
header = json.loads(lines[0])
self.assertIn("server", header, "Missing server in header")
self.assertIn("username", header, "Missing username in header")
self.assertEqual(
header["username"],
"ndjson_exporter",
"Wrong username in header",
)
# Count data objects by type
type_counts = {
"ShelfMember": 0,
"Review": 0,
"Note": 0,
"Collection": 0,
"ShelfLog": 0,
"post": 0,
}
for line in lines[1:]:
data = json.loads(line)
if "type" in data:
type_counts[data["type"]] = (
type_counts.get(data["type"], 0) + 1
)
# Verify counts
self.assertEqual(
type_counts["ShelfMember"], 8, "Expected 8 ShelfMember entries"
)
self.assertEqual(
type_counts["Review"], 2, "Expected 2 Review entries"
)
self.assertEqual(type_counts["Note"], 4, "Expected 4 Note entries")
self.assertEqual(
type_counts["Collection"], 2, "Expected 2 Collection entries"
)
self.assertEqual(type_counts["ShelfLog"], logs.count())
# Now import the export file into a different user account
self.assertEqual(get_neodb_importer(export_path), NdjsonImporter)
importer = NdjsonImporter.create(
user=self.user2, file=export_path, visibility=2
)
importer.run()
self.assertIn("Import complete", importer.message)
# Verify imported data
# Check marks
mark_book1_imported = Mark(self.user2.identity, self.book1)
self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_book1_imported.comment_text, "Great sci-fi classic")
self.assertEqual(mark_book1_imported.rating_grade, 10)
self.assertEqual(mark_book1_imported.visibility, 1)
self.assertEqual(
set(mark_book1_imported.tags), set(["sci-fi", "favorite", "space"])
)
mark_book2_imported = Mark(self.user2.identity, self.book2)
self.assertEqual(mark_book2_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_book2_imported.comment_text, "Read.")
self.assertIsNone(mark_book2_imported.rating_grade)
self.assertEqual(set(mark_book2_imported.tags), set(["sci-fi", "desert"]))
self.assertEqual(mark_book2_imported.visibility, 0)
mark_movie1_imported = Mark(self.user2.identity, self.movie1)
self.assertEqual(mark_movie1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_movie1_imported.comment_text, "Mind-bending")
self.assertEqual(mark_movie1_imported.rating_grade, 8)
self.assertEqual(set(mark_movie1_imported.tags), set(["mindbender", "scifi"]))
mark_episode1_imported = Mark(self.user2.identity, self.tvepisode1)
self.assertEqual(mark_episode1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_episode1_imported.comment_text, "Great start")
self.assertEqual(mark_episode1_imported.rating_grade, 9)
self.assertEqual(set(mark_episode1_imported.tags), set(["pilot", "drama"]))
# Check podcast episode mark
mark_podcast_imported = Mark(self.user2.identity, self.podcastepisode)
self.assertEqual(mark_podcast_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_podcast_imported.comment_text, "Insightful episode")
self.assertEqual(mark_podcast_imported.rating_grade, 8)
self.assertEqual(set(mark_podcast_imported.tags), set(["tech", "interview"]))
# Check reviews
book1_reviews = Review.objects.filter(
owner=self.user2.identity, item=self.book1
)
self.assertEqual(book1_reviews.count(), 1)
self.assertEqual(book1_reviews[0].title, "My thoughts on Hyperion")
self.assertIn("masterpiece of science fiction", book1_reviews[0].body)
movie1_reviews = Review.objects.filter(
owner=self.user2.identity, item=self.movie1
)
self.assertEqual(movie1_reviews.count(), 1)
self.assertEqual(movie1_reviews[0].title, "Inception Review")
self.assertIn("Christopher Nolan", movie1_reviews[0].body)
# Check notes
book2_notes = Note.objects.filter(owner=self.user2.identity, item=self.book2)
self.assertEqual(book2_notes.count(), 1)
self.assertEqual(book2_notes[0].title, "Reading progress")
self.assertIn("world-building is incredible", book2_notes[0].content)
self.assertEqual(book2_notes[0].progress_type, Note.ProgressType.PAGE)
self.assertEqual(book2_notes[0].progress_value, "125")
tvshow_notes = Note.objects.filter(owner=self.user2.identity, item=self.tvshow)
self.assertEqual(tvshow_notes.count(), 1)
self.assertEqual(tvshow_notes[0].title, "Before watching")
self.assertIn("Character development", tvshow_notes[0].content)
# Check TV episode notes
tvepisode_notes = Note.objects.filter(
owner=self.user2.identity, item=self.tvepisode1
)
self.assertEqual(tvepisode_notes.count(), 1)
self.assertEqual(tvepisode_notes[0].title, "Episode thoughts")
self.assertIn("Sets up the character arcs", tvepisode_notes[0].content)
# Check podcast episode notes
podcast_notes = Note.objects.filter(
owner=self.user2.identity, item=self.podcastepisode
)
self.assertEqual(podcast_notes.count(), 1)
self.assertEqual(podcast_notes[0].title, "Podcast episode notes")
self.assertIn(
"Interesting discussion about tech trends", podcast_notes[0].content
)
self.assertEqual(podcast_notes[0].progress_type, Note.ProgressType.TIMESTAMP)
self.assertEqual(podcast_notes[0].progress_value, "23:45")
# Check first collection
collections = Collection.objects.filter(
owner=self.user2.identity, title="Favorites"
)
self.assertEqual(collections.count(), 1)
self.assertEqual(collections[0].brief, "My all-time favorites")
self.assertEqual(collections[0].visibility, 1)
collection_items = list(collections[0].ordered_items)
self.assertEqual([self.book1, self.movie1], collection_items)
# Check second collection
collections2 = Collection.objects.filter(
owner=self.user2.identity, title="To Review"
)
self.assertEqual(collections2.count(), 1)
self.assertEqual(collections2[0].brief, "Items I need to review soon")
self.assertEqual(collections2[0].visibility, 1)
# Check second collection items
collection2_items = [m.item for m in collections2[0].members.all()]
self.assertEqual(len(collection2_items), 3)
self.assertIn(self.book2, collection2_items)
self.assertIn(self.movie2, collection2_items)
self.assertIn(self.tvshow, collection2_items)
tag1 = Tag.objects.filter(owner=self.user2.identity, title="favorite").first()
self.assertIsNotNone(tag1)
if tag1:
self.assertTrue(tag1.pinned)
self.assertEqual(tag1.visibility, 2)
# Check shelf log entries
logs2 = ShelfLogEntry.objects.filter(owner=self.user2.identity).order_by(
"timestamp", "item_id"
)
l1 = [(log.item, log.shelf_type, log.timestamp) for log in logs]
l2 = [(log.item, log.shelf_type, log.timestamp) for log in logs2]
self.assertEqual(l1, l2)

View file

@ -48,7 +48,7 @@
<br>
{{ ndjson_export_task.message }}
{% if ndjson_export_task.metadata.file %}
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
<a href="{% url 'users:export_ndjson' %}" download><i class="fa fa-file-code"></i> {% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
@ -86,11 +86,24 @@
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
</li>
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
<li>
{% trans 'Both CSV and NDJSON formats exported from NeoDB are supported. NDJSON format includes more data, like collections.' %}
</li>
</ul>
<br>
<input type="file" name="file" required accept=".zip">
<input type="file" name="file" id="neodb_import_file" required accept=".zip">
<div id="detected_format_info"
style="display: none;
margin: 10px 0;
padding: 8px 12px;
border-radius: 4px;
background-color: var(--card-background-color, #f8f9fa);
border: 1px solid var(--card-border-color, #dee2e6)">
<i class="fa fa-info-circle"></i> {% trans 'Detected format' %}: <strong id="detected_format">-</strong>
</div>
<div id="visibility_settings" style="display: none;">
<p>
{% trans 'Visibility' %}: <small><code>.csv</code> only</small>
{% trans 'Visibility' %}:
<br>
<label for="csv_visibility_0">
<input type="radio"
@ -118,26 +131,103 @@
{% trans 'Mentioned Only' %}
</label>
</p>
</div>
<input type="hidden" name="format_type" id="format_type" value="">
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% if csv_import_task %}
{% trans 'Last import started' %}: {{ csv_import_task.created_time }}
{% if csv_import_task.state == 0 or csv_import_task.state == 1 %}
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
hx-target="this"
hx-trigger="load delay:2s, every 10s"
hx-swap="outerHTML"></div>
{% else %}
{% trans 'Status' %}: {{ csv_import_task.get_state_display }}。
{{ csv_import_task.message }}
<script src="{{ cdn_url }}/npm/jszip@3.10.1/dist/jszip.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
const fileInput = document.getElementById('neodb_import_file');
if (!fileInput) return;
fileInput.addEventListener('change', async function(event) {
const file = event.target.files[0];
if (!file) {
document.getElementById('detected_format_info').style.display = 'none';
document.getElementById('visibility_settings').style.display = 'none';
document.getElementById('format_type').value = '';
return;
}
// Check if it's a zip file
if (file.type !== 'application/zip' &&
file.type !== 'application/x-zip-compressed' &&
!file.name.toLowerCase().endsWith('.zip')) {
document.getElementById('detected_format_info').style.display = 'none';
document.getElementById('visibility_settings').style.display = 'none';
document.getElementById('format_type').value = '';
return;
}
// Update UI to show "Detecting format..." with a spinner
document.getElementById('detected_format').innerHTML = '{% trans "Detecting format..." %} <i class="fa fa-spinner fa-spin"></i>';
document.getElementById('detected_format_info').style.display = 'block';
try {
// Use JSZip to examine the actual contents of the ZIP file
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
const fileNames = Object.keys(zipContents.files);
// Check for specific files that indicate format type
const hasNdjson = fileNames.some(name => name === 'journal.ndjson' || name === 'catalog.ndjson');
const hasCsv = fileNames.some(name => name.endsWith('_mark.csv') ||
name.endsWith('_review.csv') ||
name.endsWith('_note.csv'));
let format = '';
let formatValue = '';
if (hasNdjson) {
format = 'NDJSON';
formatValue = 'ndjson';
} else if (hasCsv) {
format = 'CSV';
formatValue = 'csv';
} else {
// Unable to detect format from contents
format = '{% trans "Unknown format" %}';
formatValue = '';
}
// Update UI with detected format and appropriate icon
let formatIcon = '';
if (formatValue === 'ndjson') {
formatIcon = '<i class="fa fa-file-code"></i> ';
} else if (formatValue === 'csv') {
formatIcon = '<i class="fa fa-file-csv"></i> ';
} else {
formatIcon = '<i class="fa fa-question-circle"></i> ';
}
document.getElementById('detected_format').innerHTML = formatIcon + format;
document.getElementById('format_type').value = formatValue;
// Show visibility settings only for NDJSON format
if (formatValue === 'ndjson') {
document.getElementById('visibility_settings').style.display = 'block';
} else {
document.getElementById('visibility_settings').style.display = 'none';
}
} catch (error) {
console.error('Error examining ZIP file:', error);
document.getElementById('detected_format').innerHTML = '<i class="fa fa-exclamation-triangle"></i> {% trans "Error detecting format" %}';
document.getElementById('format_type').value = '';
// Make the error more visible
document.getElementById('detected_format_info').style.backgroundColor = 'var(--form-element-invalid-active-border-color, #d9534f)';
document.getElementById('detected_format_info').style.color = 'white';
// Hide visibility settings on error
document.getElementById('visibility_settings').style.display = 'none';
}
});
});
</script>
{% if neodb_import_task %}
{% include "users/user_task_status.html" with task=neodb_import_task %}
{% endif %}
{% if csv_import_task.metadata.failed_items %}
{% trans 'Failed items' %}:
<br>
<textarea readonly>{% for item in csv_import_task.metadata.failed_items %}{{item}}&#10;{% endfor %}</textarea>
{% endif %}
{% endif %}
</small>
</form>
</details>
</article>

View file

@ -1,19 +1,20 @@
{% load i18n %}
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
{% if task.state == 0 or task.state == 1 %}hx-target="this" hx-trigger="every 30s"{% endif %}
<div hx-target="this"
{% if task.state == 0 or task.state == 1 %} hx-get="{% url 'users:user_task_status' task.type %}" hx-trigger="intersect once, every 30s"{% endif %}
hx-swap="outerHTML">
{% trans 'Status' %}: {{ task.get_state_display }}。
{% trans 'Requested' %}: {{ task.created_time }}
({{ task.get_state_display }})
{{ task.message }}
<br>
{% if task.state == 0 or task.state == 1 %}
{% if task.metadata.total and task.metadata.processed %}
<div class="progress-container">
<div>
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
<div class="progress-text">
{{ task.metadata.processed }} / {{ task.metadata.total }}
({{ task.metadata.imported }} imported,
{{ task.metadata.skipped }} skipped,
{{ task.metadata.failed }} failed)
</div>
</div>
{% endif %}
{% endif %}
{% if task.metadata.failed_items %}
{% trans 'Failed items' %}:
<br>
<textarea readonly>{% for item in task.metadata.failed_items %}{{item}}&#10;{% endfor %}</textarea>
{% endif %}
</div>

View file

@ -10,7 +10,7 @@ urlpatterns = [
path("data", data, name="data"),
path("info", account_info, name="info"),
path("profile", account_profile, name="profile"),
path("task/<str:task_name>/status", user_task_status, name="user_task_status"),
path("task/<str:task_type>/status", user_task_status, name="user_task_status"),
path("data/import/status", data_import_status, name="import_status"),
path("data/import/goodreads", import_goodreads, name="import_goodreads"),
path("data/import/douban", import_douban, name="import_douban"),

View file

@ -18,6 +18,7 @@ from journal.importers import (
DoubanImporter,
GoodreadsImporter,
LetterboxdImporter,
NdjsonImporter,
OPMLImporter,
get_neodb_importer,
)
@ -92,6 +93,20 @@ def data(request):
start_date = queryset.aggregate(Min("created_time"))["created_time__min"]
start_year = start_date.year if start_date else current_year
years = reversed(range(start_year, current_year + 1))
# Import tasks - check for both CSV and NDJSON importers
csv_import_task = CsvImporter.latest_task(request.user)
ndjson_import_task = NdjsonImporter.latest_task(request.user)
# Use the most recent import task for display
if ndjson_import_task and (
not csv_import_task
or ndjson_import_task.created_time > csv_import_task.created_time
):
neodb_import_task = ndjson_import_task
else:
neodb_import_task = csv_import_task
return render(
request,
"users/data.html",
@ -100,7 +115,7 @@ def data(request):
"import_task": DoubanImporter.latest_task(request.user),
"export_task": DoufenExporter.latest_task(request.user),
"csv_export_task": CsvExporter.latest_task(request.user),
"csv_import_task": CsvImporter.latest_task(request.user),
"neodb_import_task": neodb_import_task, # Use the most recent import task
"ndjson_export_task": NdjsonExporter.latest_task(request.user),
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
"goodreads_task": GoodreadsImporter.latest_task(request.user),
@ -121,19 +136,21 @@ def data_import_status(request):
@login_required
def user_task_status(request, task_name: str):
match task_name:
case "csv_import":
def user_task_status(request, task_type: str):
match task_type:
case "journal.csvimporter":
task_cls = CsvImporter
case "csv_export":
case "journal.ndjsonimporter":
task_cls = NdjsonImporter
case "journal.csvexporter":
task_cls = CsvExporter
case "ndjson_export":
case "journal.ndjsonexporter":
task_cls = NdjsonExporter
case "letterboxd":
case "journal.letterboxdimporter":
task_cls = LetterboxdImporter
case "goodreads":
case "journal.goodreadsimporter":
task_cls = GoodreadsImporter
case "douban":
case "journal.doubanimporter":
task_cls = DoubanImporter
case _:
return redirect(reverse("users:data"))
@ -357,16 +374,49 @@ def import_neodb(request):
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
# Get format type hint from frontend, if provided
format_type_hint = request.POST.get("format_type", "").lower()
# Import appropriate class based on format type or auto-detect
from journal.importers import CsvImporter, NdjsonImporter
if format_type_hint == "csv":
importer = CsvImporter
format_type = "CSV"
elif format_type_hint == "ndjson":
importer = NdjsonImporter
format_type = "NDJSON"
else:
# Fall back to auto-detection if no hint provided
importer = get_neodb_importer(f)
if importer == CsvImporter:
format_type = "CSV"
elif importer == NdjsonImporter:
format_type = "NDJSON"
else:
format_type = ""
importer = None # Make sure importer is None if auto-detection fails
if not importer:
messages.add_message(request, messages.ERROR, _("Invalid file."))
messages.add_message(
request,
messages.ERROR,
_(
"Invalid file. Expected a ZIP containing either CSV or NDJSON files exported from NeoDB."
),
)
return redirect(reverse("users:data"))
importer.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
request,
messages.INFO,
_(f"{format_type} file is uploaded and will be imported soon."),
)
return redirect(reverse("users:data"))