import ndjson
This commit is contained in:
parent
25e3536985
commit
d343d6e71e
10 changed files with 1378 additions and 267 deletions
|
@ -5,16 +5,19 @@ from .csv import CsvImporter
|
||||||
from .douban import DoubanImporter
|
from .douban import DoubanImporter
|
||||||
from .goodreads import GoodreadsImporter
|
from .goodreads import GoodreadsImporter
|
||||||
from .letterboxd import LetterboxdImporter
|
from .letterboxd import LetterboxdImporter
|
||||||
|
from .ndjson import NdjsonImporter
|
||||||
from .opml import OPMLImporter
|
from .opml import OPMLImporter
|
||||||
|
|
||||||
|
|
||||||
def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
|
def get_neodb_importer(
|
||||||
|
filename: str,
|
||||||
|
) -> type[CsvImporter] | type[NdjsonImporter] | None:
|
||||||
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
|
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
|
||||||
return None
|
return None
|
||||||
with zipfile.ZipFile(filename, "r") as z:
|
with zipfile.ZipFile(filename, "r") as z:
|
||||||
files = z.namelist()
|
files = z.namelist()
|
||||||
if any(f == "journal.ndjson" for f in files):
|
if any(f == "journal.ndjson" for f in files):
|
||||||
return None
|
return NdjsonImporter
|
||||||
if any(
|
if any(
|
||||||
f.endswith("_mark.csv")
|
f.endswith("_mark.csv")
|
||||||
or f.endswith("_review.csv")
|
or f.endswith("_review.csv")
|
||||||
|
@ -26,6 +29,7 @@ def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"CsvImporter",
|
"CsvImporter",
|
||||||
|
"NdjsonImporter",
|
||||||
"LetterboxdImporter",
|
"LetterboxdImporter",
|
||||||
"OPMLImporter",
|
"OPMLImporter",
|
||||||
"DoubanImporter",
|
"DoubanImporter",
|
||||||
|
|
197
journal/importers/base.py
Normal file
197
journal/importers/base.py
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
import datetime
|
||||||
|
from typing import Dict, List, Literal, Optional
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from django.utils.dateparse import parse_datetime
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from catalog.common.sites import SiteManager
|
||||||
|
from catalog.models import Edition, IdType, Item, SiteName
|
||||||
|
from journal.models import ShelfType
|
||||||
|
from users.models import Task
|
||||||
|
|
||||||
|
_PREFERRED_SITES = [
|
||||||
|
SiteName.Fediverse,
|
||||||
|
SiteName.RSS,
|
||||||
|
SiteName.TMDB,
|
||||||
|
SiteName.IMDB,
|
||||||
|
SiteName.GoogleBooks,
|
||||||
|
SiteName.Goodreads,
|
||||||
|
SiteName.IGDB,
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class BaseImporter(Task):
|
||||||
|
class Meta:
|
||||||
|
app_label = "journal" # workaround bug in TypedModel
|
||||||
|
|
||||||
|
ImportResult = Literal["imported", "skipped", "failed"]
|
||||||
|
TaskQueue = "import"
|
||||||
|
DefaultMetadata = {
|
||||||
|
"total": 0,
|
||||||
|
"processed": 0,
|
||||||
|
"skipped": 0,
|
||||||
|
"imported": 0,
|
||||||
|
"failed": 0,
|
||||||
|
"failed_items": [],
|
||||||
|
"file": None,
|
||||||
|
"visibility": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
def progress(self, result: ImportResult) -> None:
|
||||||
|
"""Update import progress.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
result: The import result ('imported', 'skipped', or 'failed')
|
||||||
|
"""
|
||||||
|
self.metadata["processed"] += 1
|
||||||
|
self.metadata[result] = self.metadata.get(result, 0) + 1
|
||||||
|
|
||||||
|
if self.metadata["total"]:
|
||||||
|
progress_percentage = round(
|
||||||
|
self.metadata["processed"] / self.metadata["total"] * 100
|
||||||
|
)
|
||||||
|
self.message = f"Progress: {progress_percentage}% - "
|
||||||
|
else:
|
||||||
|
self.message = ""
|
||||||
|
self.message += (
|
||||||
|
f"{self.metadata['imported']} imported, "
|
||||||
|
f"{self.metadata['skipped']} skipped, "
|
||||||
|
f"{self.metadata['failed']} failed"
|
||||||
|
)
|
||||||
|
self.save(update_fields=["metadata", "message"])
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def get_item_by_info_and_links(
|
||||||
|
self, title: str, info_str: str, links: list[str]
|
||||||
|
) -> Optional[Item]:
|
||||||
|
"""Find an item based on information from CSV export.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
title: Item title
|
||||||
|
info_str: Item info string (space-separated key:value pairs)
|
||||||
|
links_str: Space-separated URLs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Item if found, None otherwise
|
||||||
|
"""
|
||||||
|
site_url = settings.SITE_INFO["site_url"] + "/"
|
||||||
|
# look for local items first
|
||||||
|
for link in links:
|
||||||
|
if link.startswith("/") or link.startswith(site_url):
|
||||||
|
item = Item.get_by_url(link, resolve_merge=True)
|
||||||
|
if item and not item.is_deleted:
|
||||||
|
return item
|
||||||
|
|
||||||
|
sites = [
|
||||||
|
SiteManager.get_site_by_url(link, detect_redirection=False)
|
||||||
|
for link in links
|
||||||
|
]
|
||||||
|
sites = [site for site in sites if site]
|
||||||
|
sites.sort(
|
||||||
|
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
|
||||||
|
if x.SITE_NAME in _PREFERRED_SITES
|
||||||
|
else 99
|
||||||
|
)
|
||||||
|
|
||||||
|
# match items without extra requests
|
||||||
|
for site in sites:
|
||||||
|
item = site.get_item()
|
||||||
|
if item:
|
||||||
|
return item
|
||||||
|
|
||||||
|
# match items after HEAD
|
||||||
|
sites = [
|
||||||
|
SiteManager.get_site_by_url(site.url) if site.url else site
|
||||||
|
for site in sites
|
||||||
|
]
|
||||||
|
sites = [site for site in sites if site]
|
||||||
|
for site in sites:
|
||||||
|
item = site.get_item()
|
||||||
|
if item:
|
||||||
|
return item
|
||||||
|
|
||||||
|
# fetch from remote
|
||||||
|
for site in sites:
|
||||||
|
try:
|
||||||
|
logger.debug(f"fetching {site.url}")
|
||||||
|
site.get_resource_ready()
|
||||||
|
item = site.get_item()
|
||||||
|
if item:
|
||||||
|
return item
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching item: {e}")
|
||||||
|
|
||||||
|
# Try using the info string
|
||||||
|
if info_str:
|
||||||
|
info_dict = {}
|
||||||
|
for pair in info_str.strip().split():
|
||||||
|
if ":" in pair:
|
||||||
|
key, value = pair.split(":", 1)
|
||||||
|
info_dict[key] = value
|
||||||
|
|
||||||
|
# Check for ISBN, IMDB, etc.
|
||||||
|
item = None
|
||||||
|
for key, value in info_dict.items():
|
||||||
|
if key == "isbn" and value:
|
||||||
|
item = Edition.objects.filter(
|
||||||
|
primary_lookup_id_type=IdType.ISBN,
|
||||||
|
primary_lookup_id_value=value,
|
||||||
|
).first()
|
||||||
|
elif key == "imdb" and value:
|
||||||
|
item = Item.objects.filter(
|
||||||
|
primary_lookup_id_type=IdType.IMDB,
|
||||||
|
primary_lookup_id_value=value,
|
||||||
|
).first()
|
||||||
|
if item:
|
||||||
|
return item
|
||||||
|
return None
|
||||||
|
|
||||||
|
def parse_tags(self, tags_str: str) -> List[str]:
|
||||||
|
"""Parse space-separated tags string into a list of tags."""
|
||||||
|
if not tags_str:
|
||||||
|
return []
|
||||||
|
return [tag.strip() for tag in tags_str.split() if tag.strip()]
|
||||||
|
|
||||||
|
def parse_info(self, info_str: str) -> Dict[str, str]:
|
||||||
|
"""Parse info string into a dictionary."""
|
||||||
|
info_dict = {}
|
||||||
|
if not info_str:
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
for pair in info_str.split():
|
||||||
|
if ":" in pair:
|
||||||
|
key, value = pair.split(":", 1)
|
||||||
|
info_dict[key] = value
|
||||||
|
|
||||||
|
return info_dict
|
||||||
|
|
||||||
|
def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]:
|
||||||
|
"""Parse ISO format timestamp into datetime."""
|
||||||
|
if not timestamp_str:
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
dt = parse_datetime(timestamp_str)
|
||||||
|
if dt and dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=datetime.UTC)
|
||||||
|
return dt
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def parse_shelf_type(self, status_str: str) -> ShelfType:
|
||||||
|
"""Parse shelf type string into ShelfType enum."""
|
||||||
|
if not status_str:
|
||||||
|
return ShelfType.WISHLIST
|
||||||
|
|
||||||
|
status_map = {
|
||||||
|
"wishlist": ShelfType.WISHLIST,
|
||||||
|
"progress": ShelfType.PROGRESS,
|
||||||
|
"complete": ShelfType.COMPLETE,
|
||||||
|
"dropped": ShelfType.DROPPED,
|
||||||
|
}
|
||||||
|
|
||||||
|
return status_map.get(status_str.lower(), ShelfType.WISHLIST)
|
|
@ -1,181 +1,20 @@
|
||||||
import csv
|
import csv
|
||||||
import datetime
|
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict
|
||||||
|
|
||||||
from django.conf import settings
|
|
||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
from django.utils.dateparse import parse_datetime
|
|
||||||
from django.utils.translation import gettext as _
|
from django.utils.translation import gettext as _
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from catalog.common.sites import SiteManager
|
from catalog.models import ItemCategory
|
||||||
from catalog.models import Edition, IdType, Item, ItemCategory, SiteName
|
from journal.models import Mark, Note, Review
|
||||||
from journal.models import Mark, Note, Review, ShelfType
|
|
||||||
from users.models import Task
|
|
||||||
|
|
||||||
_PREFERRED_SITES = [
|
from .base import BaseImporter
|
||||||
SiteName.Fediverse,
|
|
||||||
SiteName.RSS,
|
|
||||||
SiteName.TMDB,
|
|
||||||
SiteName.IMDB,
|
|
||||||
SiteName.GoogleBooks,
|
|
||||||
SiteName.Goodreads,
|
|
||||||
SiteName.IGDB,
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
class CsvImporter(Task):
|
class CsvImporter(BaseImporter):
|
||||||
class Meta:
|
|
||||||
app_label = "journal" # workaround bug in TypedModel
|
|
||||||
|
|
||||||
TaskQueue = "import"
|
|
||||||
DefaultMetadata = {
|
|
||||||
"total": 0,
|
|
||||||
"processed": 0,
|
|
||||||
"skipped": 0,
|
|
||||||
"imported": 0,
|
|
||||||
"failed": 0,
|
|
||||||
"failed_items": [],
|
|
||||||
"file": None,
|
|
||||||
"visibility": 0,
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_item_by_info_and_links(
|
|
||||||
self, title: str, info_str: str, links_str: str
|
|
||||||
) -> Optional[Item]:
|
|
||||||
"""Find an item based on information from CSV export.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
title: Item title
|
|
||||||
info_str: Item info string (space-separated key:value pairs)
|
|
||||||
links_str: Space-separated URLs
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Item if found, None otherwise
|
|
||||||
"""
|
|
||||||
site_url = settings.SITE_INFO["site_url"] + "/"
|
|
||||||
links = links_str.strip().split()
|
|
||||||
# look for local items first
|
|
||||||
for link in links:
|
|
||||||
if link.startswith("/") or link.startswith(site_url):
|
|
||||||
item = Item.get_by_url(link, resolve_merge=True)
|
|
||||||
if item and not item.is_deleted:
|
|
||||||
return item
|
|
||||||
|
|
||||||
sites = [
|
|
||||||
SiteManager.get_site_by_url(link, detect_redirection=False)
|
|
||||||
for link in links
|
|
||||||
]
|
|
||||||
sites = [site for site in sites if site]
|
|
||||||
sites.sort(
|
|
||||||
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
|
|
||||||
if x.SITE_NAME in _PREFERRED_SITES
|
|
||||||
else 99
|
|
||||||
)
|
|
||||||
|
|
||||||
# match items without extra requests
|
|
||||||
for site in sites:
|
|
||||||
item = site.get_item()
|
|
||||||
if item:
|
|
||||||
return item
|
|
||||||
|
|
||||||
# match items after HEAD
|
|
||||||
sites = [
|
|
||||||
SiteManager.get_site_by_url(site.url) if site.url else site
|
|
||||||
for site in sites
|
|
||||||
]
|
|
||||||
sites = [site for site in sites if site]
|
|
||||||
for site in sites:
|
|
||||||
item = site.get_item()
|
|
||||||
if item:
|
|
||||||
return item
|
|
||||||
|
|
||||||
# fetch from remote
|
|
||||||
for site in sites:
|
|
||||||
try:
|
|
||||||
logger.debug(f"fetching {site.url}")
|
|
||||||
site.get_resource_ready()
|
|
||||||
item = site.get_item()
|
|
||||||
if item:
|
|
||||||
return item
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error fetching item: {e}")
|
|
||||||
|
|
||||||
# Try using the info string
|
|
||||||
if info_str:
|
|
||||||
info_dict = {}
|
|
||||||
for pair in info_str.strip().split():
|
|
||||||
if ":" in pair:
|
|
||||||
key, value = pair.split(":", 1)
|
|
||||||
info_dict[key] = value
|
|
||||||
|
|
||||||
# Check for ISBN, IMDB, etc.
|
|
||||||
item = None
|
|
||||||
for key, value in info_dict.items():
|
|
||||||
if key == "isbn" and value:
|
|
||||||
item = Edition.objects.filter(
|
|
||||||
primary_lookup_id_type=IdType.ISBN,
|
|
||||||
primary_lookup_id_value=value,
|
|
||||||
).first()
|
|
||||||
elif key == "imdb" and value:
|
|
||||||
item = Item.objects.filter(
|
|
||||||
primary_lookup_id_type=IdType.IMDB,
|
|
||||||
primary_lookup_id_value=value,
|
|
||||||
).first()
|
|
||||||
if item:
|
|
||||||
return item
|
|
||||||
return None
|
|
||||||
|
|
||||||
def parse_tags(self, tags_str: str) -> List[str]:
|
|
||||||
"""Parse space-separated tags string into a list of tags."""
|
|
||||||
if not tags_str:
|
|
||||||
return []
|
|
||||||
return [tag.strip() for tag in tags_str.split() if tag.strip()]
|
|
||||||
|
|
||||||
def parse_info(self, info_str: str) -> Dict[str, str]:
|
|
||||||
"""Parse info string into a dictionary."""
|
|
||||||
info_dict = {}
|
|
||||||
if not info_str:
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
for pair in info_str.split():
|
|
||||||
if ":" in pair:
|
|
||||||
key, value = pair.split(":", 1)
|
|
||||||
info_dict[key] = value
|
|
||||||
|
|
||||||
return info_dict
|
|
||||||
|
|
||||||
def parse_datetime(self, timestamp_str: str) -> Optional[datetime.datetime]:
|
|
||||||
"""Parse ISO format timestamp into datetime."""
|
|
||||||
if not timestamp_str:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
dt = parse_datetime(timestamp_str)
|
|
||||||
if dt and dt.tzinfo is None:
|
|
||||||
dt = dt.replace(tzinfo=datetime.UTC)
|
|
||||||
return dt
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def parse_shelf_type(self, status_str: str) -> ShelfType:
|
|
||||||
"""Parse shelf type string into ShelfType enum."""
|
|
||||||
if not status_str:
|
|
||||||
return ShelfType.WISHLIST
|
|
||||||
|
|
||||||
status_map = {
|
|
||||||
"wishlist": ShelfType.WISHLIST,
|
|
||||||
"progress": ShelfType.PROGRESS,
|
|
||||||
"complete": ShelfType.COMPLETE,
|
|
||||||
"dropped": ShelfType.DROPPED,
|
|
||||||
}
|
|
||||||
|
|
||||||
return status_map.get(status_str.lower(), ShelfType.WISHLIST)
|
|
||||||
|
|
||||||
def import_mark(self, row: Dict[str, str]) -> str:
|
def import_mark(self, row: Dict[str, str]) -> str:
|
||||||
"""Import a mark from a CSV row.
|
"""Import a mark from a CSV row.
|
||||||
|
|
||||||
|
@ -184,7 +23,9 @@ class CsvImporter(Task):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
item = self.get_item_by_info_and_links(
|
item = self.get_item_by_info_and_links(
|
||||||
row.get("title", ""), row.get("info", ""), row.get("links", "")
|
row.get("title", ""),
|
||||||
|
row.get("info", ""),
|
||||||
|
row.get("links", "").strip().split(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if not item:
|
if not item:
|
||||||
|
@ -246,7 +87,9 @@ class CsvImporter(Task):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
item = self.get_item_by_info_and_links(
|
item = self.get_item_by_info_and_links(
|
||||||
row.get("title", ""), row.get("info", ""), row.get("links", "")
|
row.get("title", ""),
|
||||||
|
row.get("info", ""),
|
||||||
|
row.get("links", "").strip().split(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if not item:
|
if not item:
|
||||||
|
@ -304,7 +147,9 @@ class CsvImporter(Task):
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
item = self.get_item_by_info_and_links(
|
item = self.get_item_by_info_and_links(
|
||||||
row.get("title", ""), row.get("info", ""), row.get("links", "")
|
row.get("title", ""),
|
||||||
|
row.get("info", ""),
|
||||||
|
row.get("links", "").strip().split(),
|
||||||
)
|
)
|
||||||
|
|
||||||
if not item:
|
if not item:
|
||||||
|
@ -361,26 +206,6 @@ class CsvImporter(Task):
|
||||||
)
|
)
|
||||||
return "failed"
|
return "failed"
|
||||||
|
|
||||||
def progress(self, result: str) -> None:
|
|
||||||
"""Update import progress.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
result: The import result ('imported', 'skipped', or 'failed')
|
|
||||||
"""
|
|
||||||
self.metadata["processed"] += 1
|
|
||||||
self.metadata[result] = self.metadata.get(result, 0) + 1
|
|
||||||
|
|
||||||
progress_percentage = round(
|
|
||||||
self.metadata["processed"] / self.metadata["total"] * 100
|
|
||||||
)
|
|
||||||
self.message = (
|
|
||||||
f"Progress: {progress_percentage}% - "
|
|
||||||
f"{self.metadata['imported']} imported, "
|
|
||||||
f"{self.metadata['skipped']} skipped, "
|
|
||||||
f"{self.metadata['failed']} failed"
|
|
||||||
)
|
|
||||||
self.save(update_fields=["metadata", "message"])
|
|
||||||
|
|
||||||
def process_csv_file(self, file_path: str, import_function) -> None:
|
def process_csv_file(self, file_path: str, import_function) -> None:
|
||||||
"""Process a CSV file using the specified import function."""
|
"""Process a CSV file using the specified import function."""
|
||||||
logger.debug(f"Processing {file_path}")
|
logger.debug(f"Processing {file_path}")
|
||||||
|
|
447
journal/importers/ndjson.py
Normal file
447
journal/importers/ndjson.py
Normal file
|
@ -0,0 +1,447 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import zipfile
|
||||||
|
from typing import Any, Dict
|
||||||
|
|
||||||
|
from django.utils.translation import gettext as _
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from journal.models import (
|
||||||
|
Collection,
|
||||||
|
Comment,
|
||||||
|
Mark,
|
||||||
|
Note,
|
||||||
|
Rating,
|
||||||
|
Review,
|
||||||
|
ShelfLogEntry,
|
||||||
|
ShelfType,
|
||||||
|
Tag,
|
||||||
|
TagMember,
|
||||||
|
)
|
||||||
|
|
||||||
|
from .base import BaseImporter
|
||||||
|
|
||||||
|
|
||||||
|
class NdjsonImporter(BaseImporter):
|
||||||
|
"""Importer for NDJSON files exported from NeoDB."""
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
self.items = {}
|
||||||
|
|
||||||
|
def import_collection(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a collection from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
metadata = data.get("metadata", {})
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
name = content_data.get("name", "")
|
||||||
|
content = content_data.get("content", "")
|
||||||
|
collection = Collection.objects.create(
|
||||||
|
owner=owner,
|
||||||
|
title=name,
|
||||||
|
brief=content,
|
||||||
|
visibility=visibility,
|
||||||
|
metadata=data.get("metadata", {}),
|
||||||
|
created_time=published_dt,
|
||||||
|
)
|
||||||
|
item_data = data.get("items", [])
|
||||||
|
for item_entry in item_data:
|
||||||
|
item_url = item_entry.get("item")
|
||||||
|
if not item_url:
|
||||||
|
continue
|
||||||
|
item = self.items.get(item_url)
|
||||||
|
if not item:
|
||||||
|
logger.warning(f"Could not find item for collection: {item_url}")
|
||||||
|
continue
|
||||||
|
metadata = item_entry.get("metadata", {})
|
||||||
|
collection.append_item(item, metadata=metadata)
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing collection: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_shelf_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a shelf member (mark) from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
metadata = data.get("metadata", {})
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
item = self.items.get(content_data.get("withRegardTo", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
shelf_type = content_data.get("status", ShelfType.WISHLIST)
|
||||||
|
mark = Mark(owner, item)
|
||||||
|
if mark.created_time and published_dt and mark.created_time >= published_dt:
|
||||||
|
return "skipped"
|
||||||
|
mark.update(
|
||||||
|
shelf_type=shelf_type,
|
||||||
|
visibility=visibility,
|
||||||
|
metadata=metadata,
|
||||||
|
created_time=published_dt,
|
||||||
|
)
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing shelf member: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_shelf_log(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a shelf log entry from NDJSON data."""
|
||||||
|
try:
|
||||||
|
item = self.items.get(data.get("item", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
owner = self.user.identity
|
||||||
|
shelf_type = data.get("status", ShelfType.WISHLIST)
|
||||||
|
# posts = data.get("posts", []) # TODO but will be tricky
|
||||||
|
timestamp = data.get("timestamp")
|
||||||
|
timestamp_dt = self.parse_datetime(timestamp) if timestamp else None
|
||||||
|
_, created = ShelfLogEntry.objects.update_or_create(
|
||||||
|
owner=owner,
|
||||||
|
item=item,
|
||||||
|
shelf_type=shelf_type,
|
||||||
|
timestamp=timestamp_dt,
|
||||||
|
)
|
||||||
|
# return "imported" if created else "skipped"
|
||||||
|
# count skip as success otherwise it may confuse user
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing shelf log: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_post(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a post from NDJSON data."""
|
||||||
|
# TODO
|
||||||
|
return "skipped"
|
||||||
|
|
||||||
|
def import_review(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a review from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
metadata = data.get("metadata", {})
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
item = self.items.get(content_data.get("withRegardTo", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
name = content_data.get("name", "")
|
||||||
|
content = content_data.get("content", "")
|
||||||
|
existing_review = Review.objects.filter(
|
||||||
|
owner=owner, item=item, title=name
|
||||||
|
).first()
|
||||||
|
if (
|
||||||
|
existing_review
|
||||||
|
and existing_review.created_time
|
||||||
|
and published_dt
|
||||||
|
and existing_review.created_time >= published_dt
|
||||||
|
):
|
||||||
|
return "skipped"
|
||||||
|
Review.objects.create(
|
||||||
|
owner=owner,
|
||||||
|
item=item,
|
||||||
|
title=name,
|
||||||
|
body=content,
|
||||||
|
created_time=published_dt,
|
||||||
|
visibility=visibility,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing review: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_note(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a note from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
item = self.items.get(content_data.get("withRegardTo", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
title = content_data.get("title", "")
|
||||||
|
content = content_data.get("content", "")
|
||||||
|
sensitive = content_data.get("sensitive", False)
|
||||||
|
progress = content_data.get("progress", {})
|
||||||
|
progress_type = progress.get("type", "")
|
||||||
|
progress_value = progress.get("value", "")
|
||||||
|
Note.objects.create(
|
||||||
|
item=item,
|
||||||
|
owner=owner,
|
||||||
|
title=title,
|
||||||
|
content=content,
|
||||||
|
sensitive=sensitive,
|
||||||
|
progress_type=progress_type,
|
||||||
|
progress_value=progress_value,
|
||||||
|
created_time=published_dt,
|
||||||
|
visibility=visibility,
|
||||||
|
metadata=data.get("metadata", {}),
|
||||||
|
)
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing note: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_comment(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a comment from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
metadata = data.get("metadata", {})
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
item = self.items.get(content_data.get("withRegardTo", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
content = content_data.get("content", "")
|
||||||
|
existing_comment = Comment.objects.filter(owner=owner, item=item).first()
|
||||||
|
if (
|
||||||
|
existing_comment
|
||||||
|
and existing_comment.created_time
|
||||||
|
and published_dt
|
||||||
|
and existing_comment.created_time >= published_dt
|
||||||
|
):
|
||||||
|
return "skipped"
|
||||||
|
Comment.objects.create(
|
||||||
|
owner=owner,
|
||||||
|
item=item,
|
||||||
|
text=content,
|
||||||
|
created_time=published_dt,
|
||||||
|
visibility=visibility,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing comment: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_rating(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import a rating from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
metadata = data.get("metadata", {})
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
item = self.items.get(content_data.get("withRegardTo", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
rating_grade = int(float(content_data.get("value", 0)))
|
||||||
|
existing_rating = Comment.objects.filter(owner=owner, item=item).first()
|
||||||
|
if (
|
||||||
|
existing_rating
|
||||||
|
and existing_rating.created_time
|
||||||
|
and published_dt
|
||||||
|
and existing_rating.created_time >= published_dt
|
||||||
|
):
|
||||||
|
return "skipped"
|
||||||
|
Rating.objects.create(
|
||||||
|
owner=owner,
|
||||||
|
item=item,
|
||||||
|
grade=rating_grade,
|
||||||
|
created_time=published_dt,
|
||||||
|
visibility=visibility,
|
||||||
|
metadata=metadata,
|
||||||
|
)
|
||||||
|
return "imported"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing rating: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_tag(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import tags from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
pinned = data.get("pinned", self.metadata.get("pinned", False))
|
||||||
|
tag_title = Tag.cleanup_title(data.get("name", ""))
|
||||||
|
_, created = Tag.objects.update_or_create(
|
||||||
|
owner=owner,
|
||||||
|
title=tag_title,
|
||||||
|
defaults={
|
||||||
|
"visibility": visibility,
|
||||||
|
"pinned": pinned,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return "imported" if created else "skipped"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing tag member: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def import_tag_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
|
||||||
|
"""Import tags from NDJSON data."""
|
||||||
|
try:
|
||||||
|
owner = self.user.identity
|
||||||
|
visibility = data.get("visibility", self.metadata.get("visibility", 0))
|
||||||
|
metadata = data.get("metadata", {})
|
||||||
|
content_data = data.get("content", {})
|
||||||
|
published_dt = self.parse_datetime(content_data.get("published"))
|
||||||
|
item = self.items.get(content_data.get("withRegardTo", ""))
|
||||||
|
if not item:
|
||||||
|
raise KeyError(f"Could not find item: {data.get('item', '')}")
|
||||||
|
tag_title = Tag.cleanup_title(content_data.get("tag", ""))
|
||||||
|
tag, _ = Tag.objects.get_or_create(
|
||||||
|
owner=owner,
|
||||||
|
title=tag_title,
|
||||||
|
defaults={
|
||||||
|
"created_time": published_dt,
|
||||||
|
"visibility": visibility,
|
||||||
|
"pinned": False,
|
||||||
|
"metadata": metadata,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
_, created = TagMember.objects.update_or_create(
|
||||||
|
owner=owner,
|
||||||
|
item=item,
|
||||||
|
parent=tag,
|
||||||
|
defaults={
|
||||||
|
"created_time": published_dt,
|
||||||
|
"visibility": visibility,
|
||||||
|
"metadata": metadata,
|
||||||
|
"position": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
return "imported" if created else "skipped"
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error importing tag member: {e}")
|
||||||
|
return "failed"
|
||||||
|
|
||||||
|
def process_journal(self, file_path: str) -> None:
|
||||||
|
"""Process a NDJSON file and import all items."""
|
||||||
|
logger.debug(f"Processing {file_path}")
|
||||||
|
lines_error = 0
|
||||||
|
import_funcs = {
|
||||||
|
"Tag": self.import_tag,
|
||||||
|
"TagMember": self.import_tag_member,
|
||||||
|
"Rating": self.import_rating,
|
||||||
|
"Comment": self.import_comment,
|
||||||
|
"ShelfMember": self.import_shelf_member,
|
||||||
|
"Review": self.import_review,
|
||||||
|
"Note": self.import_note,
|
||||||
|
"Collection": self.import_collection,
|
||||||
|
"ShelfLog": self.import_shelf_log,
|
||||||
|
"Post": self.import_post,
|
||||||
|
}
|
||||||
|
journal = {k: [] for k in import_funcs.keys()}
|
||||||
|
with open(file_path, "r") as jsonfile:
|
||||||
|
# Skip header line
|
||||||
|
next(jsonfile, None)
|
||||||
|
|
||||||
|
for line in jsonfile:
|
||||||
|
try:
|
||||||
|
data = json.loads(line)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
lines_error += 1
|
||||||
|
continue
|
||||||
|
data_type = data.get("type")
|
||||||
|
if not data_type:
|
||||||
|
continue
|
||||||
|
if data_type not in journal:
|
||||||
|
journal[data_type] = []
|
||||||
|
journal[data_type].append(data)
|
||||||
|
|
||||||
|
self.metadata["total"] = sum(len(items) for items in journal.values())
|
||||||
|
logger.debug(f"Processing {self.metadata['total']} entries")
|
||||||
|
if lines_error:
|
||||||
|
logger.error(f"Error processing journal.ndjson: {lines_error} lines")
|
||||||
|
|
||||||
|
for typ, func in import_funcs.items():
|
||||||
|
for data in journal.get(typ, []):
|
||||||
|
result = func(data)
|
||||||
|
self.progress(result)
|
||||||
|
logger.info(
|
||||||
|
f"Imported {self.metadata['imported']}, skipped {self.metadata['skipped']}, failed {self.metadata['failed']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def parse_catalog(self, file_path: str) -> None:
|
||||||
|
"""Parse the catalog.ndjson file and build item lookup tables."""
|
||||||
|
logger.debug(f"Parsing catalog file: {file_path}")
|
||||||
|
item_count = 0
|
||||||
|
try:
|
||||||
|
with open(file_path, "r") as jsonfile:
|
||||||
|
for line in jsonfile:
|
||||||
|
try:
|
||||||
|
i = json.loads(line)
|
||||||
|
except (json.JSONDecodeError, Exception) as e:
|
||||||
|
logger.error(f"Error processing catalog item: {e}")
|
||||||
|
continue
|
||||||
|
u = i.get("id")
|
||||||
|
if not u:
|
||||||
|
continue
|
||||||
|
# self.catalog_items[u] = i
|
||||||
|
item_count += 1
|
||||||
|
links = [u] + [r["url"] for r in i.get("external_resources", [])]
|
||||||
|
self.items[u] = self.get_item_by_info_and_links("", "", links)
|
||||||
|
logger.info(f"Loaded {item_count} items from catalog")
|
||||||
|
self.metadata["catalog_processed"] = item_count
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing catalog file: {e}")
|
||||||
|
|
||||||
|
def parse_header(self, file_path: str) -> Dict[str, Any]:
|
||||||
|
try:
|
||||||
|
with open(file_path, "r") as jsonfile:
|
||||||
|
first_line = jsonfile.readline().strip()
|
||||||
|
if first_line:
|
||||||
|
header = json.loads(first_line)
|
||||||
|
if header.get("server"):
|
||||||
|
return header
|
||||||
|
except (json.JSONDecodeError, IOError) as e:
|
||||||
|
logger.error(f"Error parsing NDJSON header: {e}")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def run(self) -> None:
|
||||||
|
"""Run the NDJSON import."""
|
||||||
|
filename = self.metadata["file"]
|
||||||
|
logger.debug(f"Importing {filename}")
|
||||||
|
|
||||||
|
with zipfile.ZipFile(filename, "r") as zipref:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||||
|
zipref.extractall(tmpdirname)
|
||||||
|
|
||||||
|
catalog_path = os.path.join(tmpdirname, "catalog.ndjson")
|
||||||
|
if os.path.exists(catalog_path):
|
||||||
|
catalog_header = self.parse_header(catalog_path)
|
||||||
|
logger.debug(f"Loading catalog.ndjson with {catalog_header}")
|
||||||
|
self.parse_catalog(catalog_path)
|
||||||
|
else:
|
||||||
|
logger.warning("catalog.ndjson file not found in the archive")
|
||||||
|
|
||||||
|
journal_path = os.path.join(tmpdirname, "journal.ndjson")
|
||||||
|
if not os.path.exists(journal_path):
|
||||||
|
logger.error("journal.ndjson file not found in the archive")
|
||||||
|
self.message = "Import failed: journal.ndjson file not found"
|
||||||
|
self.save()
|
||||||
|
return
|
||||||
|
header = self.parse_header(journal_path)
|
||||||
|
self.metadata["journal_header"] = header
|
||||||
|
logger.debug(f"Importing journal.ndjson with {header}")
|
||||||
|
self.process_journal(journal_path)
|
||||||
|
|
||||||
|
source_info = self.metadata.get("journal_header", {})
|
||||||
|
source_summary = f" from {source_info.get('username', 'unknown')}@{source_info.get('server', 'unknown')} ver:{source_info.get('neodb_version', 'unknown')}."
|
||||||
|
self.message = _("Import complete") + source_summary
|
||||||
|
|
||||||
|
metadata_stats = self.metadata.get("metadata_stats", {})
|
||||||
|
partial_updates = metadata_stats.get("partial_updates", 0)
|
||||||
|
if partial_updates > 0:
|
||||||
|
self.message += f", {partial_updates} items with partial metadata updates"
|
||||||
|
|
||||||
|
ratings = metadata_stats.get("ratings_updated", 0)
|
||||||
|
comments = metadata_stats.get("comments_updated", 0)
|
||||||
|
tags = metadata_stats.get("tags_updated", 0)
|
||||||
|
|
||||||
|
if ratings > 0 or comments > 0 or tags > 0:
|
||||||
|
self.message += (
|
||||||
|
f" ({ratings} ratings, {comments} comments, {tags} tag sets)"
|
||||||
|
)
|
||||||
|
|
||||||
|
if self.metadata.get("failed_items", []):
|
||||||
|
self.message += f": {self.metadata['failed']} items failed ({len(self.metadata['failed_items'])} unique items)"
|
||||||
|
self.save()
|
|
@ -1,3 +1,4 @@
|
||||||
from .csv import *
|
from .csv import *
|
||||||
|
from .ndjson import *
|
||||||
from .piece import *
|
from .piece import *
|
||||||
from .search import *
|
from .search import *
|
||||||
|
|
496
journal/tests/ndjson.py
Normal file
496
journal/tests/ndjson.py
Normal file
|
@ -0,0 +1,496 @@
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import zipfile
|
||||||
|
from tempfile import TemporaryDirectory
|
||||||
|
|
||||||
|
from django.test import TestCase
|
||||||
|
from django.utils.dateparse import parse_datetime
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from catalog.models import (
|
||||||
|
Edition,
|
||||||
|
IdType,
|
||||||
|
Movie,
|
||||||
|
Podcast,
|
||||||
|
PodcastEpisode,
|
||||||
|
TVEpisode,
|
||||||
|
TVSeason,
|
||||||
|
TVShow,
|
||||||
|
)
|
||||||
|
from journal.exporters import NdjsonExporter
|
||||||
|
from journal.importers import NdjsonImporter, get_neodb_importer
|
||||||
|
from users.models import User
|
||||||
|
|
||||||
|
from ..models import *
|
||||||
|
|
||||||
|
|
||||||
|
class NdjsonExportImportTest(TestCase):
|
||||||
|
databases = "__all__"
|
||||||
|
maxDiff = None
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.user1 = User.register(
|
||||||
|
email="ndjson_export@test.com", username="ndjson_exporter"
|
||||||
|
)
|
||||||
|
self.user2 = User.register(
|
||||||
|
email="ndjson_import@test.com", username="ndjson_importer"
|
||||||
|
)
|
||||||
|
self.tag1 = Tag.objects.create(
|
||||||
|
owner=self.user1.identity, title="favorite", pinned=True, visibility=2
|
||||||
|
)
|
||||||
|
self.dt = parse_datetime("2021-01-01T00:00:00Z")
|
||||||
|
self.dt2 = parse_datetime("2021-02-01T00:00:00Z")
|
||||||
|
self.dt3 = parse_datetime("2021-03-01T00:00:00Z")
|
||||||
|
self.book1 = Edition.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Hyperion"}],
|
||||||
|
primary_lookup_id_type=IdType.ISBN,
|
||||||
|
primary_lookup_id_value="9780553283686",
|
||||||
|
author=["Dan Simmons"],
|
||||||
|
pub_year=1989,
|
||||||
|
)
|
||||||
|
self.book2 = Edition.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Dune"}],
|
||||||
|
primary_lookup_id_type=IdType.ISBN,
|
||||||
|
primary_lookup_id_value="9780441172719",
|
||||||
|
author=["Frank Herbert"],
|
||||||
|
pub_year=1965,
|
||||||
|
)
|
||||||
|
self.movie1 = Movie.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Inception"}],
|
||||||
|
primary_lookup_id_type=IdType.IMDB,
|
||||||
|
primary_lookup_id_value="tt1375666",
|
||||||
|
director=["Christopher Nolan"],
|
||||||
|
year=2010,
|
||||||
|
)
|
||||||
|
self.movie2 = Movie.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "The Matrix"}],
|
||||||
|
primary_lookup_id_type=IdType.IMDB,
|
||||||
|
primary_lookup_id_value="tt0133093",
|
||||||
|
director=["Lana Wachowski", "Lilly Wachowski"],
|
||||||
|
year=1999,
|
||||||
|
)
|
||||||
|
self.tvshow = TVShow.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Breaking Bad"}],
|
||||||
|
primary_lookup_id_type=IdType.IMDB,
|
||||||
|
primary_lookup_id_value="tt0903747",
|
||||||
|
year=2008,
|
||||||
|
)
|
||||||
|
self.tvseason = TVSeason.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Breaking Bad Season 1"}],
|
||||||
|
show=self.tvshow,
|
||||||
|
season_number=1,
|
||||||
|
)
|
||||||
|
self.tvepisode1 = TVEpisode.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Pilot"}],
|
||||||
|
season=self.tvseason,
|
||||||
|
episode_number=1,
|
||||||
|
)
|
||||||
|
self.tvepisode2 = TVEpisode.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Cat's in the Bag..."}],
|
||||||
|
season=self.tvseason,
|
||||||
|
episode_number=2,
|
||||||
|
)
|
||||||
|
# Create podcast test items
|
||||||
|
self.podcast = Podcast.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Test Podcast"}],
|
||||||
|
primary_lookup_id_type=IdType.RSS,
|
||||||
|
primary_lookup_id_value="https://example.com/feed.xml",
|
||||||
|
host=["Test Host"],
|
||||||
|
)
|
||||||
|
self.podcastepisode = PodcastEpisode.objects.create(
|
||||||
|
localized_title=[{"lang": "en", "text": "Test Episode 1"}],
|
||||||
|
program=self.podcast,
|
||||||
|
guid="111",
|
||||||
|
pub_date=self.dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_ndjson_export_import(self):
|
||||||
|
# Create marks, reviews and notes for user1
|
||||||
|
|
||||||
|
# Book marks with ratings and tags
|
||||||
|
mark_book1 = Mark(self.user1.identity, self.book1)
|
||||||
|
mark_book1.update(
|
||||||
|
ShelfType.COMPLETE,
|
||||||
|
"Great sci-fi classic",
|
||||||
|
10,
|
||||||
|
["sci-fi", "favorite", "space"],
|
||||||
|
1,
|
||||||
|
created_time=self.dt,
|
||||||
|
)
|
||||||
|
mark_book2 = Mark(self.user1.identity, self.book2)
|
||||||
|
mark_book2.update(
|
||||||
|
ShelfType.WISHLIST,
|
||||||
|
"Read it?",
|
||||||
|
None,
|
||||||
|
["sci-fi", "desert"],
|
||||||
|
1,
|
||||||
|
created_time=self.dt,
|
||||||
|
)
|
||||||
|
mark_book2.update(
|
||||||
|
ShelfType.PROGRESS,
|
||||||
|
"Reading!",
|
||||||
|
None,
|
||||||
|
["sci-fi", "desert"],
|
||||||
|
0,
|
||||||
|
created_time=self.dt2,
|
||||||
|
)
|
||||||
|
mark_book2.update(
|
||||||
|
ShelfType.COMPLETE,
|
||||||
|
"Read.",
|
||||||
|
None,
|
||||||
|
["sci-fi", "desert"],
|
||||||
|
0,
|
||||||
|
created_time=self.dt3,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Movie marks with ratings
|
||||||
|
mark_movie1 = Mark(self.user1.identity, self.movie1)
|
||||||
|
mark_movie1.update(
|
||||||
|
ShelfType.COMPLETE,
|
||||||
|
"Mind-bending",
|
||||||
|
8,
|
||||||
|
["mindbender", "scifi"],
|
||||||
|
1,
|
||||||
|
created_time=self.dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
mark_movie2 = Mark(self.user1.identity, self.movie2)
|
||||||
|
mark_movie2.update(
|
||||||
|
ShelfType.WISHLIST, "Need to rewatch", None, [], 1, created_time=self.dt2
|
||||||
|
)
|
||||||
|
|
||||||
|
# TV show mark
|
||||||
|
mark_tvshow = Mark(self.user1.identity, self.tvshow)
|
||||||
|
mark_tvshow.update(
|
||||||
|
ShelfType.WISHLIST,
|
||||||
|
"Heard it's good",
|
||||||
|
None,
|
||||||
|
["drama"],
|
||||||
|
1,
|
||||||
|
created_time=self.dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
# TV episode marks
|
||||||
|
mark_episode1 = Mark(self.user1.identity, self.tvepisode1)
|
||||||
|
mark_episode1.update(
|
||||||
|
ShelfType.COMPLETE,
|
||||||
|
"Great start",
|
||||||
|
9,
|
||||||
|
["pilot", "drama"],
|
||||||
|
1,
|
||||||
|
created_time=self.dt2,
|
||||||
|
)
|
||||||
|
|
||||||
|
mark_episode2 = Mark(self.user1.identity, self.tvepisode2)
|
||||||
|
mark_episode2.update(
|
||||||
|
ShelfType.COMPLETE, "It gets better", 9, [], 1, created_time=self.dt3
|
||||||
|
)
|
||||||
|
|
||||||
|
# Podcast episode mark
|
||||||
|
mark_podcast = Mark(self.user1.identity, self.podcastepisode)
|
||||||
|
mark_podcast.update(
|
||||||
|
ShelfType.COMPLETE,
|
||||||
|
"Insightful episode",
|
||||||
|
8,
|
||||||
|
["tech", "interview"],
|
||||||
|
1,
|
||||||
|
created_time=self.dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create reviews
|
||||||
|
Review.update_item_review(
|
||||||
|
self.book1,
|
||||||
|
self.user1.identity,
|
||||||
|
"My thoughts on Hyperion",
|
||||||
|
"A masterpiece of science fiction that weaves multiple storylines into a captivating narrative.",
|
||||||
|
visibility=1,
|
||||||
|
created_time=self.dt,
|
||||||
|
)
|
||||||
|
|
||||||
|
Review.update_item_review(
|
||||||
|
self.movie1,
|
||||||
|
self.user1.identity,
|
||||||
|
"Inception Review",
|
||||||
|
"Christopher Nolan at his best. The movie plays with reality and dreams in a fascinating way.",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create notes
|
||||||
|
Note.objects.create(
|
||||||
|
item=self.book2,
|
||||||
|
owner=self.user1.identity,
|
||||||
|
title="Reading progress",
|
||||||
|
content="Just finished the first part. The world-building is incredible.\n\n - p 125",
|
||||||
|
progress_type=Note.ProgressType.PAGE,
|
||||||
|
progress_value="125",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
Note.objects.create(
|
||||||
|
item=self.tvshow,
|
||||||
|
owner=self.user1.identity,
|
||||||
|
title="Before watching",
|
||||||
|
content="Things to look out for according to friends:\n- Character development\n- Color symbolism\n\n - e 0",
|
||||||
|
progress_type=Note.ProgressType.EPISODE,
|
||||||
|
progress_value="2",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create TV episode note
|
||||||
|
Note.objects.create(
|
||||||
|
item=self.tvepisode1,
|
||||||
|
owner=self.user1.identity,
|
||||||
|
title="Episode thoughts",
|
||||||
|
content="Great pilot episode. Sets up the character arcs really well.",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create podcast episode note
|
||||||
|
Note.objects.create(
|
||||||
|
item=self.podcastepisode,
|
||||||
|
owner=self.user1.identity,
|
||||||
|
title="Podcast episode notes",
|
||||||
|
content="Interesting discussion about tech trends. Timestamp 23:45 has a good point about AI.",
|
||||||
|
progress_type=Note.ProgressType.TIMESTAMP,
|
||||||
|
progress_value="23:45",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create collections
|
||||||
|
items = [self.book1, self.movie1]
|
||||||
|
collection = Collection.objects.create(
|
||||||
|
owner=self.user1.identity,
|
||||||
|
title="Favorites",
|
||||||
|
brief="My all-time favorites",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
for i in items:
|
||||||
|
collection.append_item(i)
|
||||||
|
|
||||||
|
# Create another collection with different items
|
||||||
|
items2 = [self.book2, self.movie2, self.tvshow]
|
||||||
|
collection2 = Collection.objects.create(
|
||||||
|
owner=self.user1.identity,
|
||||||
|
title="To Review",
|
||||||
|
brief="Items I need to review soon",
|
||||||
|
visibility=1,
|
||||||
|
)
|
||||||
|
for i in items2:
|
||||||
|
collection2.append_item(i)
|
||||||
|
|
||||||
|
# Create shelf log entries
|
||||||
|
logs = ShelfLogEntry.objects.filter(owner=self.user1.identity).order_by(
|
||||||
|
"timestamp", "item_id"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Export data to NDJSON
|
||||||
|
exporter = NdjsonExporter.create(user=self.user1)
|
||||||
|
exporter.run()
|
||||||
|
export_path = exporter.metadata["file"]
|
||||||
|
logger.debug(f"exported to {export_path}")
|
||||||
|
self.assertTrue(os.path.exists(export_path))
|
||||||
|
|
||||||
|
# Validate the NDJSON export file structure
|
||||||
|
with TemporaryDirectory() as extract_dir:
|
||||||
|
with zipfile.ZipFile(export_path, "r") as zip_ref:
|
||||||
|
zip_ref.extractall(extract_dir)
|
||||||
|
logger.debug(f"unzipped to {extract_dir}")
|
||||||
|
|
||||||
|
# Check journal.ndjson exists
|
||||||
|
journal_path = os.path.join(extract_dir, "journal.ndjson")
|
||||||
|
self.assertTrue(
|
||||||
|
os.path.exists(journal_path), "journal.ndjson file missing"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check catalog.ndjson exists
|
||||||
|
catalog_path = os.path.join(extract_dir, "catalog.ndjson")
|
||||||
|
self.assertTrue(
|
||||||
|
os.path.exists(catalog_path), "catalog.ndjson file missing"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check attachments directory exists
|
||||||
|
attachments_path = os.path.join(extract_dir, "attachments")
|
||||||
|
self.assertTrue(
|
||||||
|
os.path.exists(attachments_path), "attachments directory missing"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Count the number of JSON objects in journal.ndjson
|
||||||
|
with open(journal_path, "r") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
# First line is header, rest are data
|
||||||
|
self.assertGreater(
|
||||||
|
len(lines), 1, "journal.ndjson has no data lines"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check the first line is a header
|
||||||
|
header = json.loads(lines[0])
|
||||||
|
self.assertIn("server", header, "Missing server in header")
|
||||||
|
self.assertIn("username", header, "Missing username in header")
|
||||||
|
self.assertEqual(
|
||||||
|
header["username"],
|
||||||
|
"ndjson_exporter",
|
||||||
|
"Wrong username in header",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Count data objects by type
|
||||||
|
type_counts = {
|
||||||
|
"ShelfMember": 0,
|
||||||
|
"Review": 0,
|
||||||
|
"Note": 0,
|
||||||
|
"Collection": 0,
|
||||||
|
"ShelfLog": 0,
|
||||||
|
"post": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
for line in lines[1:]:
|
||||||
|
data = json.loads(line)
|
||||||
|
if "type" in data:
|
||||||
|
type_counts[data["type"]] = (
|
||||||
|
type_counts.get(data["type"], 0) + 1
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify counts
|
||||||
|
self.assertEqual(
|
||||||
|
type_counts["ShelfMember"], 8, "Expected 8 ShelfMember entries"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
type_counts["Review"], 2, "Expected 2 Review entries"
|
||||||
|
)
|
||||||
|
self.assertEqual(type_counts["Note"], 4, "Expected 4 Note entries")
|
||||||
|
self.assertEqual(
|
||||||
|
type_counts["Collection"], 2, "Expected 2 Collection entries"
|
||||||
|
)
|
||||||
|
self.assertEqual(type_counts["ShelfLog"], logs.count())
|
||||||
|
|
||||||
|
# Now import the export file into a different user account
|
||||||
|
self.assertEqual(get_neodb_importer(export_path), NdjsonImporter)
|
||||||
|
importer = NdjsonImporter.create(
|
||||||
|
user=self.user2, file=export_path, visibility=2
|
||||||
|
)
|
||||||
|
importer.run()
|
||||||
|
self.assertIn("Import complete", importer.message)
|
||||||
|
|
||||||
|
# Verify imported data
|
||||||
|
|
||||||
|
# Check marks
|
||||||
|
mark_book1_imported = Mark(self.user2.identity, self.book1)
|
||||||
|
self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE)
|
||||||
|
self.assertEqual(mark_book1_imported.comment_text, "Great sci-fi classic")
|
||||||
|
self.assertEqual(mark_book1_imported.rating_grade, 10)
|
||||||
|
self.assertEqual(mark_book1_imported.visibility, 1)
|
||||||
|
self.assertEqual(
|
||||||
|
set(mark_book1_imported.tags), set(["sci-fi", "favorite", "space"])
|
||||||
|
)
|
||||||
|
|
||||||
|
mark_book2_imported = Mark(self.user2.identity, self.book2)
|
||||||
|
self.assertEqual(mark_book2_imported.shelf_type, ShelfType.COMPLETE)
|
||||||
|
self.assertEqual(mark_book2_imported.comment_text, "Read.")
|
||||||
|
self.assertIsNone(mark_book2_imported.rating_grade)
|
||||||
|
self.assertEqual(set(mark_book2_imported.tags), set(["sci-fi", "desert"]))
|
||||||
|
self.assertEqual(mark_book2_imported.visibility, 0)
|
||||||
|
|
||||||
|
mark_movie1_imported = Mark(self.user2.identity, self.movie1)
|
||||||
|
self.assertEqual(mark_movie1_imported.shelf_type, ShelfType.COMPLETE)
|
||||||
|
self.assertEqual(mark_movie1_imported.comment_text, "Mind-bending")
|
||||||
|
self.assertEqual(mark_movie1_imported.rating_grade, 8)
|
||||||
|
self.assertEqual(set(mark_movie1_imported.tags), set(["mindbender", "scifi"]))
|
||||||
|
|
||||||
|
mark_episode1_imported = Mark(self.user2.identity, self.tvepisode1)
|
||||||
|
self.assertEqual(mark_episode1_imported.shelf_type, ShelfType.COMPLETE)
|
||||||
|
self.assertEqual(mark_episode1_imported.comment_text, "Great start")
|
||||||
|
self.assertEqual(mark_episode1_imported.rating_grade, 9)
|
||||||
|
self.assertEqual(set(mark_episode1_imported.tags), set(["pilot", "drama"]))
|
||||||
|
|
||||||
|
# Check podcast episode mark
|
||||||
|
mark_podcast_imported = Mark(self.user2.identity, self.podcastepisode)
|
||||||
|
self.assertEqual(mark_podcast_imported.shelf_type, ShelfType.COMPLETE)
|
||||||
|
self.assertEqual(mark_podcast_imported.comment_text, "Insightful episode")
|
||||||
|
self.assertEqual(mark_podcast_imported.rating_grade, 8)
|
||||||
|
self.assertEqual(set(mark_podcast_imported.tags), set(["tech", "interview"]))
|
||||||
|
|
||||||
|
# Check reviews
|
||||||
|
book1_reviews = Review.objects.filter(
|
||||||
|
owner=self.user2.identity, item=self.book1
|
||||||
|
)
|
||||||
|
self.assertEqual(book1_reviews.count(), 1)
|
||||||
|
self.assertEqual(book1_reviews[0].title, "My thoughts on Hyperion")
|
||||||
|
self.assertIn("masterpiece of science fiction", book1_reviews[0].body)
|
||||||
|
|
||||||
|
movie1_reviews = Review.objects.filter(
|
||||||
|
owner=self.user2.identity, item=self.movie1
|
||||||
|
)
|
||||||
|
self.assertEqual(movie1_reviews.count(), 1)
|
||||||
|
self.assertEqual(movie1_reviews[0].title, "Inception Review")
|
||||||
|
self.assertIn("Christopher Nolan", movie1_reviews[0].body)
|
||||||
|
|
||||||
|
# Check notes
|
||||||
|
book2_notes = Note.objects.filter(owner=self.user2.identity, item=self.book2)
|
||||||
|
self.assertEqual(book2_notes.count(), 1)
|
||||||
|
self.assertEqual(book2_notes[0].title, "Reading progress")
|
||||||
|
self.assertIn("world-building is incredible", book2_notes[0].content)
|
||||||
|
self.assertEqual(book2_notes[0].progress_type, Note.ProgressType.PAGE)
|
||||||
|
self.assertEqual(book2_notes[0].progress_value, "125")
|
||||||
|
|
||||||
|
tvshow_notes = Note.objects.filter(owner=self.user2.identity, item=self.tvshow)
|
||||||
|
self.assertEqual(tvshow_notes.count(), 1)
|
||||||
|
self.assertEqual(tvshow_notes[0].title, "Before watching")
|
||||||
|
self.assertIn("Character development", tvshow_notes[0].content)
|
||||||
|
|
||||||
|
# Check TV episode notes
|
||||||
|
tvepisode_notes = Note.objects.filter(
|
||||||
|
owner=self.user2.identity, item=self.tvepisode1
|
||||||
|
)
|
||||||
|
self.assertEqual(tvepisode_notes.count(), 1)
|
||||||
|
self.assertEqual(tvepisode_notes[0].title, "Episode thoughts")
|
||||||
|
self.assertIn("Sets up the character arcs", tvepisode_notes[0].content)
|
||||||
|
|
||||||
|
# Check podcast episode notes
|
||||||
|
podcast_notes = Note.objects.filter(
|
||||||
|
owner=self.user2.identity, item=self.podcastepisode
|
||||||
|
)
|
||||||
|
self.assertEqual(podcast_notes.count(), 1)
|
||||||
|
self.assertEqual(podcast_notes[0].title, "Podcast episode notes")
|
||||||
|
self.assertIn(
|
||||||
|
"Interesting discussion about tech trends", podcast_notes[0].content
|
||||||
|
)
|
||||||
|
self.assertEqual(podcast_notes[0].progress_type, Note.ProgressType.TIMESTAMP)
|
||||||
|
self.assertEqual(podcast_notes[0].progress_value, "23:45")
|
||||||
|
|
||||||
|
# Check first collection
|
||||||
|
collections = Collection.objects.filter(
|
||||||
|
owner=self.user2.identity, title="Favorites"
|
||||||
|
)
|
||||||
|
self.assertEqual(collections.count(), 1)
|
||||||
|
self.assertEqual(collections[0].brief, "My all-time favorites")
|
||||||
|
self.assertEqual(collections[0].visibility, 1)
|
||||||
|
collection_items = list(collections[0].ordered_items)
|
||||||
|
self.assertEqual([self.book1, self.movie1], collection_items)
|
||||||
|
|
||||||
|
# Check second collection
|
||||||
|
collections2 = Collection.objects.filter(
|
||||||
|
owner=self.user2.identity, title="To Review"
|
||||||
|
)
|
||||||
|
self.assertEqual(collections2.count(), 1)
|
||||||
|
self.assertEqual(collections2[0].brief, "Items I need to review soon")
|
||||||
|
self.assertEqual(collections2[0].visibility, 1)
|
||||||
|
|
||||||
|
# Check second collection items
|
||||||
|
collection2_items = [m.item for m in collections2[0].members.all()]
|
||||||
|
self.assertEqual(len(collection2_items), 3)
|
||||||
|
self.assertIn(self.book2, collection2_items)
|
||||||
|
self.assertIn(self.movie2, collection2_items)
|
||||||
|
self.assertIn(self.tvshow, collection2_items)
|
||||||
|
|
||||||
|
tag1 = Tag.objects.filter(owner=self.user2.identity, title="favorite").first()
|
||||||
|
self.assertIsNotNone(tag1)
|
||||||
|
if tag1:
|
||||||
|
self.assertTrue(tag1.pinned)
|
||||||
|
self.assertEqual(tag1.visibility, 2)
|
||||||
|
|
||||||
|
# Check shelf log entries
|
||||||
|
logs2 = ShelfLogEntry.objects.filter(owner=self.user2.identity).order_by(
|
||||||
|
"timestamp", "item_id"
|
||||||
|
)
|
||||||
|
l1 = [(log.item, log.shelf_type, log.timestamp) for log in logs]
|
||||||
|
l2 = [(log.item, log.shelf_type, log.timestamp) for log in logs2]
|
||||||
|
self.assertEqual(l1, l2)
|
|
@ -48,7 +48,7 @@
|
||||||
<br>
|
<br>
|
||||||
{{ ndjson_export_task.message }}
|
{{ ndjson_export_task.message }}
|
||||||
{% if ndjson_export_task.metadata.file %}
|
{% if ndjson_export_task.metadata.file %}
|
||||||
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
|
<a href="{% url 'users:export_ndjson' %}" download><i class="fa fa-file-code"></i> {% trans 'Download' %}</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</form>
|
</form>
|
||||||
|
@ -86,58 +86,148 @@
|
||||||
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
|
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
|
||||||
</li>
|
</li>
|
||||||
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
|
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
|
||||||
|
<li>
|
||||||
|
{% trans 'Both CSV and NDJSON formats exported from NeoDB are supported. NDJSON format includes more data, like collections.' %}
|
||||||
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
<br>
|
<br>
|
||||||
<input type="file" name="file" required accept=".zip">
|
<input type="file" name="file" id="neodb_import_file" required accept=".zip">
|
||||||
<p>
|
<div id="detected_format_info"
|
||||||
{% trans 'Visibility' %}: <small><code>.csv</code> only</small>
|
style="display: none;
|
||||||
<br>
|
margin: 10px 0;
|
||||||
<label for="csv_visibility_0">
|
padding: 8px 12px;
|
||||||
<input type="radio"
|
border-radius: 4px;
|
||||||
name="visibility"
|
background-color: var(--card-background-color, #f8f9fa);
|
||||||
value="0"
|
border: 1px solid var(--card-border-color, #dee2e6)">
|
||||||
required=""
|
<i class="fa fa-info-circle"></i> {% trans 'Detected format' %}: <strong id="detected_format">-</strong>
|
||||||
id="csv_visibility_0"
|
</div>
|
||||||
checked>
|
<div id="visibility_settings" style="display: none;">
|
||||||
{% trans 'Public' %}
|
<p>
|
||||||
</label>
|
{% trans 'Visibility' %}:
|
||||||
<label for="csv_visibility_1">
|
<br>
|
||||||
<input type="radio"
|
<label for="csv_visibility_0">
|
||||||
name="visibility"
|
<input type="radio"
|
||||||
value="1"
|
name="visibility"
|
||||||
required=""
|
value="0"
|
||||||
id="csv_visibility_1">
|
required=""
|
||||||
{% trans 'Followers Only' %}
|
id="csv_visibility_0"
|
||||||
</label>
|
checked>
|
||||||
<label for="csv_visibility_2">
|
{% trans 'Public' %}
|
||||||
<input type="radio"
|
</label>
|
||||||
name="visibility"
|
<label for="csv_visibility_1">
|
||||||
value="2"
|
<input type="radio"
|
||||||
required=""
|
name="visibility"
|
||||||
id="csv_visibility_2">
|
value="1"
|
||||||
{% trans 'Mentioned Only' %}
|
required=""
|
||||||
</label>
|
id="csv_visibility_1">
|
||||||
</p>
|
{% trans 'Followers Only' %}
|
||||||
|
</label>
|
||||||
|
<label for="csv_visibility_2">
|
||||||
|
<input type="radio"
|
||||||
|
name="visibility"
|
||||||
|
value="2"
|
||||||
|
required=""
|
||||||
|
id="csv_visibility_2">
|
||||||
|
{% trans 'Mentioned Only' %}
|
||||||
|
</label>
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<input type="hidden" name="format_type" id="format_type" value="">
|
||||||
<input type="submit" value="{% trans 'Import' %}" />
|
<input type="submit" value="{% trans 'Import' %}" />
|
||||||
<small>
|
<script src="{{ cdn_url }}/npm/jszip@3.10.1/dist/jszip.min.js"></script>
|
||||||
{% if csv_import_task %}
|
<script>
|
||||||
{% trans 'Last import started' %}: {{ csv_import_task.created_time }}
|
document.addEventListener('DOMContentLoaded', function() {
|
||||||
{% if csv_import_task.state == 0 or csv_import_task.state == 1 %}
|
const fileInput = document.getElementById('neodb_import_file');
|
||||||
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
|
if (!fileInput) return;
|
||||||
hx-target="this"
|
|
||||||
hx-trigger="load delay:2s, every 10s"
|
fileInput.addEventListener('change', async function(event) {
|
||||||
hx-swap="outerHTML"></div>
|
const file = event.target.files[0];
|
||||||
{% else %}
|
if (!file) {
|
||||||
{% trans 'Status' %}: {{ csv_import_task.get_state_display }}。
|
document.getElementById('detected_format_info').style.display = 'none';
|
||||||
{{ csv_import_task.message }}
|
document.getElementById('visibility_settings').style.display = 'none';
|
||||||
{% endif %}
|
document.getElementById('format_type').value = '';
|
||||||
{% if csv_import_task.metadata.failed_items %}
|
return;
|
||||||
{% trans 'Failed items' %}:
|
}
|
||||||
<br>
|
|
||||||
<textarea readonly>{% for item in csv_import_task.metadata.failed_items %}{{item}} {% endfor %}</textarea>
|
// Check if it's a zip file
|
||||||
{% endif %}
|
if (file.type !== 'application/zip' &&
|
||||||
{% endif %}
|
file.type !== 'application/x-zip-compressed' &&
|
||||||
</small>
|
!file.name.toLowerCase().endsWith('.zip')) {
|
||||||
|
document.getElementById('detected_format_info').style.display = 'none';
|
||||||
|
document.getElementById('visibility_settings').style.display = 'none';
|
||||||
|
document.getElementById('format_type').value = '';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update UI to show "Detecting format..." with a spinner
|
||||||
|
document.getElementById('detected_format').innerHTML = '{% trans "Detecting format..." %} <i class="fa fa-spinner fa-spin"></i>';
|
||||||
|
document.getElementById('detected_format_info').style.display = 'block';
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use JSZip to examine the actual contents of the ZIP file
|
||||||
|
const zip = new JSZip();
|
||||||
|
const zipContents = await zip.loadAsync(file);
|
||||||
|
const fileNames = Object.keys(zipContents.files);
|
||||||
|
|
||||||
|
// Check for specific files that indicate format type
|
||||||
|
const hasNdjson = fileNames.some(name => name === 'journal.ndjson' || name === 'catalog.ndjson');
|
||||||
|
const hasCsv = fileNames.some(name => name.endsWith('_mark.csv') ||
|
||||||
|
name.endsWith('_review.csv') ||
|
||||||
|
name.endsWith('_note.csv'));
|
||||||
|
|
||||||
|
let format = '';
|
||||||
|
let formatValue = '';
|
||||||
|
|
||||||
|
if (hasNdjson) {
|
||||||
|
format = 'NDJSON';
|
||||||
|
formatValue = 'ndjson';
|
||||||
|
} else if (hasCsv) {
|
||||||
|
format = 'CSV';
|
||||||
|
formatValue = 'csv';
|
||||||
|
} else {
|
||||||
|
// Unable to detect format from contents
|
||||||
|
format = '{% trans "Unknown format" %}';
|
||||||
|
formatValue = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update UI with detected format and appropriate icon
|
||||||
|
let formatIcon = '';
|
||||||
|
if (formatValue === 'ndjson') {
|
||||||
|
formatIcon = '<i class="fa fa-file-code"></i> ';
|
||||||
|
} else if (formatValue === 'csv') {
|
||||||
|
formatIcon = '<i class="fa fa-file-csv"></i> ';
|
||||||
|
} else {
|
||||||
|
formatIcon = '<i class="fa fa-question-circle"></i> ';
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('detected_format').innerHTML = formatIcon + format;
|
||||||
|
document.getElementById('format_type').value = formatValue;
|
||||||
|
|
||||||
|
// Show visibility settings only for NDJSON format
|
||||||
|
if (formatValue === 'ndjson') {
|
||||||
|
document.getElementById('visibility_settings').style.display = 'block';
|
||||||
|
} else {
|
||||||
|
document.getElementById('visibility_settings').style.display = 'none';
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error examining ZIP file:', error);
|
||||||
|
document.getElementById('detected_format').innerHTML = '<i class="fa fa-exclamation-triangle"></i> {% trans "Error detecting format" %}';
|
||||||
|
document.getElementById('format_type').value = '';
|
||||||
|
|
||||||
|
// Make the error more visible
|
||||||
|
document.getElementById('detected_format_info').style.backgroundColor = 'var(--form-element-invalid-active-border-color, #d9534f)';
|
||||||
|
document.getElementById('detected_format_info').style.color = 'white';
|
||||||
|
|
||||||
|
// Hide visibility settings on error
|
||||||
|
document.getElementById('visibility_settings').style.display = 'none';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
{% if neodb_import_task %}
|
||||||
|
{% include "users/user_task_status.html" with task=neodb_import_task %}
|
||||||
|
{% endif %}
|
||||||
</form>
|
</form>
|
||||||
</details>
|
</details>
|
||||||
</article>
|
</article>
|
||||||
|
|
|
@ -1,19 +1,20 @@
|
||||||
{% load i18n %}
|
{% load i18n %}
|
||||||
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
|
<div hx-target="this"
|
||||||
{% if task.state == 0 or task.state == 1 %}hx-target="this" hx-trigger="every 30s"{% endif %}
|
{% if task.state == 0 or task.state == 1 %} hx-get="{% url 'users:user_task_status' task.type %}" hx-trigger="intersect once, every 30s"{% endif %}
|
||||||
hx-swap="outerHTML">
|
hx-swap="outerHTML">
|
||||||
{% trans 'Status' %}: {{ task.get_state_display }}。
|
{% trans 'Requested' %}: {{ task.created_time }}
|
||||||
|
({{ task.get_state_display }})
|
||||||
{{ task.message }}
|
{{ task.message }}
|
||||||
<br>
|
{% if task.state == 0 or task.state == 1 %}
|
||||||
{% if task.metadata.total and task.metadata.processed %}
|
{% if task.metadata.total and task.metadata.processed %}
|
||||||
<div class="progress-container">
|
<div>
|
||||||
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
|
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
|
||||||
<div class="progress-text">
|
|
||||||
{{ task.metadata.processed }} / {{ task.metadata.total }}
|
|
||||||
({{ task.metadata.imported }} imported,
|
|
||||||
{{ task.metadata.skipped }} skipped,
|
|
||||||
{{ task.metadata.failed }} failed)
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{% if task.metadata.failed_items %}
|
||||||
|
{% trans 'Failed items' %}:
|
||||||
|
<br>
|
||||||
|
<textarea readonly>{% for item in task.metadata.failed_items %}{{item}} {% endfor %}</textarea>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -10,7 +10,7 @@ urlpatterns = [
|
||||||
path("data", data, name="data"),
|
path("data", data, name="data"),
|
||||||
path("info", account_info, name="info"),
|
path("info", account_info, name="info"),
|
||||||
path("profile", account_profile, name="profile"),
|
path("profile", account_profile, name="profile"),
|
||||||
path("task/<str:task_name>/status", user_task_status, name="user_task_status"),
|
path("task/<str:task_type>/status", user_task_status, name="user_task_status"),
|
||||||
path("data/import/status", data_import_status, name="import_status"),
|
path("data/import/status", data_import_status, name="import_status"),
|
||||||
path("data/import/goodreads", import_goodreads, name="import_goodreads"),
|
path("data/import/goodreads", import_goodreads, name="import_goodreads"),
|
||||||
path("data/import/douban", import_douban, name="import_douban"),
|
path("data/import/douban", import_douban, name="import_douban"),
|
||||||
|
|
|
@ -18,6 +18,7 @@ from journal.importers import (
|
||||||
DoubanImporter,
|
DoubanImporter,
|
||||||
GoodreadsImporter,
|
GoodreadsImporter,
|
||||||
LetterboxdImporter,
|
LetterboxdImporter,
|
||||||
|
NdjsonImporter,
|
||||||
OPMLImporter,
|
OPMLImporter,
|
||||||
get_neodb_importer,
|
get_neodb_importer,
|
||||||
)
|
)
|
||||||
|
@ -92,6 +93,20 @@ def data(request):
|
||||||
start_date = queryset.aggregate(Min("created_time"))["created_time__min"]
|
start_date = queryset.aggregate(Min("created_time"))["created_time__min"]
|
||||||
start_year = start_date.year if start_date else current_year
|
start_year = start_date.year if start_date else current_year
|
||||||
years = reversed(range(start_year, current_year + 1))
|
years = reversed(range(start_year, current_year + 1))
|
||||||
|
|
||||||
|
# Import tasks - check for both CSV and NDJSON importers
|
||||||
|
csv_import_task = CsvImporter.latest_task(request.user)
|
||||||
|
ndjson_import_task = NdjsonImporter.latest_task(request.user)
|
||||||
|
|
||||||
|
# Use the most recent import task for display
|
||||||
|
if ndjson_import_task and (
|
||||||
|
not csv_import_task
|
||||||
|
or ndjson_import_task.created_time > csv_import_task.created_time
|
||||||
|
):
|
||||||
|
neodb_import_task = ndjson_import_task
|
||||||
|
else:
|
||||||
|
neodb_import_task = csv_import_task
|
||||||
|
|
||||||
return render(
|
return render(
|
||||||
request,
|
request,
|
||||||
"users/data.html",
|
"users/data.html",
|
||||||
|
@ -100,7 +115,7 @@ def data(request):
|
||||||
"import_task": DoubanImporter.latest_task(request.user),
|
"import_task": DoubanImporter.latest_task(request.user),
|
||||||
"export_task": DoufenExporter.latest_task(request.user),
|
"export_task": DoufenExporter.latest_task(request.user),
|
||||||
"csv_export_task": CsvExporter.latest_task(request.user),
|
"csv_export_task": CsvExporter.latest_task(request.user),
|
||||||
"csv_import_task": CsvImporter.latest_task(request.user),
|
"neodb_import_task": neodb_import_task, # Use the most recent import task
|
||||||
"ndjson_export_task": NdjsonExporter.latest_task(request.user),
|
"ndjson_export_task": NdjsonExporter.latest_task(request.user),
|
||||||
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
|
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
|
||||||
"goodreads_task": GoodreadsImporter.latest_task(request.user),
|
"goodreads_task": GoodreadsImporter.latest_task(request.user),
|
||||||
|
@ -121,19 +136,21 @@ def data_import_status(request):
|
||||||
|
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
def user_task_status(request, task_name: str):
|
def user_task_status(request, task_type: str):
|
||||||
match task_name:
|
match task_type:
|
||||||
case "csv_import":
|
case "journal.csvimporter":
|
||||||
task_cls = CsvImporter
|
task_cls = CsvImporter
|
||||||
case "csv_export":
|
case "journal.ndjsonimporter":
|
||||||
|
task_cls = NdjsonImporter
|
||||||
|
case "journal.csvexporter":
|
||||||
task_cls = CsvExporter
|
task_cls = CsvExporter
|
||||||
case "ndjson_export":
|
case "journal.ndjsonexporter":
|
||||||
task_cls = NdjsonExporter
|
task_cls = NdjsonExporter
|
||||||
case "letterboxd":
|
case "journal.letterboxdimporter":
|
||||||
task_cls = LetterboxdImporter
|
task_cls = LetterboxdImporter
|
||||||
case "goodreads":
|
case "journal.goodreadsimporter":
|
||||||
task_cls = GoodreadsImporter
|
task_cls = GoodreadsImporter
|
||||||
case "douban":
|
case "journal.doubanimporter":
|
||||||
task_cls = DoubanImporter
|
task_cls = DoubanImporter
|
||||||
case _:
|
case _:
|
||||||
return redirect(reverse("users:data"))
|
return redirect(reverse("users:data"))
|
||||||
|
@ -357,16 +374,49 @@ def import_neodb(request):
|
||||||
with open(f, "wb+") as destination:
|
with open(f, "wb+") as destination:
|
||||||
for chunk in request.FILES["file"].chunks():
|
for chunk in request.FILES["file"].chunks():
|
||||||
destination.write(chunk)
|
destination.write(chunk)
|
||||||
importer = get_neodb_importer(f)
|
|
||||||
|
# Get format type hint from frontend, if provided
|
||||||
|
format_type_hint = request.POST.get("format_type", "").lower()
|
||||||
|
|
||||||
|
# Import appropriate class based on format type or auto-detect
|
||||||
|
from journal.importers import CsvImporter, NdjsonImporter
|
||||||
|
|
||||||
|
if format_type_hint == "csv":
|
||||||
|
importer = CsvImporter
|
||||||
|
format_type = "CSV"
|
||||||
|
elif format_type_hint == "ndjson":
|
||||||
|
importer = NdjsonImporter
|
||||||
|
format_type = "NDJSON"
|
||||||
|
else:
|
||||||
|
# Fall back to auto-detection if no hint provided
|
||||||
|
importer = get_neodb_importer(f)
|
||||||
|
if importer == CsvImporter:
|
||||||
|
format_type = "CSV"
|
||||||
|
elif importer == NdjsonImporter:
|
||||||
|
format_type = "NDJSON"
|
||||||
|
else:
|
||||||
|
format_type = ""
|
||||||
|
importer = None # Make sure importer is None if auto-detection fails
|
||||||
|
|
||||||
if not importer:
|
if not importer:
|
||||||
messages.add_message(request, messages.ERROR, _("Invalid file."))
|
messages.add_message(
|
||||||
|
request,
|
||||||
|
messages.ERROR,
|
||||||
|
_(
|
||||||
|
"Invalid file. Expected a ZIP containing either CSV or NDJSON files exported from NeoDB."
|
||||||
|
),
|
||||||
|
)
|
||||||
return redirect(reverse("users:data"))
|
return redirect(reverse("users:data"))
|
||||||
|
|
||||||
importer.create(
|
importer.create(
|
||||||
request.user,
|
request.user,
|
||||||
visibility=int(request.POST.get("visibility", 0)),
|
visibility=int(request.POST.get("visibility", 0)),
|
||||||
file=f,
|
file=f,
|
||||||
).enqueue()
|
).enqueue()
|
||||||
|
|
||||||
messages.add_message(
|
messages.add_message(
|
||||||
request, messages.INFO, _("File is uploaded and will be imported soon.")
|
request,
|
||||||
|
messages.INFO,
|
||||||
|
_(f"{format_type} file is uploaded and will be imported soon."),
|
||||||
)
|
)
|
||||||
return redirect(reverse("users:data"))
|
return redirect(reverse("users:data"))
|
||||||
|
|
Loading…
Add table
Reference in a new issue