198 lines
6.1 KiB
Python
198 lines
6.1 KiB
Python
![]() |
import datetime
|
||
|
from typing import Dict, List, Literal, Optional
|
||
|
|
||
|
from django.conf import settings
|
||
|
from django.utils.dateparse import parse_datetime
|
||
|
from loguru import logger
|
||
|
|
||
|
from catalog.common.sites import SiteManager
|
||
|
from catalog.models import Edition, IdType, Item, SiteName
|
||
|
from journal.models import ShelfType
|
||
|
from users.models import Task
|
||
|
|
||
|
_PREFERRED_SITES = [
|
||
|
SiteName.Fediverse,
|
||
|
SiteName.RSS,
|
||
|
SiteName.TMDB,
|
||
|
SiteName.IMDB,
|
||
|
SiteName.GoogleBooks,
|
||
|
SiteName.Goodreads,
|
||
|
SiteName.IGDB,
|
||
|
]
|
||
|
|
||
|
|
||
|
class BaseImporter(Task):
|
||
|
class Meta:
|
||
|
app_label = "journal" # workaround bug in TypedModel
|
||
|
|
||
|
ImportResult = Literal["imported", "skipped", "failed"]
|
||
|
TaskQueue = "import"
|
||
|
DefaultMetadata = {
|
||
|
"total": 0,
|
||
|
"processed": 0,
|
||
|
"skipped": 0,
|
||
|
"imported": 0,
|
||
|
"failed": 0,
|
||
|
"failed_items": [],
|
||
|
"file": None,
|
||
|
"visibility": 0,
|
||
|
}
|
||
|
|
||
|
def progress(self, result: ImportResult) -> None:
|
||
|
"""Update import progress.
|
||
|
|
||
|
Args:
|
||
|
result: The import result ('imported', 'skipped', or 'failed')
|
||
|
"""
|
||
|
self.metadata["processed"] += 1
|
||
|
self.metadata[result] = self.metadata.get(result, 0) + 1
|
||
|
|
||
|
if self.metadata["total"]:
|
||
|
progress_percentage = round(
|
||
|
self.metadata["processed"] / self.metadata["total"] * 100
|
||
|
)
|
||
|
self.message = f"Progress: {progress_percentage}% - "
|
||
|
else:
|
||
|
self.message = ""
|
||
|
self.message += (
|
||
|
f"{self.metadata['imported']} imported, "
|
||
|
f"{self.metadata['skipped']} skipped, "
|
||
|
f"{self.metadata['failed']} failed"
|
||
|
)
|
||
|
self.save(update_fields=["metadata", "message"])
|
||
|
|
||
|
def run(self) -> None:
|
||
|
raise NotImplementedError
|
||
|
|
||
|
def get_item_by_info_and_links(
|
||
|
self, title: str, info_str: str, links: list[str]
|
||
|
) -> Optional[Item]:
|
||
|
"""Find an item based on information from CSV export.
|
||
|
|
||
|
Args:
|
||
|
title: Item title
|
||
|
info_str: Item info string (space-separated key:value pairs)
|
||
|
links_str: Space-separated URLs
|
||
|
|
||
|
Returns:
|
||
|
Item if found, None otherwise
|
||
|
"""
|
||
|
site_url = settings.SITE_INFO["site_url"] + "/"
|
||
|
# look for local items first
|
||
|
for link in links:
|
||
|
if link.startswith("/") or link.startswith(site_url):
|
||
|
item = Item.get_by_url(link, resolve_merge=True)
|
||
|
if item and not item.is_deleted:
|
||
|
return item
|
||
|
|
||
|
sites = [
|
||
|
SiteManager.get_site_by_url(link, detect_redirection=False)
|
||
|
for link in links
|
||
|
]
|
||
|
sites = [site for site in sites if site]
|
||
|
sites.sort(
|
||
|
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
|
||
|
if x.SITE_NAME in _PREFERRED_SITES
|
||
|
else 99
|
||
|
)
|
||
|
|
||
|
# match items without extra requests
|
||
|
for site in sites:
|
||
|
item = site.get_item()
|
||
|
if item:
|
||
|
return item
|
||
|
|
||
|
# match items after HEAD
|
||
|
sites = [
|
||
|
SiteManager.get_site_by_url(site.url) if site.url else site
|
||
|
for site in sites
|
||
|
]
|
||
|
sites = [site for site in sites if site]
|
||
|
for site in sites:
|
||
|
item = site.get_item()
|
||
|
if item:
|
||
|
return item
|
||
|
|
||
|
# fetch from remote
|
||
|
for site in sites:
|
||
|
try:
|
||
|
logger.debug(f"fetching {site.url}")
|
||
|
site.get_resource_ready()
|
||
|
item = site.get_item()
|
||
|
if item:
|
||
|
return item
|
||
|
except Exception as e:
|
||
|
logger.error(f"Error fetching item: {e}")
|
||
|
|
||
|
# Try using the info string
|
||
|
if info_str:
|
||
|
info_dict = {}
|
||
|
for pair in info_str.strip().split():
|
||
|
if ":" in pair:
|
||
|
key, value = pair.split(":", 1)
|
||
|
info_dict[key] = value
|
||
|
|
||
|
# Check for ISBN, IMDB, etc.
|
||
|
item = None
|
||
|
for key, value in info_dict.items():
|
||
|
if key == "isbn" and value:
|
||
|
item = Edition.objects.filter(
|
||
|
primary_lookup_id_type=IdType.ISBN,
|
||
|
primary_lookup_id_value=value,
|
||
|
).first()
|
||
|
elif key == "imdb" and value:
|
||
|
item = Item.objects.filter(
|
||
|
primary_lookup_id_type=IdType.IMDB,
|
||
|
primary_lookup_id_value=value,
|
||
|
).first()
|
||
|
if item:
|
||
|
return item
|
||
|
return None
|
||
|
|
||
|
def parse_tags(self, tags_str: str) -> List[str]:
|
||
|
"""Parse space-separated tags string into a list of tags."""
|
||
|
if not tags_str:
|
||
|
return []
|
||
|
return [tag.strip() for tag in tags_str.split() if tag.strip()]
|
||
|
|
||
|
def parse_info(self, info_str: str) -> Dict[str, str]:
|
||
|
"""Parse info string into a dictionary."""
|
||
|
info_dict = {}
|
||
|
if not info_str:
|
||
|
return info_dict
|
||
|
|
||
|
for pair in info_str.split():
|
||
|
if ":" in pair:
|
||
|
key, value = pair.split(":", 1)
|
||
|
info_dict[key] = value
|
||
|
|
||
|
return info_dict
|
||
|
|
||
|
def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]:
|
||
|
"""Parse ISO format timestamp into datetime."""
|
||
|
if not timestamp_str:
|
||
|
return None
|
||
|
|
||
|
try:
|
||
|
dt = parse_datetime(timestamp_str)
|
||
|
if dt and dt.tzinfo is None:
|
||
|
dt = dt.replace(tzinfo=datetime.UTC)
|
||
|
return dt
|
||
|
except Exception as e:
|
||
|
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
|
||
|
return None
|
||
|
|
||
|
def parse_shelf_type(self, status_str: str) -> ShelfType:
|
||
|
"""Parse shelf type string into ShelfType enum."""
|
||
|
if not status_str:
|
||
|
return ShelfType.WISHLIST
|
||
|
|
||
|
status_map = {
|
||
|
"wishlist": ShelfType.WISHLIST,
|
||
|
"progress": ShelfType.PROGRESS,
|
||
|
"complete": ShelfType.COMPLETE,
|
||
|
"dropped": ShelfType.DROPPED,
|
||
|
}
|
||
|
|
||
|
return status_map.get(status_str.lower(), ShelfType.WISHLIST)
|