tweak importer to prefer fedi item link
This commit is contained in:
parent
1d7816d9d7
commit
584cc9516a
10 changed files with 244 additions and 155 deletions
|
@ -1,13 +1,34 @@
|
|||
import os
|
||||
import zipfile
|
||||
|
||||
from .csv import CsvImporter
|
||||
from .douban import DoubanImporter
|
||||
from .goodreads import GoodreadsImporter
|
||||
from .letterboxd import LetterboxdImporter
|
||||
from .opml import OPMLImporter
|
||||
|
||||
|
||||
def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
|
||||
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
|
||||
return None
|
||||
with zipfile.ZipFile(filename, "r") as z:
|
||||
files = z.namelist()
|
||||
if any(f == "journal.ndjson" for f in files):
|
||||
return None
|
||||
if any(
|
||||
f.endswith("_mark.csv")
|
||||
or f.endswith("_review.csv")
|
||||
or f.endswith("_note.csv")
|
||||
for f in files
|
||||
):
|
||||
return CsvImporter
|
||||
|
||||
|
||||
__all__ = [
|
||||
"CsvImporter",
|
||||
"LetterboxdImporter",
|
||||
"OPMLImporter",
|
||||
"DoubanImporter",
|
||||
"GoodreadsImporter",
|
||||
"get_neodb_importer",
|
||||
]
|
||||
|
|
|
@ -12,10 +12,20 @@ from django.utils.translation import gettext as _
|
|||
from loguru import logger
|
||||
|
||||
from catalog.common.sites import SiteManager
|
||||
from catalog.models import Edition, IdType, Item, ItemCategory
|
||||
from catalog.models import Edition, IdType, Item, ItemCategory, SiteName
|
||||
from journal.models import Mark, Note, Review, ShelfType
|
||||
from users.models import Task
|
||||
|
||||
_PREFERRED_SITES = [
|
||||
SiteName.Fediverse,
|
||||
SiteName.RSS,
|
||||
SiteName.TMDB,
|
||||
SiteName.IMDB,
|
||||
SiteName.GoogleBooks,
|
||||
SiteName.Goodreads,
|
||||
SiteName.IGDB,
|
||||
]
|
||||
|
||||
|
||||
class CsvImporter(Task):
|
||||
class Meta:
|
||||
|
@ -49,18 +59,39 @@ class CsvImporter(Task):
|
|||
site_url = settings.SITE_INFO["site_url"] + "/"
|
||||
|
||||
links = links_str.strip().split()
|
||||
# look for local items first
|
||||
for link in links:
|
||||
if link.startswith("/") or link.startswith(site_url):
|
||||
item = Item.get_by_url(link)
|
||||
if item:
|
||||
return item
|
||||
for link in links:
|
||||
site = SiteManager.get_site_by_url(link)
|
||||
if site:
|
||||
|
||||
sites = [SiteManager.get_site_by_url(link) for link in links]
|
||||
sites = [site for site in sites if site]
|
||||
sites.sort(
|
||||
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
|
||||
if x.SITE_NAME in _PREFERRED_SITES
|
||||
else 99
|
||||
)
|
||||
|
||||
# look for external items that already matched
|
||||
for site in sites:
|
||||
logger.debug(f"matching {site.url}")
|
||||
item = site.get_item()
|
||||
if item:
|
||||
return item
|
||||
|
||||
# fetch external item if possible
|
||||
for site in sites:
|
||||
try:
|
||||
logger.debug(f"fetching {site.url}")
|
||||
site.get_resource_ready()
|
||||
item = site.get_item()
|
||||
if item:
|
||||
return item
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching item: {e}")
|
||||
|
||||
# Try using the info string
|
||||
if info_str:
|
||||
info_dict = {}
|
||||
|
@ -304,8 +335,6 @@ class CsvImporter(Task):
|
|||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing note: {e}")
|
||||
if "failed_items" not in self.metadata:
|
||||
self.metadata["failed_items"] = []
|
||||
self.metadata["failed_items"].append(
|
||||
f"Error importing note for {row.get('title', '')}: {str(e)}"
|
||||
)
|
||||
|
@ -333,32 +362,11 @@ class CsvImporter(Task):
|
|||
success = import_function(row)
|
||||
self.progress(success)
|
||||
|
||||
@classmethod
|
||||
def validate_file(cls, filename: str) -> bool:
|
||||
"""Validate that the given file is a valid CSV export ZIP file.
|
||||
|
||||
Args:
|
||||
filename: Path to the file to validate
|
||||
|
||||
Returns:
|
||||
bool: True if the file is valid, False otherwise
|
||||
"""
|
||||
return os.path.exists(filename) and zipfile.is_zipfile(filename)
|
||||
|
||||
def run(self) -> None:
|
||||
"""Run the CSV import."""
|
||||
# Ensure failed_items is initialized
|
||||
if "failed_items" not in self.metadata:
|
||||
self.metadata["failed_items"] = []
|
||||
|
||||
filename = self.metadata["file"]
|
||||
logger.debug(f"Importing {filename}")
|
||||
|
||||
# Validate the file before processing
|
||||
if not self.validate_file(filename):
|
||||
self.save()
|
||||
return
|
||||
|
||||
with zipfile.ZipFile(filename, "r") as zipref:
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
logger.debug(f"Extracting {filename} to {tmpdirname}")
|
||||
|
|
23
journal/migrations/0006_csvimporter.py
Normal file
23
journal/migrations/0006_csvimporter.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
# Generated by Django 4.2.18 on 2025-03-03 23:16
|
||||
|
||||
from django.db import migrations
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("users", "0008_alter_task_type"),
|
||||
("journal", "0005_csvexporter"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.CreateModel(
|
||||
name="CsvImporter",
|
||||
fields=[],
|
||||
options={
|
||||
"proxy": True,
|
||||
"indexes": [],
|
||||
"constraints": [],
|
||||
},
|
||||
bases=("users.task",),
|
||||
),
|
||||
]
|
|
@ -9,7 +9,7 @@ from loguru import logger
|
|||
|
||||
from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow
|
||||
from journal.exporters import CsvExporter
|
||||
from journal.importers import CsvImporter
|
||||
from journal.importers import CsvImporter, get_neodb_importer
|
||||
from users.models import User
|
||||
|
||||
from ..models import *
|
||||
|
@ -219,6 +219,7 @@ class CsvExportImportTest(TestCase):
|
|||
f"Expected file {filename} with {expected_data_count} data rows, but file not found"
|
||||
)
|
||||
|
||||
self.assertEqual(get_neodb_importer(export_path), CsvImporter)
|
||||
importer = CsvImporter.create(user=self.user2, file=export_path, visibility=2)
|
||||
importer.run()
|
||||
self.assertEqual(importer.message, "Import complete")
|
||||
|
|
|
@ -28,6 +28,8 @@ attrs==25.1.0
|
|||
# via aiohttp
|
||||
babel==2.17.0
|
||||
# via mkdocs-material
|
||||
backrefs==5.8
|
||||
# via mkdocs-material
|
||||
beautifulsoup4==4.13.3
|
||||
# via markdownify
|
||||
bleach==5.0.1
|
||||
|
@ -54,7 +56,7 @@ click==8.1.8
|
|||
colorama==0.4.6
|
||||
# via djlint
|
||||
# via mkdocs-material
|
||||
cryptography==44.0.1
|
||||
cryptography==44.0.2
|
||||
# via atproto
|
||||
cssbeautifier==1.15.4
|
||||
# via djlint
|
||||
|
@ -169,7 +171,7 @@ mkdocs==1.6.1
|
|||
# via mkdocs-material
|
||||
mkdocs-get-deps==0.2.0
|
||||
# via mkdocs
|
||||
mkdocs-material==9.6.5
|
||||
mkdocs-material==9.6.7
|
||||
mkdocs-material-extensions==1.3.1
|
||||
# via mkdocs-material
|
||||
multidict==6.1.0
|
||||
|
@ -213,7 +215,7 @@ pygments==2.19.1
|
|||
# via mkdocs-material
|
||||
pymdown-extensions==10.14.3
|
||||
# via mkdocs-material
|
||||
pyright==1.1.395
|
||||
pyright==1.1.396
|
||||
python-dateutil==2.9.0.post0
|
||||
# via dateparser
|
||||
# via django-auditlog
|
||||
|
@ -241,7 +243,6 @@ redis==5.2.1
|
|||
regex==2024.11.6
|
||||
# via dateparser
|
||||
# via djlint
|
||||
# via mkdocs-material
|
||||
requests==2.32.3
|
||||
# via django-anymail
|
||||
# via igdb-api-v4
|
||||
|
@ -251,7 +252,7 @@ rjsmin==1.2.2
|
|||
# via django-compressor
|
||||
rq==2.1.0
|
||||
# via django-rq
|
||||
ruff==0.9.8
|
||||
ruff==0.9.9
|
||||
sentry-sdk==2.22.0
|
||||
setproctitle==1.3.5
|
||||
six==1.17.0
|
||||
|
|
|
@ -44,7 +44,7 @@ charset-normalizer==3.4.1
|
|||
click==8.1.8
|
||||
# via atproto
|
||||
# via rq
|
||||
cryptography==44.0.1
|
||||
cryptography==44.0.2
|
||||
# via atproto
|
||||
dateparser==1.2.1
|
||||
deepmerge==2.0
|
||||
|
|
29
users/migrations/0008_alter_task_type.py
Normal file
29
users/migrations/0008_alter_task_type.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
# Generated by Django 4.2.18 on 2025-03-03 23:16
|
||||
|
||||
from django.db import migrations, models
|
||||
|
||||
|
||||
class Migration(migrations.Migration):
|
||||
dependencies = [
|
||||
("users", "0007_alter_task_type"),
|
||||
]
|
||||
|
||||
operations = [
|
||||
migrations.AlterField(
|
||||
model_name="task",
|
||||
name="type",
|
||||
field=models.CharField(
|
||||
choices=[
|
||||
("journal.csvexporter", "csv exporter"),
|
||||
("journal.csvimporter", "csv importer"),
|
||||
("journal.doubanimporter", "douban importer"),
|
||||
("journal.doufenexporter", "doufen exporter"),
|
||||
("journal.goodreadsimporter", "goodreads importer"),
|
||||
("journal.letterboxdimporter", "letterboxd importer"),
|
||||
("journal.ndjsonexporter", "ndjson exporter"),
|
||||
],
|
||||
db_index=True,
|
||||
max_length=255,
|
||||
),
|
||||
),
|
||||
]
|
|
@ -15,6 +15,127 @@
|
|||
{% include "_header.html" %}
|
||||
<main>
|
||||
<div class="grid__main">
|
||||
<article>
|
||||
<details>
|
||||
<summary>{% trans 'Export Data' %}</summary>
|
||||
<form action="{% url 'users:export_csv' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<input type="submit"
|
||||
value="{% trans 'Export marks, reviews and notes in CSV' %}" />
|
||||
{% if csv_export_task %}
|
||||
<br>
|
||||
{% trans 'Last export' %}: {{ csv_export_task.created_time }}
|
||||
{% trans 'Status' %}: {{ csv_export_task.get_state_display }}
|
||||
<br>
|
||||
{{ csv_export_task.message }}
|
||||
{% if csv_export_task.metadata.file %}
|
||||
<a href="{% url 'users:export_csv' %}" download>{% trans 'Download' %}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</form>
|
||||
<hr>
|
||||
<form action="{% url 'users:export_ndjson' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
|
||||
{% if ndjson_export_task %}
|
||||
<br>
|
||||
{% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
|
||||
{% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
|
||||
<br>
|
||||
{{ ndjson_export_task.message }}
|
||||
{% if ndjson_export_task.metadata.file %}
|
||||
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</form>
|
||||
<hr>
|
||||
<form action="{% url 'users:export_marks' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<input type="submit"
|
||||
class="secondary"
|
||||
value="{% trans 'Export marks and reviews in XLSX (Doufen format)' %}" />
|
||||
<small>exporting to this format will be deprecated soon.</small>
|
||||
{% if export_task %}
|
||||
<br>
|
||||
{% trans 'Last export' %}: {{ export_task.created_time }}
|
||||
{% trans 'Status' %}: {{ export_task.get_state_display }}
|
||||
<br>
|
||||
{{ export_task.message }}
|
||||
{% if export_task.metadata.file %}
|
||||
<a href="{% url 'users:export_marks' %}" download>{% trans 'Download' %}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</form>
|
||||
</details>
|
||||
</article>
|
||||
<article>
|
||||
<details>
|
||||
<summary>{% trans 'Import Data' %}</summary>
|
||||
<form action="{% url 'users:import_neodb' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<ul>
|
||||
<li>
|
||||
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
|
||||
</li>
|
||||
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
|
||||
</ul>
|
||||
<br>
|
||||
<input type="file" name="file" required accept=".zip">
|
||||
<p>
|
||||
{% trans 'Visibility' %}: <small><code>.csv</code> only</small>
|
||||
<br>
|
||||
<label for="csv_visibility_0">
|
||||
<input type="radio"
|
||||
name="visibility"
|
||||
value="0"
|
||||
required=""
|
||||
id="csv_visibility_0"
|
||||
checked>
|
||||
{% trans 'Public' %}
|
||||
</label>
|
||||
<label for="csv_visibility_1">
|
||||
<input type="radio"
|
||||
name="visibility"
|
||||
value="1"
|
||||
required=""
|
||||
id="csv_visibility_1">
|
||||
{% trans 'Followers Only' %}
|
||||
</label>
|
||||
<label for="csv_visibility_2">
|
||||
<input type="radio"
|
||||
name="visibility"
|
||||
value="2"
|
||||
required=""
|
||||
id="csv_visibility_2">
|
||||
{% trans 'Mentioned Only' %}
|
||||
</label>
|
||||
</p>
|
||||
<input type="submit" value="{% trans 'Import' %}" />
|
||||
<small>
|
||||
{% if csv_import_task %}
|
||||
<br>
|
||||
{% trans 'Last import started' %}: {{ csv_import_task.created_time }}
|
||||
{% trans 'Status' %}: {{ csv_import_task.get_state_display }}。
|
||||
<br>
|
||||
{{ csv_import_task.message }}
|
||||
{% if csv_import_task.metadata.failed_items %}
|
||||
{% trans 'Failed items' %}:
|
||||
<br>
|
||||
<textarea readonly>{% for item in csv_import_task.metadata.failed_items %}{{item}} {% endfor %}</textarea>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</small>
|
||||
</form>
|
||||
</details>
|
||||
</article>
|
||||
<article>
|
||||
<details>
|
||||
<summary>{% trans 'Import Marks and Reviews from Douban' %}</summary>
|
||||
|
@ -213,123 +334,6 @@
|
|||
</form>
|
||||
</details>
|
||||
</article>
|
||||
<article>
|
||||
<details>
|
||||
<summary>{% trans 'Import marks, reviews and notes from CSV' %}</summary>
|
||||
<form action="{% url 'users:import_csv' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<ul>
|
||||
<li>{% trans 'Upload a ZIP file containing CSV files exported from NeoDB.' %}</li>
|
||||
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
|
||||
</ul>
|
||||
<br>
|
||||
<input type="file" name="file" required accept=".zip">
|
||||
<p>
|
||||
{% trans 'Visibility' %}:
|
||||
<br>
|
||||
<label for="csv_visibility_0">
|
||||
<input type="radio"
|
||||
name="visibility"
|
||||
value="0"
|
||||
required=""
|
||||
id="csv_visibility_0"
|
||||
checked>
|
||||
{% trans 'Public' %}
|
||||
</label>
|
||||
<label for="csv_visibility_1">
|
||||
<input type="radio"
|
||||
name="visibility"
|
||||
value="1"
|
||||
required=""
|
||||
id="csv_visibility_1">
|
||||
{% trans 'Followers Only' %}
|
||||
</label>
|
||||
<label for="csv_visibility_2">
|
||||
<input type="radio"
|
||||
name="visibility"
|
||||
value="2"
|
||||
required=""
|
||||
id="csv_visibility_2">
|
||||
{% trans 'Mentioned Only' %}
|
||||
</label>
|
||||
</p>
|
||||
<input type="submit" value="{% trans 'Import' %}" />
|
||||
<small>
|
||||
{% if csv_import_task %}
|
||||
<br>
|
||||
{% trans 'Last import started' %}: {{ csv_import_task.created_time }}
|
||||
{% trans 'Status' %}: {{ csv_import_task.get_state_display }}。
|
||||
<br>
|
||||
{{ csv_import_task.message }}
|
||||
{% if csv_import_task.metadata.failed_items %}
|
||||
{% trans 'Failed items' %}:
|
||||
<br>
|
||||
<textarea readonly>{% for item in csv_import_task.metadata.failed_items %}{{item}} {% endfor %}</textarea>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</small>
|
||||
</form>
|
||||
</details>
|
||||
</article>
|
||||
<article>
|
||||
<details>
|
||||
<summary>{% trans 'Export Data' %}</summary>
|
||||
<form action="{% url 'users:export_marks' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<input type="submit"
|
||||
value="{% trans 'Export marks and reviews in XLSX (Doufen format)' %}" />
|
||||
{% if export_task %}
|
||||
<br>
|
||||
{% trans 'Last export' %}: {{ export_task.created_time }}
|
||||
{% trans 'Status' %}: {{ export_task.get_state_display }}
|
||||
<br>
|
||||
{{ export_task.message }}
|
||||
{% if export_task.metadata.file %}
|
||||
<a href="{% url 'users:export_marks' %}" download>{% trans 'Download' %}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</form>
|
||||
<hr>
|
||||
<form action="{% url 'users:export_csv' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<input type="submit"
|
||||
value="{% trans 'Export marks, reviews and notes in CSV' %}" />
|
||||
{% if csv_export_task %}
|
||||
<br>
|
||||
{% trans 'Last export' %}: {{ csv_export_task.created_time }}
|
||||
{% trans 'Status' %}: {{ csv_export_task.get_state_display }}
|
||||
<br>
|
||||
{{ csv_export_task.message }}
|
||||
{% if csv_export_task.metadata.file %}
|
||||
<a href="{% url 'users:export_csv' %}" download>{% trans 'Download' %}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</form>
|
||||
<hr>
|
||||
<form action="{% url 'users:export_ndjson' %}"
|
||||
method="post"
|
||||
enctype="multipart/form-data">
|
||||
{% csrf_token %}
|
||||
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
|
||||
{% if ndjson_export_task %}
|
||||
<br>
|
||||
{% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
|
||||
{% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
|
||||
<br>
|
||||
{{ ndjson_export_task.message }}
|
||||
{% if ndjson_export_task.metadata.file %}
|
||||
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</form>
|
||||
</details>
|
||||
</article>
|
||||
<article>
|
||||
<details>
|
||||
<summary>{% trans 'View Annual Summary' %}</summary>
|
||||
|
|
|
@ -15,7 +15,7 @@ urlpatterns = [
|
|||
path("data/import/douban", import_douban, name="import_douban"),
|
||||
path("data/import/letterboxd", import_letterboxd, name="import_letterboxd"),
|
||||
path("data/import/opml", import_opml, name="import_opml"),
|
||||
path("data/import/csv", import_csv, name="import_csv"),
|
||||
path("data/import/neodb", import_neodb, name="import_neodb"),
|
||||
path("data/export/reviews", export_reviews, name="export_reviews"),
|
||||
path("data/export/marks", export_marks, name="export_marks"),
|
||||
path("data/export/csv", export_csv, name="export_csv"),
|
||||
|
|
|
@ -19,6 +19,7 @@ from journal.importers import (
|
|||
GoodreadsImporter,
|
||||
LetterboxdImporter,
|
||||
OPMLImporter,
|
||||
get_neodb_importer,
|
||||
)
|
||||
from journal.models import ShelfType
|
||||
from takahe.utils import Takahe
|
||||
|
@ -324,7 +325,7 @@ def import_opml(request):
|
|||
|
||||
|
||||
@login_required
|
||||
def import_csv(request):
|
||||
def import_neodb(request):
|
||||
if request.method == "POST":
|
||||
f = (
|
||||
settings.MEDIA_ROOT
|
||||
|
@ -335,10 +336,11 @@ def import_csv(request):
|
|||
with open(f, "wb+") as destination:
|
||||
for chunk in request.FILES["file"].chunks():
|
||||
destination.write(chunk)
|
||||
if not CsvImporter.validate_file(f):
|
||||
importer = get_neodb_importer(f)
|
||||
if not importer:
|
||||
messages.add_message(request, messages.ERROR, _("Invalid file."))
|
||||
return redirect(reverse("users:data"))
|
||||
CsvImporter.create(
|
||||
importer.create(
|
||||
request.user,
|
||||
visibility=int(request.POST.get("visibility", 0)),
|
||||
file=f,
|
||||
|
|
Loading…
Add table
Reference in a new issue