ndjson: include actor info

This commit is contained in:
mein Name 2025-03-07 19:07:44 -05:00 committed by Henri Dickson
parent d6d360025f
commit 953791c84f
4 changed files with 102 additions and 17 deletions

View file

@ -205,6 +205,7 @@ class BasicDownloader:
) )
return resp, response_type return resp, response_type
except RequestException as e: except RequestException as e:
# logger.debug(f"RequestException: {e}")
self.logs.append( self.logs.append(
{"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e} {"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e}
) )
@ -340,16 +341,19 @@ class ImageDownloaderMixin:
def validate_response(self, response): def validate_response(self, response):
if response and response.status_code == 200: if response and response.status_code == 200:
try: try:
raw_img = response.content content_type = response.headers["content-type"]
img = Image.open(BytesIO(raw_img)) if content_type.startswith("image/svg+xml"):
img.load() # corrupted image will trigger exception self.extention = "svg"
content_type = response.headers.get("Content-Type") return RESPONSE_OK
file_type = filetype.get_type( file_type = filetype.get_type(
mime=content_type.partition(";")[0].strip() mime=content_type.partition(";")[0].strip()
) )
if file_type is None: if file_type is None:
return RESPONSE_NETWORK_ERROR return RESPONSE_NETWORK_ERROR
self.extention = file_type.extension self.extention = file_type.extension
raw_img = response.content
img = Image.open(BytesIO(raw_img))
img.load() # corrupted image will trigger exception
return RESPONSE_OK return RESPONSE_OK
except Exception: except Exception:
return RESPONSE_NETWORK_ERROR return RESPONSE_NETWORK_ERROR

View file

@ -3,6 +3,7 @@ import os
import re import re
import shutil import shutil
import tempfile import tempfile
import uuid
from django.conf import settings from django.conf import settings
from django.utils import timezone from django.utils import timezone
@ -65,13 +66,15 @@ class NdjsonExporter(Task):
def _save_image(url): def _save_image(url):
if url.startswith("http"): if url.startswith("http"):
imgdl = ProxiedImageDownloader(url) try:
raw_img = imgdl.download().content raw_img, ext = ProxiedImageDownloader.download_image(url, "")
ext = imgdl.extention if raw_img:
file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path) file = "%s/%s.%s" % (attachment_path, uuid.uuid4(), ext)
with open(file, "wb") as binary_file: with open(file, "wb") as binary_file:
binary_file.write(raw_img) binary_file.write(raw_img)
return file return file
except Exception:
logger.debug(f"error downloading {url}")
elif url.startswith("/"): elif url.startswith("/"):
p = os.path.abspath( p = os.path.abspath(
os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :]) os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :])
@ -79,11 +82,8 @@ class NdjsonExporter(Task):
if p.startswith(settings.MEDIA_ROOT): if p.startswith(settings.MEDIA_ROOT):
try: try:
shutil.copy2(p, attachment_path) shutil.copy2(p, attachment_path)
except Exception as e: except Exception:
logger.error( logger.error(f"error copying {p} to {attachment_path}")
f"error copying {p} to {attachment_path}",
extra={"exception": e},
)
return p return p
return url return url
@ -206,6 +206,25 @@ class NdjsonExporter(Task):
for item in self.ref_items: for item in self.ref_items:
f.write(json.dumps(item.ap_object, default=str) + "\n") f.write(json.dumps(item.ap_object, default=str) + "\n")
# Export actor.ndjson with Takahe identity data
filename = os.path.join(temp_folder_path, "actor.ndjson")
with open(filename, "w") as f:
f.write(json.dumps(self.get_header()) + "\n")
takahe_identity = self.user.identity.takahe_identity
identity_data = {
"type": "Identity",
"username": takahe_identity.username,
"domain": takahe_identity.domain_id,
"actor_uri": takahe_identity.actor_uri,
"name": takahe_identity.name,
"summary": takahe_identity.summary,
"metadata": takahe_identity.metadata,
"private_key": takahe_identity.private_key,
"public_key": takahe_identity.public_key,
"public_key_id": takahe_identity.public_key_id,
}
f.write(json.dumps(identity_data, default=str) + "\n")
filename = GenerateDateUUIDMediaFilePath( filename = GenerateDateUUIDMediaFilePath(
"f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT "f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
) )

View file

@ -18,6 +18,7 @@ from journal.models import (
Tag, Tag,
TagMember, TagMember,
) )
from takahe.utils import Takahe
from .base import BaseImporter from .base import BaseImporter
@ -401,6 +402,47 @@ class NdjsonImporter(BaseImporter):
logger.exception("Error parsing header") logger.exception("Error parsing header")
return {} return {}
def process_actor(self, file_path: str) -> None:
"""Process the actor.ndjson file to update user identity information."""
logger.debug(f"Processing actor data from {file_path}")
try:
with open(file_path, "r") as jsonfile:
next(jsonfile, None)
for line in jsonfile:
try:
data = json.loads(line)
except json.JSONDecodeError:
logger.error("Error parsing actor data line")
continue
if data.get("type") == "Identity":
logger.debug("Found identity data in actor.ndjson")
takahe_identity = self.user.identity.takahe_identity
updated = False
if (
data.get("name")
and data.get("name") != takahe_identity.name
):
logger.debug(
f"Updating identity name from {takahe_identity.name} to {data.get('name')}"
)
takahe_identity.name = data.get("name")
updated = True
if (
data.get("summary")
and data.get("summary") != takahe_identity.summary
):
logger.debug("Updating identity summary")
takahe_identity.summary = data.get("summary")
updated = True
if updated:
takahe_identity.save()
Takahe.update_state(takahe_identity, "edited")
logger.info("Updated identity")
return
except Exception as e:
logger.exception(f"Error processing actor file: {e}")
def run(self) -> None: def run(self) -> None:
"""Run the NDJSON import.""" """Run the NDJSON import."""
filename = self.metadata["file"] filename = self.metadata["file"]
@ -410,6 +452,15 @@ class NdjsonImporter(BaseImporter):
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
zipref.extractall(tmpdirname) zipref.extractall(tmpdirname)
# Process actor data first if available
actor_path = os.path.join(tmpdirname, "actor.ndjson")
if os.path.exists(actor_path):
actor_header = self.parse_header(actor_path)
logger.debug(f"Found actor.ndjson with {actor_header}")
self.process_actor(actor_path)
else:
logger.debug("No actor.ndjson file found in the archive")
catalog_path = os.path.join(tmpdirname, "catalog.ndjson") catalog_path = os.path.join(tmpdirname, "catalog.ndjson")
if os.path.exists(catalog_path): if os.path.exists(catalog_path):
catalog_header = self.parse_header(catalog_path) catalog_header = self.parse_header(catalog_path)

View file

@ -105,7 +105,12 @@ class NdjsonExportImportTest(TestCase):
) )
def test_ndjson_export_import(self): def test_ndjson_export_import(self):
# Create marks, reviews and notes for user1 # set name and summary for user1
identity1 = self.user1.identity
takahe_identity1 = identity1.takahe_identity
takahe_identity1.name = "Test User"
takahe_identity1.summary = "Test summary"
takahe_identity1.save()
# Book marks with ratings and tags # Book marks with ratings and tags
mark_book1 = Mark(self.user1.identity, self.book1) mark_book1 = Mark(self.user1.identity, self.book1)
@ -289,6 +294,7 @@ class NdjsonExportImportTest(TestCase):
export_path = exporter.metadata["file"] export_path = exporter.metadata["file"]
logger.debug(f"exported to {export_path}") logger.debug(f"exported to {export_path}")
self.assertTrue(os.path.exists(export_path)) self.assertTrue(os.path.exists(export_path))
self.assertEqual(exporter.metadata["total"], 61)
# Validate the NDJSON export file structure # Validate the NDJSON export file structure
with TemporaryDirectory() as extract_dir: with TemporaryDirectory() as extract_dir:
@ -370,7 +376,12 @@ class NdjsonExportImportTest(TestCase):
self.assertIn("61 items imported, 0 skipped, 0 failed.", importer.message) self.assertIn("61 items imported, 0 skipped, 0 failed.", importer.message)
# Verify imported data # Verify imported data
identity2 = self.user2.identity
takahe_identity2 = identity2.takahe_identity
# Check that name and summary were updated
self.assertEqual(takahe_identity2.name, "Test User")
self.assertEqual(takahe_identity2.summary, "Test summary")
# Check marks # Check marks
mark_book1_imported = Mark(self.user2.identity, self.book1) mark_book1_imported = Mark(self.user2.identity, self.book1)
self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE) self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE)