Compare commits

...

13 commits

Author SHA1 Message Date
a5919c8b9a
disable filter
Some checks are pending
code check / doc (3.12) (push) Waiting to run
code check / lint (3.12) (push) Waiting to run
code check / type-checker (3.12) (push) Waiting to run
Mirror to Codeberg / to_codeberg (push) Waiting to run
unit test / django (3.12) (push) Waiting to run
2025-03-08 20:40:23 +01:00
7b2d835916
typo 2025-03-08 20:40:23 +01:00
246a96ab7e
fix typo 2025-03-08 20:40:22 +01:00
00297a761a
add libgen+li source to shadow libraries 2025-03-08 20:40:22 +01:00
e857e58938
only let registered user see dufs.itmens link 2025-03-08 20:40:22 +01:00
a890cc4a87
fix stupid typos 2025-03-08 20:40:21 +01:00
8a9da6ff0c
revert mathjax script 2025-03-08 20:40:21 +01:00
dcf9e7b4e2
use compose.override.yml instead of the original one 2025-03-08 20:40:21 +01:00
d2833f6e25
let compose.yml use the custom template 2025-03-08 20:40:20 +01:00
b42a28d511
custom template for books that add shadow library links 2025-03-08 20:40:20 +01:00
mein Name
953791c84f ndjson: include actor info 2025-03-07 20:23:11 -05:00
mein Name
d6d360025f ndjson: auto refresh progress for all import and export 2025-03-07 20:23:11 -05:00
mein Name
d343d6e71e import ndjson 2025-03-07 20:23:11 -05:00
26 changed files with 2532 additions and 643 deletions

3
.gitignore vendored
View file

@ -2,8 +2,9 @@
.venv
/.env
/neodb.env
/compose.override.yml
/typings
/upgrade
.compose.override.yml
# Byte-compiled / optimized / DLL files
__pycache__/

View file

@ -205,6 +205,7 @@ class BasicDownloader:
)
return resp, response_type
except RequestException as e:
# logger.debug(f"RequestException: {e}")
self.logs.append(
{"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e}
)
@ -340,16 +341,19 @@ class ImageDownloaderMixin:
def validate_response(self, response):
if response and response.status_code == 200:
try:
raw_img = response.content
img = Image.open(BytesIO(raw_img))
img.load() # corrupted image will trigger exception
content_type = response.headers.get("Content-Type")
content_type = response.headers["content-type"]
if content_type.startswith("image/svg+xml"):
self.extention = "svg"
return RESPONSE_OK
file_type = filetype.get_type(
mime=content_type.partition(";")[0].strip()
)
if file_type is None:
return RESPONSE_NETWORK_ERROR
self.extention = file_type.extension
raw_img = response.content
img = Image.open(BytesIO(raw_img))
img.load() # corrupted image will trigger exception
return RESPONSE_OK
except Exception:
return RESPONSE_NETWORK_ERROR

View file

@ -86,7 +86,7 @@ def retrieve(request, item_path, item_uuid):
if request.method == "HEAD":
return HttpResponse()
if request.headers.get("Accept", "").endswith("json"):
return JsonResponse(item.ap_object)
return JsonResponse(item.ap_object, content_type="application/activity+json")
focus_item = None
if request.GET.get("focus"):
focus_item = get_object_or_404(

View file

@ -2,7 +2,7 @@ from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.core.cache import cache
from django.core.exceptions import DisallowedHost
from django.http import HttpRequest, JsonResponse
from django.http import HttpRequest, HttpResponse, JsonResponse
from django.shortcuts import redirect, render
from django.urls import reverse
@ -81,36 +81,41 @@ def nodeinfo2(request):
)
def _is_json_request(request) -> bool:
return request.headers.get("HTTP_ACCEPT", "").endswith("json")
def _error_response(request, status: int, exception=None, default_message=""):
message = str(exception) if exception else default_message
if request.headers.get("HTTP_ACCEPT").endswith("json"):
return JsonResponse({"error": message}, status=status)
if (
request.headers.get("HTTP_HX_REQUEST") is not None
and request.headers.get("HTTP_HX_BOOSTED") is None
):
return HttpResponse(message, status=status)
return render(
request,
f"{status}.html",
status=status,
context={"message": message, "exception": exception},
)
def error_400(request, exception=None):
if isinstance(exception, DisallowedHost):
url = settings.SITE_INFO["site_url"] + request.get_full_path()
return redirect(url, permanent=True)
if _is_json_request(request):
return JsonResponse({"error": "invalid request"}, status=400)
return render(request, "400.html", status=400, context={"exception": exception})
return _error_response(request, 400, exception, "invalid request")
def error_403(request, exception=None):
if _is_json_request(request):
return JsonResponse({"error": "forbidden"}, status=403)
return render(request, "403.html", status=403, context={"exception": exception})
return _error_response(request, 403, exception, "forbidden")
def error_404(request, exception=None):
if _is_json_request(request):
return JsonResponse({"error": "not found"}, status=404)
request.session.pop("next_url", None)
return render(request, "404.html", status=404, context={"exception": exception})
return _error_response(request, 404, exception, "not found")
def error_500(request, exception=None):
if _is_json_request(request):
return JsonResponse({"error": "something wrong"}, status=500)
return render(request, "500.html", status=500, context={"exception": exception})
return _error_response(request, 500, exception, "something wrong")
def console(request):

322
compose.override.yml Normal file
View file

@ -0,0 +1,322 @@
# NEODB Docker Compose File
#
# Note: may not be secure for production usage, use at your own risk
#
# The following env variable are expected from .env or command line
# - NEODB_SECRET_KEY
# - NEODB_SITE_DOMAIN
# - NEODB_SITE_NAME
# - NEODB_DATA
x-shared:
neodb-service: &neodb-service
build: .
image: ${NEODB_IMAGE:-neodb/neodb:latest}
environment:
NEODB_DEBUG:
NEODB_SECRET_KEY:
NEODB_SITE_NAME:
NEODB_SITE_DOMAIN:
NEODB_SITE_INTRO:
NEODB_SITE_HEAD:
NEODB_SITE_LOGO:
NEODB_SITE_ICON:
NEODB_USER_ICON:
NEODB_SITE_COLOR:
NEODB_SITE_LINKS:
NEODB_SITE_DESCRIPTION:
NEODB_ALTERNATIVE_DOMAINS:
NEODB_PREFERRED_LANGUAGES:
NEODB_INVITE_ONLY:
NEODB_LOGIN_MASTODON_WHITELIST:
NEODB_MASTODON_CLIENT_SCOPE:
NEODB_DISABLE_DEFAULT_RELAY:
NEODB_DISABLE_CRON_JOBS:
NEODB_SEARCH_PEERS:
NEODB_SEARCH_SITES:
NEODB_MIN_MARKS_FOR_DISCOVER:
NEODB_DISCOVER_UPDATE_INTERVAL:
NEODB_DISCOVER_FILTER_LANGUAGE:
NEODB_DISCOVER_SHOW_LOCAL_ONLY:
NEODB_DISCOVER_SHOW_POPULAR_POSTS:
NEODB_SENTRY_DSN:
TAKAHE_SENTRY_DSN:
NEODB_SENTRY_SAMPLE_RATE:
NEODB_DB_URL: ${NEODB_DB_URL:-postgres://neodb:aubergine@neodb-db/neodb}
TAKAHE_DB_URL: ${TAKAHE_DB_URL:-postgres://takahe:aubergine@takahe-db/takahe}
NEODB_REDIS_URL: ${NEODB_REDIS_URL:-redis://redis:6379/0}
NEODB_SEARCH_URL: ${NEODB_SEARCH_URL:-typesense://user:eggplant@typesense:8108/catalog}
NEODB_EMAIL_URL:
NEODB_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN}
NEODB_ENABLE_LOCAL_ONLY:
NEODB_ENABLE_LOGIN_BLUESKY:
NEODB_ENABLE_LOGIN_THREADS:
NEODB_EXTRA_APPS:
NEODB_FANOUT_LIMIT_DAYS:
TAKAHE_FANOUT_LIMIT_DAYS:
NEODB_DOWNLOADER_PROXY_LIST:
NEODB_DOWNLOADER_BACKUP_PROXY:
NEODB_DOWNLOADER_SAVE_DIR:
NEODB_MEDIA_ROOT: /www/m
NEODB_VENV: /neodb-venv
TAKAHE_ENVIRONMENT: ${ENVIRONMENT:-production}
TAKAHE_SECRET_KEY: ${NEODB_SECRET_KEY}
TAKAHE_MAIN_DOMAIN: ${NEODB_SITE_DOMAIN}
TAKAHE_MEDIA_URL: https://${NEODB_SITE_DOMAIN}/media/
TAKAHE_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN}
TAKAHE_DATABASE_SERVER: ${TAKAHE_DATABASE_SERVER:-postgres://takahe:aubergine@takahe-db/takahe}
TAKAHE_CACHES_DEFAULT: ${TAKAHE_CACHES_DEFAULT:-redis://redis:6379/0}
TAKAHE_MEDIA_BACKEND: local://www/media/
TAKAHE_MEDIA_ROOT: /www/media
TAKAHE_USE_PROXY_HEADERS: true
TAKAHE_STATOR_CONCURRENCY: ${TAKAHE_STATOR_CONCURRENCY:-4}
TAKAHE_STATOR_CONCURRENCY_PER_MODEL: ${TAKAHE_STATOR_CONCURRENCY_PER_MODEL:-2}
TAKAHE_VAPID_PUBLIC_KEY:
TAKAHE_VAPID_PRIVATE_KEY:
TAKAHE_DEBUG: ${NEODB_DEBUG:-False}
TAKAHE_VENV: /takahe-venv
THREADS_APP_ID:
THREADS_APP_SECRET:
SPOTIFY_API_KEY:
TMDB_API_V3_KEY:
GOOGLE_API_KEY:
DISCOGS_API_KEY:
IGDB_API_CLIENT_ID:
IGDB_API_CLIENT_SECRET:
DISCORD_WEBHOOKS:
SLACK_API_TOKEN:
SSL_ONLY:
restart: "unless-stopped"
volumes:
- ${NEODB_DATA:-../data}/neodb-media:/www/m
- ${NEODB_DATA:-../data}/takahe-media:/www/media
- ${NEODB_DATA:-../data}/takahe-cache:/www/cache
- ${NEODB_DATA:-../data}/www-root:/www/root
- ./custom/edition.custom.html:/neodb/catalog/templates/edition.html
- ./custom/_header.scss:/neodb/common/static/scss/_header.scss
depends_on:
- redis
- neodb-db
- typesense
- takahe-db
profiles:
- production
dev-neodb-service: &dev-neodb-service
<<: *neodb-service
# environment:
# NEODB_DEBUG: True
volumes:
- ${NEODB_DATA:-../data}/www-root:/www/root
- ${NEODB_DATA:-../data}/neodb-media:/www/m
- ${NEODB_DATA:-../data}/takahe-media:/www/media
- ${NEODB_DATA:-../data}/takahe-cache:/www/cache
- ${NEODB_DATA:-../data}/nginx-log:/var/log/nginx
- ${NEODB_SRC:-.}:/neodb
- ${TAKAHE_SRC:-./neodb-takahe}:/takahe
profiles:
- dev
services:
redis:
image: redis:alpine
command: redis-server --save 60 1 --loglevel warning
volumes:
- ${NEODB_DATA:-../data}/redis:/data
typesense:
image: typesense/typesense:${TYPESENSE_VERSION:-0.25.2}
restart: "on-failure"
# healthcheck:
# test: ['CMD', 'curl', '-vf', 'http://127.0.0.1:8108/health']
# ports:
# - "18108:8108"
environment:
GLOG_minloglevel: 2
volumes:
- ${NEODB_DATA:-../data}/typesense:/data
command: "--data-dir /data --api-key=eggplant"
neodb-db:
image: postgres:${POSTGRES_VERSION:-14-alpine}
healthcheck:
test: ["CMD", "pg_isready", "-U", "neodb"]
volumes:
- ${NEODB_DATA:-../data}/neodb-db:/var/lib/postgresql/data
environment:
- POSTGRES_DB=neodb
- POSTGRES_USER=neodb
- POSTGRES_PASSWORD=aubergine
takahe-db:
image: postgres:${POSTGRES_VERSION:-14-alpine}
healthcheck:
test: ["CMD", "pg_isready", "-U", "takahe"]
volumes:
- ${NEODB_DATA:-../data}/takahe-db:/var/lib/postgresql/data
environment:
- POSTGRES_DB=takahe
- POSTGRES_USER=takahe
- POSTGRES_PASSWORD=aubergine
migration:
<<: *neodb-service
restart: "no"
command: /bin/neodb-init
depends_on:
neodb-db:
condition: service_healthy
typesense:
condition: service_started
redis:
condition: service_started
takahe-db:
condition: service_healthy
neodb-web:
<<: *neodb-service
command: ${NEODB_VENV:-/neodb-venv}/bin/gunicorn boofilsic.wsgi -w ${NEODB_WEB_WORKER_NUM:-8} --preload --max-requests 2000 --timeout 60 -b 0.0.0.0:8000
healthcheck:
test:
[
"CMD",
"wget",
"-qO/tmp/test",
"--header",
"X-Forwarded-Proto: https",
"http://127.0.0.1:8000/nodeinfo/2.0/",
]
depends_on:
migration:
condition: service_completed_successfully
neodb-web-api:
<<: *neodb-service
command: ${NEODB_VENV:-/neodb-venv}/bin/gunicorn boofilsic.wsgi -w ${NEODB_API_WORKER_NUM:-4} --preload --max-requests 2000 --timeout 30 -b 0.0.0.0:8000
healthcheck:
test:
[
"CMD",
"wget",
"-qO/tmp/test",
"--header",
"X-Forwarded-Proto: https",
"http://127.0.0.1:8000/nodeinfo/2.0/",
]
depends_on:
migration:
condition: service_completed_successfully
neodb-worker:
<<: *neodb-service
command: neodb-manage rqworker --with-scheduler import export mastodon fetch crawl ap cron
depends_on:
migration:
condition: service_completed_successfully
neodb-worker-extra:
<<: *neodb-service
command: neodb-manage rqworker-pool --num-workers ${NEODB_RQ_WORKER_NUM:-4} mastodon fetch crawl ap
depends_on:
migration:
condition: service_completed_successfully
takahe-web:
<<: *neodb-service
command: ${TAKAHE_VENV:-/takahe-venv}/bin/gunicorn --chdir /takahe takahe.wsgi -w ${TAKAHE_WEB_WORKER_NUM:-8} --max-requests 2000 --timeout 60 --preload -b 0.0.0.0:8000
healthcheck:
test:
[
"CMD",
"wget",
"-qO/tmp/test",
"--header",
"X-Forwarded-Proto: https",
"http://127.0.0.1:8000/api/v1/instance",
]
depends_on:
migration:
condition: service_completed_successfully
takahe-stator:
<<: *neodb-service
command: takahe-manage runstator
stop_signal: SIGINT
depends_on:
migration:
condition: service_completed_successfully
nginx:
<<: *neodb-service
user: "root:root"
command: nginx-start
environment:
NEODB_WEB_SERVER: neodb-web:8000
NEODB_API_SERVER: neodb-web-api:8000
TAKAHE_WEB_SERVER: takahe-web:8000
NGINX_CONF: /neodb/misc/nginx.conf.d/neodb.conf
depends_on:
takahe-web:
condition: service_started
neodb-web:
condition: service_started
ports:
- "${NEODB_PORT:-8000}:8000"
shell:
<<: *neodb-service
command: bash
profiles: ["tools"]
root:
<<: *neodb-service
command: bash
profiles: ["tools"]
user: "root:root"
dev-neodb-web:
<<: *dev-neodb-service
command: neodb-manage runserver 0.0.0.0:8000
stop_signal: SIGINT
dev-neodb-worker:
<<: *dev-neodb-service
# command: neodb-manage rqworker-pool --num-workers 4 import export mastodon fetch crawl ap cron
command: neodb-manage rqworker --with-scheduler import export mastodon fetch crawl crawl ap cron
dev-takahe-web:
<<: *dev-neodb-service
command: takahe-manage runserver 0.0.0.0:8000
stop_signal: SIGINT
dev-takahe-stator:
<<: *dev-neodb-service
command: takahe-manage runstator
stop_signal: SIGINT
dev-nginx:
<<: *dev-neodb-service
user: "root:root"
command: nginx-start
environment:
NEODB_WEB_SERVER: dev-neodb-web:8000
TAKAHE_WEB_SERVER: dev-takahe-web:8000
NGINX_CONF: /neodb/misc/nginx.conf.d/neodb-dev.conf
depends_on:
dev-takahe-web:
condition: service_started
dev-neodb-web:
condition: service_started
ports:
- "${NEODB_PORT:-8000}:8000"
dev-shell:
<<: *dev-neodb-service
command: bash
profiles: ["tools"]
dev-root:
<<: *dev-neodb-service
command: bash
profiles: ["tools"]
user: "root:root"

191
custom/_header.scss Normal file
View file

@ -0,0 +1,191 @@
body {
&>header.container-fluid {
padding: 0px;
&>nav {
ul.nav-logo {
margin-left: var(--pico-nav-link-spacing-horizontal);
}
.nav-search form {
margin-bottom: 0px;
}
details.dropdown summary:not([role]) {
border-color: transparent;
background-color: transparent;
}
ul {
min-width: -webkit-max-content;
min-width: -moz-max-content;
}
.nav-links a {
color: var(--pico-secondary);
}
font-weight: bold;
.nav-search,
.nav-search li {
width: 100%;
}
.nav-search select {
max-width: max-content;
}
.nav-search input[type="submit"] {
background-color: var(--pico-primary-background);
border-color: var(--pico-primary-background);
padding-left: calc(var(--pico-nav-link-spacing-horizontal)*2);
padding-right: calc(var(--pico-nav-link-spacing-horizontal)*2);
}
.nav-logo img {
max-height: calc(1rem * var(--pico-line-height) + var(--pico-form-element-spacing-vertical) * 2 - var(--pico-nav-link-spacing-horizontal) * 2);
}
/*
/* /* Dark color scheme (Auto) */
/* /* Automatically enabled if user has Dark mode enabled */
/* @media only screen and (prefers-color-scheme: dark) {
/* .nav-logo img {
/* filter: brightness(100%) grayscale(100%) invert(20%);
/* }
/* }
/*
/* /* Dark color scheme (Forced) */
/* /* Enabled if forced with data-theme="dark" */
/* .nav-logo img [data-theme="dark"] {
/* filter: brightness(100%) grayscale(100%) invert(20%);
/* }
*/
.unhide {
display: unset !important;
}
.nav-dropdown summary {
padding-top: 0px !important;
padding-bottom: 0px !important;
}
.nav-dropdown summary::after {
height: calc(1rem * var(--pico-line-height, 1.5) + 8px) !important;
}
.nav-dropdown {
margin-right: 0rem !important;
// this fixes positioning in iOS 14 and lower but will break in other browsers and iOS 15+
// details.dropdown {
// display: block !important;
// }
ul li a {
padding: var(--pico-form-element-spacing-vertical) var(--pico-form-element-spacing-horizontal) !important
}
}
.avatar {
display: inline;
img {
height: calc(1rem * var(--pico-line-height) + var(--pico-nav-link-spacing-vertical) * 2 - var(--pico-border-width) * 2);
width: calc(1rem * var(--pico-line-height) + var(--pico-nav-link-spacing-vertical) * 2 - var(--pico-border-width) * 2);
}
}
}
}
//pack for small phones like iPhone 5s
@media (max-width: 320px) {
.nav-dropdown {
li {
padding-left: 0;
padding-right: 0;
summary {
padding: 0 !important;
}
details.dropdown summary::after {
width: 0;
}
}
}
}
@media (max-width: 768px) {
body>header {
position: sticky;
top: 0px;
background: var(--pico-background-color);
z-index: 999;
box-shadow: var(--pico-box-shadow);
}
nav {
display: flex;
flex-flow: row wrap;
}
.nav-search {
order: 999;
display: none;
min-width: unset !important;
li {
padding: 0px;
padding-bottom: 4px;
select {
max-width: 6em !important;
}
input[type="submit"] {
padding-left: calc(var(--pico-nav-link-spacing-horizontal)*1);
padding-right: calc(var(--pico-nav-link-spacing-horizontal)*1);
}
}
}
.nav-links li.small-only {
a {
color: var(--pico-secondary);
}
}
.nav-dropdown {
li {
padding-left: 0;
padding-right: 0;
summary {
padding: 0 !important;
}
}
}
/*
.nav-dropdown::after {
flex-basis: 100%;
width: 0;
}
*/
}
}
.nav-page-discover .nav-links a.nav-link-discover {
color: var(--pico-primary) !important;
font-weight: bold;
}
.nav-page-feed .nav-links a.nav-link-feed {
color: var(--pico-primary) !important;
font-weight: bold;
}
.nav-page-home .nav-links a.nav-link-home {
color: var(--pico-primary) !important;
font-weight: bold;
}
.nav-page-search .nav-links a.nav-link-search {
color: var(--pico-primary) !important;
font-weight: bold;
}

190
custom/edition.custom.html Normal file
View file

@ -0,0 +1,190 @@
{% extends "item_base.html" %}
{% load static %}
{% load i18n %}
{% load l10n %}
{% load humanize %}
{% load mastodon %}
{% load strip_scheme %}
{% load thumb %}
{% block head %}
{% if item.author %}
<meta property="og:book:author"
content="{% for author in item.author %}{{ author }}{% if not forloop.last %},{% endif %}{% endfor %}">
{% endif %}
{% if item.isbn %}<meta property="og:book:isbn" content="{{ item.isbn }}">{% endif %}
{% endblock %}
{% block details %}
<div>
{% if item.isbn %}
{% trans 'ISBN' %}: {{ item.isbn }}
{% endif %}
</div>
<div>{% include '_people.html' with people=item.author role='author' max=5 %}</div>
<div>{% include '_people.html' with people=item.translator role='translator' max=5 %}</div>
<div>
{% if item.format %}
{% trans 'book format' %}: {{ item.get_format_display }}
{% endif %}
</div>
<div>{% include '_people.html' with people=item.additional_title role='other title' max=99 %}</div>
<div>
{% if item.pub_house %}
{% trans 'publishing house' %}: {{ item.pub_house }}
{% endif %}
</div>
<div>
{% if item.imprint %}
{% trans 'imprint' %}: {{ item.imprint }}
{% endif %}
</div>
<div>
{% if item.pub_year %}
{% trans 'publication date' %}: {{ item.pub_year }}
{% if item.pub_month %}-{{ item.pub_month }}{% endif %}
{% endif %}
</div>
<div>
{% if item.series %}
{% trans 'series' %}: {{ item.series }}
{% endif %}
</div>
<div>{% include '_people.html' with people=item.language role='language' max=10 %}</div>
<div>
{% if item.binding %}
{% trans 'binding' %}: {{ item.binding }}
{% endif %}
</div>
<div>
{% if item.price %}
{% trans 'price' %}: {{ item.price }}
{% endif %}
</div>
<div>
{% if item.pages %}
{% trans 'number of pages' %}: {{ item.pages }}
{% endif %}
</div>
{% if item.other_info %}
{% for k, v in item.other_info.items %}<div>{{ k }}: {{ v|urlizetrunc:24 }}</div>{% endfor %}
{% endif %}
{% endblock %}
{% block content %}
{% if item.contents %}
<h5>contents</h5>
<p class="tldr" _="on click toggle .tldr on me">{{ item.contents | linebreaksbr }}</p>
{% endif %}
{% with related_books=item.sibling_items %}
{% if related_books.count > 0 %}
<section class="entity-sort shelf" id="sibling">
<span class="action">
<span>
<a _="on click set el to the next <ul/> then call el.scrollBy({left:-el.offsetWidth, behavior:'smooth'})"><i class="fa-solid fa-circle-left"></i></a>
</span>
<span>
<a _="on click set el to the next <ul/> then call el.scrollBy({left:el.offsetWidth, behavior:'smooth'})"><i class="fa-solid fa-circle-right"></i></a>
</span>
</span>
<h5>{% trans 'other editions' %}</h5>
<ul class="cards">
{% for b in related_books %}
<li class="card">
<a href="{{ b.url }}" title="{{ b.display_title }} {{ b.title_deco }}">
<img src="{{ b.cover|thumb:'normal' }}"
alt="{{ b.display_title }} {{ b.title_deco }}"
loading="lazy">
<div>
<span>{{ b.display_title }}</span>
<small class="title_deco"><span>{{ b.pub_house }}</span>
<span>{{ b.pub_year }}</span></small>
</div>
</a>
</li>
{% endfor %}
</ul>
</section>
{% endif %}
{% endwith %}
{% endblock %}
{% block left_sidebar %}
<script>
$(function () {
function _sidebar_auto_collapse(mm){
if (mm.matches) {
$('.auto-collapse').removeAttr('open')
} else {
$('.auto-collapse').attr('open', '')
}
}
var mm = window.matchMedia("(max-width: 768px)")
mm.addListener(_sidebar_auto_collapse);
_sidebar_auto_collapse(mm);
});
</script>
{% if item.isbn %}
<section>
<details class="auto-collapse" open>
<summary>{% trans 'Borrow or Buy' %}</summary>
<div>
<div>
<a target="_blank"
rel="noopener"
href="https://www.worldcat.org/isbn/{{ item.isbn }}">WorldCat</a>
</div>
<div>
<a target="_blank"
rel="noopener"
href="https://openlibrary.org/search?isbn={{ item.isbn }}">Open Library</a>
</div>
<div>
<a target="_blank"
rel="noopener"
href="https://library.oapen.org/discover?filtertype_1=isbn&amp;filter_relational_operator_1=equals&amp;filter_1={{ item.isbn }}">
OAPEN</a>
</div>
</div>
</details>
</section>
{% endif %}
<section>
<details class="auto-collapse" open>
<summary>Shadow Libraries</summary>
{% if item.isbn %}
<div>
Search by ISBN:<br>
<a target="_blank"
rel="noopener"
href="https://annas-archive.org/search?q={{ item.isbn }}">Anna's Archive</a>
|
<a target="_blank"
rel="noopener"
href="https://z-lib.gs/s/{{ item.isbn }}">Z-Library</a>
|
<a target="_blank"
rel="noopener"
href="https://libgen.li/index.php?req={{ item.isbn }}">Libgen+LI</a>
</div>
{% endif %}
<div>
Search by title:<br>
<a target="_blank"
rel="noopener"
href="https://annas-archive.org/search?q={{ item.display_title }}">Anna's Archive</a>
|
<a target="_blank"
rel="noopener"
href="https://z-lib.gs/s/{{ item.display_title }}">Z-Library</a>
|
<a target="_blank"
rel="noopener"
href="https://libgen.li/index.php?req={{ item.display_title }}">Libgen+LI</a>
</div>
{% if request.user.is_authenticated %}
<div>
<br>
Registered user only:<br>
<a target="_blank"
rel="noopener"
href="https://dufs.itinerariummentis.org/book/?q={{ item.display_title }}">dufs.itmens</a>
</div>
{% endif %}
{% endblock %}

View file

@ -171,5 +171,5 @@ class CsvExporter(Task):
shutil.make_archive(filename[:-4], "zip", temp_folder_path)
self.metadata["file"] = filename
self.metadata["total"] = total
self.message = "Export complete."
self.message = f"{total} records exported."
self.save()

View file

@ -3,6 +3,7 @@ import os
import re
import shutil
import tempfile
import uuid
from django.conf import settings
from django.utils import timezone
@ -65,13 +66,15 @@ class NdjsonExporter(Task):
def _save_image(url):
if url.startswith("http"):
imgdl = ProxiedImageDownloader(url)
raw_img = imgdl.download().content
ext = imgdl.extention
file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path)
with open(file, "wb") as binary_file:
binary_file.write(raw_img)
return file
try:
raw_img, ext = ProxiedImageDownloader.download_image(url, "")
if raw_img:
file = "%s/%s.%s" % (attachment_path, uuid.uuid4(), ext)
with open(file, "wb") as binary_file:
binary_file.write(raw_img)
return file
except Exception:
logger.debug(f"error downloading {url}")
elif url.startswith("/"):
p = os.path.abspath(
os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :])
@ -79,11 +82,8 @@ class NdjsonExporter(Task):
if p.startswith(settings.MEDIA_ROOT):
try:
shutil.copy2(p, attachment_path)
except Exception as e:
logger.error(
f"error copying {p} to {attachment_path}",
extra={"exception": e},
)
except Exception:
logger.error(f"error copying {p} to {attachment_path}")
return p
return url
@ -206,6 +206,25 @@ class NdjsonExporter(Task):
for item in self.ref_items:
f.write(json.dumps(item.ap_object, default=str) + "\n")
# Export actor.ndjson with Takahe identity data
filename = os.path.join(temp_folder_path, "actor.ndjson")
with open(filename, "w") as f:
f.write(json.dumps(self.get_header()) + "\n")
takahe_identity = self.user.identity.takahe_identity
identity_data = {
"type": "Identity",
"username": takahe_identity.username,
"domain": takahe_identity.domain_id,
"actor_uri": takahe_identity.actor_uri,
"name": takahe_identity.name,
"summary": takahe_identity.summary,
"metadata": takahe_identity.metadata,
"private_key": takahe_identity.private_key,
"public_key": takahe_identity.public_key,
"public_key_id": takahe_identity.public_key_id,
}
f.write(json.dumps(identity_data, default=str) + "\n")
filename = GenerateDateUUIDMediaFilePath(
"f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
)
@ -215,5 +234,5 @@ class NdjsonExporter(Task):
self.metadata["file"] = filename
self.metadata["total"] = total
self.message = "Export complete."
self.message = f"{total} records exported."
self.save()

View file

@ -1,34 +1,15 @@
import os
import zipfile
from .csv import CsvImporter
from .douban import DoubanImporter
from .goodreads import GoodreadsImporter
from .letterboxd import LetterboxdImporter
from .ndjson import NdjsonImporter
from .opml import OPMLImporter
def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
return None
with zipfile.ZipFile(filename, "r") as z:
files = z.namelist()
if any(f == "journal.ndjson" for f in files):
return None
if any(
f.endswith("_mark.csv")
or f.endswith("_review.csv")
or f.endswith("_note.csv")
for f in files
):
return CsvImporter
__all__ = [
"CsvImporter",
"NdjsonImporter",
"LetterboxdImporter",
"OPMLImporter",
"DoubanImporter",
"GoodreadsImporter",
"get_neodb_importer",
]

197
journal/importers/base.py Normal file
View file

@ -0,0 +1,197 @@
import datetime
from typing import Dict, List, Literal, Optional
from django.conf import settings
from django.utils.dateparse import parse_datetime
from loguru import logger
from catalog.common.sites import SiteManager
from catalog.models import Edition, IdType, Item, SiteName
from journal.models import ShelfType
from users.models import Task
_PREFERRED_SITES = [
SiteName.Fediverse,
SiteName.RSS,
SiteName.TMDB,
SiteName.IMDB,
SiteName.GoogleBooks,
SiteName.Goodreads,
SiteName.IGDB,
]
class BaseImporter(Task):
class Meta:
app_label = "journal" # workaround bug in TypedModel
ImportResult = Literal["imported", "skipped", "failed"]
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"failed_items": [],
"file": None,
"visibility": 0,
}
def progress(self, result: ImportResult) -> None:
"""Update import progress.
Args:
result: The import result ('imported', 'skipped', or 'failed')
"""
self.metadata["processed"] += 1
self.metadata[result] = self.metadata.get(result, 0) + 1
if self.metadata["total"]:
progress_percentage = round(
self.metadata["processed"] / self.metadata["total"] * 100
)
self.message = f"Progress: {progress_percentage}% - "
else:
self.message = ""
self.message += (
f"{self.metadata['imported']} imported, "
f"{self.metadata['skipped']} skipped, "
f"{self.metadata['failed']} failed"
)
self.save(update_fields=["metadata", "message"])
def run(self) -> None:
raise NotImplementedError
def get_item_by_info_and_links(
self, title: str, info_str: str, links: list[str]
) -> Optional[Item]:
"""Find an item based on information from CSV export.
Args:
title: Item title
info_str: Item info string (space-separated key:value pairs)
links_str: Space-separated URLs
Returns:
Item if found, None otherwise
"""
site_url = settings.SITE_INFO["site_url"] + "/"
# look for local items first
for link in links:
if link.startswith("/") or link.startswith(site_url):
item = Item.get_by_url(link, resolve_merge=True)
if item and not item.is_deleted:
return item
sites = [
SiteManager.get_site_by_url(link, detect_redirection=False)
for link in links
]
sites = [site for site in sites if site]
sites.sort(
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
if x.SITE_NAME in _PREFERRED_SITES
else 99
)
# match items without extra requests
for site in sites:
item = site.get_item()
if item:
return item
# match items after HEAD
sites = [
SiteManager.get_site_by_url(site.url) if site.url else site
for site in sites
]
sites = [site for site in sites if site]
for site in sites:
item = site.get_item()
if item:
return item
# fetch from remote
for site in sites:
try:
logger.debug(f"fetching {site.url}")
site.get_resource_ready()
item = site.get_item()
if item:
return item
except Exception as e:
logger.error(f"Error fetching item: {e}")
# Try using the info string
if info_str:
info_dict = {}
for pair in info_str.strip().split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
# Check for ISBN, IMDB, etc.
item = None
for key, value in info_dict.items():
if key == "isbn" and value:
item = Edition.objects.filter(
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value=value,
).first()
elif key == "imdb" and value:
item = Item.objects.filter(
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value=value,
).first()
if item:
return item
return None
def parse_tags(self, tags_str: str) -> List[str]:
"""Parse space-separated tags string into a list of tags."""
if not tags_str:
return []
return [tag.strip() for tag in tags_str.split() if tag.strip()]
def parse_info(self, info_str: str) -> Dict[str, str]:
"""Parse info string into a dictionary."""
info_dict = {}
if not info_str:
return info_dict
for pair in info_str.split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
return info_dict
def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]:
"""Parse ISO format timestamp into datetime."""
if not timestamp_str:
return None
try:
dt = parse_datetime(timestamp_str)
if dt and dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.UTC)
return dt
except Exception as e:
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
return None
def parse_shelf_type(self, status_str: str) -> ShelfType:
"""Parse shelf type string into ShelfType enum."""
if not status_str:
return ShelfType.WISHLIST
status_map = {
"wishlist": ShelfType.WISHLIST,
"progress": ShelfType.PROGRESS,
"complete": ShelfType.COMPLETE,
"dropped": ShelfType.DROPPED,
}
return status_map.get(status_str.lower(), ShelfType.WISHLIST)

View file

@ -1,181 +1,22 @@
import csv
import datetime
import os
import tempfile
import zipfile
from typing import Dict, List, Optional
from typing import Dict
from django.conf import settings
from django.utils import timezone
from django.utils.dateparse import parse_datetime
from django.utils.translation import gettext as _
from loguru import logger
from catalog.common.sites import SiteManager
from catalog.models import Edition, IdType, Item, ItemCategory, SiteName
from journal.models import Mark, Note, Review, ShelfType
from users.models import Task
from catalog.models import ItemCategory
from journal.models import Mark, Note, Review
_PREFERRED_SITES = [
SiteName.Fediverse,
SiteName.RSS,
SiteName.TMDB,
SiteName.IMDB,
SiteName.GoogleBooks,
SiteName.Goodreads,
SiteName.IGDB,
]
from .base import BaseImporter
class CsvImporter(Task):
class CsvImporter(BaseImporter):
class Meta:
app_label = "journal" # workaround bug in TypedModel
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"failed_items": [],
"file": None,
"visibility": 0,
}
def get_item_by_info_and_links(
self, title: str, info_str: str, links_str: str
) -> Optional[Item]:
"""Find an item based on information from CSV export.
Args:
title: Item title
info_str: Item info string (space-separated key:value pairs)
links_str: Space-separated URLs
Returns:
Item if found, None otherwise
"""
site_url = settings.SITE_INFO["site_url"] + "/"
links = links_str.strip().split()
# look for local items first
for link in links:
if link.startswith("/") or link.startswith(site_url):
item = Item.get_by_url(link, resolve_merge=True)
if item and not item.is_deleted:
return item
sites = [
SiteManager.get_site_by_url(link, detect_redirection=False)
for link in links
]
sites = [site for site in sites if site]
sites.sort(
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
if x.SITE_NAME in _PREFERRED_SITES
else 99
)
# match items without extra requests
for site in sites:
item = site.get_item()
if item:
return item
# match items after HEAD
sites = [
SiteManager.get_site_by_url(site.url) if site.url else site
for site in sites
]
sites = [site for site in sites if site]
for site in sites:
item = site.get_item()
if item:
return item
# fetch from remote
for site in sites:
try:
logger.debug(f"fetching {site.url}")
site.get_resource_ready()
item = site.get_item()
if item:
return item
except Exception as e:
logger.error(f"Error fetching item: {e}")
# Try using the info string
if info_str:
info_dict = {}
for pair in info_str.strip().split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
# Check for ISBN, IMDB, etc.
item = None
for key, value in info_dict.items():
if key == "isbn" and value:
item = Edition.objects.filter(
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value=value,
).first()
elif key == "imdb" and value:
item = Item.objects.filter(
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value=value,
).first()
if item:
return item
return None
def parse_tags(self, tags_str: str) -> List[str]:
"""Parse space-separated tags string into a list of tags."""
if not tags_str:
return []
return [tag.strip() for tag in tags_str.split() if tag.strip()]
def parse_info(self, info_str: str) -> Dict[str, str]:
"""Parse info string into a dictionary."""
info_dict = {}
if not info_str:
return info_dict
for pair in info_str.split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
return info_dict
def parse_datetime(self, timestamp_str: str) -> Optional[datetime.datetime]:
"""Parse ISO format timestamp into datetime."""
if not timestamp_str:
return None
try:
dt = parse_datetime(timestamp_str)
if dt and dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.UTC)
return dt
except Exception as e:
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
return None
def parse_shelf_type(self, status_str: str) -> ShelfType:
"""Parse shelf type string into ShelfType enum."""
if not status_str:
return ShelfType.WISHLIST
status_map = {
"wishlist": ShelfType.WISHLIST,
"progress": ShelfType.PROGRESS,
"complete": ShelfType.COMPLETE,
"dropped": ShelfType.DROPPED,
}
return status_map.get(status_str.lower(), ShelfType.WISHLIST)
def import_mark(self, row: Dict[str, str]) -> str:
"""Import a mark from a CSV row.
@ -184,7 +25,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -246,7 +89,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -304,7 +149,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -361,26 +208,6 @@ class CsvImporter(Task):
)
return "failed"
def progress(self, result: str) -> None:
"""Update import progress.
Args:
result: The import result ('imported', 'skipped', or 'failed')
"""
self.metadata["processed"] += 1
self.metadata[result] = self.metadata.get(result, 0) + 1
progress_percentage = round(
self.metadata["processed"] / self.metadata["total"] * 100
)
self.message = (
f"Progress: {progress_percentage}% - "
f"{self.metadata['imported']} imported, "
f"{self.metadata['skipped']} skipped, "
f"{self.metadata['failed']} failed"
)
self.save(update_fields=["metadata", "message"])
def process_csv_file(self, file_path: str, import_function) -> None:
"""Process a CSV file using the specified import function."""
logger.debug(f"Processing {file_path}")
@ -424,7 +251,7 @@ class CsvImporter(Task):
# Set the total count in metadata
self.metadata["total"] = total_rows
self.message = f"Found {total_rows} items to import"
self.message = f"found {total_rows} records to import"
self.save(update_fields=["metadata", "message"])
# Now process all files
@ -432,7 +259,5 @@ class CsvImporter(Task):
import_function = getattr(self, f"import_{file_type}")
self.process_csv_file(file_path, import_function)
self.message = _("Import complete")
if self.metadata.get("failed_items", []):
self.message += f": {self.metadata['failed']} items failed ({len(self.metadata['failed_items'])} unique items)"
self.message = f"{self.metadata['imported']} items imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed."
self.save()

View file

@ -154,6 +154,8 @@ class DoubanImporter(Task):
def run(self):
logger.info(f"{self.user} import start")
self.load_sheets()
self.message = f"豆瓣标记和评论导入开始,共{self.metadata['total']}篇。"
self.save(update_fields=["message"])
logger.info(f"{self.user} sheet loaded, {self.metadata['total']} lines total")
for name, param in self.mark_sheet_config.items():
self.import_mark_sheet(self.mark_data[name], param[0], name)

View file

@ -1,4 +1,5 @@
import csv
import os
import tempfile
import zipfile
from datetime import timedelta
@ -35,6 +36,13 @@ class LetterboxdImporter(Task):
"file": None,
}
@classmethod
def validate_file(cls, uploaded_file):
try:
return zipfile.is_zipfile(uploaded_file)
except Exception:
return False
def get_item_by_url(self, url):
try:
h = BasicDownloader(url).download().html()
@ -121,7 +129,6 @@ class LetterboxdImporter(Task):
self.progress(1)
def progress(self, mark_state: int, url=None):
self.metadata["total"] += 1
self.metadata["processed"] += 1
match mark_state:
case 1:
@ -142,49 +149,56 @@ class LetterboxdImporter(Task):
with tempfile.TemporaryDirectory() as tmpdirname:
logger.debug(f"Extracting {filename} to {tmpdirname}")
zipref.extractall(tmpdirname)
with open(tmpdirname + "/reviews.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Watched Date"],
row["Rating"],
row["Review"],
row["Tags"],
)
with open(tmpdirname + "/ratings.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
row["Rating"],
)
with open(tmpdirname + "/watched.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
)
with open(tmpdirname + "/watchlist.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.WISHLIST,
row["Date"],
)
if os.path.exists(tmpdirname + "/reviews.csv"):
with open(tmpdirname + "/reviews.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Watched Date"],
row["Rating"],
row["Review"],
row["Tags"],
)
if os.path.exists(tmpdirname + "/ratings.csv"):
with open(tmpdirname + "/ratings.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
row["Rating"],
)
if os.path.exists(tmpdirname + "/watched.csv"):
with open(tmpdirname + "/watched.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
)
if os.path.exists(tmpdirname + "/watchlist.csv"):
with open(tmpdirname + "/watchlist.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.WISHLIST,
row["Date"],
)
self.metadata["total"] = self.metadata["processed"]
self.message = f"{self.metadata['imported']} imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed"
self.save(update_fields=["metadata", "message"])

484
journal/importers/ndjson.py Normal file
View file

@ -0,0 +1,484 @@
import json
import os
import tempfile
import zipfile
from typing import Any, Dict
from loguru import logger
from journal.models import (
Collection,
Comment,
Mark,
Note,
Rating,
Review,
ShelfLogEntry,
ShelfType,
Tag,
TagMember,
)
from takahe.utils import Takahe
from .base import BaseImporter
class NdjsonImporter(BaseImporter):
"""Importer for NDJSON files exported from NeoDB."""
class Meta:
app_label = "journal" # workaround bug in TypedModel
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.items = {}
def import_collection(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a collection from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
name = content_data.get("name", "")
content = content_data.get("content", "")
collection = Collection.objects.create(
owner=owner,
title=name,
brief=content,
visibility=visibility,
metadata=data.get("metadata", {}),
created_time=published_dt,
)
item_data = data.get("items", [])
for item_entry in item_data:
item_url = item_entry.get("item")
if not item_url:
continue
item = self.items.get(item_url)
if not item:
logger.warning(f"Could not find item for collection: {item_url}")
continue
metadata = item_entry.get("metadata", {})
collection.append_item(item, metadata=metadata)
return "imported"
except Exception:
logger.exception("Error importing collection")
return "failed"
def import_shelf_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a shelf member (mark) from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
shelf_type = content_data.get("status", ShelfType.WISHLIST)
mark = Mark(owner, item)
if mark.created_time and published_dt and mark.created_time >= published_dt:
return "skipped"
mark.update(
shelf_type=shelf_type,
visibility=visibility,
metadata=metadata,
created_time=published_dt,
)
return "imported"
except Exception:
logger.exception("Error importing shelf member")
return "failed"
def import_shelf_log(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a shelf log entry from NDJSON data."""
try:
item = self.items.get(data.get("item", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
owner = self.user.identity
shelf_type = data.get("status", ShelfType.WISHLIST)
# posts = data.get("posts", []) # TODO but will be tricky
timestamp = data.get("timestamp")
timestamp_dt = self.parse_datetime(timestamp) if timestamp else None
_, created = ShelfLogEntry.objects.update_or_create(
owner=owner,
item=item,
shelf_type=shelf_type,
timestamp=timestamp_dt,
)
# return "imported" if created else "skipped"
# count skip as success otherwise it may confuse user
return "imported"
except Exception:
logger.exception("Error importing shelf log")
return "failed"
def import_post(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a post from NDJSON data."""
# TODO
return "skipped"
def import_review(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a review from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
name = content_data.get("name", "")
content = content_data.get("content", "")
existing_review = Review.objects.filter(
owner=owner, item=item, title=name
).first()
if (
existing_review
and existing_review.created_time
and published_dt
and existing_review.created_time >= published_dt
):
return "skipped"
Review.objects.create(
owner=owner,
item=item,
title=name,
body=content,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception:
logger.exception("Error importing review")
return "failed"
def import_note(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a note from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
title = content_data.get("title", "")
content = content_data.get("content", "")
sensitive = content_data.get("sensitive", False)
progress = content_data.get("progress", {})
progress_type = progress.get("type", "")
progress_value = progress.get("value", "")
Note.objects.create(
item=item,
owner=owner,
title=title,
content=content,
sensitive=sensitive,
progress_type=progress_type,
progress_value=progress_value,
created_time=published_dt,
visibility=visibility,
metadata=data.get("metadata", {}),
)
return "imported"
except Exception:
logger.exception("Error importing note")
return "failed"
def import_comment(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a comment from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
content = content_data.get("content", "")
existing_comment = Comment.objects.filter(owner=owner, item=item).first()
if (
existing_comment
and existing_comment.created_time
and published_dt
and existing_comment.created_time >= published_dt
):
return "skipped"
Comment.objects.create(
owner=owner,
item=item,
text=content,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception:
logger.exception("Error importing comment")
return "failed"
def import_rating(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a rating from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
rating_grade = int(float(content_data.get("value", 0)))
existing_rating = Comment.objects.filter(owner=owner, item=item).first()
if (
existing_rating
and existing_rating.created_time
and published_dt
and existing_rating.created_time >= published_dt
):
return "skipped"
Rating.objects.create(
owner=owner,
item=item,
grade=rating_grade,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception:
logger.exception("Error importing rating")
return "failed"
def import_tag(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import tags from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
pinned = data.get("pinned", self.metadata.get("pinned", False))
tag_title = Tag.cleanup_title(data.get("name", ""))
_, created = Tag.objects.update_or_create(
owner=owner,
title=tag_title,
defaults={
"visibility": visibility,
"pinned": pinned,
},
)
return "imported" if created else "skipped"
except Exception:
logger.exception("Error importing tag member")
return "failed"
def import_tag_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import tags from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
tag_title = Tag.cleanup_title(content_data.get("tag", ""))
tag, _ = Tag.objects.get_or_create(
owner=owner,
title=tag_title,
defaults={
"created_time": published_dt,
"visibility": visibility,
"pinned": False,
"metadata": metadata,
},
)
_, created = TagMember.objects.update_or_create(
owner=owner,
item=item,
parent=tag,
defaults={
"created_time": published_dt,
"visibility": visibility,
"metadata": metadata,
"position": 0,
},
)
return "imported" if created else "skipped"
except Exception:
logger.exception("Error importing tag member")
return "failed"
def process_journal(self, file_path: str) -> None:
"""Process a NDJSON file and import all items."""
logger.debug(f"Processing {file_path}")
lines_error = 0
import_funcs = {
"Tag": self.import_tag,
"TagMember": self.import_tag_member,
"Rating": self.import_rating,
"Comment": self.import_comment,
"ShelfMember": self.import_shelf_member,
"Review": self.import_review,
"Note": self.import_note,
"Collection": self.import_collection,
"ShelfLog": self.import_shelf_log,
"Post": self.import_post,
}
journal = {k: [] for k in import_funcs.keys()}
with open(file_path, "r") as jsonfile:
# Skip header line
next(jsonfile, None)
for line in jsonfile:
try:
data = json.loads(line)
except json.JSONDecodeError:
lines_error += 1
continue
data_type = data.get("type")
if not data_type:
continue
if data_type not in journal:
journal[data_type] = []
journal[data_type].append(data)
self.metadata["total"] = sum(len(items) for items in journal.values())
self.message = f"found {self.metadata['total']} records to import"
self.save(update_fields=["metadata", "message"])
logger.debug(f"Processing {self.metadata['total']} entries")
if lines_error:
logger.error(f"Error processing journal.ndjson: {lines_error} lines")
for typ, func in import_funcs.items():
for data in journal.get(typ, []):
result = func(data)
self.progress(result)
logger.info(
f"Imported {self.metadata['imported']}, skipped {self.metadata['skipped']}, failed {self.metadata['failed']}"
)
def parse_catalog(self, file_path: str) -> None:
"""Parse the catalog.ndjson file and build item lookup tables."""
logger.debug(f"Parsing catalog file: {file_path}")
item_count = 0
try:
with open(file_path, "r") as jsonfile:
for line in jsonfile:
try:
i = json.loads(line)
except (json.JSONDecodeError, Exception):
logger.exception("Error processing catalog item")
continue
u = i.get("id")
if not u:
continue
# self.catalog_items[u] = i
item_count += 1
links = [u] + [r["url"] for r in i.get("external_resources", [])]
self.items[u] = self.get_item_by_info_and_links("", "", links)
logger.info(f"Loaded {item_count} items from catalog")
self.metadata["catalog_processed"] = item_count
except Exception:
logger.exception("Error parsing catalog file")
def parse_header(self, file_path: str) -> Dict[str, Any]:
try:
with open(file_path, "r") as jsonfile:
first_line = jsonfile.readline().strip()
if first_line:
header = json.loads(first_line)
if header.get("server"):
return header
except (json.JSONDecodeError, IOError):
logger.exception("Error parsing header")
return {}
def process_actor(self, file_path: str) -> None:
"""Process the actor.ndjson file to update user identity information."""
logger.debug(f"Processing actor data from {file_path}")
try:
with open(file_path, "r") as jsonfile:
next(jsonfile, None)
for line in jsonfile:
try:
data = json.loads(line)
except json.JSONDecodeError:
logger.error("Error parsing actor data line")
continue
if data.get("type") == "Identity":
logger.debug("Found identity data in actor.ndjson")
takahe_identity = self.user.identity.takahe_identity
updated = False
if (
data.get("name")
and data.get("name") != takahe_identity.name
):
logger.debug(
f"Updating identity name from {takahe_identity.name} to {data.get('name')}"
)
takahe_identity.name = data.get("name")
updated = True
if (
data.get("summary")
and data.get("summary") != takahe_identity.summary
):
logger.debug("Updating identity summary")
takahe_identity.summary = data.get("summary")
updated = True
if updated:
takahe_identity.save()
Takahe.update_state(takahe_identity, "edited")
logger.info("Updated identity")
return
except Exception as e:
logger.exception(f"Error processing actor file: {e}")
def run(self) -> None:
"""Run the NDJSON import."""
filename = self.metadata["file"]
logger.debug(f"Importing {filename}")
with zipfile.ZipFile(filename, "r") as zipref:
with tempfile.TemporaryDirectory() as tmpdirname:
zipref.extractall(tmpdirname)
# Process actor data first if available
actor_path = os.path.join(tmpdirname, "actor.ndjson")
if os.path.exists(actor_path):
actor_header = self.parse_header(actor_path)
logger.debug(f"Found actor.ndjson with {actor_header}")
self.process_actor(actor_path)
else:
logger.debug("No actor.ndjson file found in the archive")
catalog_path = os.path.join(tmpdirname, "catalog.ndjson")
if os.path.exists(catalog_path):
catalog_header = self.parse_header(catalog_path)
logger.debug(f"Loading catalog.ndjson with {catalog_header}")
self.parse_catalog(catalog_path)
else:
logger.warning("catalog.ndjson file not found in the archive")
journal_path = os.path.join(tmpdirname, "journal.ndjson")
if not os.path.exists(journal_path):
logger.error("journal.ndjson file not found in the archive")
self.message = "Import failed: journal.ndjson file not found"
self.save()
return
header = self.parse_header(journal_path)
self.metadata["journal_header"] = header
logger.debug(f"Importing journal.ndjson with {header}")
self.process_journal(journal_path)
self.message = f"{self.metadata['imported']} items imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed."
self.save()

View file

@ -1,43 +1,54 @@
import django_rq
import listparser
from auditlog.context import set_actor
from django.utils.translation import gettext as _
from loguru import logger
from user_messages import api as msg
from catalog.common import *
from catalog.common.downloaders import *
from catalog.sites.rss import RSS
from journal.models import *
from users.models.task import Task
class OPMLImporter:
def __init__(self, user, visibility, mode):
self.user = user
self.visibility = visibility
self.mode = mode
class OPMLImporter(Task):
class Meta:
app_label = "journal" # workaround bug in TypedModel
def parse_file(self, uploaded_file):
return listparser.parse(uploaded_file.read()).feeds
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"mode": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"visibility": 0,
"failed_urls": [],
"file": None,
}
def import_from_file(self, uploaded_file):
feeds = self.parse_file(uploaded_file)
if not feeds:
@classmethod
def validate_file(cls, f):
try:
return bool(listparser.parse(f.read()).feeds)
except Exception:
return False
django_rq.get_queue("import").enqueue(self.import_from_file_task, feeds)
return True
def import_from_file_task(self, feeds):
logger.info(f"{self.user} import opml start")
skip = 0
collection = None
with set_actor(self.user):
if self.mode == 1:
def run(self):
with open(self.metadata["file"], "r") as f:
feeds = listparser.parse(f.read()).feeds
self.metadata["total"] = len(feeds)
self.message = f"Processing {self.metadata['total']} feeds."
self.save(update_fields=["metadata", "message"])
collection = None
if self.metadata["mode"] == 1:
title = _("{username}'s podcast subscriptions").format(
username=self.user.display_name
)
collection = Collection.objects.create(
owner=self.user.identity, title=title
owner=self.user.identity,
title=title,
visibility=self.metadata["visibility"],
)
for feed in feeds:
logger.info(f"{self.user} import {feed.url}")
@ -47,21 +58,26 @@ class OPMLImporter:
res = None
if not res or not res.item:
logger.warning(f"{self.user} feed error {feed.url}")
self.metadata["failed"] += 1
continue
item = res.item
if self.mode == 0:
if self.metadata["mode"] == 0:
mark = Mark(self.user.identity, item)
if mark.shelfmember:
logger.info(f"{self.user} marked, skip {feed.url}")
skip += 1
self.metadata["skipped"] += 1
else:
self.metadata["imported"] += 1
mark.update(
ShelfType.PROGRESS, None, None, visibility=self.visibility
ShelfType.PROGRESS,
None,
None,
visibility=self.metadata["visibility"],
)
elif self.mode == 1 and collection:
elif self.metadata["mode"] == 1 and collection:
self.metadata["imported"] += 1
collection.append_item(item)
logger.info(f"{self.user} import opml end")
msg.success(
self.user,
f"OPML import complete, {len(feeds)} feeds processed, {skip} exisiting feeds skipped.",
)
self.metadata["processed"] += 1
self.save(update_fields=["metadata"])
self.message = f"{self.metadata['imported']} feeds imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed."
self.save(update_fields=["message"])

View file

@ -10,6 +10,16 @@ class Migration(migrations.Migration):
]
operations = [
migrations.CreateModel(
name="BaseImporter",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("users.task",),
),
migrations.CreateModel(
name="CsvImporter",
fields=[],
@ -20,4 +30,24 @@ class Migration(migrations.Migration):
},
bases=("users.task",),
),
migrations.CreateModel(
name="OPMLImporter",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("users.task",),
),
migrations.CreateModel(
name="NdjsonImporter",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("journal.baseimporter",),
),
]

View file

@ -1,3 +1,4 @@
from .csv import *
from .ndjson import *
from .piece import *
from .search import *

View file

@ -9,7 +9,7 @@ from loguru import logger
from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow
from journal.exporters import CsvExporter
from journal.importers import CsvImporter, get_neodb_importer
from journal.importers import CsvImporter
from users.models import User
from ..models import *
@ -219,10 +219,9 @@ class CsvExportImportTest(TestCase):
f"Expected file {filename} with {expected_data_count} data rows, but file not found"
)
self.assertEqual(get_neodb_importer(export_path), CsvImporter)
importer = CsvImporter.create(user=self.user2, file=export_path, visibility=2)
importer.run()
self.assertEqual(importer.message, "Import complete")
self.assertEqual(importer.message, "11 items imported, 0 skipped, 0 failed.")
# Verify imported data

506
journal/tests/ndjson.py Normal file
View file

@ -0,0 +1,506 @@
import json
import os
import zipfile
from tempfile import TemporaryDirectory
from django.test import TestCase
from django.utils.dateparse import parse_datetime
from loguru import logger
from catalog.models import (
Edition,
IdType,
Movie,
Podcast,
PodcastEpisode,
TVEpisode,
TVSeason,
TVShow,
)
from journal.exporters import NdjsonExporter
from journal.importers import NdjsonImporter
from users.models import User
from ..models import *
class NdjsonExportImportTest(TestCase):
databases = "__all__"
maxDiff = None
def setUp(self):
self.user1 = User.register(
email="ndjson_export@test.com", username="ndjson_exporter"
)
self.user2 = User.register(
email="ndjson_import@test.com", username="ndjson_importer"
)
self.tag1 = Tag.objects.create(
owner=self.user1.identity, title="favorite", pinned=True, visibility=2
)
self.dt = parse_datetime("2021-01-01T00:00:00Z")
self.dt2 = parse_datetime("2021-02-01T00:00:00Z")
self.dt3 = parse_datetime("2021-03-01T00:00:00Z")
self.book1 = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Hyperion"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780553283686",
author=["Dan Simmons"],
pub_year=1989,
)
self.book2 = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Dune"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780441172719",
author=["Frank Herbert"],
pub_year=1965,
)
self.movie1 = Movie.objects.create(
localized_title=[{"lang": "en", "text": "Inception"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt1375666",
director=["Christopher Nolan"],
year=2010,
)
self.movie2 = Movie.objects.create(
localized_title=[{"lang": "en", "text": "The Matrix"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt0133093",
director=["Lana Wachowski", "Lilly Wachowski"],
year=1999,
)
self.tvshow = TVShow.objects.create(
localized_title=[{"lang": "en", "text": "Breaking Bad"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt0903747",
year=2008,
)
self.tvseason = TVSeason.objects.create(
localized_title=[{"lang": "en", "text": "Breaking Bad Season 1"}],
show=self.tvshow,
season_number=1,
)
self.tvepisode1 = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Pilot"}],
season=self.tvseason,
episode_number=1,
)
self.tvepisode2 = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Cat's in the Bag..."}],
season=self.tvseason,
episode_number=2,
)
# Create podcast test items
self.podcast = Podcast.objects.create(
localized_title=[{"lang": "en", "text": "Test Podcast"}],
primary_lookup_id_type=IdType.RSS,
primary_lookup_id_value="https://example.com/feed.xml",
host=["Test Host"],
)
self.podcastepisode = PodcastEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Test Episode 1"}],
program=self.podcast,
guid="111",
pub_date=self.dt,
)
def test_ndjson_export_import(self):
# set name and summary for user1
identity1 = self.user1.identity
takahe_identity1 = identity1.takahe_identity
takahe_identity1.name = "Test User"
takahe_identity1.summary = "Test summary"
takahe_identity1.save()
# Book marks with ratings and tags
mark_book1 = Mark(self.user1.identity, self.book1)
mark_book1.update(
ShelfType.COMPLETE,
"Great sci-fi classic",
10,
["sci-fi", "favorite", "space"],
1,
created_time=self.dt,
)
mark_book2 = Mark(self.user1.identity, self.book2)
mark_book2.update(
ShelfType.WISHLIST,
"Read it?",
None,
["sci-fi", "desert"],
1,
created_time=self.dt,
)
mark_book2.update(
ShelfType.PROGRESS,
"Reading!",
None,
["sci-fi", "desert"],
0,
created_time=self.dt2,
)
mark_book2.update(
ShelfType.COMPLETE,
"Read.",
None,
["sci-fi", "desert"],
0,
created_time=self.dt3,
)
# Movie marks with ratings
mark_movie1 = Mark(self.user1.identity, self.movie1)
mark_movie1.update(
ShelfType.COMPLETE,
"Mind-bending",
8,
["mindbender", "scifi"],
1,
created_time=self.dt,
)
mark_movie2 = Mark(self.user1.identity, self.movie2)
mark_movie2.update(
ShelfType.WISHLIST, "Need to rewatch", None, [], 1, created_time=self.dt2
)
# TV show mark
mark_tvshow = Mark(self.user1.identity, self.tvshow)
mark_tvshow.update(
ShelfType.WISHLIST,
"Heard it's good",
None,
["drama"],
1,
created_time=self.dt,
)
# TV episode marks
mark_episode1 = Mark(self.user1.identity, self.tvepisode1)
mark_episode1.update(
ShelfType.COMPLETE,
"Great start",
9,
["pilot", "drama"],
1,
created_time=self.dt2,
)
mark_episode2 = Mark(self.user1.identity, self.tvepisode2)
mark_episode2.update(
ShelfType.COMPLETE, "It gets better", 9, [], 1, created_time=self.dt3
)
# Podcast episode mark
mark_podcast = Mark(self.user1.identity, self.podcastepisode)
mark_podcast.update(
ShelfType.COMPLETE,
"Insightful episode",
8,
["tech", "interview"],
1,
created_time=self.dt,
)
# Create reviews
Review.update_item_review(
self.book1,
self.user1.identity,
"My thoughts on Hyperion",
"A masterpiece of science fiction that weaves multiple storylines into a captivating narrative.",
visibility=1,
created_time=self.dt,
)
Review.update_item_review(
self.movie1,
self.user1.identity,
"Inception Review",
"Christopher Nolan at his best. The movie plays with reality and dreams in a fascinating way.",
visibility=1,
)
# Create notes
Note.objects.create(
item=self.book2,
owner=self.user1.identity,
title="Reading progress",
content="Just finished the first part. The world-building is incredible.\n\n - p 125",
progress_type=Note.ProgressType.PAGE,
progress_value="125",
visibility=1,
)
Note.objects.create(
item=self.tvshow,
owner=self.user1.identity,
title="Before watching",
content="Things to look out for according to friends:\n- Character development\n- Color symbolism\n\n - e 0",
progress_type=Note.ProgressType.EPISODE,
progress_value="2",
visibility=1,
)
# Create TV episode note
Note.objects.create(
item=self.tvepisode1,
owner=self.user1.identity,
title="Episode thoughts",
content="Great pilot episode. Sets up the character arcs really well.",
visibility=1,
)
# Create podcast episode note
Note.objects.create(
item=self.podcastepisode,
owner=self.user1.identity,
title="Podcast episode notes",
content="Interesting discussion about tech trends. Timestamp 23:45 has a good point about AI.",
progress_type=Note.ProgressType.TIMESTAMP,
progress_value="23:45",
visibility=1,
)
# Create collections
items = [self.book1, self.movie1]
collection = Collection.objects.create(
owner=self.user1.identity,
title="Favorites",
brief="My all-time favorites",
visibility=1,
)
for i in items:
collection.append_item(i)
# Create another collection with different items
items2 = [self.book2, self.movie2, self.tvshow]
collection2 = Collection.objects.create(
owner=self.user1.identity,
title="To Review",
brief="Items I need to review soon",
visibility=1,
)
for i in items2:
collection2.append_item(i)
# Create shelf log entries
logs = ShelfLogEntry.objects.filter(owner=self.user1.identity).order_by(
"timestamp", "item_id"
)
# Export data to NDJSON
exporter = NdjsonExporter.create(user=self.user1)
exporter.run()
export_path = exporter.metadata["file"]
logger.debug(f"exported to {export_path}")
self.assertTrue(os.path.exists(export_path))
self.assertEqual(exporter.metadata["total"], 61)
# Validate the NDJSON export file structure
with TemporaryDirectory() as extract_dir:
with zipfile.ZipFile(export_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
logger.debug(f"unzipped to {extract_dir}")
# Check journal.ndjson exists
journal_path = os.path.join(extract_dir, "journal.ndjson")
self.assertTrue(
os.path.exists(journal_path), "journal.ndjson file missing"
)
# Check catalog.ndjson exists
catalog_path = os.path.join(extract_dir, "catalog.ndjson")
self.assertTrue(
os.path.exists(catalog_path), "catalog.ndjson file missing"
)
# Check attachments directory exists
attachments_path = os.path.join(extract_dir, "attachments")
self.assertTrue(
os.path.exists(attachments_path), "attachments directory missing"
)
# Count the number of JSON objects in journal.ndjson
with open(journal_path, "r") as f:
lines = f.readlines()
# First line is header, rest are data
self.assertGreater(
len(lines), 1, "journal.ndjson has no data lines"
)
# Check the first line is a header
header = json.loads(lines[0])
self.assertIn("server", header, "Missing server in header")
self.assertIn("username", header, "Missing username in header")
self.assertEqual(
header["username"],
"ndjson_exporter",
"Wrong username in header",
)
# Count data objects by type
type_counts = {
"ShelfMember": 0,
"Review": 0,
"Note": 0,
"Collection": 0,
"ShelfLog": 0,
"post": 0,
}
for line in lines[1:]:
data = json.loads(line)
if "type" in data:
type_counts[data["type"]] = (
type_counts.get(data["type"], 0) + 1
)
# Verify counts
self.assertEqual(
type_counts["ShelfMember"], 8, "Expected 8 ShelfMember entries"
)
self.assertEqual(
type_counts["Review"], 2, "Expected 2 Review entries"
)
self.assertEqual(type_counts["Note"], 4, "Expected 4 Note entries")
self.assertEqual(
type_counts["Collection"], 2, "Expected 2 Collection entries"
)
self.assertEqual(type_counts["ShelfLog"], logs.count())
# Now import the export file into a different user account
importer = NdjsonImporter.create(
user=self.user2, file=export_path, visibility=2
)
importer.run()
self.assertIn("61 items imported, 0 skipped, 0 failed.", importer.message)
# Verify imported data
identity2 = self.user2.identity
takahe_identity2 = identity2.takahe_identity
# Check that name and summary were updated
self.assertEqual(takahe_identity2.name, "Test User")
self.assertEqual(takahe_identity2.summary, "Test summary")
# Check marks
mark_book1_imported = Mark(self.user2.identity, self.book1)
self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_book1_imported.comment_text, "Great sci-fi classic")
self.assertEqual(mark_book1_imported.rating_grade, 10)
self.assertEqual(mark_book1_imported.visibility, 1)
self.assertEqual(
set(mark_book1_imported.tags), set(["sci-fi", "favorite", "space"])
)
mark_book2_imported = Mark(self.user2.identity, self.book2)
self.assertEqual(mark_book2_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_book2_imported.comment_text, "Read.")
self.assertIsNone(mark_book2_imported.rating_grade)
self.assertEqual(set(mark_book2_imported.tags), set(["sci-fi", "desert"]))
self.assertEqual(mark_book2_imported.visibility, 0)
mark_movie1_imported = Mark(self.user2.identity, self.movie1)
self.assertEqual(mark_movie1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_movie1_imported.comment_text, "Mind-bending")
self.assertEqual(mark_movie1_imported.rating_grade, 8)
self.assertEqual(set(mark_movie1_imported.tags), set(["mindbender", "scifi"]))
mark_episode1_imported = Mark(self.user2.identity, self.tvepisode1)
self.assertEqual(mark_episode1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_episode1_imported.comment_text, "Great start")
self.assertEqual(mark_episode1_imported.rating_grade, 9)
self.assertEqual(set(mark_episode1_imported.tags), set(["pilot", "drama"]))
# Check podcast episode mark
mark_podcast_imported = Mark(self.user2.identity, self.podcastepisode)
self.assertEqual(mark_podcast_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_podcast_imported.comment_text, "Insightful episode")
self.assertEqual(mark_podcast_imported.rating_grade, 8)
self.assertEqual(set(mark_podcast_imported.tags), set(["tech", "interview"]))
# Check reviews
book1_reviews = Review.objects.filter(
owner=self.user2.identity, item=self.book1
)
self.assertEqual(book1_reviews.count(), 1)
self.assertEqual(book1_reviews[0].title, "My thoughts on Hyperion")
self.assertIn("masterpiece of science fiction", book1_reviews[0].body)
movie1_reviews = Review.objects.filter(
owner=self.user2.identity, item=self.movie1
)
self.assertEqual(movie1_reviews.count(), 1)
self.assertEqual(movie1_reviews[0].title, "Inception Review")
self.assertIn("Christopher Nolan", movie1_reviews[0].body)
# Check notes
book2_notes = Note.objects.filter(owner=self.user2.identity, item=self.book2)
self.assertEqual(book2_notes.count(), 1)
self.assertEqual(book2_notes[0].title, "Reading progress")
self.assertIn("world-building is incredible", book2_notes[0].content)
self.assertEqual(book2_notes[0].progress_type, Note.ProgressType.PAGE)
self.assertEqual(book2_notes[0].progress_value, "125")
tvshow_notes = Note.objects.filter(owner=self.user2.identity, item=self.tvshow)
self.assertEqual(tvshow_notes.count(), 1)
self.assertEqual(tvshow_notes[0].title, "Before watching")
self.assertIn("Character development", tvshow_notes[0].content)
# Check TV episode notes
tvepisode_notes = Note.objects.filter(
owner=self.user2.identity, item=self.tvepisode1
)
self.assertEqual(tvepisode_notes.count(), 1)
self.assertEqual(tvepisode_notes[0].title, "Episode thoughts")
self.assertIn("Sets up the character arcs", tvepisode_notes[0].content)
# Check podcast episode notes
podcast_notes = Note.objects.filter(
owner=self.user2.identity, item=self.podcastepisode
)
self.assertEqual(podcast_notes.count(), 1)
self.assertEqual(podcast_notes[0].title, "Podcast episode notes")
self.assertIn(
"Interesting discussion about tech trends", podcast_notes[0].content
)
self.assertEqual(podcast_notes[0].progress_type, Note.ProgressType.TIMESTAMP)
self.assertEqual(podcast_notes[0].progress_value, "23:45")
# Check first collection
collections = Collection.objects.filter(
owner=self.user2.identity, title="Favorites"
)
self.assertEqual(collections.count(), 1)
self.assertEqual(collections[0].brief, "My all-time favorites")
self.assertEqual(collections[0].visibility, 1)
collection_items = list(collections[0].ordered_items)
self.assertEqual([self.book1, self.movie1], collection_items)
# Check second collection
collections2 = Collection.objects.filter(
owner=self.user2.identity, title="To Review"
)
self.assertEqual(collections2.count(), 1)
self.assertEqual(collections2[0].brief, "Items I need to review soon")
self.assertEqual(collections2[0].visibility, 1)
# Check second collection items
collection2_items = [m.item for m in collections2[0].members.all()]
self.assertEqual(len(collection2_items), 3)
self.assertIn(self.book2, collection2_items)
self.assertIn(self.movie2, collection2_items)
self.assertIn(self.tvshow, collection2_items)
tag1 = Tag.objects.filter(owner=self.user2.identity, title="favorite").first()
self.assertIsNotNone(tag1)
if tag1:
self.assertTrue(tag1.pinned)
self.assertEqual(tag1.visibility, 2)
# Check shelf log entries
logs2 = ShelfLogEntry.objects.filter(owner=self.user2.identity).order_by(
"timestamp", "item_id"
)
l1 = [(log.item, log.shelf_type, log.timestamp) for log in logs]
l2 = [(log.item, log.shelf_type, log.timestamp) for log in logs2]
self.assertEqual(l1, l2)

View file

@ -14,6 +14,7 @@ class Migration(migrations.Migration):
name="type",
field=models.CharField(
choices=[
("journal.baseimporter", "base importer"),
("journal.csvexporter", "csv exporter"),
("journal.csvimporter", "csv importer"),
("journal.doubanimporter", "douban importer"),
@ -21,6 +22,8 @@ class Migration(migrations.Migration):
("journal.goodreadsimporter", "goodreads importer"),
("journal.letterboxdimporter", "letterboxd importer"),
("journal.ndjsonexporter", "ndjson exporter"),
("journal.ndjsonimporter", "ndjson importer"),
("journal.opmlimporter", "opml importer"),
],
db_index=True,
max_length=255,

View file

@ -82,7 +82,6 @@ class Task(TypedModel):
task.refresh_from_db()
task.state = cls.States.complete if ok else cls.States.failed
task.save()
task.notify()
def enqueue(self):
return django_rq.get_queue(self.TaskQueue).enqueue(

View file

@ -10,137 +10,18 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ site_name }} - {% trans 'Data Management' %}</title>
{% include "common_libs.html" %}
<script>
document.addEventListener('htmx:responseError', (event) => {
let response = event.detail.xhr.response;
let body = response ? response : `Request error: ${event.detail.xhr.statusText}`;
alert(body);
});
</script>
</head>
<body>
{% include "_header.html" %}
<main>
<div class="grid__main">
<article>
<details>
<summary>{% trans 'Export Data' %}</summary>
<form action="{% url 'users:export_csv' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit"
value="{% trans 'Export marks, reviews and notes in CSV' %}" />
{% if csv_export_task %}
<br>
{% trans 'Last export' %}: {{ csv_export_task.created_time }}
{% trans 'Status' %}: {{ csv_export_task.get_state_display }}
<br>
{{ csv_export_task.message }}
{% if csv_export_task.metadata.file %}
<a href="{% url 'users:export_csv' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
<hr>
<form action="{% url 'users:export_ndjson' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
{% if ndjson_export_task %}
<br>
{% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
{% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
<br>
{{ ndjson_export_task.message }}
{% if ndjson_export_task.metadata.file %}
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
<hr>
<form action="{% url 'users:export_marks' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit"
class="secondary"
value="{% trans 'Export marks and reviews in XLSX (Doufen format)' %}" />
<small>exporting to this format will be deprecated soon.</small>
{% if export_task %}
<br>
{% trans 'Last export' %}: {{ export_task.created_time }}
{% trans 'Status' %}: {{ export_task.get_state_display }}
<br>
{{ export_task.message }}
{% if export_task.metadata.file %}
<a href="{% url 'users:export_marks' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Data' %}</summary>
<form action="{% url 'users:import_neodb' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<ul>
<li>
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
</li>
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
</ul>
<br>
<input type="file" name="file" required accept=".zip">
<p>
{% trans 'Visibility' %}: <small><code>.csv</code> only</small>
<br>
<label for="csv_visibility_0">
<input type="radio"
name="visibility"
value="0"
required=""
id="csv_visibility_0"
checked>
{% trans 'Public' %}
</label>
<label for="csv_visibility_1">
<input type="radio"
name="visibility"
value="1"
required=""
id="csv_visibility_1">
{% trans 'Followers Only' %}
</label>
<label for="csv_visibility_2">
<input type="radio"
name="visibility"
value="2"
required=""
id="csv_visibility_2">
{% trans 'Mentioned Only' %}
</label>
</p>
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% if csv_import_task %}
{% trans 'Last import started' %}: {{ csv_import_task.created_time }}
{% if csv_import_task.state == 0 or csv_import_task.state == 1 %}
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
hx-target="this"
hx-trigger="load delay:2s, every 10s"
hx-swap="outerHTML"></div>
{% else %}
{% trans 'Status' %}: {{ csv_import_task.get_state_display }}。
{{ csv_import_task.message }}
{% endif %}
{% if csv_import_task.metadata.failed_items %}
{% trans 'Failed items' %}:
<br>
<textarea readonly>{% for item in csv_import_task.metadata.failed_items %}{{item}}&#10;{% endfor %}</textarea>
{% endif %}
{% endif %}
</small>
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Marks and Reviews from Douban' %}</summary>
@ -193,59 +74,50 @@
<input type="submit"
{% if import_task.status == "pending" %} onclick="return confirm('{% trans "Another import is in progress, starting a new import may cause issues, sure to import?" %}')" value="{% trans "Import in progress, please wait" %}" {% else %} value="{% trans 'Import' %}" {% endif %} />
</form>
<div hx-get="{% url 'users:import_status' %}"
hx-trigger="load delay:1s"
hx-swap="outerHTML"></div>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Shelf or List from Goodreads' %}</summary>
<form action="{% url 'users:import_goodreads' %}" method="post">
<form hx-post="{% url 'users:import_goodreads' %}">
{% csrf_token %}
<div>
{% trans 'Link to Goodreads Profile / Shelf / List' %}
<ul>
<li>
Profile <code>https://www.goodreads.com/user/show/12345-janedoe</code>
<br>
{% trans 'want-to-read / currently-reading / read books and their reviews will be imported.' %}
</li>
<li>
Shelf <code>https://www.goodreads.com/review/list/12345-janedoe?shelf=name</code>
<br>
{% trans 'Shelf will be imported as a new collection.' %}
</li>
<li>
List <code>https://www.goodreads.com/list/show/155086.Popular_Highlights</code>
<br>
{% trans 'List will be imported as a new collection.' %}
</li>
<li>
<mark>Who Can View My Profile</mark> must be set as <mark>anyone</mark> prior to import.
</li>
</ul>
<input type="url"
name="url"
value=""
placeholder="https://www.goodreads.com/user/show/12345-janedoe"
required>
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% if goodreads_task %}
<br>
{% trans 'Last import started' %}: {{ goodreads_task.created_time }}
{% trans 'Status' %}: {{ goodreads_task.get_state_display }}。
<br>
{{ goodreads_task.message }}
{% endif %}
</small>
</div>
<ul>
<li>
Profile <code>https://www.goodreads.com/user/show/12345-janedoe</code>
{% trans 'want-to-read / currently-reading / read books and their reviews will be imported.' %}
</li>
<li>
Shelf <code>https://www.goodreads.com/review/list/12345-janedoe?shelf=name</code>
{% trans 'Shelf will be imported as a new collection.' %}
</li>
<li>
List <code>https://www.goodreads.com/list/show/155086.Popular_Highlights</code>
{% trans 'List will be imported as a new collection.' %}
</li>
<li>
<mark>Who Can View My Profile</mark> must be set as <mark>anyone</mark> prior to import.
</li>
</ul>
{% include "users/user_task_status.html" with task=goodreads_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import from Letterboxd' %}</summary>
<form action="{% url 'users:import_letterboxd' %}"
method="post"
<form hx-post="{% url 'users:import_letterboxd' %}"
enctype="multipart/form-data">
{% csrf_token %}
<ul>
@ -292,30 +164,15 @@
</label>
</p>
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% trans 'Only forward changes(none->to-watch->watched) will be imported.' %}
{% if letterboxd_task %}
<br>
{% trans 'Last import started' %}: {{ letterboxd_task.created_time }}
{% trans 'Status' %}: {{ letterboxd_task.get_state_display }}。
<br>
{{ letterboxd_task.message }}
{% if letterboxd_task.metadata.failed_urls %}
{% trans 'Failed links, likely due to Letterboxd error, you may have to mark them manually' %}:
<br>
<textarea readonly>{% for url in letterboxd_task.metadata.failed_urls %}{{url}}&#10;{% endfor %}</textarea>
{% endif %}
{% endif %}
</small>
<small>{% trans 'Only forward changes(none->to-watch->watched) will be imported.' %}</small>
{% include "users/user_task_status.html" with task=letterboxd_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Podcast Subscriptions' %}</summary>
<form action="{% url 'users:import_opml' %}"
method="post"
enctype="multipart/form-data">
<form hx-post="{% url 'users:import_opml' %}" enctype="multipart/form-data">
{% csrf_token %}
<div>
{% trans 'Import Method' %}
@ -331,11 +188,230 @@
<input id="opml_import_mode_1" type="radio" name="import_mode" value="1">
{% trans 'Import as a new collection' %}
</label>
{% trans 'Visibility' %}:
<label for="opml_visibility_0">
<input type="radio"
name="visibility"
value="0"
required=""
id="opml_visibility_0"
checked>
{% trans 'Public' %}
</label>
<label for="opml_visibility_1">
<input type="radio"
name="visibility"
value="1"
required=""
id="opml_visibility_1">
{% trans 'Followers Only' %}
</label>
<label for="opml_visibility_2">
<input type="radio"
name="visibility"
value="2"
required=""
id="opml_visibility_2">
{% trans 'Mentioned Only' %}
</label>
<br>
{% trans 'Select OPML file' %}
<input type="file" name="file" id="excel" required accept=".opml,.xml">
<input type="file" name="file" required accept=".opml,.xml">
<input type="submit" value="{% trans 'Import' %}" />
</div>
{% include "users/user_task_status.html" with task=opml_import_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import NeoDB Archive' %}</summary>
<form hx-post="{% url 'users:import_neodb' %}"
enctype="multipart/form-data">
{% csrf_token %}
<ul>
<li>
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
</li>
<li>{% trans 'Existing data may be overwritten.' %}</li>
</ul>
<input type="file" name="file" id="neodb_import_file" required accept=".zip">
<div id="detected_format_info"
style="display: none;
margin: 10px 0;
padding: 8px 12px;
border-radius: 4px;
background-color: var(--card-background-color, #f8f9fa);
border: 1px solid var(--card-border-color, #dee2e6)">
<i class="fa fa-info-circle"></i> {% trans 'Detected format' %}: <strong id="detected_format">-</strong>
</div>
<div id="visibility_settings" style="display: none;">
<p>
{% trans 'Visibility' %}:
<br>
<label for="csv_visibility_0">
<input type="radio"
name="visibility"
value="0"
required=""
id="csv_visibility_0"
checked>
{% trans 'Public' %}
</label>
<label for="csv_visibility_1">
<input type="radio"
name="visibility"
value="1"
required=""
id="csv_visibility_1">
{% trans 'Followers Only' %}
</label>
<label for="csv_visibility_2">
<input type="radio"
name="visibility"
value="2"
required=""
id="csv_visibility_2">
{% trans 'Mentioned Only' %}
</label>
</p>
</div>
<input type="hidden" name="format_type" id="format_type" value="" required>
<input type="submit" value="{% trans 'Import' %}" id="import_submit" />
<script src="{{ cdn_url }}/npm/jszip@3.10.1/dist/jszip.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
const fileInput = document.getElementById('neodb_import_file');
if (!fileInput) return;
fileInput.addEventListener('change', async function(event) {
const file = event.target.files[0];
if (!file) {
document.getElementById('detected_format_info').style.display = 'none';
document.getElementById('visibility_settings').style.display = 'none';
document.getElementById('format_type').value = '';
return;
}
// Check if it's a zip file
if (file.type !== 'application/zip' &&
file.type !== 'application/x-zip-compressed' &&
!file.name.toLowerCase().endsWith('.zip')) {
document.getElementById('detected_format_info').style.display = 'none';
document.getElementById('visibility_settings').style.display = 'none';
document.getElementById('format_type').value = '';
return;
}
// Update UI to show "Detecting format..." with a spinner
document.getElementById('detected_format').innerHTML = '{% trans "Detecting format..." %} <i class="fa fa-spinner fa-spin"></i>';
document.getElementById('detected_format_info').style.display = 'block';
try {
// Use JSZip to examine the actual contents of the ZIP file
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
const fileNames = Object.keys(zipContents.files);
// Check for specific files that indicate format type
const hasNdjson = fileNames.some(name => name === 'journal.ndjson' || name === 'catalog.ndjson');
const hasCsv = fileNames.some(name => name.endsWith('_mark.csv') ||
name.endsWith('_review.csv') ||
name.endsWith('_note.csv'));
let format = '';
let formatValue = '';
if (hasNdjson) {
format = 'NDJSON';
formatValue = 'ndjson';
} else if (hasCsv) {
format = 'CSV';
formatValue = 'csv';
} else {
// Unable to detect format from contents
format = '{% trans "Unknown format" %}';
formatValue = '';
}
// Update UI with detected format and appropriate icon
let formatIcon = '';
if (formatValue === 'ndjson') {
formatIcon = '<i class="fa fa-file-code"></i> ';
} else if (formatValue === 'csv') {
formatIcon = '<i class="fa fa-file-csv"></i> ';
} else {
formatIcon = '<i class="fa fa-question-circle"></i> ';
}
document.getElementById('detected_format').innerHTML = formatIcon + format;
document.getElementById('format_type').value = formatValue;
if (formatValue === 'csv') {
document.getElementById('visibility_settings').style.display = 'block';
} else {
document.getElementById('visibility_settings').style.display = 'none';
}
} catch (error) {
console.error('Error examining ZIP file:', error);
document.getElementById('detected_format').innerHTML = '<i class="fa fa-exclamation-triangle"></i> {% trans "Error detecting format" %}';
document.getElementById('format_type').value = '';
// Make the error more visible
document.getElementById('detected_format_info').style.backgroundColor = 'var(--form-element-invalid-active-border-color, #d9534f)';
document.getElementById('detected_format_info').style.color = 'white';
// Hide visibility settings on error
document.getElementById('visibility_settings').style.display = 'none';
}
if (document.getElementById('format_type').value == '') {
document.getElementById('import_submit').setAttribute('disabled', '')
} else {
document.getElementById('import_submit').removeAttribute('disabled', '')
}
});
});
</script>
{% include "users/user_task_status.html" with task=neodb_import_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Export NeoDB Archive' %}</summary>
<form hx-post="{% url 'users:export_csv' %}" enctype="multipart/form-data">
{% csrf_token %}
<input type="submit"
value="{% trans 'Export marks, reviews and notes in CSV' %}" />
{% include "users/user_task_status.html" with task=csv_export_task %}
</form>
<hr>
<form hx-post="{% url 'users:export_ndjson' %}"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
{% include "users/user_task_status.html" with task=ndjson_export_task %}
</form>
<hr>
<form action="{% url 'users:export_marks' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<b>exporting to this format will be deprecated soon, please use csv or ndjson format.</b>
<input type="submit"
class="secondary"
value="{% trans 'Export marks and reviews in XLSX (Doufen format)' %}" />
{% if export_task %}
<br>
{% trans 'Last export' %}: {{ export_task.created_time }}
{% trans 'Status' %}: {{ export_task.get_state_display }}
<br>
{{ export_task.message }}
{% if export_task.metadata.file %}
<a href="{% url 'users:export_marks' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
</details>
</article>
@ -351,25 +427,6 @@
</div>
</details>
</article>
{% comment %}
<article>
<details>
<summary>{% trans 'Reset visibility for all marks' %}</summary>
<form action="{% url 'users:reset_visibility' %}" method="post">
{% csrf_token %}
<input type="submit" value="{% trans 'Reset' %}" />
<div>
<input type="radio" name="visibility" id="visPublic" value="0" checked>
<label for="visPublic">{% trans 'Public' %}</label>
<input type="radio" name="visibility" id="visFollower" value="1">
<label for="visFollower">{% trans 'Followers Only' %}</label>
<input type="radio" name="visibility" id="visSelf" value="2">
<label for="visSelf">{% trans 'Mentioned Only' %}</label>
</div>
</form>
</details>
</article>
{% endcomment %}
</div>
{% include "_sidebar.html" with show_profile=1 identity=request.user.identity %}
</main>

View file

@ -1,19 +1,33 @@
{% load i18n %}
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
{% if task.state == 0 or task.state == 1 %}hx-target="this" hx-trigger="every 30s"{% endif %}
hx-swap="outerHTML">
{% trans 'Status' %}: {{ task.get_state_display }}。
{{ task.message }}
<br>
{% if task.metadata.total and task.metadata.processed %}
<div class="progress-container">
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
<div class="progress-text">
{{ task.metadata.processed }} / {{ task.metadata.total }}
({{ task.metadata.imported }} imported,
{{ task.metadata.skipped }} skipped,
{{ task.metadata.failed }} failed)
</div>
{% if task %}
<div hx-target="this"
{% if task.state == 0 or task.state == 1 %} hx-get="{% url 'users:user_task_status' task.type %}" hx-trigger="every 30s"{% endif %}
hx-swap="outerHTML">
<div>
{% if task.state == 0 %}
<i class="fa-solid fa-spinner fa-spin"></i>
{% elif task.state == 1 %}
<i class="fa-solid fa-gear fa-spin"></i>
{% elif task.state == 3 %}
<i class="fa-solid fa-triangle-exclamation"></i>
{% elif 'exporter' in task.type %}
<a href="{% url 'users:user_task_download' task.type %}" download><i class="fa fa-download"></i></a>
{% else %}
<i class="fa-solid fa-check"></i>
{% endif %}
{{ task.created_time }}
{{ task.message }}
</div>
{% endif %}
</div>
{% if task.state == 0 or task.state == 1 %}
{% if task.metadata.total and task.metadata.processed %}
<div>
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
</div>
{% endif %}
{% endif %}
{% if task.metadata.failed_items %}
{% trans 'Failed items' %}:
<textarea readonly>{% for item in task.metadata.failed_items %}{{item}}&#10;{% endfor %}</textarea>
{% endif %}
</div>
{% endif %}

View file

@ -10,7 +10,10 @@ urlpatterns = [
path("data", data, name="data"),
path("info", account_info, name="info"),
path("profile", account_profile, name="profile"),
path("task/<str:task_name>/status", user_task_status, name="user_task_status"),
path("task/<str:task_type>/status", user_task_status, name="user_task_status"),
path(
"task/<str:task_type>/download", user_task_download, name="user_task_download"
),
path("data/import/status", data_import_status, name="import_status"),
path("data/import/goodreads", import_goodreads, name="import_goodreads"),
path("data/import/douban", import_douban, name="import_douban"),

View file

@ -4,6 +4,7 @@ import os
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.decorators import login_required
from django.core.exceptions import BadRequest
from django.db.models import Min
from django.http import HttpResponse
from django.shortcuts import redirect, render
@ -18,8 +19,8 @@ from journal.importers import (
DoubanImporter,
GoodreadsImporter,
LetterboxdImporter,
NdjsonImporter,
OPMLImporter,
get_neodb_importer,
)
from journal.models import ShelfType
from takahe.utils import Takahe
@ -92,6 +93,19 @@ def data(request):
start_date = queryset.aggregate(Min("created_time"))["created_time__min"]
start_year = start_date.year if start_date else current_year
years = reversed(range(start_year, current_year + 1))
# Import tasks - check for both CSV and NDJSON importers
csv_import_task = CsvImporter.latest_task(request.user)
ndjson_import_task = NdjsonImporter.latest_task(request.user)
# Use the most recent import task for display
if ndjson_import_task and (
not csv_import_task
or ndjson_import_task.created_time > csv_import_task.created_time
):
neodb_import_task = ndjson_import_task
else:
neodb_import_task = csv_import_task
return render(
request,
"users/data.html",
@ -100,10 +114,11 @@ def data(request):
"import_task": DoubanImporter.latest_task(request.user),
"export_task": DoufenExporter.latest_task(request.user),
"csv_export_task": CsvExporter.latest_task(request.user),
"csv_import_task": CsvImporter.latest_task(request.user),
"neodb_import_task": neodb_import_task, # Use the most recent import task
"ndjson_export_task": NdjsonExporter.latest_task(request.user),
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
"goodreads_task": GoodreadsImporter.latest_task(request.user),
# "opml_task": OPMLImporter.latest_task(request.user),
"years": years,
},
)
@ -121,19 +136,23 @@ def data_import_status(request):
@login_required
def user_task_status(request, task_name: str):
match task_name:
case "csv_import":
def user_task_status(request, task_type: str):
match task_type:
case "journal.csvimporter":
task_cls = CsvImporter
case "csv_export":
case "journal.ndjsonimporter":
task_cls = NdjsonImporter
case "journal.csvexporter":
task_cls = CsvExporter
case "ndjson_export":
case "journal.ndjsonexporter":
task_cls = NdjsonExporter
case "letterboxd":
case "journal.letterboxdimporter":
task_cls = LetterboxdImporter
case "goodreads":
case "journal.goodreadsimporter":
task_cls = GoodreadsImporter
case "douban":
case "journal.opmlimporter":
task_cls = OPMLImporter
case "journal.doubanimporter":
task_cls = DoubanImporter
case _:
return redirect(reverse("users:data"))
@ -141,6 +160,28 @@ def user_task_status(request, task_name: str):
return render(request, "users/user_task_status.html", {"task": task})
@login_required
def user_task_download(request, task_type: str):
match task_type:
case "journal.csvexporter":
task_cls = CsvExporter
case "journal.ndjsonexporter":
task_cls = NdjsonExporter
case _:
return redirect(reverse("users:data"))
task = task_cls.latest_task(request.user)
if not task or task.state != Task.States.complete or not task.metadata.get("file"):
messages.add_message(request, messages.ERROR, _("Export file not available."))
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
@login_required
def export_reviews(request):
if request.method != "POST":
@ -150,6 +191,7 @@ def export_reviews(request):
@login_required
def export_marks(request):
# TODO: deprecated
if request.method == "POST":
DoufenExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
@ -189,22 +231,10 @@ def export_csv(request):
)
return redirect(reverse("users:data"))
CsvExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
return redirect(reverse("users:data"))
else:
task = CsvExporter.latest_task(request.user)
if not task or task.state != Task.States.complete:
messages.add_message(
request, messages.ERROR, _("Export file not available.")
)
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
return redirect(
reverse("users:user_task_status", args=("journal.csvexporter",))
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
return redirect(reverse("users:data"))
@login_required
@ -221,22 +251,10 @@ def export_ndjson(request):
)
return redirect(reverse("users:data"))
NdjsonExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
return redirect(reverse("users:data"))
else:
task = NdjsonExporter.latest_task(request.user)
if not task or task.state != Task.States.complete:
messages.add_message(
request, messages.ERROR, _("Export file not available.")
)
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
return redirect(
reverse("users:user_task_status", args=("journal.ndjsonexporter",))
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
return redirect(reverse("users:data"))
@login_required
@ -263,24 +281,26 @@ def sync_mastodon_preference(request):
@login_required
def import_goodreads(request):
if request.method == "POST":
raw_url = request.POST.get("url")
if GoodreadsImporter.validate_url(raw_url):
GoodreadsImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
url=raw_url,
).enqueue()
messages.add_message(request, messages.INFO, _("Import in progress."))
else:
messages.add_message(request, messages.ERROR, _("Invalid URL."))
return redirect(reverse("users:data"))
if request.method != "POST":
return redirect(reverse("users:data"))
raw_url = request.POST.get("url")
if not GoodreadsImporter.validate_url(raw_url):
raise BadRequest(_("Invalid URL."))
task = GoodreadsImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
url=raw_url,
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_douban(request):
if request.method != "POST":
return redirect(reverse("users:data"))
if not DoubanImporter.validate_file(request.FILES["file"]):
raise BadRequest(_("Invalid file."))
f = (
settings.MEDIA_ROOT
+ "/"
@ -290,64 +310,75 @@ def import_douban(request):
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
if not DoubanImporter.validate_file(request.FILES["file"]):
messages.add_message(request, messages.ERROR, _("Invalid file."))
return redirect(reverse("users:data"))
DoubanImporter.create(
task = DoubanImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
mode=int(request.POST.get("import_mode", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
return redirect(reverse("users:data"))
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_letterboxd(request):
if request.method == "POST":
f = (
settings.MEDIA_ROOT
+ "/"
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
)
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
LetterboxdImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
return redirect(reverse("users:data"))
if request.method != "POST":
return redirect(reverse("users:data"))
if not LetterboxdImporter.validate_file(request.FILES["file"]):
raise BadRequest(_("Invalid file."))
f = (
settings.MEDIA_ROOT
+ "/"
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
)
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
task = LetterboxdImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_opml(request):
if request.method == "POST":
importer = OPMLImporter(
request.user,
int(request.POST.get("visibility", 0)),
int(request.POST.get("import_mode", 0)),
)
if importer.import_from_file(request.FILES["file"]):
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
else:
messages.add_message(request, messages.ERROR, _("Invalid file."))
return redirect(reverse("users:data"))
if request.method != "POST":
return redirect(reverse("users:data"))
if not OPMLImporter.validate_file(request.FILES["file"]):
raise BadRequest(_("Invalid file."))
f = (
settings.MEDIA_ROOT
+ "/"
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
)
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
task = OPMLImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
mode=int(request.POST.get("import_mode", 0)),
file=f,
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_neodb(request):
if request.method == "POST":
format_type_hint = request.POST.get("format_type", "").lower()
if format_type_hint == "csv":
importer = CsvImporter
elif format_type_hint == "ndjson":
importer = NdjsonImporter
else:
raise BadRequest("Invalid file.")
f = (
settings.MEDIA_ROOT
+ "/"
@ -357,16 +388,11 @@ def import_neodb(request):
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
importer = get_neodb_importer(f)
if not importer:
messages.add_message(request, messages.ERROR, _("Invalid file."))
return redirect(reverse("users:data"))
importer.create(
task = importer.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
return redirect(reverse("users:data"))