merge
Some checks are pending
code check / lint (3.12) (push) Waiting to run
code check / type-checker (3.12) (push) Waiting to run
Mirror to Codeberg / to_codeberg (push) Waiting to run
unit test / django (3.12) (push) Waiting to run

This commit is contained in:
gesang 2025-03-09 21:10:31 +01:00
commit ccd2e76610
Signed by: gesang
GPG key ID: 6CE35141D31CEAFB
64 changed files with 3173 additions and 4195 deletions

View file

@ -3,22 +3,6 @@ name: code check
on: [push, pull_request]
jobs:
doc:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.12']
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: pip
- name: Run pre-commit
run: |
python -m pip install mkdocs-material
mkdocs build -s -d /tmp/neodb-docs
lint:
runs-on: ubuntu-latest
strategy:

View file

@ -1,3 +1,3 @@
# Federation
see [doc](docs/internals/federation.md) for FEP-67ff related information.
see [doc](https://neodb.net/internals/federation.md) for FEP-67ff related information.

View file

@ -4,19 +4,21 @@
[![Docker Pulls](https://img.shields.io/docker/pulls/neodb/neodb?label=docker&color=3791E0&style=for-the-badge)](https://hub.docker.com/r/neodb/neodb)
[![GitHub License](https://img.shields.io/github/license/neodb-social/neodb?color=E69A48&style=for-the-badge)](https://github.com/neodb-social/neodb/blob/main/LICENSE)
# 🧩 NeoDB
_mark the things you love._
[NeoDB](https://neodb.net) (fka boofilsic) is an open source project and free service to help users manage, share and discover collections, reviews and ratings for culture products (e.g. books, movies, music, podcasts, games and performances) in Fediverse.
[NeoDB.social](https://neodb.social) is a free instance hosted by NeoDB developers, there are more [servers](https://neodb.net/servers/) and [apps](https://neodb.net/apps/) available. Your support is essential to keep them free and open-sourced.
[NeoDB.social](https://neodb.social) is a free instance hosted by NeoDB developers, there are more [servers](https://neodb.net/servers/) and [apps](https://neodb.net/apps/) available.
Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky.app/profile/neodb.net) or join our [Discord community](https://discord.gg/QBHkrV8bxK) to share your ideas/questions/creations.
Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky.app/profile/neodb.net) or join our [Discord community](https://discord.gg/QBHkrV8bxK) to share your ideas/questions/creations. Your support is essential to keep the services free and open-sourced.
[![Mastodon](https://img.shields.io/mastodon/follow/106919732872456302?style=for-the-badge&logo=mastodon&logoColor=fff&label=%40neodb%40mastodon.social&color=6D75D2)](https://mastodon.social/@neodb)
[![Discord](https://img.shields.io/discord/1041738638364528710?label=Discord&logo=discord&logoColor=fff&color=6D75D2&style=for-the-badge)](https://discord.gg/QBHkrV8bxK)
[![Kofi](https://img.shields.io/badge/Ko--Fi-Donate-orange?label=Support%20NeoDB%20on%20Ko-fi&style=for-the-badge&color=ff5f5f&logo=ko-fi)](https://ko-fi.com/neodb)
## Features
- Manage a shared catalog of books/movies/tv shows/music album/games/podcasts/performances
+ search or create catalog items in each category
@ -66,13 +68,17 @@ Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky
- Other
+ i18n: English, Danish and Simp/Trad Chinese available; contribution for more languages welcomed
## Host your own instance
Please see [docs/install.md](docs/install.md)
Please see [installation guide](https://neodb.net/install/).
## Contribution
- To build application with NeoDB API, documentation is available in [NeoDB API Developer Console](https://neodb.social/developer/)
- To help develop NeoDB, please see [docs/development.md](docs/development.md) for some basics to start with
- To help develop NeoDB, please see [development guide](https://neodb.net/development/) for some basics to start with
- To translate NeoDB to more languages, please join [our project on Weblate](https://hosted.weblate.org/projects/neodb/neodb/)
- Source code for [NeoDB documentation](https://neodb.net) can be found [here](https://github.com/neodb-social/neodb-doc)
## Sponsor
If you like this project, please consider donating to [NeoDB.social on ko-fi](https://ko-fi.com/neodb), or [Takahē](https://www.patreon.com/takahe) and [NiceDB](https://patreon.com/tertius) without whom this project won't be possible.

View file

@ -449,6 +449,7 @@ LANGUAGE_CODE, PREFERRED_LANGUAGES = _init_language_settings(
if TESTING: # force en if testing
LANGUAGE_CODE = "en"
PREFERRED_LANGUAGES = ["en"]
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
@ -580,7 +581,7 @@ SEARCH_INDEX_NEW_ONLY = False
INDEX_ALIASES = env("INDEX_ALIASES")
DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="/tmp") # type: ignore
DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="") # type: ignore
DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc

View file

@ -205,6 +205,7 @@ class BasicDownloader:
)
return resp, response_type
except RequestException as e:
# logger.debug(f"RequestException: {e}")
self.logs.append(
{"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e}
)
@ -340,16 +341,19 @@ class ImageDownloaderMixin:
def validate_response(self, response):
if response and response.status_code == 200:
try:
raw_img = response.content
img = Image.open(BytesIO(raw_img))
img.load() # corrupted image will trigger exception
content_type = response.headers.get("Content-Type")
content_type = response.headers["content-type"]
if content_type.startswith("image/svg+xml"):
self.extention = "svg"
return RESPONSE_OK
file_type = filetype.get_type(
mime=content_type.partition(";")[0].strip()
)
if file_type is None:
return RESPONSE_NETWORK_ERROR
self.extention = file_type.extension
raw_img = response.content
img = Image.open(BytesIO(raw_img))
img.load() # corrupted image will trigger exception
return RESPONSE_OK
except Exception:
return RESPONSE_NETWORK_ERROR

View file

@ -771,22 +771,22 @@ class Item(PolymorphicModel):
return not self.is_deleted and self.merged_to_item is None
@cached_property
def rating(self):
def rating_info(self):
from journal.models import Rating
return Rating.get_rating_for_item(self)
return Rating.get_info_for_item(self)
@property
def rating(self):
return self.rating_info.get("average")
@cached_property
def rating_count(self):
from journal.models import Rating
return Rating.get_rating_count_for_item(self)
return self.rating_info.get("count")
@cached_property
def rating_distribution(self):
from journal.models import Rating
return Rating.get_rating_distribution_for_item(self)
return self.rating_info.get("distribution")
@cached_property
def tags(self):

View file

@ -6,6 +6,8 @@ from catalog.music.utils import *
class BasicMusicTest(TestCase):
databases = "__all__"
def test_gtin(self):
self.assertIsNone(upc_to_gtin_13("018771208112X"))
self.assertIsNone(upc_to_gtin_13("999018771208112"))
@ -15,6 +17,8 @@ class BasicMusicTest(TestCase):
class SpotifyTestCase(TestCase):
databases = "__all__"
def test_parse(self):
t_id_type = IdType.Spotify_Album
t_id_value = "65KwtzkJXw7oT819NFWmEP"
@ -48,6 +52,8 @@ class SpotifyTestCase(TestCase):
class DoubanMusicTestCase(TestCase):
databases = "__all__"
def test_parse(self):
t_id_type = IdType.DoubanMusic
t_id_value = "33551231"
@ -74,6 +80,8 @@ class DoubanMusicTestCase(TestCase):
class MultiMusicSitesTestCase(TestCase):
databases = "__all__"
@use_local_response
def test_albums(self):
url1 = "https://music.douban.com/subject/33551231/"
@ -92,6 +100,8 @@ class MultiMusicSitesTestCase(TestCase):
class BandcampTestCase(TestCase):
databases = "__all__"
def test_parse(self):
t_id_type = IdType.Bandcamp
t_id_value = "intlanthem.bandcamp.com/album/in-these-times"
@ -119,6 +129,8 @@ class BandcampTestCase(TestCase):
class DiscogsReleaseTestCase(TestCase):
databases = "__all__"
def test_parse(self):
t_id_type = IdType.Discogs_Release
t_id_value = "25829341"
@ -155,6 +167,8 @@ class DiscogsReleaseTestCase(TestCase):
class DiscogsMasterTestCase(TestCase):
databases = "__all__"
def test_parse(self):
t_id_type = IdType.Discogs_Master
t_id_value = "469004"
@ -182,6 +196,8 @@ class DiscogsMasterTestCase(TestCase):
class AppleMusicTestCase(TestCase):
databases = "__all__"
def test_parse(self):
t_id_type = IdType.AppleMusic
t_id_value = "1284391545"
@ -201,8 +217,10 @@ class AppleMusicTestCase(TestCase):
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(site.resource.metadata["title"], "Kids Only")
self.assertEqual(
site.resource.metadata["localized_title"][0]["text"], "Kids Only"
)
self.assertEqual(site.resource.metadata["artist"], ["Leah Dou"])
self.assertIsInstance(site.resource.item, Album)
self.assertEqual(site.resource.item.genre, ["Pop"])
self.assertEqual(site.resource.item.duration, 2371628)
self.assertEqual(site.resource.item.genre, ["Pop", "Music"])
self.assertEqual(site.resource.item.duration, 2368000)

View file

@ -9,8 +9,9 @@ Scraping the website directly.
"""
import json
from datetime import timedelta
import dateparser
from django.utils.dateparse import parse_duration
from loguru import logger
from catalog.common import *
@ -18,7 +19,6 @@ from catalog.models import *
from common.models.lang import (
SITE_DEFAULT_LANGUAGE,
SITE_PREFERRED_LANGUAGES,
detect_language,
)
from common.models.misc import uniq
@ -39,7 +39,6 @@ class AppleMusic(AbstractSite):
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": BasicDownloader.get_accept_language(),
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"DNT": "1",
@ -70,80 +69,63 @@ class AppleMusic(AbstractSite):
return locales
def scrape(self):
matched_content = None
matched_schema_data = None
localized_title = []
localized_desc = []
for lang, locales in self.get_locales().items():
for loc in locales: # waterfall thru all locales
url = f"https://music.apple.com/{loc}/album/{self.id_value}"
try:
tl = f"{lang}-{loc}" if lang == "zh" else lang
headers = {
"Accept-Language": tl,
}
headers.update(self.headers)
content = (
BasicDownloader(url, headers=self.headers).download().html()
)
logger.info(f"got localized content from {url}")
elem = content.xpath(
"//script[@id='serialized-server-data']/text()"
)
txt: str = elem[0] # type:ignore
page_data = json.loads(txt)[0]
album_data = page_data["data"]["sections"][0]["items"][0]
title = album_data["title"]
brief = album_data.get("modalPresentationDescriptor", {}).get(
"paragraphText", ""
)
tl = detect_language(title + " " + brief)
localized_title.append({"lang": tl, "text": title})
if brief:
localized_desc.append({"lang": tl, "text": brief})
if lang == SITE_DEFAULT_LANGUAGE or not matched_content:
matched_content = content
logger.debug(f"got localized content from {url}")
txt: str = content.xpath(
"//script[@id='schema:music-album']/text()"
)[0] # type:ignore
schema_data = json.loads(txt)
title = schema_data["name"]
if title:
localized_title.append({"lang": tl, "text": title})
try:
txt: str = content.xpath(
"//script[@id='serialized-server-data']/text()"
)[0] # type:ignore
server_data = json.loads(txt)
brief = server_data[0]["data"]["sections"][0]["items"][0][
"modalPresentationDescriptor"
]["paragraphText"]
if brief:
localized_desc.append({"lang": tl, "text": brief})
except Exception:
server_data = brief = None
if lang == SITE_DEFAULT_LANGUAGE or not matched_schema_data:
matched_schema_data = schema_data
break
except Exception:
pass
if matched_content is None:
if matched_schema_data is None: # no schema data found
raise ParseError(self, f"localized content for {self.url}")
elem = matched_content.xpath("//script[@id='serialized-server-data']/text()")
txt: str = elem[0] # type:ignore
page_data = json.loads(txt)[0]
album_data = page_data["data"]["sections"][0]["items"][0]
title = album_data["title"]
brief = album_data.get("modalPresentationDescriptor")
brief = brief.get("paragraphText") if brief else None
artist_list = album_data["subtitleLinks"]
artist = [item["title"] for item in artist_list]
track_data = page_data["data"]["seoData"]
date_elem = track_data.get("musicReleaseDate")
release_datetime = dateparser.parse(date_elem.strip()) if date_elem else None
release_date = (
release_datetime.strftime("%Y-%m-%d") if release_datetime else None
artist = [a["name"] for a in matched_schema_data.get("byArtist", [])]
release_date = matched_schema_data.get("datePublished", None)
genre = matched_schema_data.get("genre", [])
image_url = matched_schema_data.get("image", None)
track_list = [t["name"] for t in matched_schema_data.get("tracks", [])]
duration = round(
sum(
(parse_duration(t["duration"]) or timedelta()).total_seconds() * 1000
for t in matched_schema_data.get("tracks", [])
)
)
track_list = [
f"{i}. {track['attributes']['name']}"
for i, track in enumerate(track_data["ogSongs"], 1)
]
duration_list = [
track["attributes"].get("durationInMillis", 0)
for track in track_data["ogSongs"]
]
duration = int(sum(duration_list))
genre = track_data["schemaContent"].get("genre")
if genre:
genre = [
genre[0]
] # apple treat "Music" as a genre. Thus, only the first genre is obtained.
images = matched_content.xpath("//source[@type='image/jpeg']/@srcset")
image_elem: str = images[0] if images else "" # type:ignore
image_url = image_elem.split(" ")[0] if image_elem else None
pd = ResourceContent(
metadata={
"localized_title": uniq(localized_title),
"localized_description": uniq(localized_desc),
"title": title,
"brief": brief,
"artist": artist,
"genre": genre,
"release_date": release_date,

View file

@ -44,6 +44,7 @@ from catalog.common import (
)
from catalog.common.models import LANGUAGE_CHOICES_JSONFORM, LanguageListField
from common.models.lang import RE_LOCALIZED_SEASON_NUMBERS, localize_number
from common.models.misc import uniq
class TVShowInSchema(ItemInSchema):
@ -414,30 +415,35 @@ class TVSeason(Item):
- "Show Title Season X" with some localization
"""
s = super().display_title
if self.parent_item and (
RE_LOCALIZED_SEASON_NUMBERS.sub("", s) == ""
or s == self.parent_item.display_title
):
if self.parent_item.get_season_count() == 1:
return self.parent_item.display_title
elif self.season_number:
return _("{show_title} Season {season_number}").format(
show_title=self.parent_item.display_title,
season_number=localize_number(self.season_number),
)
else:
return f"{self.parent_item.display_title} {s}"
if self.parent_item:
if (
RE_LOCALIZED_SEASON_NUMBERS.sub("", s) == ""
or s == self.parent_item.display_title
):
if self.parent_item.get_season_count() == 1:
return self.parent_item.display_title
elif self.season_number:
return _("{show_title} Season {season_number}").format(
show_title=self.parent_item.display_title,
season_number=localize_number(self.season_number),
)
else:
return f"{self.parent_item.display_title} {s}"
elif self.parent_item.display_title not in s:
return f"{self.parent_item.display_title} ({s})"
return s
@cached_property
def additional_title(self) -> list[str]:
title = self.display_title
return [
t["text"]
for t in self.localized_title
if t["text"] != title
and RE_LOCALIZED_SEASON_NUMBERS.sub("", t["text"]) != ""
]
return uniq(
[
t["text"]
for t in self.localized_title
if t["text"] != title
and RE_LOCALIZED_SEASON_NUMBERS.sub("", t["text"]) != ""
]
)
def to_indexable_titles(self) -> list[str]:
titles = [t["text"] for t in self.localized_title if t["text"]]

View file

@ -86,7 +86,7 @@ def retrieve(request, item_path, item_uuid):
if request.method == "HEAD":
return HttpResponse()
if request.headers.get("Accept", "").endswith("json"):
return JsonResponse(item.ap_object)
return JsonResponse(item.ap_object, content_type="application/activity+json")
focus_item = None
if request.GET.get("focus"):
focus_item = get_object_or_404(

View file

@ -2,7 +2,7 @@ from django.conf import settings
from django.contrib.auth.decorators import login_required
from django.core.cache import cache
from django.core.exceptions import DisallowedHost
from django.http import HttpRequest, JsonResponse
from django.http import HttpRequest, HttpResponse, JsonResponse
from django.shortcuts import redirect, render
from django.urls import reverse
@ -81,36 +81,41 @@ def nodeinfo2(request):
)
def _is_json_request(request) -> bool:
return request.headers.get("HTTP_ACCEPT", "").endswith("json")
def _error_response(request, status: int, exception=None, default_message=""):
message = str(exception) if exception else default_message
if request.headers.get("HTTP_ACCEPT", "").endswith("json"):
return JsonResponse({"error": message}, status=status)
if (
request.headers.get("HTTP_HX_REQUEST") is not None
and request.headers.get("HTTP_HX_BOOSTED") is None
):
return HttpResponse(message, status=status)
return render(
request,
f"{status}.html",
status=status,
context={"message": message, "exception": exception},
)
def error_400(request, exception=None):
if isinstance(exception, DisallowedHost):
url = settings.SITE_INFO["site_url"] + request.get_full_path()
return redirect(url, permanent=True)
if _is_json_request(request):
return JsonResponse({"error": "invalid request"}, status=400)
return render(request, "400.html", status=400, context={"exception": exception})
return _error_response(request, 400, exception, "invalid request")
def error_403(request, exception=None):
if _is_json_request(request):
return JsonResponse({"error": "forbidden"}, status=403)
return render(request, "403.html", status=403, context={"exception": exception})
return _error_response(request, 403, exception, "forbidden")
def error_404(request, exception=None):
if _is_json_request(request):
return JsonResponse({"error": "not found"}, status=404)
request.session.pop("next_url", None)
return render(request, "404.html", status=404, context={"exception": exception})
return _error_response(request, 404, exception, "not found")
def error_500(request, exception=None):
if _is_json_request(request):
return JsonResponse({"error": "something wrong"}, status=500)
return render(request, "500.html", status=500, context={"exception": exception})
return _error_response(request, 500, exception, "something wrong")
def console(request):

View file

@ -1,81 +0,0 @@
# API
## Endpoints
NeoDB has a set of API endpoints mapping to its functions like marking a book or listing collections, they can be found in swagger based API documentation at `/developer/` of your running instance, [a version of it](https://neodb.social/developer/) is available on our flagship instance.
NeoDB also supports a subset of Mastodon API, details can be found in [Mastodon API documentation](https://docs.joinmastodon.org/api/).
Both set of APIs can be accessed by the same access token.
## How to authorize
### Create an application
you must have at least one URL included in the Redirect URIs field, e.g. `https://example.org/callback`, or use `urn:ietf:wg:oauth:2.0:oob` if you don't have a callback URL.
```
curl https://neodb.social/api/v1/apps \
-d client_name=MyApp \
-d redirect_uris=https://example.org/callback \
-d website=https://my.site
```
and save of the `client_id` and `client_secret` returned in the response:
```
{
"client_id": "CLIENT_ID",
"client_secret": "CLIENT_SECRET",
"name": "MyApp",
"redirect_uri": "https://example.org/callback",
"vapid_key": "PUSH_KEY",
"website": "https://my.site"
}
```
### Guide your user to open this URL
```
https://neodb.social/oauth/authorize?response_type=code&client_id=CLIENT_ID&redirect_uri=https://example.org/callback&scope=read+write
```
### Once authorizated by user, it will redirect to `https://example.org/callback` with a `code` parameter:
```
https://example.org/callback?code=AUTH_CODE
```
### Obtain access token with the following POST request:
```
curl https://neodb.social/oauth/token \
-d "client_id=CLIENT_ID" \
-d "client_secret=CLIENT_SECRET" \
-d "code=AUTH_CODE" \
-d "redirect_uri=https://example.org/callback" \
-d "grant_type=authorization_code"
```
and access token will be returned in the response:
```
{
"access_token": "ACCESS_TOKEN",
"token_type": "Bearer",
"scope": "read write"
}
```
### Use the access token to access protected endpoints like `/api/me`
```
curl -H "Authorization: Bearer ACCESS_TOKEN" -X GET https://neodb.social/api/me
```
and response will be returned accordingly:
```
{"url": "https://neodb.social/users/xxx/", "external_acct": "xxx@yyy.zzz", "display_name": "XYZ", "avatar": "https://yyy.zzz/xxx.gif"}
```

View file

@ -1,19 +0,0 @@
# Apps
NeoDB web version will provide the most features and experience, while some third-party apps are also available below.
## Apps for NeoDB
A few apps for NeoDB are being actively developed:
- [Piecelet](https://piecelet.app) by `@piecelet@mastodon.social` - [App Store](https://apps.apple.com/app/piecelet-for-neodb/id6739444863) / [Source Code](https://github.com/lcandy2/neodb-app)
- [Chihu](https://chihu.app) by `@chihu@mastodon.social` - [Test Flight](https://testflight.apple.com/join/WmbnP9Vx)
These apps are not affiliated with NeoDB, but they are being developed with the support of this community. If you are also developing an app for NeoDB, and wish to share that with the community, please [edit this file](https://github.com/neodb-social/neodb/edit/main/docs/apps.md) and submit a pull request.
## Mastodon apps
[Mastodon compatible mobile and native apps](https://joinmastodon.org/apps) can be used to log in and utilize the micro-blogging features in NeoDB server.
In addition to micro-blogging, Mastodon compatible can also be used to take note on book you are currently reading. Just head to bookmark section in your app, your currently reading books are listed there as bookmarked posts, replying any of them will make a note for that book.

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 42 KiB

View file

@ -1,210 +0,0 @@
# Configuration
## Important settings you may want to change first
absolutely set these in `.env` before start the instance for the first time:
- `NEODB_SECRET_KEY` - 50 characters of random string, no white space
- `NEODB_SITE_NAME` - the name of your site
- `NEODB_SITE_DOMAIN` - the domain name of your site
**`NEODB_SECRET_KEY` and `NEODB_SITE_DOMAIN` must not be changed later.**
if you are doing debug or development:
- `NEODB_DEBUG` - True will turn on debug for both neodb and takahe, turn off relay, and reveal self as debug mode in nodeinfo (so peers won't try to run fedi search on this node)
- `NEODB_IMAGE` - the docker image to use, `neodb/neodb:edge` for the main branch
## Settings for customization
- `NEODB_SITE_LOGO`
- `NEODB_SITE_ICON`
- `NEODB_USER_ICON`
- `NEODB_SITE_COLOR` - one of [these color schemes](https://picocss.com/docs/colors)
- `NEODB_SITE_INTRO`
- `NEODB_SITE_HEAD`
- `NEODB_SITE_DESCRIPTION`
- `NEODB_PREFERRED_LANGUAGES` - preferred languages when importing titles from 3rd party sites like TMDB and Steam, comma-separated list of ISO-639-1 two-letter codes, `en,zh` by default. It can includes languages with no UI translations yet, e.g. if set to `ja,en,zh`, NeoDB scraper will fetch catalog metadata in three languages if they are available from third party sites, Japanese users (= whose browser language set to ja-JP) will see English UI with Japanese metadata.
- `NEODB_DISCOVER_FILTER_LANGUAGE` - `False` by default; when set to `True`, `/discover/` will only show items with languages match one of `NEODB_PREFERRED_LANGUAGES`.
- `NEODB_DISCOVER_SHOW_LOCAL_ONLY` - `False` by default; when set to `True`, only show items marked by local users rather than entire network on `/discover/`
- `NEODB_DISCOVER_UPDATE_INTERVAL` - minutes between each update for popular items on `/discover/`
- `NEODB_SITE_LINKS` - a list of title and links to show in the footer, comma separated, e.g. `Feedback=https://discord.gg/8KweCuApaK,ToS=/pages/rules/`
- `NEODB_INVITE_ONLY` - `False` by default, set to `True` to require invite code(generated by `neodb-manage invite --create`) to register
- `NEODB_ENABLE_LOCAL_ONLY` - `False` by default, set to `True` to allow user to post marks as "local public"
- `NEODB_LOGIN_MASTODON_WHITELIST` - a list of Mastodon instances to allow login from, comma separated
- `NEODB_EMAIL_FROM` - the email address to send email from
- `NEODB_EMAIL_URL` - email sender configuration, e.g.
- `smtp://<username>:<password>@<host>:<port>`
- `smtp+tls://<username>:<password>@<host>:<port>`
- `smtp+ssl://<username>:<password>@<host>:<port>`
- `anymail://<anymail_backend_name>?<anymail_args>`, to send email via email service providers, see [anymail doc](https://anymail.dev/)
## Settings for administration
- `DISCORD_WEBHOOKS` - Discord channel to send notification about user submitted suggestion and changes, e.g. `suggest=https://discord.com/api/webhooks/123/abc,audit=https://discord.com/api/webhooks/123/def`. Both suggest and audit channels must be in forum mode.
- `NEODB_SENTRY_DSN` , `TAKAHE_SENTRY_DSN` - [Sentry](https://sentry.io/) DSN to log errors.
## Settings for Federation
- `NEODB_SEARCH_PEERS` is empty by default, which means NeoDB will search all known peers running production version of NeoDB when user look for items. This can be set to a comma-separated list of host names, so that NeoDB will only search those servers; or search no other peers if set to just `-`.
- `NEODB_DISABLE_DEFAULT_RELAY` is set to `False` by default, the server will send and receive public posts from `relay.neodb.net`.
`relay.neodb.net` is [open sourced](https://github.com/neodb-social/neodb-relay) and operated by NeoDB developers, it works like most ActivityPub relays except it only relays between NeoDB instances, it helps public information like catalogs and trends flow between NeoDB instances. You may set it to `True` if you don't want to relay public posts with other NeoDB instances.
## Settings for external item sources
- `SPOTIFY_API_KEY` - base64('CLIENT_ID:SECRET'), see [spotify doc](https://developer.spotify.com/documentation/web-api/tutorials/client-credentials-flow)
- `TMDB_API_V3_KEY` - API v3 key from [TMDB](https://developer.themoviedb.org/)
- `GOOGLE_API_KEY` - API key for [Google Books](https://developers.google.com/books/docs/v1/using)
- `DISCOGS_API_KEY` - personal access token from [Discogs](https://www.discogs.com/settings/developers)
- `IGDB_API_CLIENT_ID`, `IGDB_API_CLIENT_SECRET` - IGDB [keys](https://api-docs.igdb.com/)
- `NEODB_SEARCH_SITES` is empty by default, which means NeoDB will search all available sources. This can be set to a comma-separated list of site names (e.g. `goodreads,googlebooks,spotify,tmdb,igdb,bandcamp,apple_podcast`), so that NeoDB will only search those sites; or not search any of them if set to just `-`.
## Other maintenance tasks
Add alias to your shell for easier access
```
alias neodb-manage='docker-compose --profile production run --rm shell neodb-manage'
```
Toggle user's active, staff and super user status
```
neodb-manage user --active <username>
neodb-manage user --staff <username>
neodb-manage user --super <username>
```
create a super user; delete a user / remote identity (`takahe-stator` and `neodb-worker` containers must be running to complete the deletion)
```
neodb-manage createsuperuser
neodb-manage user --delete username
neodb-manage user --delete username@remote.instance
```
Create an invite link
```
neodb-manage invite --create
```
Manage user tasks and cron jobs
```
neodb-manage task --list
neodb-manage cron --list
```
Manage search index
```
neo-manage index --reindex
```
Crawl links
```
neodb-manage cat [--save] <url> # parse / save a supported link
neodb-manage crawl <url> # crawl all recognizable links from a page
```
## Run PostgresQL/Redis/Typesense without Docker
It's currently possible but quite cumbersome to run without Docker, hence not recommended. However it's possible to only use docker to run neodb server but reuse existing PostgresQL/Redis/Typesense servers with `compose.override.yml`, an example for reference:
```
services:
redis:
profiles: ['disabled']
typesense:
profiles: ['disabled']
neodb-db:
profiles: ['disabled']
takahe-db:
profiles: ['disabled']
migration:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
neodb-web:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
healthcheck: !reset {}
neodb-web-api:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
healthcheck: !reset {}
neodb-worker:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
neodb-worker-extra:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
takahe-web:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
takahe-stator:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
shell:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
root:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
dev-neodb-web:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
dev-neodb-worker:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
dev-takahe-web:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
dev-takahe-stator:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
dev-shell:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
dev-root:
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: !reset []
```
(`extra_hosts` is only needed if PostgresQL/Redis/Typesense is on your host server)
## Multiple instances on one server
It's possible to run multiple clusters in one host server with docker compose, as long as `NEODB_SITE_DOMAIN`, `NEODB_PORT` and `NEODB_DATA` are different.
## Scaling up
For high-traffic instance, spin up these configurations to a higher number, as long as the host server can handle them:
- `NEODB_WEB_WORKER_NUM`
- `NEODB_API_WORKER_NUM`
- `NEODB_RQ_WORKER_NUM`
- `TAKAHE_WEB_WORKER_NUM`
- `TAKAHE_STATOR_CONCURRENCY`
- `TAKAHE_STATOR_CONCURRENCY_PER_MODEL`
Further scaling up with multiple nodes (e.g. via Kubernetes) is beyond the scope of this document, but consider run db/redis/typesense separately, and then duplicate web/worker/stator containers as long as connections and mounts are properly configured; `migration` only runs once when start or upgrade, it should be kept that way.

View file

@ -1,187 +0,0 @@
Development
===========
Overview
--------
NeoDB is a Django project, and it runs side by side with a [modified version](https://github.com/neodb-social/neodb-takahe) of [Takahē](https://github.com/jointakahe/takahe) (a separate Django project, code in `neodb_takahe` folder of this repo as submodule). They communicate with each other mainly thru database and task queue, [the diagram](troubleshooting.md#containers) demonstrates a typical architecture. Currently the two are loosely coupled, so you may take either one offline without immediate impact on the other, which makes it very easy to conduct maintenance and troubleshooting separately. In the future, they may get combined but it's not decided and will not be decided very soon.
Prerequisite
------------
- Python 3.12.x
- Docker Compose v2 or newer
Prepare the code
----------------
When checking out NeoDB source code, make sure submodules are also checked out:
```
git clone https://github.com/neodb-social/neodb.git
cd neodb
git submodule update --init
```
Install [rye](http://rye.astral.sh) package manager, packages and pre-commit hooks:
```
curl -sSf https://rye.astral.sh/get | bash
rye sync
. .venv/bin/activate
pre-commit install
```
To develop Takahe, install requirements(-dev) and pre-commit hooks for `neodb-takahe` project as well, preferably using a different virtual environment.
Note: the virtual environments and packages installed in this step are mostly for linting, the actual virtual environments and packages are from NeoDB docker image, and they can be configured differently, more on this later in this document.
Start local instance for development
------------------------------------
Follow [install guide](install.md) to create `.env` in the root folder of NeoDB code, including at least these configuration:
```
NEODB_SITE_NAME="My Test"
NEODB_SITE_DOMAIN=mydomain.dev
NEODB_SECRET_KEY=_random_string__50_characters_of_length__no_whitespaces_
NEODB_IMAGE=neodb/neodb:edge
NEODB_DEBUG=True
```
Download docker images and start pgsql/redis/typesense before initializing database schema:
```
docker compose --profile dev pull
docker compose up -d
```
Initialize database schema:
```
docker compose --profile dev run --rm dev-shell takahe-manage collectstatic --noinput
docker compose --profile dev run --rm dev-shell neodb-init
```
Start the cluster:
```
docker compose --profile dev up -d
```
Watch the logs:
```
docker compose --profile dev logs -f
```
Now the local development instance is ready to serve at `http://localhost:8000`, but to develop or test anything related with ActivityPub, reverse proxying it from externally reachable https://`NEODB_SITE_DOMAIN`/ is required; https is optional theoretically but in reality required for various compatibility reasons.
Note: `dev` profile is for development only, and quite different from `production`, so always use `--profile dev` instead of `--profile production`, more on those differences later in this document.
Common development tasks
------------------------
Shutdown the cluster:
```
docker compose --profile dev down
```
Restart background tasks (unlike web servers, background tasks don't auto reload after code change):
```
docker-compose --profile dev restart dev-neodb-worker dev-takahe-stator
```
When updating code, always update submodules:
```
git pull
git submodule update --init
```
With newer git 2.15+, you main use `git pull --recurse-submodules` or `git config --global submodule.recurse true` to make it automatic.
To save some typing, consider adding some aliases to `~/.profile`:
```
alias neodb-logs='docker compose --profile dev logs'
alias neodb-shell='docker compose --profile dev run --rm dev-shell'
alias neodb-manage='docker compose --profile dev run --rm dev-shell neodb-manage'
```
Always use `neodb-init`, not `python3 manage.py migrate`, to update db schema after updating code:
```
neodb-shell neodb-init
```
Run unit test:
```
neodb-manage test
```
Update translations:
```
django-admin makemessages --no-wrap --no-obsolete -i .venv -i neodb-takahe --keep-pot -l zh_Hans -l zh_Hant
# edit .po files, run the following to make sure edits are correct
django-admin makemessages --no-wrap --no-obsolete -i .venv -i neodb-takahe --keep-pot -l zh_Hans -l zh_Hant
django-admin compilemessages -i .venv -i neodb-takahe
```
Preview documentation:
```
python -m mkdocs serve
```
Development in Docker Compose
-----------------------------
The `dev` profile is different from `production`:
- code in `NEODB_SRC` (default: .) and `TAKAHE_SRC` (default: ./neodb-takahe) will be mounted and used in the container instead of code in the image
- `runserver` with autoreload will be used instead of `gunicorn` for both neodb and takahe web server
- /static/ and /s/ url are not map to pre-generated/collected static file path, `NEODB_DEBUG=True` is required locate static files from source code
- one `rqworker` container will be started, instead of two
- use `dev-shell` and `dev-root` to invoke shells, instead of `shell` and `root`
- there's no automatic `migration` container, but it can be triggered manually via `docker compose run dev-shell neodb-init`
Note:
- Python virtual environments inside docker image, which are `/neodb-venv` and `/takahe-venv`, will be used by default. They can be changed to different locations with `TAKAHE_VENV` and `NEODB_VENV` if needed, usually in a case of development code using a package not in docker venv.
- Some packages inside python virtual environments are platform dependent, so mount venv built by macOS host into the Linux container will likely not work.
- Python servers are launched as `app` user, who has no write access to anywhere except /tmp and media path, that's by design.
- Database/redis/typesense used in the container cluster are not accessible from host directly, which is by design. Querying them can be done by one of the following:
- `neodb-manage dbshell`
- `neodb-shell redis-cli -h redis`
- or create `compose.override.yml` to uncomment `ports` section.
- To expose the neodb and takahe web server directly, in the folder for configuration, create `compose.override.yml` with the following content:
```
services:
dev-neodb-web:
ports:
- "8001:8000"
dev-takahe-web:
ports:
- "8002:8000"
```
Development with Github Codespace
---------------------------------
At the time of writing, docker compose will work in Github Codespace by adding this in `.env`:
```
NEODB_SITE_DOMAIN=${CODESPACE_NAME}-8000.${GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN}
```
Applications
------------
Main Django apps for NeoDB:
- `users` manages user in typical Django fashion
- `mastodon` this leverages [Mastodon API](https://docs.joinmastodon.org/client/intro/), [Threads API](https://developers.facebook.com/docs/threads/) and [ATProto}(https://atproto.com) for user login and data sync
- `catalog` manages different types of items user may collect, and scrapers to fetch from external resources, see [catalog.md](internals/catalog.md) for more details
- `journal` manages user created content(review/ratings) and lists(collection/shelf/tag/note), see [journal.md](internals/journal.md) for more details
- `social` present timeline and notification for local users
- `takahe` communicate with Takahe (a separate Django server, run side by side with this server, code in `neodb_takahe` as submodule), see [federation.md](internals/federation.md) for customization of ActivityPub protocol
- `legacy` this is only used by instances upgraded from 0.4.x and earlier, to provide a link mapping from old urls to new ones. If your journey starts with 0.5 and later, feel free to ignore it.
Miscellaneous notes
-------------------
If models in `takahe/models.py` are changed, instead of adding incremental migrations, just regenerate `takahe/migrations/0001_initial.py` instead, because these migrations will never be applied except for constructing a test database.
A `libsass` wheel is stored in the repo to speed up docker image building process in Github Action.

View file

@ -1,69 +0,0 @@
# Features
NeoDB has [various features](features.md), and you may image it as a mix of Mastodon, Goodreads, Letterboxd, RateYourMusic, Podchaser, and more.
## Public catalog
- a shared catalog of books/movies/tv shows/music album/games/podcasts/performances
- search or create catalog items in each category
- one click create item with links to 3rd party sites:
- Goodreads
- IMDB
- The Movie Database
- Douban
- Google Books
- Discogs
- Spotify
- Apple Music
- Bandcamp
- Steam
- IGDB
- Bangumi
- Board Game Geek
- any RSS link to a podcast
- ...[full list](sites.md)
## Personal collections
- mark an item as wishlist/in progress/complete/dropped
- rate and write reviews for an item
- write notes for an item with progress (e.g. reading notes at page 42)
- create tags for an item, either privately or publicly
- create and share list of items
- tracking progress of a list (e.g. personal reading challenges)
- Import and export full user data archive
- import list or archives from some 3rd party sites:
- Goodreads reading list
- Letterboxd watch list
- Douban archive (via [Doufen](https://doufen.org/))
## Social
- view home feed with friends' activities
- every activity can be set as viewable to self/follower-only/public
- eligible items, e.g. podcasts and albums, are playable in feed
- login with other Fediverse identity and import social graph
- supported servers: Mastodon/Pleroma/Firefish/GoToSocial/Pixelfed/friendica/Takahē
- login with Bluesky / ATProto identity and import social graph
- login with threads.net (requires app verification by Meta)
- share collections and reviews to Fediverse/Bluesky/Threads
- ActivityPub support
- NeoDB users can follow and interact with users on other ActivityPub services like Mastodon and Pleroma
- NeoDB instances communicate with each other via an extended version of ActivityPub
- NeoDB instances may share public rating and reviews with a default relay
- implementation is based on [Takahē](https://jointakahe.org/) server
## API
- Mastodon compatible API
- most mastodon compatible apps are compatible with NeoDB
- NeoDB API to manage reviews and collections
## Languages
- English
- Simplified Chinese
- Traditional Chinese
- more to come and your contributions are welcomed!

View file

@ -1,47 +0,0 @@
NeoDB is an open-source software and global community platform [since 2021](origin.md). It helps users to manage and explore collections, reviews, and ratings for various cultural products, including books, movies, music, podcasts, games, and performances. Additionally, users can share their collections, publish microblogs, and engage with others in the Fediverse. NeoDB integrates the functionalities of platforms like Goodreads, Letterboxd, RateYourMusic, and Podchaser, among others. It also supports self-hosting and interconnection through containerized deployment and the ActivityPub protocol.
This project is licensed under AGPLv3.
## How to use
Please [find a server](servers.md) to register with your email, Mastodon or Bluesky ID.
Besides the web version of your chosen server, you may also use [various apps](apps.md) to login and manage your collections.
## Host your own instance
You can [install and run your own instance of NeoDB](install.md). If you decide to share your server with the public, please inform us so that we can add it to the list above.
All instances interact with each other in the Fediverse via ActivityPub, allowing you to follow users from other NeoDB instances or instances running other ActivityPub software, such as Mastodon.
## API, Development and Translation
- NeoDB offers [APIs to manage user collections](api.md), and [Mastodon client compatible API](https://docs.joinmastodon.org/client/) to manage user posts.
- For those interested in developing for NeoDB, please refer to the [development](development.md) section for basic instructions to get started.
- To help translate NeoDB to more languages, please join [our project on Weblate](https://hosted.weblate.org/projects/neodb/)
## Copyleft
- NeoDB software code is licensed under AGPL, please check it if you plan to provide a service with modified code.
- If you publish a work (e.g. application, website or service) utilizing NeoDB software or API, you may use "NeoDB" and its visual identity in your work, but please refrain from any usage that may imply the affiliation with NeoDB software and team.
- If you publish such work to serve others, we do appreciate you clearly mention the usage of NeoDB in your work, provide descriptions and links to NeoDB software (neodb.net) and service providers (e.g. neodb.social), and guide your users to support them directly.
## Donation
If you appreciate this project, please help spread the words, and consider sponsoring the service providers. Your support is essential to keep these services free, open-sourced and stay committed to their mission. You may donate to the flagship instance, NeoDB.social, through the link below; or check the instance you use for their details about donations.
[![Kofi](https://img.shields.io/badge/Ko--Fi-Donate-orange?label=Support%20this%20project%20on%20Ko-fi&style=for-the-badge&color=ff5f5f&logo=ko-fi)](https://ko-fi.com/neodb)
## Feedback
Follow us on the Fediverse or join our Discord community to share your ideas, questions, or creations.
[![Mastodon](https://img.shields.io/mastodon/follow/106935434138389355?domain=https%3A%2F%2Fmastodon.online&style=for-the-badge&logo=mastodon&logoColor=fff&label=%40neodb%40mastodon.online&color=6D75D2)](https://mastodon.online/@neodb)
[![Mastodon](https://img.shields.io/mastodon/follow/106919732872456302?style=for-the-badge&logo=mastodon&logoColor=fff&label=%40neodb%40mastodon.social&color=6D75D2)](https://mastodon.social/@neodb)
[![Discord](https://img.shields.io/discord/1041738638364528710?label=Discord%20Community&logo=discord&logoColor=fff&color=6D75D2&style=for-the-badge&cacheSeconds=21600)](https://discord.gg/QBHkrV8bxK)

View file

@ -1,90 +0,0 @@
Install
=======
For small and medium NeoDB instances, it's recommended to deploy as a container cluster with Docker Compose. To run a large instance, please see [scaling up](configuration.md#scaling-up) for some tips.
## Install docker compose
Follow [official instructions](https://docs.docker.com/compose/install/) to install Docker Compose if not yet.
Please verify its version is 2.x or higher before next step:
```
docker compose version
```
The rest of this doc assumes you can run docker commands without `sudo`, to verify that:
```
docker run --rm hello-world
```
Follow [official instructions](https://docs.docker.com/engine/install/linux-postinstall/) if it's not enabled, or use `sudo` to run commands in this doc.
## Prepare configuration files
- create a folder for configuration, eg ~/mysite/config
- grab `compose.yml` and `neodb.env.example` from [latest release](https://github.com/neodb-social/neodb/releases)
- rename `neodb.env.example` to `.env`
## Set up .env file and web root
Change essential options like `NEODB_SITE_DOMAIN` in `.env` before starting the cluster for the first time. Changing them later may have unintended consequences, please make sure they are correct before exposing the service externally.
- `NEODB_SITE_NAME` - name of your site
- `NEODB_SITE_DOMAIN` - domain name of your site
- `NEODB_SECRET_KEY` - encryption key of session data
- `NEODB_DATA` is the path to store db/media/cache, it's `../data` by default, but can be any path that's writable
- `NEODB_DEBUG` - set to `False` for production deployment
- `NEODB_PREFERRED_LANGUAGES` - preferred languages when importing titles from 3rd party sites like TMDB and Steam, comma-separated list of ISO-639-1 two-letter codes, 'en,zh' by default.
Optionally, `robots.txt` and `logo.png` may be placed under `$NEODB_DATA/www-root/`.
See [neodb.env.example](https://raw.githubusercontent.com/neodb-social/neodb/main/neodb.env.example) and [configuration](configuration.md) for more options
## Start container
in the folder with `compose.yml` and `.env`, execute as the user you just created:
```
docker compose --profile production pull
docker compose --profile production up -d
```
Starting up for the first time might take a few minutes, depending on download speed, use the following commands for status and logs:
```
docker compose ps
docker compose --profile production logs -f
```
In a few seconds, the site should be up at 127.0.0.1:8000 , you may check it with:
```
curl http://localhost:8000/nodeinfo/2.0/
```
JSON response will be returned if the server is up and running:
```
{"version": "2.0", "software": {"name": "neodb", "version": "0.8-dev"}, "protocols": ["activitypub", "neodb"], "services": {"outbound": [], "inbound": []}, "usage": {"users": {"total": 1}, "localPosts": 0}, "openRegistrations": true, "metadata": {}}
```
## Make the site available publicly
Next step is to expose `http://127.0.0.1:8000` to external network as `https://yourdomain.tld` (NeoDB requires `https`). There are many ways to do it, you may use nginx or caddy as a reverse proxy server with an SSL cert configured, or configure a tunnel provider like cloudflared to do the same. Once done, you may check it with:
```
curl https://yourdomain.tld/nodeinfo/2.0/
```
You should see the same JSON response as above, and the site is now accessible to the public.
## Register an account and make it admin
Open `https://yourdomain.tld` in your browser and register an account, assuming username `admin`, run the following command to make it super user
```
docker compose --profile production run --rm shell neodb-manage user --super admin
```
Now your instance should be ready to serve. More tweaks are available, see [configuration](configuration.md) for options.

View file

@ -1,114 +0,0 @@
Catalog
=======
Data Models
-----------
all types of catalog items inherits from `Item` which stores as multi-table django model.
one `Item` may have multiple `ExternalResource`s, each represents one page on an external site
```mermaid
classDiagram
class Item {
<<abstract>>
}
Item <|-- Album
class Album {
+String barcode
+String Douban_ID
+String Spotify_ID
}
Item <|-- Game
class Game {
+String Steam_ID
}
Item <|-- Podcast
class Podcast {
+String feed_url
+String Apple_ID
}
Item <|-- Performance
Item <|-- Work
class Work {
+String Douban_Work_ID
+String Goodreads_Work_ID
}
Item <|-- Edition
Item <|-- Series
Series *-- Work
Work *-- Edition
class Series {
+String Goodreads_Series_ID
}
class Work {
+String Douban_ID
+String Goodreads_ID
}
class Edition{
+String ISBN
+String Douban_ID
+String Goodreads_ID
+String GoogleBooks_ID
}
Item <|-- Movie
Item <|-- TVShow
Item <|-- TVSeason
Item <|-- TVEpisode
TVShow *-- TVSeason
TVSeason *-- TVEpisode
class TVShow{
+String IMDB_ID
+String TMDB_ID
}
class TVSeason{
+String Douban_ID
+String TMDB_ID
}
class TVEpisode{
+String IMDB_ID
+String TMDB_ID
}
class Movie{
+String Douban_ID
+String IMDB_ID
+String TMDB_ID
}
Item <|-- Collection
ExternalResource --* Item
class ExternalResource {
+enum site
+url: string
}
```
Add a new site
--------------
- If official API is available for the site, it should be the preferred way to get data.
- add a new value to `IdType` and `SiteName` in `catalog/common/models.py`
- add a new file in `catalog/sites/`, a new class inherits `AbstractSite`, with:
* `SITE_NAME`
* `ID_TYPE`
* `URL_PATTERNS`
* `WIKI_PROPERTY_ID` (not used now)
* `DEFAULT_MODEL` (unless specified in `scrape()` return val)
* a classmethod `id_to_url()`
* a method `scrape()` returns a `ResourceContent` object
* `BasicDownloader` or `ProxiedDownloader` can used to download website content or API data. e.g. `content = BasicDownloader(url).download().html()`
* check out existing files in `catalog/sites/` for more examples
- add an import in `catalog/sites/__init__.py`
- add some tests to `catalog/<folder>/tests.py` according to site type
+ add `DOWNLOADER_SAVEDIR = '/tmp'` to `settings.py` can save all response to /tmp
+ run `neodb-manage cat <url>` for debugging or saving response file to `/tmp`. Detailed code of `cat` is in `catalog/management/commands/cat.py`
+ move captured response file to `test_data/`, except large/images files. Or if have to, replace it with a smallest version (e.g. 1x1 pixel / 1s audio)
+ add `@use_local_response` decorator to test methods that should pick up these responses (if `BasicDownloader` or `ProxiedDownloader` is used)
- run all the tests and make sure they pass
- Command: `neodb-manage python3 manage.py test [--keepdb]`.
- See [this issue](https://github.com/neodb-social/neodb/issues/5) if `lxml.etree.ParserError` occurs on macOS.
- add a site UI label style to `common/static/scss/_sitelabel.scss`
- update documentation in [sites.md](../sites.md)

View file

@ -1,131 +0,0 @@
# Federation
## Supported federation protocols and standards
- [ActivityPub](https://www.w3.org/TR/activitypub/) (Server-to-Server)
- [WebFinger](https://webfinger.net/)
- [Http Signatures](https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures)
- [NodeInfo](https://nodeinfo.diaspora.software/)
## Supported FEPs
- [FEP-f1d5: NodeInfo in Fediverse Software](https://codeberg.org/fediverse/fep/src/branch/main/fep/f1d5/fep-f1d5.md)
## NodeInfo
NeoDB instances can be identified from user agent string (`NeoDB/x.x (+https://example.org)`) and `protocols` in its nodeinfo, e.g. https://neodb.social/nodeinfo/2.0/ :
```json
{
"version": "2.0",
"software": {
"name": "neodb",
"version": "0.10.4.13",
"repository": "https://github.com/neodb-social/neodb",
"homepage": "https://neodb.net/"
},
"protocols": ["activitypub", "neodb"],
}
```
## ActivityPub
NeoDB's ActivityPub implementation is based on [Takahē](https://jointakahe.org), with some change to enable interchange of additional information between NeoDB instances.
### Activity
NeoDB add additional fields to `Note` activity:
- `relatedWith` is a list of NeoDB specific activities which are associated with this `Note`. For each activity, `id` and `href` are both unique links to that activity, `withRegardTo` links to the catalog item, `attributedTo` links to the user, `type` is one of:
- `Status`, its `status` can be one of: `complete`, `progress`, `wishlist` and `dropped`
- `Rating`, its `value` is rating grade (int, 1-10), `worst` is always 1, `best` is always 10
- `Comment`, its `content` is comment text
- `Review`, its `name` is review title, `content` is its body, `mediaType` is always `text/markdown` for now
- `Note`, its `content` is note text
- `tag` is used to store list of NeoDB catalog items, which are related with this activity. `type` of NeoDB catalog item can be one of `Edition`, `Movie`, `TVShow`, `TVSeason`, `TVEpisode`, `Album`, `Game`, `Podcast`, `PodcastEpisode`, `Performance`, `PerformanceProduction`; href will be the link to that item.
Example:
```json
{
"@context": ["https://www.w3.org/ns/activitystreams", {
"blurhash": "toot:blurhash",
"Emoji": "toot:Emoji",
"focalPoint": {
"@container": "@list",
"@id": "toot:focalPoint"
},
"Hashtag": "as:Hashtag",
"manuallyApprovesFollowers": "as:manuallyApprovesFollowers",
"sensitive": "as:sensitive",
"toot": "http://joinmastodon.org/ns#",
"votersCount": "toot:votersCount",
"featured": {
"@id": "toot:featured",
"@type": "@id"
}
}, "https://w3id.org/security/v1"],
"id": "https://neodb.social/@april_long_face@neodb.social/posts/380919151408919488/",
"type": "Note",
"relatedWith": [{
"id": "https://neodb.social/p/5oyF0qRx96mKKmVpFzHtMM",
"type": "Status",
"status": "complete",
"withRegardTo": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR",
"attributedTo": "https://neodb.social/@april_long_face@neodb.social/",
"href": "https://neodb.social/p/5oyF0qRx96mKKmVpFzHtMM",
"published": "2024-11-17T10:16:42.745240+00:00",
"updated": "2024-11-17T10:16:42.750917+00:00"
}, {
"id": "https://neodb.social/p/47cJnbQTkbSSN2izLwQMjo",
"type": "Comment",
"withRegardTo": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR",
"attributedTo": "https://neodb.social/@april_long_face@neodb.social/",
"content": "Broadway cin\u00e9math\u00e8que, at least I laughed hard.",
"href": "https://neodb.social/p/47cJnbQTkbSSN2izLwQMjo",
"published": "2024-11-17T10:16:42.745240+00:00",
"updated": "2024-11-17T10:16:42.777276+00:00"
}, {
"id": "https://neodb.social/p/3AyYu974qo6OU09AAsPweQ",
"type": "Rating",
"best": 10,
"value": 7,
"withRegardTo": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR",
"worst": 1,
"attributedTo": "https://neodb.social/@april_long_face@neodb.social/",
"href": "https://neodb.social/p/3AyYu974qo6OU09AAsPweQ",
"published": "2024-11-17T10:16:42.784220+00:00",
"updated": "2024-11-17T10:16:42.786458+00:00"
}],
"attributedTo": "https://neodb.social/@april_long_face@neodb.social/",
"content": "<p>\u770b\u8fc7 <a href=\"https://neodb.social/~neodb~/movie/7hfF7d0aFMaqHpFjUpq4zR\" rel=\"nofollow\">\u963f\u8bfa\u62c9</a> \ud83c\udf15\ud83c\udf15\ud83c\udf15\ud83c\udf17\ud83c\udf11 <br>Broadway cin\u00e9math\u00e8que, at least I laughed hard.</p><p><a href=\"https://neodb.social/tags/\u6211\u770b\u6211\u542c\u6211\u8bfb/\" class=\"mention hashtag\" rel=\"tag\">#\u6211\u770b\u6211\u542c\u6211\u8bfb</a></p>",
"published": "2024-11-17T10:16:42.745Z",
"sensitive": false,
"tag": [{
"type": "Hashtag",
"href": "https://neodb.social/tags/\u6211\u770b\u6211\u542c\u6211\u8bfb/",
"name": "#\u6211\u770b\u6211\u542c\u6211\u8bfb"
}, {
"type": "Movie",
"href": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR",
"image": "https://neodb.social/m/item/doubanmovie/2024/09/13/a30bf2f3-4f79-43ef-b22f-58ebc3fd8aae.jpg",
"name": "Anora"
}],
"to": ["https://www.w3.org/ns/activitystreams#Public"],
"updated": "2024-11-17T10:16:42.750Z",
"url": "https://neodb.social/@april_long_face/posts/380919151408919488/"
}
```
This is not ideal but a practical manner to pass along additional information between NeoDB instances and other ActivityPub servers. We have some ideas for improvements, but are open to more suggestions.
### Relay
NeoDB instances may share public rating and reviews with a default relay, which is currently `https://relay.neodb.net`. This relay is used to propagate public activities and catalog information between NeoDB instances.
Owner of each instance may choose to turn this off in their admin settings.
## ATProto
NeoDB is not a PDS itself currently, but can interact with PDS to import user's social graph, and send status updates. So technically NeoDB does not do full federation in ATProto, but NeoDB will handle some side effect from federation, e.g. when user logging in via ATProto handle, NeoDB will resolve user's DID and store it, and will attempt further operation with the DID, and update user's handle if that's changed, and use the corresponding PDS for that handle; user may still have to login NeoDB again with their Bluesky app password, since the change of PDS may invalidates previous app password.

View file

@ -1,99 +0,0 @@
Journal
=======
Data Model
----------
```mermaid
classDiagram
User .. Piece
class Piece {
+User owner
+int visibility
}
class Item {
+str title
+str brief
-enum type
}
Piece <|-- Content
Item .. Content
class Content {
+Item target
}
Content <|-- Rating
class Rating {
+int grade
}
Content <|-- Review
class Review {
+str title
+str body
}
Content <|-- Comment
class Comment {
+str text
}
Content <|-- Note
class Note {
+str title
+str content
+enum progress_type
+str progress_value
}
Content <|-- Reply
class Reply {
+Content reply_to
}
Piece <|-- List
class List{
+ListItem[] items
}
Item .. ListItem
List *-- ListItem
class ListItem {
+int position
+Item item
+Dict metadata
}
List <|-- Collection
Item .. Collection
class Collection {
+str title
+str brief
+Bool collabrative
}
List <|-- Tag
class Tag {
+str title
}
List <|-- Shelf
class Shelf {
+Enum type
}
User .. ShelfLogManager
class ShelfLogManager {
+User owner
+ShelfLogEntry[] logs
}
ShelfLogManager *-- ShelfLogEntry
class ShelfLogEntry {
+Item item
+Shelf shelf
+DateTime timestamp
}
ShelfLogEntry .. Item
ShelfLogEntry .. Shelf
Shelf *-- ShelfItem
ListItem <|-- ShelfItem
ListItem <|-- TagItem
ListItem <|-- CollectionItem
Tag *-- TagItem
Collection *-- CollectionItem
```

View file

@ -1,100 +0,0 @@
Social
======
Data Modal
----------
```mermaid
classDiagram
User .. Piece
class Piece {
+User owner
+int visibility
}
User .. Activity
class Activity {
+User owner
+int visibility
+Piece action_object
}
Activity .. Piece
Activity .. Item
class Item {
+str title
+str brief
-enum type
}
```
Activities
----------
Activity data may be used for:
1. time line view of user and her friends
2. chronological view of user's action about an item
3. ActivityStreams `OrderedCollection` for ActivityPub
However, 2 is currently implemented separately via `ShelfLogManager` in `journal` app, because users may want to change these records manually.
Local Timeline
--------------
| Local Timeline Activities | action object class |
| ------------------------- | ------------------- |
| Add an Item to Shelf | ShelfMember |
| Create a Collection | Collection |
| Like a Collection | Like |
| Create a Review | Review |
Activity Streams
----------------
These are list of activities should be either shown in the site or delivered as ActivityStreams or both:
- `Add` / `Remove` an *Item* to / from a *List*:
+ add / remove *Item* to / from a user *Collection*
+ mark *Item* as wishlist / progress / complete, which are essentially add to / remove from user's predefined *Collection*
- `Create` / `Update` / `Delete` a user *Collection*
- `Create` / `Update` / `Delete` a *Content* with an `Object Link` to *Item*
+ `Create` / `Update` / `Delete` a *Comment* or *Review*
+ `Create` / `Update` / `Delete` a *Quote* or *Note*
- `Create` / `Update` / `Delete` a *Reply* to another *Content*
- `Announce` / `Like` a *Content* / *Collection*, or `Undo` that
- Social Graph interaction
+ `Follow`/`Unfo` `Follow`
+ `Accept`/`Reject`
+ `Block`/`Undo` `Block`
Supporting these activities above will be essential to a reasonable ActivityPub server implementation.
There are additional activities not made into ActivityPub MVP but technically possible to support in future:
- `Create` / `Update` / `Delete` a *Content* in different flavors, without link to *Item*
* `Note` or `Article` without link to *Item*
* DM (`Note`)
* `Question`
* `Article`
* `Page`
* `Image`
* `Audio`
* `Video`
* `Event`
- `Add` `Content` to / `Remove` `Content` from a user *Collection*
* *Pin* / *Unpin*
- `Move`/`Delete` account
ActivityPub
-----------
TBA
References:
- https://www.w3.org/TR/activitypub/
- https://www.w3.org/TR/activitystreams-core/
- https://www.w3.org/TR/activitystreams-vocabulary/
- https://www.w3.org/TR/json-ld/
- https://codeberg.org/fediverse/fep/src/branch/main/feps/fep-e232.md
- https://socialhub.activitypub.rocks/t/guide-for-new-activitypub-implementers/479
- https://docs.joinmastodon.org/spec/activitypub/
- https://docs.joinbookwyrm.com/activitypub.html
- https://github.com/Podcastindex-org/podcast-namespace/blob/main/proposal-docs/social/social.md#socialinteract-element
- https://dev.funkwhale.audio/funkwhale/funkwhale/-/tree/develop/docs/developer_documentation/federation
- https://github.com/inventaire/inventaire/issues/187
- https://github.com/inventaire/inventaire/issues/533

View file

@ -1,16 +0,0 @@
Our Story
=========
Established in September 2021, NeoDB is an [open source project](https://github.com/neodb-social/neodb) and [free service](https://neodb.social/) to help users around the world share and discover reviews and ratings of books, movies, music and games in Fediverse without concern of tracking or censorship.
The community that we serve today are mostly those previously used social networks in China to share their reviews and ratings for book, movie and music. By doing so for the last 15 years, they have made these reviews a rich form of archive that is the container of personal stories, intellectual debates, political opinions, and more. However, in recent years many of them got suppressed and banned due to increasing censorship and authoritarian laws in China. Many users have lost trust in proprietary software and services in China. NeoDB started as a humble effort to build open source software and services to help these internet refugees recreate their social network identity, regain control of their own content, and share their discoveries and opinions with their friends freely in Fediverse and, thanks to the openness of ActivityPub, the whole world.
As weve gained some [initial attraction](https://blog.joinmastodon.org/2021/12/mastodon-recap-2021/), we expect to continue supporting this fast growing community with better portability, security and user experience, we also hope to connect our community with broader Fediverse users with diverse backgrounds and serve both via enhanced a16y and i18n, decentralized architecture and interchangeable data format and protocols on catalogue information and user generated content. Your support and donation will help us operate continuously and develop towards this vision with more confidence.
NeoDB is a fork of [NiceDB](https://github.com/doubaniux/boofilsic), our code is actively being developed and is open-sourced under the AGPL-v3 license. We highly welcome contributions in code, design, and localization.
Please follow us on Fediverse([en](https://mastodon.online/@neodb)|[cn](https://mastodon.social/@neodb)) / [Bluesky](https://bsky.app/profile/neodb.net) / [Twitter](https://x.com/NeoDBsocial), and help spread the words. Thank you for the support!

View file

@ -1,67 +0,0 @@
{
"version": "1.0",
"servers": [
{
"host": "neodb.social",
"description": "Flagship instance, managed by NeoDB developers.",
"label": [
"flagship"
],
"language": [
"zh",
"en"
]
},
{
"name": "NeoDB experimental",
"host": "eggplant.place",
"description": "Instance running development version of NeoDB software, which may have newer features and occationally bugs, managed by NeoDB developers.",
"label": [
"beta"
],
"language": [
"en"
]
},
{
"name": "ReviewDB",
"host": "neodb.social",
"admin": [
"@shlee@aus.social"
],
"language": [
"en"
]
},
{
"name": "Minreol",
"host": "minreol.dk",
"admin": [
"@pmakholm@norrebro.space"
],
"language": [
"da"
]
},
{
"name": "CasDB",
"host": "db.casually.cat",
"admin": [
"@casuallynoted@casually.cat"
],
"language": [
"en"
]
},
{
"name": "KevGa-NeoDB",
"host": "neodb.kevga.de",
"admin": [
"@lorker@mastodon.kevga.de"
],
"language": [
"de"
]
}
]
}

View file

@ -1,21 +0,0 @@
# Servers
## Community instances
NeoDB is not a single website. To use it, you need to sign up on an instance, that lets you connect with other people using NeoDB across Fediverse and Bluesky.
{servers}
JSON version of this list is also available [here](servers.json). If you are hosting a public instance of NeoDB and wish to share that with the community, please [edit this file](https://github.com/neodb-social/neodb/edit/main/docs/servers.json) and submit a pull request.
To host your own instance of NeoDB, see [installation guide](install.md).
## Public relay hosted by NeoDB developers
- `relay.neodb.net` - NeoDB instances may connect to this relay to send and receive public posts, this is to help share items, ratings and reviews in the network, more on this in [configuration doc](configuration.md).
## Honorable mention
- [NiceDB](https://nicedb.org) - the original instance, no longer open for registration.

View file

@ -1,27 +0,0 @@
# Supported Sites
the following sites are supported
| | import link for media type | import archive |
| ----------------------| ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ |
| Apple Music | Music (Album) | |
| Archive of Our Own | Book (Edition) | |
| Bandcamp | Music (Album) | |
| Bangumi | Book (Edition)<br>Music (Album)<br>Movie<br>TV (Season)<br>Game | |
| Board Game Geek | Game | |
| BooksTW 博客來 | Book (Edition) | |
| Discogs | Music (Album) | |
| Douban 豆瓣 | Book (Edition, Work)<br>Music (Album)<br>Movie<br>TV (Show, Season, Episode)<br>Game<br>Performance (Performance, Production) | Yes, upload [doufen](https://doufen.org) archive |
| Goodreads | Book (Edition, Work) | Yes, submit profile or shelf link |
| Google Books | Book (Edition) | |
| IGDB | Game | |
| IMDB | Movie<br>TV (Show, Episode) | |
| jjwxc 晋江文学城 | Book (Edition) | |
| Letterboxd | not supported (link in archive is mapped to TMDB) | Yes, upload exported archive |
| Qidian 起点 | Book (Edition) | |
| Spotify | Music (Album) | |
| Steam | Game | |
| The Movie Database | Movie<br>TV (Show, Season, Episode) | |
| ypshuo 阅评说 | Book (Edition) | |
| RSS link to a podcast | Podcast | Yes, upload OPML |

View file

@ -1,14 +0,0 @@
:root {
--md-primary-fg-color: #0172ad;
--md-primary-fg-color--light: #0172ad;
--md-primary-fg-color--dark: #0172ad;
--md-accent-fg-color: #01aaff;
--md-accent-fg-color--light: #01aaff;
--md-accent-fg-color--dark: #01aaff;
font-family:system-ui, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, Helvetica, Arial, "Helvetica Neue", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
}
@media screen and (min-width: 76.25em) {
.md-nav__title {
display: none;
}
}

View file

@ -1,5 +0,0 @@
{% extends "base.html" %}
{% block extrahead %}
<link rel="me" href="https://mastodon.online/@neodb">
<link rel="me" href="https://mastodon.social/@neodb">
{% endblock %}

View file

@ -1,56 +0,0 @@
# Troubleshooting
## Tips
- `docker compose ps` to see if any service is down, (btw it's normal that `migration` is in `Exit 0` state)
- `docker compose run shell` to run a shell into the cluster; or `docker compose run root` for root shell, and `apt` is available if extra package needed
- see `Debug in Docker` in [development doc](development.md) for debugging tips
## Containers
a typical neodb cluster looks like:
```mermaid
flowchart TB
web[[Your reverse proxy server with SSL]] --- neodb-nginx[nginx listening on localhost:8000]
subgraph Containers managed by compose.yml
neodb-nginx --- neodb-web
neodb-nginx --- takahe-web
neodb-worker --- typesense[(typesense)]
neodb-worker --- neodb-db[(neodb-db)]
neodb-worker --- redis[(redis)]
neodb-web --- typesense
neodb-web --- neodb-db
neodb-web --- redis
neodb-web --- takahe-db[(takahe-db)]
migration([migration]) --- neodb-db
migration --- takahe-db
takahe-web --- takahe-db
takahe-web --- redis
takahe-stator --- takahe-db
takahe-stator --- redis
end
```
## Data Folders
a typical neodb folder after starting up should look like:
```
mysite
├── data # neodb data folder, location can be changed via NEODB_DATA in .env
│ ├── neodb-db # neodb database
│ ├── neodb-media # uid must be 1000 (app user in docker image), chmod if not so
│ ├── redis # neodb/takahe cache
│ ├── takahe-cache # uid must be 33 (www-data user in docker image), chmod if not so
│ ├── takahe-db # neodb database
│ ├── takahe-media # uid must be 1000 (app user in docker image), chmod if not so
│ ├── typesense # neodb search index
│ └── www-root # neodb web root for robots.txt, logo.png and etc
└── config
├── compose.yml # copied from neodb release
└── .env # your configuration, see neodb.env.example
```

View file

@ -1,21 +0,0 @@
Upgrade
=======
Check the [release notes](https://github.com/neodb-social/neodb/releases), update `compose.yml` and `.env` as instructed.
If there is `compose.override.yml`, make sure it's compatible with the updated `compose.yml`.
Pull the latest container image
```bash
docker compose --profile production pull
```
Restart the entire cluster:
```bash
docker compose --profile production up -d
```
Optionally, clean up old images:
```bash
docker system prune -af --volumes
```

View file

@ -171,5 +171,5 @@ class CsvExporter(Task):
shutil.make_archive(filename[:-4], "zip", temp_folder_path)
self.metadata["file"] = filename
self.metadata["total"] = total
self.message = "Export complete."
self.message = f"{total} records exported."
self.save()

View file

@ -3,6 +3,7 @@ import os
import re
import shutil
import tempfile
import uuid
from django.conf import settings
from django.utils import timezone
@ -65,13 +66,15 @@ class NdjsonExporter(Task):
def _save_image(url):
if url.startswith("http"):
imgdl = ProxiedImageDownloader(url)
raw_img = imgdl.download().content
ext = imgdl.extention
file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path)
with open(file, "wb") as binary_file:
binary_file.write(raw_img)
return file
try:
raw_img, ext = ProxiedImageDownloader.download_image(url, "")
if raw_img:
file = "%s/%s.%s" % (attachment_path, uuid.uuid4(), ext)
with open(file, "wb") as binary_file:
binary_file.write(raw_img)
return file
except Exception:
logger.debug(f"error downloading {url}")
elif url.startswith("/"):
p = os.path.abspath(
os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :])
@ -79,11 +82,8 @@ class NdjsonExporter(Task):
if p.startswith(settings.MEDIA_ROOT):
try:
shutil.copy2(p, attachment_path)
except Exception as e:
logger.error(
f"error copying {p} to {attachment_path}",
extra={"exception": e},
)
except Exception:
logger.error(f"error copying {p} to {attachment_path}")
return p
return url
@ -206,6 +206,25 @@ class NdjsonExporter(Task):
for item in self.ref_items:
f.write(json.dumps(item.ap_object, default=str) + "\n")
# Export actor.ndjson with Takahe identity data
filename = os.path.join(temp_folder_path, "actor.ndjson")
with open(filename, "w") as f:
f.write(json.dumps(self.get_header()) + "\n")
takahe_identity = self.user.identity.takahe_identity
identity_data = {
"type": "Identity",
"username": takahe_identity.username,
"domain": takahe_identity.domain_id,
"actor_uri": takahe_identity.actor_uri,
"name": takahe_identity.name,
"summary": takahe_identity.summary,
"metadata": takahe_identity.metadata,
"private_key": takahe_identity.private_key,
"public_key": takahe_identity.public_key,
"public_key_id": takahe_identity.public_key_id,
}
f.write(json.dumps(identity_data, default=str) + "\n")
filename = GenerateDateUUIDMediaFilePath(
"f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT
)
@ -215,5 +234,5 @@ class NdjsonExporter(Task):
self.metadata["file"] = filename
self.metadata["total"] = total
self.message = "Export complete."
self.message = f"{total} records exported."
self.save()

View file

@ -1,34 +1,15 @@
import os
import zipfile
from .csv import CsvImporter
from .douban import DoubanImporter
from .goodreads import GoodreadsImporter
from .letterboxd import LetterboxdImporter
from .ndjson import NdjsonImporter
from .opml import OPMLImporter
def get_neodb_importer(filename: str) -> type[CsvImporter] | None:
if not os.path.exists(filename) or not zipfile.is_zipfile(filename):
return None
with zipfile.ZipFile(filename, "r") as z:
files = z.namelist()
if any(f == "journal.ndjson" for f in files):
return None
if any(
f.endswith("_mark.csv")
or f.endswith("_review.csv")
or f.endswith("_note.csv")
for f in files
):
return CsvImporter
__all__ = [
"CsvImporter",
"NdjsonImporter",
"LetterboxdImporter",
"OPMLImporter",
"DoubanImporter",
"GoodreadsImporter",
"get_neodb_importer",
]

197
journal/importers/base.py Normal file
View file

@ -0,0 +1,197 @@
import datetime
from typing import Dict, List, Literal, Optional
from django.conf import settings
from django.utils.dateparse import parse_datetime
from loguru import logger
from catalog.common.sites import SiteManager
from catalog.models import Edition, IdType, Item, SiteName
from journal.models import ShelfType
from users.models import Task
_PREFERRED_SITES = [
SiteName.Fediverse,
SiteName.RSS,
SiteName.TMDB,
SiteName.IMDB,
SiteName.GoogleBooks,
SiteName.Goodreads,
SiteName.IGDB,
]
class BaseImporter(Task):
class Meta:
app_label = "journal" # workaround bug in TypedModel
ImportResult = Literal["imported", "skipped", "failed"]
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"failed_items": [],
"file": None,
"visibility": 0,
}
def progress(self, result: ImportResult) -> None:
"""Update import progress.
Args:
result: The import result ('imported', 'skipped', or 'failed')
"""
self.metadata["processed"] += 1
self.metadata[result] = self.metadata.get(result, 0) + 1
if self.metadata["total"]:
progress_percentage = round(
self.metadata["processed"] / self.metadata["total"] * 100
)
self.message = f"Progress: {progress_percentage}% - "
else:
self.message = ""
self.message += (
f"{self.metadata['imported']} imported, "
f"{self.metadata['skipped']} skipped, "
f"{self.metadata['failed']} failed"
)
self.save(update_fields=["metadata", "message"])
def run(self) -> None:
raise NotImplementedError
def get_item_by_info_and_links(
self, title: str, info_str: str, links: list[str]
) -> Optional[Item]:
"""Find an item based on information from CSV export.
Args:
title: Item title
info_str: Item info string (space-separated key:value pairs)
links_str: Space-separated URLs
Returns:
Item if found, None otherwise
"""
site_url = settings.SITE_INFO["site_url"] + "/"
# look for local items first
for link in links:
if link.startswith("/") or link.startswith(site_url):
item = Item.get_by_url(link, resolve_merge=True)
if item and not item.is_deleted:
return item
sites = [
SiteManager.get_site_by_url(link, detect_redirection=False)
for link in links
]
sites = [site for site in sites if site]
sites.sort(
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
if x.SITE_NAME in _PREFERRED_SITES
else 99
)
# match items without extra requests
for site in sites:
item = site.get_item()
if item:
return item
# match items after HEAD
sites = [
SiteManager.get_site_by_url(site.url) if site.url else site
for site in sites
]
sites = [site for site in sites if site]
for site in sites:
item = site.get_item()
if item:
return item
# fetch from remote
for site in sites:
try:
logger.debug(f"fetching {site.url}")
site.get_resource_ready()
item = site.get_item()
if item:
return item
except Exception as e:
logger.error(f"Error fetching item: {e}")
# Try using the info string
if info_str:
info_dict = {}
for pair in info_str.strip().split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
# Check for ISBN, IMDB, etc.
item = None
for key, value in info_dict.items():
if key == "isbn" and value:
item = Edition.objects.filter(
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value=value,
).first()
elif key == "imdb" and value:
item = Item.objects.filter(
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value=value,
).first()
if item:
return item
return None
def parse_tags(self, tags_str: str) -> List[str]:
"""Parse space-separated tags string into a list of tags."""
if not tags_str:
return []
return [tag.strip() for tag in tags_str.split() if tag.strip()]
def parse_info(self, info_str: str) -> Dict[str, str]:
"""Parse info string into a dictionary."""
info_dict = {}
if not info_str:
return info_dict
for pair in info_str.split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
return info_dict
def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]:
"""Parse ISO format timestamp into datetime."""
if not timestamp_str:
return None
try:
dt = parse_datetime(timestamp_str)
if dt and dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.UTC)
return dt
except Exception as e:
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
return None
def parse_shelf_type(self, status_str: str) -> ShelfType:
"""Parse shelf type string into ShelfType enum."""
if not status_str:
return ShelfType.WISHLIST
status_map = {
"wishlist": ShelfType.WISHLIST,
"progress": ShelfType.PROGRESS,
"complete": ShelfType.COMPLETE,
"dropped": ShelfType.DROPPED,
}
return status_map.get(status_str.lower(), ShelfType.WISHLIST)

View file

@ -1,181 +1,22 @@
import csv
import datetime
import os
import tempfile
import zipfile
from typing import Dict, List, Optional
from typing import Dict
from django.conf import settings
from django.utils import timezone
from django.utils.dateparse import parse_datetime
from django.utils.translation import gettext as _
from loguru import logger
from catalog.common.sites import SiteManager
from catalog.models import Edition, IdType, Item, ItemCategory, SiteName
from journal.models import Mark, Note, Review, ShelfType
from users.models import Task
from catalog.models import ItemCategory
from journal.models import Mark, Note, Review
_PREFERRED_SITES = [
SiteName.Fediverse,
SiteName.RSS,
SiteName.TMDB,
SiteName.IMDB,
SiteName.GoogleBooks,
SiteName.Goodreads,
SiteName.IGDB,
]
from .base import BaseImporter
class CsvImporter(Task):
class CsvImporter(BaseImporter):
class Meta:
app_label = "journal" # workaround bug in TypedModel
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"failed_items": [],
"file": None,
"visibility": 0,
}
def get_item_by_info_and_links(
self, title: str, info_str: str, links_str: str
) -> Optional[Item]:
"""Find an item based on information from CSV export.
Args:
title: Item title
info_str: Item info string (space-separated key:value pairs)
links_str: Space-separated URLs
Returns:
Item if found, None otherwise
"""
site_url = settings.SITE_INFO["site_url"] + "/"
links = links_str.strip().split()
# look for local items first
for link in links:
if link.startswith("/") or link.startswith(site_url):
item = Item.get_by_url(link, resolve_merge=True)
if item and not item.is_deleted:
return item
sites = [
SiteManager.get_site_by_url(link, detect_redirection=False)
for link in links
]
sites = [site for site in sites if site]
sites.sort(
key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME)
if x.SITE_NAME in _PREFERRED_SITES
else 99
)
# match items without extra requests
for site in sites:
item = site.get_item()
if item:
return item
# match items after HEAD
sites = [
SiteManager.get_site_by_url(site.url) if site.url else site
for site in sites
]
sites = [site for site in sites if site]
for site in sites:
item = site.get_item()
if item:
return item
# fetch from remote
for site in sites:
try:
logger.debug(f"fetching {site.url}")
site.get_resource_ready()
item = site.get_item()
if item:
return item
except Exception as e:
logger.error(f"Error fetching item: {e}")
# Try using the info string
if info_str:
info_dict = {}
for pair in info_str.strip().split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
# Check for ISBN, IMDB, etc.
item = None
for key, value in info_dict.items():
if key == "isbn" and value:
item = Edition.objects.filter(
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value=value,
).first()
elif key == "imdb" and value:
item = Item.objects.filter(
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value=value,
).first()
if item:
return item
return None
def parse_tags(self, tags_str: str) -> List[str]:
"""Parse space-separated tags string into a list of tags."""
if not tags_str:
return []
return [tag.strip() for tag in tags_str.split() if tag.strip()]
def parse_info(self, info_str: str) -> Dict[str, str]:
"""Parse info string into a dictionary."""
info_dict = {}
if not info_str:
return info_dict
for pair in info_str.split():
if ":" in pair:
key, value = pair.split(":", 1)
info_dict[key] = value
return info_dict
def parse_datetime(self, timestamp_str: str) -> Optional[datetime.datetime]:
"""Parse ISO format timestamp into datetime."""
if not timestamp_str:
return None
try:
dt = parse_datetime(timestamp_str)
if dt and dt.tzinfo is None:
dt = dt.replace(tzinfo=datetime.UTC)
return dt
except Exception as e:
logger.error(f"Error parsing datetime {timestamp_str}: {e}")
return None
def parse_shelf_type(self, status_str: str) -> ShelfType:
"""Parse shelf type string into ShelfType enum."""
if not status_str:
return ShelfType.WISHLIST
status_map = {
"wishlist": ShelfType.WISHLIST,
"progress": ShelfType.PROGRESS,
"complete": ShelfType.COMPLETE,
"dropped": ShelfType.DROPPED,
}
return status_map.get(status_str.lower(), ShelfType.WISHLIST)
def import_mark(self, row: Dict[str, str]) -> str:
"""Import a mark from a CSV row.
@ -184,7 +25,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -246,7 +89,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -304,7 +149,9 @@ class CsvImporter(Task):
"""
try:
item = self.get_item_by_info_and_links(
row.get("title", ""), row.get("info", ""), row.get("links", "")
row.get("title", ""),
row.get("info", ""),
row.get("links", "").strip().split(),
)
if not item:
@ -361,26 +208,6 @@ class CsvImporter(Task):
)
return "failed"
def progress(self, result: str) -> None:
"""Update import progress.
Args:
result: The import result ('imported', 'skipped', or 'failed')
"""
self.metadata["processed"] += 1
self.metadata[result] = self.metadata.get(result, 0) + 1
progress_percentage = round(
self.metadata["processed"] / self.metadata["total"] * 100
)
self.message = (
f"Progress: {progress_percentage}% - "
f"{self.metadata['imported']} imported, "
f"{self.metadata['skipped']} skipped, "
f"{self.metadata['failed']} failed"
)
self.save(update_fields=["metadata", "message"])
def process_csv_file(self, file_path: str, import_function) -> None:
"""Process a CSV file using the specified import function."""
logger.debug(f"Processing {file_path}")
@ -424,7 +251,7 @@ class CsvImporter(Task):
# Set the total count in metadata
self.metadata["total"] = total_rows
self.message = f"Found {total_rows} items to import"
self.message = f"found {total_rows} records to import"
self.save(update_fields=["metadata", "message"])
# Now process all files
@ -432,7 +259,5 @@ class CsvImporter(Task):
import_function = getattr(self, f"import_{file_type}")
self.process_csv_file(file_path, import_function)
self.message = _("Import complete")
if self.metadata.get("failed_items", []):
self.message += f": {self.metadata['failed']} items failed ({len(self.metadata['failed_items'])} unique items)"
self.message = f"{self.metadata['imported']} items imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed."
self.save()

View file

@ -154,6 +154,8 @@ class DoubanImporter(Task):
def run(self):
logger.info(f"{self.user} import start")
self.load_sheets()
self.message = f"豆瓣标记和评论导入开始,共{self.metadata['total']}篇。"
self.save(update_fields=["message"])
logger.info(f"{self.user} sheet loaded, {self.metadata['total']} lines total")
for name, param in self.mark_sheet_config.items():
self.import_mark_sheet(self.mark_data[name], param[0], name)

View file

@ -1,4 +1,5 @@
import csv
import os
import tempfile
import zipfile
from datetime import timedelta
@ -35,16 +36,24 @@ class LetterboxdImporter(Task):
"file": None,
}
def get_item_by_url(self, url):
@classmethod
def validate_file(cls, uploaded_file):
try:
return zipfile.is_zipfile(uploaded_file)
except Exception:
return False
@classmethod
def get_item_by_url(cls, url):
try:
h = BasicDownloader(url).download().html()
tu = h.xpath("//a[@data-track-action='TMDb']/@href")
tu = h.xpath("//a[@data-track-action='TMDB']/@href")
iu = h.xpath("//a[@data-track-action='IMDb']/@href")
if not tu:
i = h.xpath('//span[@class="film-title-wrapper"]/a/@href')
u2 = "https://letterboxd.com" + i[0] # type:ignore
h = BasicDownloader(u2).download().html()
tu = h.xpath("//a[@data-track-action='TMDb']/@href")
tu = h.xpath("//a[@data-track-action='TMDB']/@href")
iu = h.xpath("//a[@data-track-action='IMDb']/@href")
if not tu:
logger.error(f"Unknown TMDB for {url}")
@ -121,7 +130,6 @@ class LetterboxdImporter(Task):
self.progress(1)
def progress(self, mark_state: int, url=None):
self.metadata["total"] += 1
self.metadata["processed"] += 1
match mark_state:
case 1:
@ -142,49 +150,56 @@ class LetterboxdImporter(Task):
with tempfile.TemporaryDirectory() as tmpdirname:
logger.debug(f"Extracting {filename} to {tmpdirname}")
zipref.extractall(tmpdirname)
with open(tmpdirname + "/reviews.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Watched Date"],
row["Rating"],
row["Review"],
row["Tags"],
)
with open(tmpdirname + "/ratings.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
row["Rating"],
)
with open(tmpdirname + "/watched.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
)
with open(tmpdirname + "/watchlist.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.WISHLIST,
row["Date"],
)
if os.path.exists(tmpdirname + "/reviews.csv"):
with open(tmpdirname + "/reviews.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Watched Date"],
row["Rating"],
row["Review"],
row["Tags"],
)
if os.path.exists(tmpdirname + "/ratings.csv"):
with open(tmpdirname + "/ratings.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
row["Rating"],
)
if os.path.exists(tmpdirname + "/watched.csv"):
with open(tmpdirname + "/watched.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.COMPLETE,
row["Date"],
)
if os.path.exists(tmpdirname + "/watchlist.csv"):
with open(tmpdirname + "/watchlist.csv") as f:
reader = csv.DictReader(f, delimiter=",")
for row in reader:
if row["Letterboxd URI"] in uris:
continue
uris.add(row["Letterboxd URI"])
self.mark(
row["Letterboxd URI"],
ShelfType.WISHLIST,
row["Date"],
)
self.metadata["total"] = self.metadata["processed"]
self.message = f"{self.metadata['imported']} imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed"
self.save(update_fields=["metadata", "message"])

484
journal/importers/ndjson.py Normal file
View file

@ -0,0 +1,484 @@
import json
import os
import tempfile
import zipfile
from typing import Any, Dict
from loguru import logger
from journal.models import (
Collection,
Comment,
Mark,
Note,
Rating,
Review,
ShelfLogEntry,
ShelfType,
Tag,
TagMember,
)
from takahe.utils import Takahe
from .base import BaseImporter
class NdjsonImporter(BaseImporter):
"""Importer for NDJSON files exported from NeoDB."""
class Meta:
app_label = "journal" # workaround bug in TypedModel
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.items = {}
def import_collection(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a collection from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
name = content_data.get("name", "")
content = content_data.get("content", "")
collection = Collection.objects.create(
owner=owner,
title=name,
brief=content,
visibility=visibility,
metadata=data.get("metadata", {}),
created_time=published_dt,
)
item_data = data.get("items", [])
for item_entry in item_data:
item_url = item_entry.get("item")
if not item_url:
continue
item = self.items.get(item_url)
if not item:
logger.warning(f"Could not find item for collection: {item_url}")
continue
metadata = item_entry.get("metadata", {})
collection.append_item(item, metadata=metadata)
return "imported"
except Exception:
logger.exception("Error importing collection")
return "failed"
def import_shelf_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a shelf member (mark) from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
shelf_type = content_data.get("status", ShelfType.WISHLIST)
mark = Mark(owner, item)
if mark.created_time and published_dt and mark.created_time >= published_dt:
return "skipped"
mark.update(
shelf_type=shelf_type,
visibility=visibility,
metadata=metadata,
created_time=published_dt,
)
return "imported"
except Exception:
logger.exception("Error importing shelf member")
return "failed"
def import_shelf_log(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a shelf log entry from NDJSON data."""
try:
item = self.items.get(data.get("item", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
owner = self.user.identity
shelf_type = data.get("status", ShelfType.WISHLIST)
# posts = data.get("posts", []) # TODO but will be tricky
timestamp = data.get("timestamp")
timestamp_dt = self.parse_datetime(timestamp) if timestamp else None
_, created = ShelfLogEntry.objects.update_or_create(
owner=owner,
item=item,
shelf_type=shelf_type,
timestamp=timestamp_dt,
)
# return "imported" if created else "skipped"
# count skip as success otherwise it may confuse user
return "imported"
except Exception:
logger.exception("Error importing shelf log")
return "failed"
def import_post(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a post from NDJSON data."""
# TODO
return "skipped"
def import_review(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a review from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
name = content_data.get("name", "")
content = content_data.get("content", "")
existing_review = Review.objects.filter(
owner=owner, item=item, title=name
).first()
if (
existing_review
and existing_review.created_time
and published_dt
and existing_review.created_time >= published_dt
):
return "skipped"
Review.objects.create(
owner=owner,
item=item,
title=name,
body=content,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception:
logger.exception("Error importing review")
return "failed"
def import_note(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a note from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
title = content_data.get("title", "")
content = content_data.get("content", "")
sensitive = content_data.get("sensitive", False)
progress = content_data.get("progress", {})
progress_type = progress.get("type", "")
progress_value = progress.get("value", "")
Note.objects.create(
item=item,
owner=owner,
title=title,
content=content,
sensitive=sensitive,
progress_type=progress_type,
progress_value=progress_value,
created_time=published_dt,
visibility=visibility,
metadata=data.get("metadata", {}),
)
return "imported"
except Exception:
logger.exception("Error importing note")
return "failed"
def import_comment(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a comment from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
content = content_data.get("content", "")
existing_comment = Comment.objects.filter(owner=owner, item=item).first()
if (
existing_comment
and existing_comment.created_time
and published_dt
and existing_comment.created_time >= published_dt
):
return "skipped"
Comment.objects.create(
owner=owner,
item=item,
text=content,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception:
logger.exception("Error importing comment")
return "failed"
def import_rating(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import a rating from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
rating_grade = int(float(content_data.get("value", 0)))
existing_rating = Comment.objects.filter(owner=owner, item=item).first()
if (
existing_rating
and existing_rating.created_time
and published_dt
and existing_rating.created_time >= published_dt
):
return "skipped"
Rating.objects.create(
owner=owner,
item=item,
grade=rating_grade,
created_time=published_dt,
visibility=visibility,
metadata=metadata,
)
return "imported"
except Exception:
logger.exception("Error importing rating")
return "failed"
def import_tag(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import tags from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
pinned = data.get("pinned", self.metadata.get("pinned", False))
tag_title = Tag.cleanup_title(data.get("name", ""))
_, created = Tag.objects.update_or_create(
owner=owner,
title=tag_title,
defaults={
"visibility": visibility,
"pinned": pinned,
},
)
return "imported" if created else "skipped"
except Exception:
logger.exception("Error importing tag member")
return "failed"
def import_tag_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult:
"""Import tags from NDJSON data."""
try:
owner = self.user.identity
visibility = data.get("visibility", self.metadata.get("visibility", 0))
metadata = data.get("metadata", {})
content_data = data.get("content", {})
published_dt = self.parse_datetime(content_data.get("published"))
item = self.items.get(content_data.get("withRegardTo", ""))
if not item:
raise KeyError(f"Could not find item: {data.get('item', '')}")
tag_title = Tag.cleanup_title(content_data.get("tag", ""))
tag, _ = Tag.objects.get_or_create(
owner=owner,
title=tag_title,
defaults={
"created_time": published_dt,
"visibility": visibility,
"pinned": False,
"metadata": metadata,
},
)
_, created = TagMember.objects.update_or_create(
owner=owner,
item=item,
parent=tag,
defaults={
"created_time": published_dt,
"visibility": visibility,
"metadata": metadata,
"position": 0,
},
)
return "imported" if created else "skipped"
except Exception:
logger.exception("Error importing tag member")
return "failed"
def process_journal(self, file_path: str) -> None:
"""Process a NDJSON file and import all items."""
logger.debug(f"Processing {file_path}")
lines_error = 0
import_funcs = {
"Tag": self.import_tag,
"TagMember": self.import_tag_member,
"Rating": self.import_rating,
"Comment": self.import_comment,
"ShelfMember": self.import_shelf_member,
"Review": self.import_review,
"Note": self.import_note,
"Collection": self.import_collection,
"ShelfLog": self.import_shelf_log,
"Post": self.import_post,
}
journal = {k: [] for k in import_funcs.keys()}
with open(file_path, "r") as jsonfile:
# Skip header line
next(jsonfile, None)
for line in jsonfile:
try:
data = json.loads(line)
except json.JSONDecodeError:
lines_error += 1
continue
data_type = data.get("type")
if not data_type:
continue
if data_type not in journal:
journal[data_type] = []
journal[data_type].append(data)
self.metadata["total"] = sum(len(items) for items in journal.values())
self.message = f"found {self.metadata['total']} records to import"
self.save(update_fields=["metadata", "message"])
logger.debug(f"Processing {self.metadata['total']} entries")
if lines_error:
logger.error(f"Error processing journal.ndjson: {lines_error} lines")
for typ, func in import_funcs.items():
for data in journal.get(typ, []):
result = func(data)
self.progress(result)
logger.info(
f"Imported {self.metadata['imported']}, skipped {self.metadata['skipped']}, failed {self.metadata['failed']}"
)
def parse_catalog(self, file_path: str) -> None:
"""Parse the catalog.ndjson file and build item lookup tables."""
logger.debug(f"Parsing catalog file: {file_path}")
item_count = 0
try:
with open(file_path, "r") as jsonfile:
for line in jsonfile:
try:
i = json.loads(line)
except (json.JSONDecodeError, Exception):
logger.exception("Error processing catalog item")
continue
u = i.get("id")
if not u:
continue
# self.catalog_items[u] = i
item_count += 1
links = [u] + [r["url"] for r in i.get("external_resources", [])]
self.items[u] = self.get_item_by_info_and_links("", "", links)
logger.info(f"Loaded {item_count} items from catalog")
self.metadata["catalog_processed"] = item_count
except Exception:
logger.exception("Error parsing catalog file")
def parse_header(self, file_path: str) -> Dict[str, Any]:
try:
with open(file_path, "r") as jsonfile:
first_line = jsonfile.readline().strip()
if first_line:
header = json.loads(first_line)
if header.get("server"):
return header
except (json.JSONDecodeError, IOError):
logger.exception("Error parsing header")
return {}
def process_actor(self, file_path: str) -> None:
"""Process the actor.ndjson file to update user identity information."""
logger.debug(f"Processing actor data from {file_path}")
try:
with open(file_path, "r") as jsonfile:
next(jsonfile, None)
for line in jsonfile:
try:
data = json.loads(line)
except json.JSONDecodeError:
logger.error("Error parsing actor data line")
continue
if data.get("type") == "Identity":
logger.debug("Found identity data in actor.ndjson")
takahe_identity = self.user.identity.takahe_identity
updated = False
if (
data.get("name")
and data.get("name") != takahe_identity.name
):
logger.debug(
f"Updating identity name from {takahe_identity.name} to {data.get('name')}"
)
takahe_identity.name = data.get("name")
updated = True
if (
data.get("summary")
and data.get("summary") != takahe_identity.summary
):
logger.debug("Updating identity summary")
takahe_identity.summary = data.get("summary")
updated = True
if updated:
takahe_identity.save()
Takahe.update_state(takahe_identity, "edited")
logger.info("Updated identity")
return
except Exception as e:
logger.exception(f"Error processing actor file: {e}")
def run(self) -> None:
"""Run the NDJSON import."""
filename = self.metadata["file"]
logger.debug(f"Importing {filename}")
with zipfile.ZipFile(filename, "r") as zipref:
with tempfile.TemporaryDirectory() as tmpdirname:
zipref.extractall(tmpdirname)
# Process actor data first if available
actor_path = os.path.join(tmpdirname, "actor.ndjson")
if os.path.exists(actor_path):
actor_header = self.parse_header(actor_path)
logger.debug(f"Found actor.ndjson with {actor_header}")
self.process_actor(actor_path)
else:
logger.debug("No actor.ndjson file found in the archive")
catalog_path = os.path.join(tmpdirname, "catalog.ndjson")
if os.path.exists(catalog_path):
catalog_header = self.parse_header(catalog_path)
logger.debug(f"Loading catalog.ndjson with {catalog_header}")
self.parse_catalog(catalog_path)
else:
logger.warning("catalog.ndjson file not found in the archive")
journal_path = os.path.join(tmpdirname, "journal.ndjson")
if not os.path.exists(journal_path):
logger.error("journal.ndjson file not found in the archive")
self.message = "Import failed: journal.ndjson file not found"
self.save()
return
header = self.parse_header(journal_path)
self.metadata["journal_header"] = header
logger.debug(f"Importing journal.ndjson with {header}")
self.process_journal(journal_path)
self.message = f"{self.metadata['imported']} items imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed."
self.save()

View file

@ -1,43 +1,54 @@
import django_rq
import listparser
from auditlog.context import set_actor
from django.utils.translation import gettext as _
from loguru import logger
from user_messages import api as msg
from catalog.common import *
from catalog.common.downloaders import *
from catalog.sites.rss import RSS
from journal.models import *
from users.models.task import Task
class OPMLImporter:
def __init__(self, user, visibility, mode):
self.user = user
self.visibility = visibility
self.mode = mode
class OPMLImporter(Task):
class Meta:
app_label = "journal" # workaround bug in TypedModel
def parse_file(self, uploaded_file):
return listparser.parse(uploaded_file.read()).feeds
TaskQueue = "import"
DefaultMetadata = {
"total": 0,
"mode": 0,
"processed": 0,
"skipped": 0,
"imported": 0,
"failed": 0,
"visibility": 0,
"failed_urls": [],
"file": None,
}
def import_from_file(self, uploaded_file):
feeds = self.parse_file(uploaded_file)
if not feeds:
@classmethod
def validate_file(cls, f):
try:
return bool(listparser.parse(f.read()).feeds)
except Exception:
return False
django_rq.get_queue("import").enqueue(self.import_from_file_task, feeds)
return True
def import_from_file_task(self, feeds):
logger.info(f"{self.user} import opml start")
skip = 0
collection = None
with set_actor(self.user):
if self.mode == 1:
def run(self):
with open(self.metadata["file"], "r") as f:
feeds = listparser.parse(f.read()).feeds
self.metadata["total"] = len(feeds)
self.message = f"Processing {self.metadata['total']} feeds."
self.save(update_fields=["metadata", "message"])
collection = None
if self.metadata["mode"] == 1:
title = _("{username}'s podcast subscriptions").format(
username=self.user.display_name
)
collection = Collection.objects.create(
owner=self.user.identity, title=title
owner=self.user.identity,
title=title,
visibility=self.metadata["visibility"],
)
for feed in feeds:
logger.info(f"{self.user} import {feed.url}")
@ -47,21 +58,26 @@ class OPMLImporter:
res = None
if not res or not res.item:
logger.warning(f"{self.user} feed error {feed.url}")
self.metadata["failed"] += 1
continue
item = res.item
if self.mode == 0:
if self.metadata["mode"] == 0:
mark = Mark(self.user.identity, item)
if mark.shelfmember:
logger.info(f"{self.user} marked, skip {feed.url}")
skip += 1
self.metadata["skipped"] += 1
else:
self.metadata["imported"] += 1
mark.update(
ShelfType.PROGRESS, None, None, visibility=self.visibility
ShelfType.PROGRESS,
None,
None,
visibility=self.metadata["visibility"],
)
elif self.mode == 1 and collection:
elif self.metadata["mode"] == 1 and collection:
self.metadata["imported"] += 1
collection.append_item(item)
logger.info(f"{self.user} import opml end")
msg.success(
self.user,
f"OPML import complete, {len(feeds)} feeds processed, {skip} exisiting feeds skipped.",
)
self.metadata["processed"] += 1
self.save(update_fields=["metadata"])
self.message = f"{self.metadata['imported']} feeds imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed."
self.save(update_fields=["message"])

View file

@ -10,6 +10,16 @@ class Migration(migrations.Migration):
]
operations = [
migrations.CreateModel(
name="BaseImporter",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("users.task",),
),
migrations.CreateModel(
name="CsvImporter",
fields=[],
@ -20,4 +30,24 @@ class Migration(migrations.Migration):
},
bases=("users.task",),
),
migrations.CreateModel(
name="OPMLImporter",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("users.task",),
),
migrations.CreateModel(
name="NdjsonImporter",
fields=[],
options={
"proxy": True,
"indexes": [],
"constraints": [],
},
bases=("journal.baseimporter",),
),
]

View file

@ -163,7 +163,7 @@ class Mark:
log entries
log entry will be created when item is added to shelf
log entry will be created when item is moved to another shelf
log entry will be created when item is removed from shelf (TODO change this to DEFERRED shelf)
log entry will be created when item is removed from shelf
timestamp of log entry will be updated whenever created_time of shelfmember is updated
any log entry can be deleted by user arbitrarily

View file

@ -5,14 +5,14 @@ from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.db.models import Avg, Count
from catalog.models import Item
from catalog.models import Item, Performance, TVShow
from takahe.utils import Takahe
from users.models import APIdentity
from .common import Content
MIN_RATING_COUNT = 5
RATING_INCLUDES_CHILD_ITEMS = ["tvshow", "performance"]
RATING_INCLUDES_CHILD_ITEMS = [TVShow, Performance]
class Rating(Content):
@ -73,10 +73,41 @@ class Rating(Content):
p.link_post_id(post.id)
return p
@classmethod
def get_info_for_item(cls, item: Item) -> dict:
stat = Rating.objects.filter(grade__isnull=False)
if item.__class__ in RATING_INCLUDES_CHILD_ITEMS:
stat = stat.filter(item_id__in=item.child_item_ids + [item.pk])
else:
stat = stat.filter(item=item)
stat = stat.values("grade").annotate(count=Count("grade"))
grades = [0] * 11
votes = 0
total = 0
for s in stat:
if s["grade"] and s["grade"] > 0 and s["grade"] < 11:
grades[s["grade"]] = s["count"]
total += s["count"] * s["grade"]
votes += s["count"]
if votes < MIN_RATING_COUNT:
return {"average": None, "count": votes, "distribution": [0] * 5}
else:
return {
"average": round(total / votes, 1),
"count": votes,
"distribution": [
100 * (grades[1] + grades[2]) // votes,
100 * (grades[3] + grades[4]) // votes,
100 * (grades[5] + grades[6]) // votes,
100 * (grades[7] + grades[8]) // votes,
100 * (grades[9] + grades[10]) // votes,
],
}
@staticmethod
def get_rating_for_item(item: Item) -> float | None:
stat = Rating.objects.filter(grade__isnull=False)
if item.class_name in RATING_INCLUDES_CHILD_ITEMS:
if item.__class__ in RATING_INCLUDES_CHILD_ITEMS:
stat = stat.filter(item_id__in=item.child_item_ids + [item.pk])
else:
stat = stat.filter(item=item)
@ -86,7 +117,7 @@ class Rating(Content):
@staticmethod
def get_rating_count_for_item(item: Item) -> int:
stat = Rating.objects.filter(grade__isnull=False)
if item.class_name in RATING_INCLUDES_CHILD_ITEMS:
if item.__class__ in RATING_INCLUDES_CHILD_ITEMS:
stat = stat.filter(item_id__in=item.child_item_ids + [item.pk])
else:
stat = stat.filter(item=item)
@ -96,7 +127,7 @@ class Rating(Content):
@staticmethod
def get_rating_distribution_for_item(item: Item):
stat = Rating.objects.filter(grade__isnull=False)
if item.class_name in RATING_INCLUDES_CHILD_ITEMS:
if item.__class__ in RATING_INCLUDES_CHILD_ITEMS:
stat = stat.filter(item_id__in=item.child_item_ids + [item.pk])
else:
stat = stat.filter(item=item)

View file

@ -7,6 +7,7 @@ from django.db import connection, models
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from loguru import logger
from polymorphic.models import PolymorphicManager
from catalog.models import Item, ItemCategory
from takahe.utils import Takahe
@ -310,6 +311,28 @@ _SHELF_LABELS = [
# grammatically problematic, for translation only
class ShelfMemberManager(PolymorphicManager):
def get_queryset(self):
from .comment import Comment
from .rating import Rating
rating_subquery = Rating.objects.filter(
owner_id=models.OuterRef("owner_id"), item_id=models.OuterRef("item_id")
).values("grade")[:1]
comment_subquery = Comment.objects.filter(
owner_id=models.OuterRef("owner_id"), item_id=models.OuterRef("item_id")
).values("text")[:1]
return (
super()
.get_queryset()
.annotate(
_rating_grade=models.Subquery(rating_subquery),
_comment_text=models.Subquery(comment_subquery),
_shelf_type=models.F("parent__shelf_type"),
)
)
class ShelfMember(ListMember):
if TYPE_CHECKING:
parent: models.ForeignKey["ShelfMember", "Shelf"]
@ -318,6 +341,8 @@ class ShelfMember(ListMember):
"Shelf", related_name="members", on_delete=models.CASCADE
)
objects = ShelfMemberManager()
class Meta:
unique_together = [["owner", "item"]]
indexes = [
@ -448,6 +473,15 @@ class ShelfMember(ListMember):
"content": content,
}
def save(self, *args, **kwargs):
try:
del self._shelf_type # type:ignore
del self._rating_grade # type:ignore
del self._comment_text # type:ignore
except AttributeError:
pass
return super().save(*args, **kwargs)
@cached_property
def sibling_comment(self) -> "Comment | None":
from .comment import Comment
@ -470,19 +504,28 @@ class ShelfMember(ListMember):
@property
def shelf_label(self) -> str | None:
return ShelfManager.get_label(self.parent.shelf_type, self.item.category)
return ShelfManager.get_label(self.shelf_type, self.item.category)
@property
def shelf_type(self):
return self.parent.shelf_type
try:
return getattr(self, "_shelf_type")
except AttributeError:
return self.parent.shelf_type
@property
def rating_grade(self):
return self.mark.rating_grade
try:
return getattr(self, "_rating_grade")
except AttributeError:
return self.mark.rating_grade
@property
def comment_text(self):
return self.mark.comment_text
try:
return getattr(self, "_comment_text")
except AttributeError:
return self.mark.comment_text
@property
def tags(self):

View file

@ -1,3 +1,5 @@
from .csv import *
from .ndjson import *
from .piece import *
from .rating import *
from .search import *

View file

@ -9,7 +9,7 @@ from loguru import logger
from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow
from journal.exporters import CsvExporter
from journal.importers import CsvImporter, get_neodb_importer
from journal.importers import CsvImporter
from users.models import User
from ..models import *
@ -219,10 +219,9 @@ class CsvExportImportTest(TestCase):
f"Expected file {filename} with {expected_data_count} data rows, but file not found"
)
self.assertEqual(get_neodb_importer(export_path), CsvImporter)
importer = CsvImporter.create(user=self.user2, file=export_path, visibility=2)
importer.run()
self.assertEqual(importer.message, "Import complete")
self.assertEqual(importer.message, "11 items imported, 0 skipped, 0 failed.")
# Verify imported data

506
journal/tests/ndjson.py Normal file
View file

@ -0,0 +1,506 @@
import json
import os
import zipfile
from tempfile import TemporaryDirectory
from django.test import TestCase
from django.utils.dateparse import parse_datetime
from loguru import logger
from catalog.models import (
Edition,
IdType,
Movie,
Podcast,
PodcastEpisode,
TVEpisode,
TVSeason,
TVShow,
)
from journal.exporters import NdjsonExporter
from journal.importers import NdjsonImporter
from users.models import User
from ..models import *
class NdjsonExportImportTest(TestCase):
databases = "__all__"
maxDiff = None
def setUp(self):
self.user1 = User.register(
email="ndjson_export@test.com", username="ndjson_exporter"
)
self.user2 = User.register(
email="ndjson_import@test.com", username="ndjson_importer"
)
self.tag1 = Tag.objects.create(
owner=self.user1.identity, title="favorite", pinned=True, visibility=2
)
self.dt = parse_datetime("2021-01-01T00:00:00Z")
self.dt2 = parse_datetime("2021-02-01T00:00:00Z")
self.dt3 = parse_datetime("2021-03-01T00:00:00Z")
self.book1 = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Hyperion"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780553283686",
author=["Dan Simmons"],
pub_year=1989,
)
self.book2 = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Dune"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780441172719",
author=["Frank Herbert"],
pub_year=1965,
)
self.movie1 = Movie.objects.create(
localized_title=[{"lang": "en", "text": "Inception"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt1375666",
director=["Christopher Nolan"],
year=2010,
)
self.movie2 = Movie.objects.create(
localized_title=[{"lang": "en", "text": "The Matrix"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt0133093",
director=["Lana Wachowski", "Lilly Wachowski"],
year=1999,
)
self.tvshow = TVShow.objects.create(
localized_title=[{"lang": "en", "text": "Breaking Bad"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt0903747",
year=2008,
)
self.tvseason = TVSeason.objects.create(
localized_title=[{"lang": "en", "text": "Breaking Bad Season 1"}],
show=self.tvshow,
season_number=1,
)
self.tvepisode1 = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Pilot"}],
season=self.tvseason,
episode_number=1,
)
self.tvepisode2 = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Cat's in the Bag..."}],
season=self.tvseason,
episode_number=2,
)
# Create podcast test items
self.podcast = Podcast.objects.create(
localized_title=[{"lang": "en", "text": "Test Podcast"}],
primary_lookup_id_type=IdType.RSS,
primary_lookup_id_value="https://example.com/feed.xml",
host=["Test Host"],
)
self.podcastepisode = PodcastEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Test Episode 1"}],
program=self.podcast,
guid="111",
pub_date=self.dt,
)
def test_ndjson_export_import(self):
# set name and summary for user1
identity1 = self.user1.identity
takahe_identity1 = identity1.takahe_identity
takahe_identity1.name = "Test User"
takahe_identity1.summary = "Test summary"
takahe_identity1.save()
# Book marks with ratings and tags
mark_book1 = Mark(self.user1.identity, self.book1)
mark_book1.update(
ShelfType.COMPLETE,
"Great sci-fi classic",
10,
["sci-fi", "favorite", "space"],
1,
created_time=self.dt,
)
mark_book2 = Mark(self.user1.identity, self.book2)
mark_book2.update(
ShelfType.WISHLIST,
"Read it?",
None,
["sci-fi", "desert"],
1,
created_time=self.dt,
)
mark_book2.update(
ShelfType.PROGRESS,
"Reading!",
None,
["sci-fi", "desert"],
0,
created_time=self.dt2,
)
mark_book2.update(
ShelfType.COMPLETE,
"Read.",
None,
["sci-fi", "desert"],
0,
created_time=self.dt3,
)
# Movie marks with ratings
mark_movie1 = Mark(self.user1.identity, self.movie1)
mark_movie1.update(
ShelfType.COMPLETE,
"Mind-bending",
8,
["mindbender", "scifi"],
1,
created_time=self.dt,
)
mark_movie2 = Mark(self.user1.identity, self.movie2)
mark_movie2.update(
ShelfType.WISHLIST, "Need to rewatch", None, [], 1, created_time=self.dt2
)
# TV show mark
mark_tvshow = Mark(self.user1.identity, self.tvshow)
mark_tvshow.update(
ShelfType.WISHLIST,
"Heard it's good",
None,
["drama"],
1,
created_time=self.dt,
)
# TV episode marks
mark_episode1 = Mark(self.user1.identity, self.tvepisode1)
mark_episode1.update(
ShelfType.COMPLETE,
"Great start",
9,
["pilot", "drama"],
1,
created_time=self.dt2,
)
mark_episode2 = Mark(self.user1.identity, self.tvepisode2)
mark_episode2.update(
ShelfType.COMPLETE, "It gets better", 9, [], 1, created_time=self.dt3
)
# Podcast episode mark
mark_podcast = Mark(self.user1.identity, self.podcastepisode)
mark_podcast.update(
ShelfType.COMPLETE,
"Insightful episode",
8,
["tech", "interview"],
1,
created_time=self.dt,
)
# Create reviews
Review.update_item_review(
self.book1,
self.user1.identity,
"My thoughts on Hyperion",
"A masterpiece of science fiction that weaves multiple storylines into a captivating narrative.",
visibility=1,
created_time=self.dt,
)
Review.update_item_review(
self.movie1,
self.user1.identity,
"Inception Review",
"Christopher Nolan at his best. The movie plays with reality and dreams in a fascinating way.",
visibility=1,
)
# Create notes
Note.objects.create(
item=self.book2,
owner=self.user1.identity,
title="Reading progress",
content="Just finished the first part. The world-building is incredible.\n\n - p 125",
progress_type=Note.ProgressType.PAGE,
progress_value="125",
visibility=1,
)
Note.objects.create(
item=self.tvshow,
owner=self.user1.identity,
title="Before watching",
content="Things to look out for according to friends:\n- Character development\n- Color symbolism\n\n - e 0",
progress_type=Note.ProgressType.EPISODE,
progress_value="2",
visibility=1,
)
# Create TV episode note
Note.objects.create(
item=self.tvepisode1,
owner=self.user1.identity,
title="Episode thoughts",
content="Great pilot episode. Sets up the character arcs really well.",
visibility=1,
)
# Create podcast episode note
Note.objects.create(
item=self.podcastepisode,
owner=self.user1.identity,
title="Podcast episode notes",
content="Interesting discussion about tech trends. Timestamp 23:45 has a good point about AI.",
progress_type=Note.ProgressType.TIMESTAMP,
progress_value="23:45",
visibility=1,
)
# Create collections
items = [self.book1, self.movie1]
collection = Collection.objects.create(
owner=self.user1.identity,
title="Favorites",
brief="My all-time favorites",
visibility=1,
)
for i in items:
collection.append_item(i)
# Create another collection with different items
items2 = [self.book2, self.movie2, self.tvshow]
collection2 = Collection.objects.create(
owner=self.user1.identity,
title="To Review",
brief="Items I need to review soon",
visibility=1,
)
for i in items2:
collection2.append_item(i)
# Create shelf log entries
logs = ShelfLogEntry.objects.filter(owner=self.user1.identity).order_by(
"timestamp", "item_id"
)
# Export data to NDJSON
exporter = NdjsonExporter.create(user=self.user1)
exporter.run()
export_path = exporter.metadata["file"]
logger.debug(f"exported to {export_path}")
self.assertTrue(os.path.exists(export_path))
self.assertEqual(exporter.metadata["total"], 61)
# Validate the NDJSON export file structure
with TemporaryDirectory() as extract_dir:
with zipfile.ZipFile(export_path, "r") as zip_ref:
zip_ref.extractall(extract_dir)
logger.debug(f"unzipped to {extract_dir}")
# Check journal.ndjson exists
journal_path = os.path.join(extract_dir, "journal.ndjson")
self.assertTrue(
os.path.exists(journal_path), "journal.ndjson file missing"
)
# Check catalog.ndjson exists
catalog_path = os.path.join(extract_dir, "catalog.ndjson")
self.assertTrue(
os.path.exists(catalog_path), "catalog.ndjson file missing"
)
# Check attachments directory exists
attachments_path = os.path.join(extract_dir, "attachments")
self.assertTrue(
os.path.exists(attachments_path), "attachments directory missing"
)
# Count the number of JSON objects in journal.ndjson
with open(journal_path, "r") as f:
lines = f.readlines()
# First line is header, rest are data
self.assertGreater(
len(lines), 1, "journal.ndjson has no data lines"
)
# Check the first line is a header
header = json.loads(lines[0])
self.assertIn("server", header, "Missing server in header")
self.assertIn("username", header, "Missing username in header")
self.assertEqual(
header["username"],
"ndjson_exporter",
"Wrong username in header",
)
# Count data objects by type
type_counts = {
"ShelfMember": 0,
"Review": 0,
"Note": 0,
"Collection": 0,
"ShelfLog": 0,
"post": 0,
}
for line in lines[1:]:
data = json.loads(line)
if "type" in data:
type_counts[data["type"]] = (
type_counts.get(data["type"], 0) + 1
)
# Verify counts
self.assertEqual(
type_counts["ShelfMember"], 8, "Expected 8 ShelfMember entries"
)
self.assertEqual(
type_counts["Review"], 2, "Expected 2 Review entries"
)
self.assertEqual(type_counts["Note"], 4, "Expected 4 Note entries")
self.assertEqual(
type_counts["Collection"], 2, "Expected 2 Collection entries"
)
self.assertEqual(type_counts["ShelfLog"], logs.count())
# Now import the export file into a different user account
importer = NdjsonImporter.create(
user=self.user2, file=export_path, visibility=2
)
importer.run()
self.assertIn("61 items imported, 0 skipped, 0 failed.", importer.message)
# Verify imported data
identity2 = self.user2.identity
takahe_identity2 = identity2.takahe_identity
# Check that name and summary were updated
self.assertEqual(takahe_identity2.name, "Test User")
self.assertEqual(takahe_identity2.summary, "Test summary")
# Check marks
mark_book1_imported = Mark(self.user2.identity, self.book1)
self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_book1_imported.comment_text, "Great sci-fi classic")
self.assertEqual(mark_book1_imported.rating_grade, 10)
self.assertEqual(mark_book1_imported.visibility, 1)
self.assertEqual(
set(mark_book1_imported.tags), set(["sci-fi", "favorite", "space"])
)
mark_book2_imported = Mark(self.user2.identity, self.book2)
self.assertEqual(mark_book2_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_book2_imported.comment_text, "Read.")
self.assertIsNone(mark_book2_imported.rating_grade)
self.assertEqual(set(mark_book2_imported.tags), set(["sci-fi", "desert"]))
self.assertEqual(mark_book2_imported.visibility, 0)
mark_movie1_imported = Mark(self.user2.identity, self.movie1)
self.assertEqual(mark_movie1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_movie1_imported.comment_text, "Mind-bending")
self.assertEqual(mark_movie1_imported.rating_grade, 8)
self.assertEqual(set(mark_movie1_imported.tags), set(["mindbender", "scifi"]))
mark_episode1_imported = Mark(self.user2.identity, self.tvepisode1)
self.assertEqual(mark_episode1_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_episode1_imported.comment_text, "Great start")
self.assertEqual(mark_episode1_imported.rating_grade, 9)
self.assertEqual(set(mark_episode1_imported.tags), set(["pilot", "drama"]))
# Check podcast episode mark
mark_podcast_imported = Mark(self.user2.identity, self.podcastepisode)
self.assertEqual(mark_podcast_imported.shelf_type, ShelfType.COMPLETE)
self.assertEqual(mark_podcast_imported.comment_text, "Insightful episode")
self.assertEqual(mark_podcast_imported.rating_grade, 8)
self.assertEqual(set(mark_podcast_imported.tags), set(["tech", "interview"]))
# Check reviews
book1_reviews = Review.objects.filter(
owner=self.user2.identity, item=self.book1
)
self.assertEqual(book1_reviews.count(), 1)
self.assertEqual(book1_reviews[0].title, "My thoughts on Hyperion")
self.assertIn("masterpiece of science fiction", book1_reviews[0].body)
movie1_reviews = Review.objects.filter(
owner=self.user2.identity, item=self.movie1
)
self.assertEqual(movie1_reviews.count(), 1)
self.assertEqual(movie1_reviews[0].title, "Inception Review")
self.assertIn("Christopher Nolan", movie1_reviews[0].body)
# Check notes
book2_notes = Note.objects.filter(owner=self.user2.identity, item=self.book2)
self.assertEqual(book2_notes.count(), 1)
self.assertEqual(book2_notes[0].title, "Reading progress")
self.assertIn("world-building is incredible", book2_notes[0].content)
self.assertEqual(book2_notes[0].progress_type, Note.ProgressType.PAGE)
self.assertEqual(book2_notes[0].progress_value, "125")
tvshow_notes = Note.objects.filter(owner=self.user2.identity, item=self.tvshow)
self.assertEqual(tvshow_notes.count(), 1)
self.assertEqual(tvshow_notes[0].title, "Before watching")
self.assertIn("Character development", tvshow_notes[0].content)
# Check TV episode notes
tvepisode_notes = Note.objects.filter(
owner=self.user2.identity, item=self.tvepisode1
)
self.assertEqual(tvepisode_notes.count(), 1)
self.assertEqual(tvepisode_notes[0].title, "Episode thoughts")
self.assertIn("Sets up the character arcs", tvepisode_notes[0].content)
# Check podcast episode notes
podcast_notes = Note.objects.filter(
owner=self.user2.identity, item=self.podcastepisode
)
self.assertEqual(podcast_notes.count(), 1)
self.assertEqual(podcast_notes[0].title, "Podcast episode notes")
self.assertIn(
"Interesting discussion about tech trends", podcast_notes[0].content
)
self.assertEqual(podcast_notes[0].progress_type, Note.ProgressType.TIMESTAMP)
self.assertEqual(podcast_notes[0].progress_value, "23:45")
# Check first collection
collections = Collection.objects.filter(
owner=self.user2.identity, title="Favorites"
)
self.assertEqual(collections.count(), 1)
self.assertEqual(collections[0].brief, "My all-time favorites")
self.assertEqual(collections[0].visibility, 1)
collection_items = list(collections[0].ordered_items)
self.assertEqual([self.book1, self.movie1], collection_items)
# Check second collection
collections2 = Collection.objects.filter(
owner=self.user2.identity, title="To Review"
)
self.assertEqual(collections2.count(), 1)
self.assertEqual(collections2[0].brief, "Items I need to review soon")
self.assertEqual(collections2[0].visibility, 1)
# Check second collection items
collection2_items = [m.item for m in collections2[0].members.all()]
self.assertEqual(len(collection2_items), 3)
self.assertIn(self.book2, collection2_items)
self.assertIn(self.movie2, collection2_items)
self.assertIn(self.tvshow, collection2_items)
tag1 = Tag.objects.filter(owner=self.user2.identity, title="favorite").first()
self.assertIsNotNone(tag1)
if tag1:
self.assertTrue(tag1.pinned)
self.assertEqual(tag1.visibility, 2)
# Check shelf log entries
logs2 = ShelfLogEntry.objects.filter(owner=self.user2.identity).order_by(
"timestamp", "item_id"
)
l1 = [(log.item, log.shelf_type, log.timestamp) for log in logs]
l2 = [(log.item, log.shelf_type, log.timestamp) for log in logs2]
self.assertEqual(l1, l2)

View file

@ -65,23 +65,34 @@ class ShelfTest(TestCase):
self.assertEqual(q1.members.all().count(), 0)
self.assertEqual(q2.members.all().count(), 0)
Mark(user.identity, book1).update(ShelfType.WISHLIST)
time.sleep(0.001) # add a little delay to make sure the timestamp is different
Mark(user.identity, book2).update(ShelfType.WISHLIST)
log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)]
self.assertEqual(log, ["wishlist"])
log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book2)]
self.assertEqual(log, ["wishlist"])
time.sleep(0.001) # add a little delay to make sure the timestamp is different
Mark(user.identity, book1).update(ShelfType.WISHLIST)
log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)]
self.assertEqual(log, ["wishlist"])
time.sleep(0.001)
self.assertEqual(q1.members.all().count(), 2)
Mark(user.identity, book1).update(ShelfType.PROGRESS)
time.sleep(0.001)
self.assertEqual(q1.members.all().count(), 1)
self.assertEqual(q2.members.all().count(), 1)
time.sleep(0.001)
self.assertEqual(len(Mark(user.identity, book1).all_post_ids), 2)
log = shelf_manager.get_log_for_item(book1)
self.assertEqual(log.count(), 2)
log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)]
self.assertEqual(log, ["wishlist", "progress"])
Mark(user.identity, book1).update(ShelfType.PROGRESS, metadata={"progress": 1})
time.sleep(0.001)
self.assertEqual(q1.members.all().count(), 1)
self.assertEqual(q2.members.all().count(), 1)
log = shelf_manager.get_log_for_item(book1)
self.assertEqual(log.count(), 2)
log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)]
self.assertEqual(log, ["wishlist", "progress"])
self.assertEqual(len(Mark(user.identity, book1).all_post_ids), 2)
# theses tests are not relevant anymore, bc we don't use log to track metadata changes
@ -127,7 +138,8 @@ class ShelfTest(TestCase):
# test delete mark -> one more log
Mark(user.identity, book1).delete()
self.assertEqual(log.count(), 4)
log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)]
self.assertEqual(log, ["wishlist", "progress", "complete", None])
deleted_mark = Mark(user.identity, book1)
self.assertEqual(deleted_mark.shelf_type, None)
self.assertEqual(deleted_mark.tags, [])

201
journal/tests/rating.py Normal file
View file

@ -0,0 +1,201 @@
from django.test import TestCase
from catalog.common.models import Item
from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow
from journal.models.rating import Rating
from users.models import User
class RatingTest(TestCase):
databases = "__all__"
def setUp(self):
# Create 10 users
self.users = []
for i in range(10):
user = User.register(email=f"user{i}@example.com", username=f"user{i}")
self.users.append(user)
# Create a book
self.book = Edition.objects.create(
localized_title=[{"lang": "en", "text": "Test Book"}],
primary_lookup_id_type=IdType.ISBN,
primary_lookup_id_value="9780553283686",
author=["Test Author"],
)
# Create a movie
self.movie = Movie.objects.create(
localized_title=[{"lang": "en", "text": "Test Movie"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt1234567",
director=["Test Director"],
year=2020,
)
# Create a TV show with a season and episode
self.tvshow = TVShow.objects.create(
localized_title=[{"lang": "en", "text": "Test Show"}],
primary_lookup_id_type=IdType.IMDB,
primary_lookup_id_value="tt9876543",
)
self.tvseason = TVSeason.objects.create(
localized_title=[{"lang": "en", "text": "Season 1"}],
show=self.tvshow,
season_number=1,
)
self.tvepisode = TVEpisode.objects.create(
localized_title=[{"lang": "en", "text": "Episode 1"}],
season=self.tvseason,
episode_number=1,
)
def test_rating_basic(self):
"""Test basic rating functionality for a single item."""
# Add ratings for the book from all users
ratings = [7, 8, 9, 10, 8, 7, 6, 9, 10, 8]
for i, user in enumerate(self.users):
Rating.update_item_rating(
self.book, user.identity, ratings[i], visibility=1
)
# Get rating info for the book
rating_info = Rating.get_info_for_item(self.book)
# Check rating count
self.assertEqual(rating_info["count"], 10)
# Check average rating - should be 8.2
expected_avg = sum(ratings) / len(ratings)
self.assertEqual(rating_info["average"], round(expected_avg, 1))
# Check distribution
# [1-2, 3-4, 5-6, 7-8, 9-10] buckets represented as percentages
expected_distribution = [0, 0, 10, 50, 40] # Based on our ratings
self.assertEqual(rating_info["distribution"], expected_distribution)
# Test individual user rating
user_rating = Rating.get_item_rating(self.book, self.users[0].identity)
self.assertEqual(user_rating, 7)
book = Item.objects.get(pk=self.book.pk)
self.assertEqual(book.rating, round(expected_avg, 1))
self.assertEqual(book.rating_count, 10)
self.assertEqual(book.rating_distribution, expected_distribution)
def test_rating_multiple_items(self):
"""Test ratings across multiple items."""
# Rate the movie with varying scores
movie_ratings = [3, 4, 5, 6, 7, 8, 9, 10, 2, 1]
for i, user in enumerate(self.users):
Rating.update_item_rating(
self.movie, user.identity, movie_ratings[i], visibility=1
)
# Rate the TV show
tvshow_ratings = [10, 9, 8, 9, 10, 9, 8, 10, 9, 8]
for i, user in enumerate(self.users):
Rating.update_item_rating(
self.tvshow, user.identity, tvshow_ratings[i], visibility=1
)
# Get rating info for both items
movie_info = Rating.get_info_for_item(self.movie)
tvshow_info = Rating.get_info_for_item(self.tvshow)
# Check counts
self.assertEqual(movie_info["count"], 10)
self.assertEqual(tvshow_info["count"], 10)
# Check averages
expected_movie_avg = sum(movie_ratings) / len(movie_ratings)
expected_tvshow_avg = sum(tvshow_ratings) / len(tvshow_ratings)
self.assertEqual(movie_info["average"], round(expected_movie_avg, 1))
self.assertEqual(tvshow_info["average"], round(expected_tvshow_avg, 1))
# Check distribution for movie
# [1-2, 3-4, 5-6, 7-8, 9-10] buckets
expected_movie_distribution = [
20,
20,
20,
20,
20,
] # Evenly distributed across buckets
self.assertEqual(movie_info["distribution"], expected_movie_distribution)
# Check distribution for TV show
# [1-2, 3-4, 5-6, 7-8, 9-10] buckets
expected_tvshow_distribution = [0, 0, 0, 30, 70] # High ratings only
self.assertEqual(tvshow_info["distribution"], expected_tvshow_distribution)
def test_rating_update_and_delete(self):
"""Test updating and deleting ratings."""
# Add initial ratings
for user in self.users[:5]:
Rating.update_item_rating(self.tvepisode, user.identity, 8, visibility=1)
# Check initial count
self.assertEqual(Rating.get_rating_count_for_item(self.tvepisode), 5)
# Update a rating
Rating.update_item_rating(
self.tvepisode, self.users[0].identity, 10, visibility=1
)
# Check that rating was updated
updated_rating = Rating.get_item_rating(self.tvepisode, self.users[0].identity)
self.assertEqual(updated_rating, 10)
# Delete a rating by setting it to None
Rating.update_item_rating(
self.tvepisode, self.users[1].identity, None, visibility=1
)
# Check that rating count decreased
self.assertEqual(Rating.get_rating_count_for_item(self.tvepisode), 4)
# Check that the rating was deleted
deleted_rating = Rating.get_item_rating(self.tvepisode, self.users[1].identity)
self.assertIsNone(deleted_rating)
def test_rating_minimum_count(self):
"""Test the minimum rating count threshold."""
# Add only 4 ratings to the book (below MIN_RATING_COUNT of 5)
for user in self.users[:4]:
Rating.update_item_rating(self.book, user.identity, 10, visibility=1)
# Check that get_rating_for_item returns None due to insufficient ratings
rating = Rating.get_rating_for_item(self.book)
self.assertIsNone(rating)
# Add one more rating to reach the threshold
Rating.update_item_rating(self.book, self.users[4].identity, 10, visibility=1)
# Now we should get a valid rating
rating = Rating.get_rating_for_item(self.book)
self.assertEqual(rating, 10.0)
def test_tvshow_rating_includes_children(self):
"""Test that TV show ratings include ratings from child items."""
# Rate the TV show directly
Rating.update_item_rating(self.tvshow, self.users[0].identity, 6, visibility=1)
# Rate the episode (which is a child of the TV show)
for i in range(1, 6): # Users 1-5
Rating.update_item_rating(
self.tvseason, self.users[i].identity, 10, visibility=1
)
# Get info for TV show - should include ratings from episode
tvshow_info = Rating.get_info_for_item(self.tvshow)
# Check count (1 for show + 5 for episode = 6)
self.assertEqual(tvshow_info["count"], 6)
# The average should consider all ratings (6 + 5*10 = 56, divided by 6 = 9.3)
self.assertEqual(tvshow_info["average"], 9.3)

View file

@ -1,55 +0,0 @@
site_name: 🧩 NeoDB
site_url: https://neodb.net
repo_url: http://github.com/neodb-social/neodb
edit_uri: blob/main/docs/
site_description: Mark the things you love.
nav:
- Overview: 'index.md'
- features.md
- servers.md
- apps.md
- install.md
- upgrade.md
- configuration.md
- troubleshooting.md
- development.md
- api.md
- origin.md
theme:
logo: assets/logo.svg
name: material
font:
text: Lato
palette:
primary: custom
accent: custom
features:
# - navigation.tabs
# - navigation.footer
custom_dir: docs/templates
extra:
social:
- icon: fontawesome/brands/mastodon
link: https://mastodon.online/@neodb/
- icon: fontawesome/brands/bluesky
link: https://bsky.app/profile/neodb.net
- icon: fontawesome/brands/x-twitter
link: https://twitter.com/NeoDBsocial
- icon: fontawesome/brands/threads
link: https://www.threads.net/@neodb.social
- icon: fontawesome/brands/discord
link: https://discord.gg/QBHkrV8bxK
- icon: fontawesome/brands/github
link: http://github.com/neodb-social/neodb
- icon: fontawesome/brands/docker
link: https://hub.docker.com/u/neodb
extra_css:
- stylesheets/extra.css
markdown_extensions:
- pymdownx.superfences:
custom_fences:
- name: mermaid
class: mermaid
format: !!python/name:pymdownx.superfences.fence_code_format
hooks:
- mkdocs_hook.py

View file

@ -1,26 +0,0 @@
import json
def on_page_markdown(markdown, page, config, **kwargs):
if page.url == "servers/":
with open(config.docs_dir + "/servers.json") as f:
servers = json.load(f)
m = ""
for s in servers["servers"]:
host = s["host"]
name = s.get("name", host)
admin = s.get("admin", [])
label = s.get("label", [])
language = s.get("language", [])
description = s.get("description", "")
m += f" - **[{name}](https://{host})**"
if label:
m += f" {' '.join([f'`{a}`' for a in label])}"
if language:
m += f" {' '.join([f'`{a}`' for a in language])}"
if description:
m += f" \n {description}"
if admin:
m += f" \n admin: {', '.join([f'`{a}`' for a in admin])}"
m += "\n"
return markdown.replace("{servers}", m)

View file

@ -9,7 +9,7 @@
# generate-hashes: false
# universal: false
aiohappyeyeballs==2.4.6
aiohappyeyeballs==2.5.0
# via aiohttp
aiohttp==3.11.13
# via discord-py
@ -62,10 +62,10 @@ cssbeautifier==1.15.4
# via djlint
dateparser==1.2.1
deepmerge==2.0
discord-py==2.5.0
discord-py==2.5.2
distlib==0.3.9
# via virtualenv
django==4.2.19
django==4.2.20
# via django-anymail
# via django-appconf
# via django-auditlog
@ -130,7 +130,7 @@ httpcore==1.0.7
# via httpx
httpx==0.28.1
# via atproto
identify==2.6.8
identify==2.6.9
# via pre-commit
idna==3.10
# via anyio
@ -138,7 +138,7 @@ idna==3.10
# via requests
# via yarl
igdb-api-v4==0.3.3
jinja2==3.1.5
jinja2==3.1.6
# via mkdocs
# via mkdocs-material
jsbeautifier==1.15.4
@ -159,7 +159,7 @@ markdown==3.7
# via mkdocs
# via mkdocs-material
# via pymdown-extensions
markdownify==1.0.0
markdownify==1.1.0
markupsafe==3.0.2
# via jinja2
# via mkdocs
@ -201,7 +201,7 @@ pre-commit==4.1.0
propcache==0.3.0
# via aiohttp
# via yarl
protobuf==5.29.3
protobuf==6.30.0
# via igdb-api-v4
psycopg2-binary==2.9.10
pycparser==2.22
@ -252,7 +252,7 @@ rjsmin==1.2.2
# via django-compressor
rq==2.1.0
# via django-rq
ruff==0.9.9
ruff==0.9.10
sentry-sdk==2.22.0
setproctitle==1.3.5
six==1.17.0
@ -285,7 +285,7 @@ typing-extensions==4.12.2
# via pydantic
# via pydantic-core
# via pyright
tzlocal==5.3
tzlocal==5.3.1
# via dateparser
urllib3==2.3.0
# via django-anymail
@ -293,7 +293,7 @@ urllib3==2.3.0
# via sentry-sdk
urlman==2.0.2
validators==0.34.0
virtualenv==20.29.2
virtualenv==20.29.3
# via pre-commit
watchdog==6.0.0
# via mkdocs

View file

@ -9,7 +9,7 @@
# generate-hashes: false
# universal: false
aiohappyeyeballs==2.4.6
aiohappyeyeballs==2.5.0
# via aiohttp
aiohttp==3.11.13
# via discord-py
@ -48,8 +48,8 @@ cryptography==44.0.2
# via atproto
dateparser==1.2.1
deepmerge==2.0
discord-py==2.5.0
django==4.2.19
discord-py==2.5.2
django==4.2.20
# via django-anymail
# via django-appconf
# via django-auditlog
@ -116,7 +116,7 @@ loguru==0.7.3
lxml==5.3.1
markdown==3.7
# via django-markdownx
markdownify==1.0.0
markdownify==1.1.0
mistune==3.1.2
multidict==6.1.0
# via aiohttp
@ -132,7 +132,7 @@ podcastparser==0.6.10
propcache==0.3.0
# via aiohttp
# via yarl
protobuf==5.29.3
protobuf==6.30.0
# via igdb-api-v4
psycopg2-binary==2.9.10
pycparser==2.22
@ -190,7 +190,7 @@ typing-extensions==4.12.2
# via beautifulsoup4
# via pydantic
# via pydantic-core
tzlocal==5.3
tzlocal==5.3.1
# via dateparser
urllib3==2.3.0
# via django-anymail

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View file

@ -14,6 +14,7 @@ class Migration(migrations.Migration):
name="type",
field=models.CharField(
choices=[
("journal.baseimporter", "base importer"),
("journal.csvexporter", "csv exporter"),
("journal.csvimporter", "csv importer"),
("journal.doubanimporter", "douban importer"),
@ -21,6 +22,8 @@ class Migration(migrations.Migration):
("journal.goodreadsimporter", "goodreads importer"),
("journal.letterboxdimporter", "letterboxd importer"),
("journal.ndjsonexporter", "ndjson exporter"),
("journal.ndjsonimporter", "ndjson importer"),
("journal.opmlimporter", "opml importer"),
],
db_index=True,
max_length=255,

View file

@ -82,7 +82,6 @@ class Task(TypedModel):
task.refresh_from_db()
task.state = cls.States.complete if ok else cls.States.failed
task.save()
task.notify()
def enqueue(self):
return django_rq.get_queue(self.TaskQueue).enqueue(

View file

@ -10,137 +10,18 @@
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{{ site_name }} - {% trans 'Data Management' %}</title>
{% include "common_libs.html" %}
<script>
document.addEventListener('htmx:responseError', (event) => {
let response = event.detail.xhr.response;
let body = response ? response : `Request error: ${event.detail.xhr.statusText}`;
alert(body);
});
</script>
</head>
<body>
{% include "_header.html" %}
<main>
<div class="grid__main">
<article>
<details>
<summary>{% trans 'Export Data' %}</summary>
<form action="{% url 'users:export_csv' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit"
value="{% trans 'Export marks, reviews and notes in CSV' %}" />
{% if csv_export_task %}
<br>
{% trans 'Last export' %}: {{ csv_export_task.created_time }}
{% trans 'Status' %}: {{ csv_export_task.get_state_display }}
<br>
{{ csv_export_task.message }}
{% if csv_export_task.metadata.file %}
<a href="{% url 'users:export_csv' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
<hr>
<form action="{% url 'users:export_ndjson' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
{% if ndjson_export_task %}
<br>
{% trans 'Last export' %}: {{ ndjson_export_task.created_time }}
{% trans 'Status' %}: {{ ndjson_export_task.get_state_display }}
<br>
{{ ndjson_export_task.message }}
{% if ndjson_export_task.metadata.file %}
<a href="{% url 'users:export_ndjson' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
<hr>
<form action="{% url 'users:export_marks' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit"
class="secondary"
value="{% trans 'Export marks and reviews in XLSX (Doufen format)' %}" />
<small>exporting to this format will be deprecated soon.</small>
{% if export_task %}
<br>
{% trans 'Last export' %}: {{ export_task.created_time }}
{% trans 'Status' %}: {{ export_task.get_state_display }}
<br>
{{ export_task.message }}
{% if export_task.metadata.file %}
<a href="{% url 'users:export_marks' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Data' %}</summary>
<form action="{% url 'users:import_neodb' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<ul>
<li>
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
</li>
<li>{% trans 'Existing marks and reviews with newer dates will be preserved.' %}</li>
</ul>
<br>
<input type="file" name="file" required accept=".zip">
<p>
{% trans 'Visibility' %}: <small><code>.csv</code> only</small>
<br>
<label for="csv_visibility_0">
<input type="radio"
name="visibility"
value="0"
required=""
id="csv_visibility_0"
checked>
{% trans 'Public' %}
</label>
<label for="csv_visibility_1">
<input type="radio"
name="visibility"
value="1"
required=""
id="csv_visibility_1">
{% trans 'Followers Only' %}
</label>
<label for="csv_visibility_2">
<input type="radio"
name="visibility"
value="2"
required=""
id="csv_visibility_2">
{% trans 'Mentioned Only' %}
</label>
</p>
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% if csv_import_task %}
{% trans 'Last import started' %}: {{ csv_import_task.created_time }}
{% if csv_import_task.state == 0 or csv_import_task.state == 1 %}
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
hx-target="this"
hx-trigger="load delay:2s, every 10s"
hx-swap="outerHTML"></div>
{% else %}
{% trans 'Status' %}: {{ csv_import_task.get_state_display }}。
{{ csv_import_task.message }}
{% endif %}
{% if csv_import_task.metadata.failed_items %}
{% trans 'Failed items' %}:
<br>
<textarea readonly>{% for item in csv_import_task.metadata.failed_items %}{{item}}&#10;{% endfor %}</textarea>
{% endif %}
{% endif %}
</small>
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Marks and Reviews from Douban' %}</summary>
@ -193,59 +74,50 @@
<input type="submit"
{% if import_task.status == "pending" %} onclick="return confirm('{% trans "Another import is in progress, starting a new import may cause issues, sure to import?" %}')" value="{% trans "Import in progress, please wait" %}" {% else %} value="{% trans 'Import' %}" {% endif %} />
</form>
<div hx-get="{% url 'users:import_status' %}"
hx-trigger="load delay:1s"
hx-swap="outerHTML"></div>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Shelf or List from Goodreads' %}</summary>
<form action="{% url 'users:import_goodreads' %}" method="post">
<form hx-post="{% url 'users:import_goodreads' %}">
{% csrf_token %}
<div>
{% trans 'Link to Goodreads Profile / Shelf / List' %}
<ul>
<li>
Profile <code>https://www.goodreads.com/user/show/12345-janedoe</code>
<br>
{% trans 'want-to-read / currently-reading / read books and their reviews will be imported.' %}
</li>
<li>
Shelf <code>https://www.goodreads.com/review/list/12345-janedoe?shelf=name</code>
<br>
{% trans 'Shelf will be imported as a new collection.' %}
</li>
<li>
List <code>https://www.goodreads.com/list/show/155086.Popular_Highlights</code>
<br>
{% trans 'List will be imported as a new collection.' %}
</li>
<li>
<mark>Who Can View My Profile</mark> must be set as <mark>anyone</mark> prior to import.
</li>
</ul>
<input type="url"
name="url"
value=""
placeholder="https://www.goodreads.com/user/show/12345-janedoe"
required>
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% if goodreads_task %}
<br>
{% trans 'Last import started' %}: {{ goodreads_task.created_time }}
{% trans 'Status' %}: {{ goodreads_task.get_state_display }}。
<br>
{{ goodreads_task.message }}
{% endif %}
</small>
</div>
<ul>
<li>
Profile <code>https://www.goodreads.com/user/show/12345-janedoe</code>
{% trans 'want-to-read / currently-reading / read books and their reviews will be imported.' %}
</li>
<li>
Shelf <code>https://www.goodreads.com/review/list/12345-janedoe?shelf=name</code>
{% trans 'Shelf will be imported as a new collection.' %}
</li>
<li>
List <code>https://www.goodreads.com/list/show/155086.Popular_Highlights</code>
{% trans 'List will be imported as a new collection.' %}
</li>
<li>
<mark>Who Can View My Profile</mark> must be set as <mark>anyone</mark> prior to import.
</li>
</ul>
{% include "users/user_task_status.html" with task=goodreads_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import from Letterboxd' %}</summary>
<form action="{% url 'users:import_letterboxd' %}"
method="post"
<form hx-post="{% url 'users:import_letterboxd' %}"
enctype="multipart/form-data">
{% csrf_token %}
<ul>
@ -292,30 +164,15 @@
</label>
</p>
<input type="submit" value="{% trans 'Import' %}" />
<small>
{% trans 'Only forward changes(none->to-watch->watched) will be imported.' %}
{% if letterboxd_task %}
<br>
{% trans 'Last import started' %}: {{ letterboxd_task.created_time }}
{% trans 'Status' %}: {{ letterboxd_task.get_state_display }}。
<br>
{{ letterboxd_task.message }}
{% if letterboxd_task.metadata.failed_urls %}
{% trans 'Failed links, likely due to Letterboxd error, you may have to mark them manually' %}:
<br>
<textarea readonly>{% for url in letterboxd_task.metadata.failed_urls %}{{url}}&#10;{% endfor %}</textarea>
{% endif %}
{% endif %}
</small>
<small>{% trans 'Only forward changes(none->to-watch->watched) will be imported.' %}</small>
{% include "users/user_task_status.html" with task=letterboxd_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import Podcast Subscriptions' %}</summary>
<form action="{% url 'users:import_opml' %}"
method="post"
enctype="multipart/form-data">
<form hx-post="{% url 'users:import_opml' %}" enctype="multipart/form-data">
{% csrf_token %}
<div>
{% trans 'Import Method' %}
@ -331,11 +188,230 @@
<input id="opml_import_mode_1" type="radio" name="import_mode" value="1">
{% trans 'Import as a new collection' %}
</label>
{% trans 'Visibility' %}:
<label for="opml_visibility_0">
<input type="radio"
name="visibility"
value="0"
required=""
id="opml_visibility_0"
checked>
{% trans 'Public' %}
</label>
<label for="opml_visibility_1">
<input type="radio"
name="visibility"
value="1"
required=""
id="opml_visibility_1">
{% trans 'Followers Only' %}
</label>
<label for="opml_visibility_2">
<input type="radio"
name="visibility"
value="2"
required=""
id="opml_visibility_2">
{% trans 'Mentioned Only' %}
</label>
<br>
{% trans 'Select OPML file' %}
<input type="file" name="file" id="excel" required accept=".opml,.xml">
<input type="file" name="file" required accept=".opml,.xml">
<input type="submit" value="{% trans 'Import' %}" />
</div>
{% include "users/user_task_status.html" with task=opml_import_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Import NeoDB Archive' %}</summary>
<form hx-post="{% url 'users:import_neodb' %}"
enctype="multipart/form-data">
{% csrf_token %}
<ul>
<li>
{% trans 'Upload a <code>.zip</code> file containing <code>.csv</code> or <code>.ndjson</code> files exported from NeoDB.' %}
</li>
<li>{% trans 'Existing data may be overwritten.' %}</li>
</ul>
<input type="file" name="file" id="neodb_import_file" required accept=".zip">
<div id="detected_format_info"
style="display: none;
margin: 10px 0;
padding: 8px 12px;
border-radius: 4px;
background-color: var(--card-background-color, #f8f9fa);
border: 1px solid var(--card-border-color, #dee2e6)">
<i class="fa fa-info-circle"></i> {% trans 'Detected format' %}: <strong id="detected_format">-</strong>
</div>
<div id="visibility_settings" style="display: none;">
<p>
{% trans 'Visibility' %}:
<br>
<label for="csv_visibility_0">
<input type="radio"
name="visibility"
value="0"
required=""
id="csv_visibility_0"
checked>
{% trans 'Public' %}
</label>
<label for="csv_visibility_1">
<input type="radio"
name="visibility"
value="1"
required=""
id="csv_visibility_1">
{% trans 'Followers Only' %}
</label>
<label for="csv_visibility_2">
<input type="radio"
name="visibility"
value="2"
required=""
id="csv_visibility_2">
{% trans 'Mentioned Only' %}
</label>
</p>
</div>
<input type="hidden" name="format_type" id="format_type" value="" required>
<input type="submit" value="{% trans 'Import' %}" id="import_submit" />
<script src="{{ cdn_url }}/npm/jszip@3.10.1/dist/jszip.min.js"></script>
<script>
document.addEventListener('DOMContentLoaded', function() {
const fileInput = document.getElementById('neodb_import_file');
if (!fileInput) return;
fileInput.addEventListener('change', async function(event) {
const file = event.target.files[0];
if (!file) {
document.getElementById('detected_format_info').style.display = 'none';
document.getElementById('visibility_settings').style.display = 'none';
document.getElementById('format_type').value = '';
return;
}
// Check if it's a zip file
if (file.type !== 'application/zip' &&
file.type !== 'application/x-zip-compressed' &&
!file.name.toLowerCase().endsWith('.zip')) {
document.getElementById('detected_format_info').style.display = 'none';
document.getElementById('visibility_settings').style.display = 'none';
document.getElementById('format_type').value = '';
return;
}
// Update UI to show "Detecting format..." with a spinner
document.getElementById('detected_format').innerHTML = '{% trans "Detecting format..." %} <i class="fa fa-spinner fa-spin"></i>';
document.getElementById('detected_format_info').style.display = 'block';
try {
// Use JSZip to examine the actual contents of the ZIP file
const zip = new JSZip();
const zipContents = await zip.loadAsync(file);
const fileNames = Object.keys(zipContents.files);
// Check for specific files that indicate format type
const hasNdjson = fileNames.some(name => name === 'journal.ndjson' || name === 'catalog.ndjson');
const hasCsv = fileNames.some(name => name.endsWith('_mark.csv') ||
name.endsWith('_review.csv') ||
name.endsWith('_note.csv'));
let format = '';
let formatValue = '';
if (hasNdjson) {
format = 'NDJSON';
formatValue = 'ndjson';
} else if (hasCsv) {
format = 'CSV';
formatValue = 'csv';
} else {
// Unable to detect format from contents
format = '{% trans "Unknown format" %}';
formatValue = '';
}
// Update UI with detected format and appropriate icon
let formatIcon = '';
if (formatValue === 'ndjson') {
formatIcon = '<i class="fa fa-file-code"></i> ';
} else if (formatValue === 'csv') {
formatIcon = '<i class="fa fa-file-csv"></i> ';
} else {
formatIcon = '<i class="fa fa-question-circle"></i> ';
}
document.getElementById('detected_format').innerHTML = formatIcon + format;
document.getElementById('format_type').value = formatValue;
if (formatValue === 'csv') {
document.getElementById('visibility_settings').style.display = 'block';
} else {
document.getElementById('visibility_settings').style.display = 'none';
}
} catch (error) {
console.error('Error examining ZIP file:', error);
document.getElementById('detected_format').innerHTML = '<i class="fa fa-exclamation-triangle"></i> {% trans "Error detecting format" %}';
document.getElementById('format_type').value = '';
// Make the error more visible
document.getElementById('detected_format_info').style.backgroundColor = 'var(--form-element-invalid-active-border-color, #d9534f)';
document.getElementById('detected_format_info').style.color = 'white';
// Hide visibility settings on error
document.getElementById('visibility_settings').style.display = 'none';
}
if (document.getElementById('format_type').value == '') {
document.getElementById('import_submit').setAttribute('disabled', '')
} else {
document.getElementById('import_submit').removeAttribute('disabled', '')
}
});
});
</script>
{% include "users/user_task_status.html" with task=neodb_import_task %}
</form>
</details>
</article>
<article>
<details>
<summary>{% trans 'Export NeoDB Archive' %}</summary>
<form hx-post="{% url 'users:export_csv' %}" enctype="multipart/form-data">
{% csrf_token %}
<input type="submit"
value="{% trans 'Export marks, reviews and notes in CSV' %}" />
{% include "users/user_task_status.html" with task=csv_export_task %}
</form>
<hr>
<form hx-post="{% url 'users:export_ndjson' %}"
enctype="multipart/form-data">
{% csrf_token %}
<input type="submit" value="{% trans 'Export everything in NDJSON' %}" />
{% include "users/user_task_status.html" with task=ndjson_export_task %}
</form>
<hr>
<form action="{% url 'users:export_marks' %}"
method="post"
enctype="multipart/form-data">
{% csrf_token %}
<b>exporting to this format will be deprecated soon, please use csv or ndjson format.</b>
<input type="submit"
class="secondary"
value="{% trans 'Export marks and reviews in XLSX (Doufen format)' %}" />
{% if export_task %}
<br>
{% trans 'Last export' %}: {{ export_task.created_time }}
{% trans 'Status' %}: {{ export_task.get_state_display }}
<br>
{{ export_task.message }}
{% if export_task.metadata.file %}
<a href="{% url 'users:export_marks' %}" download>{% trans 'Download' %}</a>
{% endif %}
{% endif %}
</form>
</details>
</article>
@ -351,25 +427,6 @@
</div>
</details>
</article>
{% comment %}
<article>
<details>
<summary>{% trans 'Reset visibility for all marks' %}</summary>
<form action="{% url 'users:reset_visibility' %}" method="post">
{% csrf_token %}
<input type="submit" value="{% trans 'Reset' %}" />
<div>
<input type="radio" name="visibility" id="visPublic" value="0" checked>
<label for="visPublic">{% trans 'Public' %}</label>
<input type="radio" name="visibility" id="visFollower" value="1">
<label for="visFollower">{% trans 'Followers Only' %}</label>
<input type="radio" name="visibility" id="visSelf" value="2">
<label for="visSelf">{% trans 'Mentioned Only' %}</label>
</div>
</form>
</details>
</article>
{% endcomment %}
</div>
{% include "_sidebar.html" with show_profile=1 identity=request.user.identity %}
</main>

View file

@ -1,19 +1,33 @@
{% load i18n %}
<div hx-get="{% url 'users:user_task_status' 'csv_import' %}"
{% if task.state == 0 or task.state == 1 %}hx-target="this" hx-trigger="every 30s"{% endif %}
hx-swap="outerHTML">
{% trans 'Status' %}: {{ task.get_state_display }}。
{{ task.message }}
<br>
{% if task.metadata.total and task.metadata.processed %}
<div class="progress-container">
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
<div class="progress-text">
{{ task.metadata.processed }} / {{ task.metadata.total }}
({{ task.metadata.imported }} imported,
{{ task.metadata.skipped }} skipped,
{{ task.metadata.failed }} failed)
</div>
{% if task %}
<div hx-target="this"
{% if task.state == 0 or task.state == 1 %} hx-get="{% url 'users:user_task_status' task.type %}" hx-trigger="every 30s"{% endif %}
hx-swap="outerHTML">
<div>
{% if task.state == 0 %}
<i class="fa-solid fa-spinner fa-spin"></i>
{% elif task.state == 1 %}
<i class="fa-solid fa-gear fa-spin"></i>
{% elif task.state == 3 %}
<i class="fa-solid fa-triangle-exclamation"></i>
{% elif 'exporter' in task.type %}
<a href="{% url 'users:user_task_download' task.type %}" download><i class="fa fa-download"></i></a>
{% else %}
<i class="fa-solid fa-check"></i>
{% endif %}
{{ task.created_time }}
{{ task.message }}
</div>
{% endif %}
</div>
{% if task.state == 0 or task.state == 1 %}
{% if task.metadata.total and task.metadata.processed %}
<div>
<progress value="{{ task.metadata.processed }}" max="{{ task.metadata.total }}"></progress>
</div>
{% endif %}
{% endif %}
{% if task.metadata.failed_items %}
{% trans 'Failed items' %}:
<textarea readonly>{% for item in task.metadata.failed_items %}{{item}}&#10;{% endfor %}</textarea>
{% endif %}
</div>
{% endif %}

View file

@ -10,7 +10,10 @@ urlpatterns = [
path("data", data, name="data"),
path("info", account_info, name="info"),
path("profile", account_profile, name="profile"),
path("task/<str:task_name>/status", user_task_status, name="user_task_status"),
path("task/<str:task_type>/status", user_task_status, name="user_task_status"),
path(
"task/<str:task_type>/download", user_task_download, name="user_task_download"
),
path("data/import/status", data_import_status, name="import_status"),
path("data/import/goodreads", import_goodreads, name="import_goodreads"),
path("data/import/douban", import_douban, name="import_douban"),

View file

@ -4,6 +4,7 @@ import os
from django.conf import settings
from django.contrib import messages
from django.contrib.auth.decorators import login_required
from django.core.exceptions import BadRequest
from django.db.models import Min
from django.http import HttpResponse
from django.shortcuts import redirect, render
@ -18,8 +19,8 @@ from journal.importers import (
DoubanImporter,
GoodreadsImporter,
LetterboxdImporter,
NdjsonImporter,
OPMLImporter,
get_neodb_importer,
)
from journal.models import ShelfType
from takahe.utils import Takahe
@ -92,6 +93,19 @@ def data(request):
start_date = queryset.aggregate(Min("created_time"))["created_time__min"]
start_year = start_date.year if start_date else current_year
years = reversed(range(start_year, current_year + 1))
# Import tasks - check for both CSV and NDJSON importers
csv_import_task = CsvImporter.latest_task(request.user)
ndjson_import_task = NdjsonImporter.latest_task(request.user)
# Use the most recent import task for display
if ndjson_import_task and (
not csv_import_task
or ndjson_import_task.created_time > csv_import_task.created_time
):
neodb_import_task = ndjson_import_task
else:
neodb_import_task = csv_import_task
return render(
request,
"users/data.html",
@ -100,10 +114,11 @@ def data(request):
"import_task": DoubanImporter.latest_task(request.user),
"export_task": DoufenExporter.latest_task(request.user),
"csv_export_task": CsvExporter.latest_task(request.user),
"csv_import_task": CsvImporter.latest_task(request.user),
"neodb_import_task": neodb_import_task, # Use the most recent import task
"ndjson_export_task": NdjsonExporter.latest_task(request.user),
"letterboxd_task": LetterboxdImporter.latest_task(request.user),
"goodreads_task": GoodreadsImporter.latest_task(request.user),
# "opml_task": OPMLImporter.latest_task(request.user),
"years": years,
},
)
@ -121,19 +136,23 @@ def data_import_status(request):
@login_required
def user_task_status(request, task_name: str):
match task_name:
case "csv_import":
def user_task_status(request, task_type: str):
match task_type:
case "journal.csvimporter":
task_cls = CsvImporter
case "csv_export":
case "journal.ndjsonimporter":
task_cls = NdjsonImporter
case "journal.csvexporter":
task_cls = CsvExporter
case "ndjson_export":
case "journal.ndjsonexporter":
task_cls = NdjsonExporter
case "letterboxd":
case "journal.letterboxdimporter":
task_cls = LetterboxdImporter
case "goodreads":
case "journal.goodreadsimporter":
task_cls = GoodreadsImporter
case "douban":
case "journal.opmlimporter":
task_cls = OPMLImporter
case "journal.doubanimporter":
task_cls = DoubanImporter
case _:
return redirect(reverse("users:data"))
@ -141,6 +160,28 @@ def user_task_status(request, task_name: str):
return render(request, "users/user_task_status.html", {"task": task})
@login_required
def user_task_download(request, task_type: str):
match task_type:
case "journal.csvexporter":
task_cls = CsvExporter
case "journal.ndjsonexporter":
task_cls = NdjsonExporter
case _:
return redirect(reverse("users:data"))
task = task_cls.latest_task(request.user)
if not task or task.state != Task.States.complete or not task.metadata.get("file"):
messages.add_message(request, messages.ERROR, _("Export file not available."))
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
@login_required
def export_reviews(request):
if request.method != "POST":
@ -150,6 +191,7 @@ def export_reviews(request):
@login_required
def export_marks(request):
# TODO: deprecated
if request.method == "POST":
DoufenExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
@ -189,22 +231,10 @@ def export_csv(request):
)
return redirect(reverse("users:data"))
CsvExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
return redirect(reverse("users:data"))
else:
task = CsvExporter.latest_task(request.user)
if not task or task.state != Task.States.complete:
messages.add_message(
request, messages.ERROR, _("Export file not available.")
)
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
return redirect(
reverse("users:user_task_status", args=("journal.csvexporter",))
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
return redirect(reverse("users:data"))
@login_required
@ -221,22 +251,10 @@ def export_ndjson(request):
)
return redirect(reverse("users:data"))
NdjsonExporter.create(request.user).enqueue()
messages.add_message(request, messages.INFO, _("Generating exports."))
return redirect(reverse("users:data"))
else:
task = NdjsonExporter.latest_task(request.user)
if not task or task.state != Task.States.complete:
messages.add_message(
request, messages.ERROR, _("Export file not available.")
)
return redirect(reverse("users:data"))
response = HttpResponse()
response["X-Accel-Redirect"] = (
settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :]
return redirect(
reverse("users:user_task_status", args=("journal.ndjsonexporter",))
)
response["Content-Type"] = "application/zip"
response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"'
return response
return redirect(reverse("users:data"))
@login_required
@ -263,24 +281,26 @@ def sync_mastodon_preference(request):
@login_required
def import_goodreads(request):
if request.method == "POST":
raw_url = request.POST.get("url")
if GoodreadsImporter.validate_url(raw_url):
GoodreadsImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
url=raw_url,
).enqueue()
messages.add_message(request, messages.INFO, _("Import in progress."))
else:
messages.add_message(request, messages.ERROR, _("Invalid URL."))
return redirect(reverse("users:data"))
if request.method != "POST":
return redirect(reverse("users:data"))
raw_url = request.POST.get("url")
if not GoodreadsImporter.validate_url(raw_url):
raise BadRequest(_("Invalid URL."))
task = GoodreadsImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
url=raw_url,
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_douban(request):
if request.method != "POST":
return redirect(reverse("users:data"))
if not DoubanImporter.validate_file(request.FILES["file"]):
raise BadRequest(_("Invalid file."))
f = (
settings.MEDIA_ROOT
+ "/"
@ -290,64 +310,75 @@ def import_douban(request):
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
if not DoubanImporter.validate_file(request.FILES["file"]):
messages.add_message(request, messages.ERROR, _("Invalid file."))
return redirect(reverse("users:data"))
DoubanImporter.create(
task = DoubanImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
mode=int(request.POST.get("import_mode", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
return redirect(reverse("users:data"))
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_letterboxd(request):
if request.method == "POST":
f = (
settings.MEDIA_ROOT
+ "/"
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
)
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
LetterboxdImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
return redirect(reverse("users:data"))
if request.method != "POST":
return redirect(reverse("users:data"))
if not LetterboxdImporter.validate_file(request.FILES["file"]):
raise BadRequest(_("Invalid file."))
f = (
settings.MEDIA_ROOT
+ "/"
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
)
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
task = LetterboxdImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_opml(request):
if request.method == "POST":
importer = OPMLImporter(
request.user,
int(request.POST.get("visibility", 0)),
int(request.POST.get("import_mode", 0)),
)
if importer.import_from_file(request.FILES["file"]):
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
else:
messages.add_message(request, messages.ERROR, _("Invalid file."))
return redirect(reverse("users:data"))
if request.method != "POST":
return redirect(reverse("users:data"))
if not OPMLImporter.validate_file(request.FILES["file"]):
raise BadRequest(_("Invalid file."))
f = (
settings.MEDIA_ROOT
+ "/"
+ GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT)
)
os.makedirs(os.path.dirname(f), exist_ok=True)
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
task = OPMLImporter.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
mode=int(request.POST.get("import_mode", 0)),
file=f,
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
@login_required
def import_neodb(request):
if request.method == "POST":
format_type_hint = request.POST.get("format_type", "").lower()
if format_type_hint == "csv":
importer = CsvImporter
elif format_type_hint == "ndjson":
importer = NdjsonImporter
else:
raise BadRequest("Invalid file.")
f = (
settings.MEDIA_ROOT
+ "/"
@ -357,16 +388,11 @@ def import_neodb(request):
with open(f, "wb+") as destination:
for chunk in request.FILES["file"].chunks():
destination.write(chunk)
importer = get_neodb_importer(f)
if not importer:
messages.add_message(request, messages.ERROR, _("Invalid file."))
return redirect(reverse("users:data"))
importer.create(
task = importer.create(
request.user,
visibility=int(request.POST.get("visibility", 0)),
file=f,
).enqueue()
messages.add_message(
request, messages.INFO, _("File is uploaded and will be imported soon.")
)
task.enqueue()
return redirect(reverse("users:user_task_status", args=(task.type,)))
return redirect(reverse("users:data"))