diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 01e5ffd6..2b2443b2 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -3,22 +3,6 @@ name: code check on: [push, pull_request] jobs: - doc: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: ['3.12'] - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - cache: pip - - name: Run pre-commit - run: | - python -m pip install mkdocs-material - mkdocs build -s -d /tmp/neodb-docs lint: runs-on: ubuntu-latest strategy: diff --git a/FEDERATION.md b/FEDERATION.md index 8f93139f..22289e5b 100644 --- a/FEDERATION.md +++ b/FEDERATION.md @@ -1,3 +1,3 @@ # Federation -see [doc](docs/internals/federation.md) for FEP-67ff related information. +see [doc](https://neodb.net/internals/federation.md) for FEP-67ff related information. diff --git a/README.md b/README.md index cfc1269c..5ad6b863 100644 --- a/README.md +++ b/README.md @@ -4,19 +4,21 @@ [![Docker Pulls](https://img.shields.io/docker/pulls/neodb/neodb?label=docker&color=3791E0&style=for-the-badge)](https://hub.docker.com/r/neodb/neodb) [![GitHub License](https://img.shields.io/github/license/neodb-social/neodb?color=E69A48&style=for-the-badge)](https://github.com/neodb-social/neodb/blob/main/LICENSE) + # 🧩 NeoDB _mark the things you love._ [NeoDB](https://neodb.net) (fka boofilsic) is an open source project and free service to help users manage, share and discover collections, reviews and ratings for culture products (e.g. books, movies, music, podcasts, games and performances) in Fediverse. -[NeoDB.social](https://neodb.social) is a free instance hosted by NeoDB developers, there are more [servers](https://neodb.net/servers/) and [apps](https://neodb.net/apps/) available. Your support is essential to keep them free and open-sourced. +[NeoDB.social](https://neodb.social) is a free instance hosted by NeoDB developers, there are more [servers](https://neodb.net/servers/) and [apps](https://neodb.net/apps/) available. -Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky.app/profile/neodb.net) or join our [Discord community](https://discord.gg/QBHkrV8bxK) to share your ideas/questions/creations. +Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky.app/profile/neodb.net) or join our [Discord community](https://discord.gg/QBHkrV8bxK) to share your ideas/questions/creations. Your support is essential to keep the services free and open-sourced. [![Mastodon](https://img.shields.io/mastodon/follow/106919732872456302?style=for-the-badge&logo=mastodon&logoColor=fff&label=%40neodb%40mastodon.social&color=6D75D2)](https://mastodon.social/@neodb) [![Discord](https://img.shields.io/discord/1041738638364528710?label=Discord&logo=discord&logoColor=fff&color=6D75D2&style=for-the-badge)](https://discord.gg/QBHkrV8bxK) [![Kofi](https://img.shields.io/badge/Ko--Fi-Donate-orange?label=Support%20NeoDB%20on%20Ko-fi&style=for-the-badge&color=ff5f5f&logo=ko-fi)](https://ko-fi.com/neodb) + ## Features - Manage a shared catalog of books/movies/tv shows/music album/games/podcasts/performances + search or create catalog items in each category @@ -66,13 +68,17 @@ Follow us on [Fediverse](https://mastodon.online/@neodb), [Bluesky](https://bsky - Other + i18n: English, Danish and Simp/Trad Chinese available; contribution for more languages welcomed + ## Host your own instance -Please see [docs/install.md](docs/install.md) +Please see [installation guide](https://neodb.net/install/). + ## Contribution - To build application with NeoDB API, documentation is available in [NeoDB API Developer Console](https://neodb.social/developer/) - - To help develop NeoDB, please see [docs/development.md](docs/development.md) for some basics to start with + - To help develop NeoDB, please see [development guide](https://neodb.net/development/) for some basics to start with - To translate NeoDB to more languages, please join [our project on Weblate](https://hosted.weblate.org/projects/neodb/neodb/) + - Source code for [NeoDB documentation](https://neodb.net) can be found [here](https://github.com/neodb-social/neodb-doc) + ## Sponsor If you like this project, please consider donating to [NeoDB.social on ko-fi](https://ko-fi.com/neodb), or [Takahē](https://www.patreon.com/takahe) and [NiceDB](https://patreon.com/tertius) without whom this project won't be possible. diff --git a/boofilsic/settings.py b/boofilsic/settings.py index 04b1df5f..3f74a9cb 100644 --- a/boofilsic/settings.py +++ b/boofilsic/settings.py @@ -449,6 +449,7 @@ LANGUAGE_CODE, PREFERRED_LANGUAGES = _init_language_settings( if TESTING: # force en if testing LANGUAGE_CODE = "en" + PREFERRED_LANGUAGES = ["en"] LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")] @@ -580,7 +581,7 @@ SEARCH_INDEX_NEW_ONLY = False INDEX_ALIASES = env("INDEX_ALIASES") -DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="/tmp") # type: ignore +DOWNLOADER_SAVEDIR = env("NEODB_DOWNLOADER_SAVE_DIR", default="") # type: ignore DISABLE_MODEL_SIGNAL = False # disable index and social feeds during importing/etc diff --git a/catalog/common/downloaders.py b/catalog/common/downloaders.py index c4da7244..4a1c1b15 100644 --- a/catalog/common/downloaders.py +++ b/catalog/common/downloaders.py @@ -205,6 +205,7 @@ class BasicDownloader: ) return resp, response_type except RequestException as e: + # logger.debug(f"RequestException: {e}") self.logs.append( {"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e} ) @@ -340,16 +341,19 @@ class ImageDownloaderMixin: def validate_response(self, response): if response and response.status_code == 200: try: - raw_img = response.content - img = Image.open(BytesIO(raw_img)) - img.load() # corrupted image will trigger exception - content_type = response.headers.get("Content-Type") + content_type = response.headers["content-type"] + if content_type.startswith("image/svg+xml"): + self.extention = "svg" + return RESPONSE_OK file_type = filetype.get_type( mime=content_type.partition(";")[0].strip() ) if file_type is None: return RESPONSE_NETWORK_ERROR self.extention = file_type.extension + raw_img = response.content + img = Image.open(BytesIO(raw_img)) + img.load() # corrupted image will trigger exception return RESPONSE_OK except Exception: return RESPONSE_NETWORK_ERROR diff --git a/catalog/common/models.py b/catalog/common/models.py index a6201474..07d02f72 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -771,22 +771,22 @@ class Item(PolymorphicModel): return not self.is_deleted and self.merged_to_item is None @cached_property - def rating(self): + def rating_info(self): from journal.models import Rating - return Rating.get_rating_for_item(self) + return Rating.get_info_for_item(self) + + @property + def rating(self): + return self.rating_info.get("average") @cached_property def rating_count(self): - from journal.models import Rating - - return Rating.get_rating_count_for_item(self) + return self.rating_info.get("count") @cached_property def rating_distribution(self): - from journal.models import Rating - - return Rating.get_rating_distribution_for_item(self) + return self.rating_info.get("distribution") @cached_property def tags(self): diff --git a/catalog/music/tests.py b/catalog/music/tests.py index 0bfd8a98..d489fe7a 100644 --- a/catalog/music/tests.py +++ b/catalog/music/tests.py @@ -6,6 +6,8 @@ from catalog.music.utils import * class BasicMusicTest(TestCase): + databases = "__all__" + def test_gtin(self): self.assertIsNone(upc_to_gtin_13("018771208112X")) self.assertIsNone(upc_to_gtin_13("999018771208112")) @@ -15,6 +17,8 @@ class BasicMusicTest(TestCase): class SpotifyTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Spotify_Album t_id_value = "65KwtzkJXw7oT819NFWmEP" @@ -48,6 +52,8 @@ class SpotifyTestCase(TestCase): class DoubanMusicTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.DoubanMusic t_id_value = "33551231" @@ -74,6 +80,8 @@ class DoubanMusicTestCase(TestCase): class MultiMusicSitesTestCase(TestCase): + databases = "__all__" + @use_local_response def test_albums(self): url1 = "https://music.douban.com/subject/33551231/" @@ -92,6 +100,8 @@ class MultiMusicSitesTestCase(TestCase): class BandcampTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Bandcamp t_id_value = "intlanthem.bandcamp.com/album/in-these-times" @@ -119,6 +129,8 @@ class BandcampTestCase(TestCase): class DiscogsReleaseTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Discogs_Release t_id_value = "25829341" @@ -155,6 +167,8 @@ class DiscogsReleaseTestCase(TestCase): class DiscogsMasterTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.Discogs_Master t_id_value = "469004" @@ -182,6 +196,8 @@ class DiscogsMasterTestCase(TestCase): class AppleMusicTestCase(TestCase): + databases = "__all__" + def test_parse(self): t_id_type = IdType.AppleMusic t_id_value = "1284391545" @@ -201,8 +217,10 @@ class AppleMusicTestCase(TestCase): self.assertEqual(site.ready, False) site.get_resource_ready() self.assertEqual(site.ready, True) - self.assertEqual(site.resource.metadata["title"], "Kids Only") + self.assertEqual( + site.resource.metadata["localized_title"][0]["text"], "Kids Only" + ) self.assertEqual(site.resource.metadata["artist"], ["Leah Dou"]) self.assertIsInstance(site.resource.item, Album) - self.assertEqual(site.resource.item.genre, ["Pop"]) - self.assertEqual(site.resource.item.duration, 2371628) + self.assertEqual(site.resource.item.genre, ["Pop", "Music"]) + self.assertEqual(site.resource.item.duration, 2368000) diff --git a/catalog/sites/apple_music.py b/catalog/sites/apple_music.py index 19b89d98..194944d8 100644 --- a/catalog/sites/apple_music.py +++ b/catalog/sites/apple_music.py @@ -9,8 +9,9 @@ Scraping the website directly. """ import json +from datetime import timedelta -import dateparser +from django.utils.dateparse import parse_duration from loguru import logger from catalog.common import * @@ -18,7 +19,6 @@ from catalog.models import * from common.models.lang import ( SITE_DEFAULT_LANGUAGE, SITE_PREFERRED_LANGUAGES, - detect_language, ) from common.models.misc import uniq @@ -39,7 +39,6 @@ class AppleMusic(AbstractSite): headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", - "Accept-Language": BasicDownloader.get_accept_language(), "Accept-Encoding": "gzip, deflate", "Connection": "keep-alive", "DNT": "1", @@ -70,80 +69,63 @@ class AppleMusic(AbstractSite): return locales def scrape(self): - matched_content = None + matched_schema_data = None localized_title = [] localized_desc = [] for lang, locales in self.get_locales().items(): for loc in locales: # waterfall thru all locales url = f"https://music.apple.com/{loc}/album/{self.id_value}" try: + tl = f"{lang}-{loc}" if lang == "zh" else lang + headers = { + "Accept-Language": tl, + } + headers.update(self.headers) content = ( BasicDownloader(url, headers=self.headers).download().html() ) - logger.info(f"got localized content from {url}") - elem = content.xpath( - "//script[@id='serialized-server-data']/text()" - ) - txt: str = elem[0] # type:ignore - page_data = json.loads(txt)[0] - album_data = page_data["data"]["sections"][0]["items"][0] - title = album_data["title"] - brief = album_data.get("modalPresentationDescriptor", {}).get( - "paragraphText", "" - ) - tl = detect_language(title + " " + brief) - localized_title.append({"lang": tl, "text": title}) - if brief: - localized_desc.append({"lang": tl, "text": brief}) - if lang == SITE_DEFAULT_LANGUAGE or not matched_content: - matched_content = content + logger.debug(f"got localized content from {url}") + txt: str = content.xpath( + "//script[@id='schema:music-album']/text()" + )[0] # type:ignore + schema_data = json.loads(txt) + title = schema_data["name"] + if title: + localized_title.append({"lang": tl, "text": title}) + try: + txt: str = content.xpath( + "//script[@id='serialized-server-data']/text()" + )[0] # type:ignore + server_data = json.loads(txt) + brief = server_data[0]["data"]["sections"][0]["items"][0][ + "modalPresentationDescriptor" + ]["paragraphText"] + if brief: + localized_desc.append({"lang": tl, "text": brief}) + except Exception: + server_data = brief = None + if lang == SITE_DEFAULT_LANGUAGE or not matched_schema_data: + matched_schema_data = schema_data break except Exception: pass - if matched_content is None: + if matched_schema_data is None: # no schema data found raise ParseError(self, f"localized content for {self.url}") - elem = matched_content.xpath("//script[@id='serialized-server-data']/text()") - txt: str = elem[0] # type:ignore - page_data = json.loads(txt)[0] - album_data = page_data["data"]["sections"][0]["items"][0] - title = album_data["title"] - brief = album_data.get("modalPresentationDescriptor") - brief = brief.get("paragraphText") if brief else None - artist_list = album_data["subtitleLinks"] - artist = [item["title"] for item in artist_list] - - track_data = page_data["data"]["seoData"] - date_elem = track_data.get("musicReleaseDate") - release_datetime = dateparser.parse(date_elem.strip()) if date_elem else None - release_date = ( - release_datetime.strftime("%Y-%m-%d") if release_datetime else None + artist = [a["name"] for a in matched_schema_data.get("byArtist", [])] + release_date = matched_schema_data.get("datePublished", None) + genre = matched_schema_data.get("genre", []) + image_url = matched_schema_data.get("image", None) + track_list = [t["name"] for t in matched_schema_data.get("tracks", [])] + duration = round( + sum( + (parse_duration(t["duration"]) or timedelta()).total_seconds() * 1000 + for t in matched_schema_data.get("tracks", []) + ) ) - - track_list = [ - f"{i}. {track['attributes']['name']}" - for i, track in enumerate(track_data["ogSongs"], 1) - ] - duration_list = [ - track["attributes"].get("durationInMillis", 0) - for track in track_data["ogSongs"] - ] - duration = int(sum(duration_list)) - genre = track_data["schemaContent"].get("genre") - if genre: - genre = [ - genre[0] - ] # apple treat "Music" as a genre. Thus, only the first genre is obtained. - - images = matched_content.xpath("//source[@type='image/jpeg']/@srcset") - image_elem: str = images[0] if images else "" # type:ignore - image_url = image_elem.split(" ")[0] if image_elem else None - pd = ResourceContent( metadata={ "localized_title": uniq(localized_title), "localized_description": uniq(localized_desc), - "title": title, - "brief": brief, "artist": artist, "genre": genre, "release_date": release_date, diff --git a/catalog/tv/models.py b/catalog/tv/models.py index c4aec7d9..a9422f9d 100644 --- a/catalog/tv/models.py +++ b/catalog/tv/models.py @@ -44,6 +44,7 @@ from catalog.common import ( ) from catalog.common.models import LANGUAGE_CHOICES_JSONFORM, LanguageListField from common.models.lang import RE_LOCALIZED_SEASON_NUMBERS, localize_number +from common.models.misc import uniq class TVShowInSchema(ItemInSchema): @@ -414,30 +415,35 @@ class TVSeason(Item): - "Show Title Season X" with some localization """ s = super().display_title - if self.parent_item and ( - RE_LOCALIZED_SEASON_NUMBERS.sub("", s) == "" - or s == self.parent_item.display_title - ): - if self.parent_item.get_season_count() == 1: - return self.parent_item.display_title - elif self.season_number: - return _("{show_title} Season {season_number}").format( - show_title=self.parent_item.display_title, - season_number=localize_number(self.season_number), - ) - else: - return f"{self.parent_item.display_title} {s}" + if self.parent_item: + if ( + RE_LOCALIZED_SEASON_NUMBERS.sub("", s) == "" + or s == self.parent_item.display_title + ): + if self.parent_item.get_season_count() == 1: + return self.parent_item.display_title + elif self.season_number: + return _("{show_title} Season {season_number}").format( + show_title=self.parent_item.display_title, + season_number=localize_number(self.season_number), + ) + else: + return f"{self.parent_item.display_title} {s}" + elif self.parent_item.display_title not in s: + return f"{self.parent_item.display_title} ({s})" return s @cached_property def additional_title(self) -> list[str]: title = self.display_title - return [ - t["text"] - for t in self.localized_title - if t["text"] != title - and RE_LOCALIZED_SEASON_NUMBERS.sub("", t["text"]) != "" - ] + return uniq( + [ + t["text"] + for t in self.localized_title + if t["text"] != title + and RE_LOCALIZED_SEASON_NUMBERS.sub("", t["text"]) != "" + ] + ) def to_indexable_titles(self) -> list[str]: titles = [t["text"] for t in self.localized_title if t["text"]] diff --git a/catalog/views.py b/catalog/views.py index 1c8345f2..a8dd5d5e 100644 --- a/catalog/views.py +++ b/catalog/views.py @@ -86,7 +86,7 @@ def retrieve(request, item_path, item_uuid): if request.method == "HEAD": return HttpResponse() if request.headers.get("Accept", "").endswith("json"): - return JsonResponse(item.ap_object) + return JsonResponse(item.ap_object, content_type="application/activity+json") focus_item = None if request.GET.get("focus"): focus_item = get_object_or_404( diff --git a/common/views.py b/common/views.py index 3334a293..c9b827d0 100644 --- a/common/views.py +++ b/common/views.py @@ -2,7 +2,7 @@ from django.conf import settings from django.contrib.auth.decorators import login_required from django.core.cache import cache from django.core.exceptions import DisallowedHost -from django.http import HttpRequest, JsonResponse +from django.http import HttpRequest, HttpResponse, JsonResponse from django.shortcuts import redirect, render from django.urls import reverse @@ -81,36 +81,41 @@ def nodeinfo2(request): ) -def _is_json_request(request) -> bool: - return request.headers.get("HTTP_ACCEPT", "").endswith("json") +def _error_response(request, status: int, exception=None, default_message=""): + message = str(exception) if exception else default_message + if request.headers.get("HTTP_ACCEPT", "").endswith("json"): + return JsonResponse({"error": message}, status=status) + if ( + request.headers.get("HTTP_HX_REQUEST") is not None + and request.headers.get("HTTP_HX_BOOSTED") is None + ): + return HttpResponse(message, status=status) + return render( + request, + f"{status}.html", + status=status, + context={"message": message, "exception": exception}, + ) def error_400(request, exception=None): if isinstance(exception, DisallowedHost): url = settings.SITE_INFO["site_url"] + request.get_full_path() return redirect(url, permanent=True) - if _is_json_request(request): - return JsonResponse({"error": "invalid request"}, status=400) - return render(request, "400.html", status=400, context={"exception": exception}) + return _error_response(request, 400, exception, "invalid request") def error_403(request, exception=None): - if _is_json_request(request): - return JsonResponse({"error": "forbidden"}, status=403) - return render(request, "403.html", status=403, context={"exception": exception}) + return _error_response(request, 403, exception, "forbidden") def error_404(request, exception=None): - if _is_json_request(request): - return JsonResponse({"error": "not found"}, status=404) request.session.pop("next_url", None) - return render(request, "404.html", status=404, context={"exception": exception}) + return _error_response(request, 404, exception, "not found") def error_500(request, exception=None): - if _is_json_request(request): - return JsonResponse({"error": "something wrong"}, status=500) - return render(request, "500.html", status=500, context={"exception": exception}) + return _error_response(request, 500, exception, "something wrong") def console(request): diff --git a/docs/api.md b/docs/api.md deleted file mode 100644 index 06d65e2f..00000000 --- a/docs/api.md +++ /dev/null @@ -1,81 +0,0 @@ -# API - -## Endpoints - -NeoDB has a set of API endpoints mapping to its functions like marking a book or listing collections, they can be found in swagger based API documentation at `/developer/` of your running instance, [a version of it](https://neodb.social/developer/) is available on our flagship instance. - -NeoDB also supports a subset of Mastodon API, details can be found in [Mastodon API documentation](https://docs.joinmastodon.org/api/). - -Both set of APIs can be accessed by the same access token. - -## How to authorize - -### Create an application - -you must have at least one URL included in the Redirect URIs field, e.g. `https://example.org/callback`, or use `urn:ietf:wg:oauth:2.0:oob` if you don't have a callback URL. - -``` -curl https://neodb.social/api/v1/apps \ - -d client_name=MyApp \ - -d redirect_uris=https://example.org/callback \ - -d website=https://my.site -``` - -and save of the `client_id` and `client_secret` returned in the response: - -``` -{ - "client_id": "CLIENT_ID", - "client_secret": "CLIENT_SECRET", - "name": "MyApp", - "redirect_uri": "https://example.org/callback", - "vapid_key": "PUSH_KEY", - "website": "https://my.site" -} -``` - - -### Guide your user to open this URL - -``` -https://neodb.social/oauth/authorize?response_type=code&client_id=CLIENT_ID&redirect_uri=https://example.org/callback&scope=read+write -``` - -### Once authorizated by user, it will redirect to `https://example.org/callback` with a `code` parameter: - -``` -https://example.org/callback?code=AUTH_CODE -``` - -### Obtain access token with the following POST request: - -``` -curl https://neodb.social/oauth/token \ - -d "client_id=CLIENT_ID" \ - -d "client_secret=CLIENT_SECRET" \ - -d "code=AUTH_CODE" \ - -d "redirect_uri=https://example.org/callback" \ - -d "grant_type=authorization_code" -``` - -and access token will be returned in the response: - -``` -{ - "access_token": "ACCESS_TOKEN", - "token_type": "Bearer", - "scope": "read write" -} -``` - -### Use the access token to access protected endpoints like `/api/me` - -``` -curl -H "Authorization: Bearer ACCESS_TOKEN" -X GET https://neodb.social/api/me -``` - -and response will be returned accordingly: - -``` -{"url": "https://neodb.social/users/xxx/", "external_acct": "xxx@yyy.zzz", "display_name": "XYZ", "avatar": "https://yyy.zzz/xxx.gif"} -``` diff --git a/docs/apps.md b/docs/apps.md deleted file mode 100644 index e5175a28..00000000 --- a/docs/apps.md +++ /dev/null @@ -1,19 +0,0 @@ -# Apps - -NeoDB web version will provide the most features and experience, while some third-party apps are also available below. - -## Apps for NeoDB - -A few apps for NeoDB are being actively developed: - - - [Piecelet](https://piecelet.app) by `@piecelet@mastodon.social` - [App Store](https://apps.apple.com/app/piecelet-for-neodb/id6739444863) / [Source Code](https://github.com/lcandy2/neodb-app) - - [Chihu](https://chihu.app) by `@chihu@mastodon.social` - [Test Flight](https://testflight.apple.com/join/WmbnP9Vx) - -These apps are not affiliated with NeoDB, but they are being developed with the support of this community. If you are also developing an app for NeoDB, and wish to share that with the community, please [edit this file](https://github.com/neodb-social/neodb/edit/main/docs/apps.md) and submit a pull request. - - -## Mastodon apps - -[Mastodon compatible mobile and native apps](https://joinmastodon.org/apps) can be used to log in and utilize the micro-blogging features in NeoDB server. - -In addition to micro-blogging, Mastodon compatible can also be used to take note on book you are currently reading. Just head to bookmark section in your app, your currently reading books are listed there as bookmarked posts, replying any of them will make a note for that book. diff --git a/docs/assets/logo.svg b/docs/assets/logo.svg deleted file mode 100644 index 8a1418e3..00000000 --- a/docs/assets/logo.svg +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/configuration.md b/docs/configuration.md deleted file mode 100644 index b52a85af..00000000 --- a/docs/configuration.md +++ /dev/null @@ -1,210 +0,0 @@ -# Configuration - - -## Important settings you may want to change first - -absolutely set these in `.env` before start the instance for the first time: - - - `NEODB_SECRET_KEY` - 50 characters of random string, no white space - - `NEODB_SITE_NAME` - the name of your site - - `NEODB_SITE_DOMAIN` - the domain name of your site - -**`NEODB_SECRET_KEY` and `NEODB_SITE_DOMAIN` must not be changed later.** - -if you are doing debug or development: - - - `NEODB_DEBUG` - True will turn on debug for both neodb and takahe, turn off relay, and reveal self as debug mode in nodeinfo (so peers won't try to run fedi search on this node) - - `NEODB_IMAGE` - the docker image to use, `neodb/neodb:edge` for the main branch - -## Settings for customization - - - `NEODB_SITE_LOGO` - - `NEODB_SITE_ICON` - - `NEODB_USER_ICON` - - `NEODB_SITE_COLOR` - one of [these color schemes](https://picocss.com/docs/colors) - - `NEODB_SITE_INTRO` - - `NEODB_SITE_HEAD` - - `NEODB_SITE_DESCRIPTION` - - `NEODB_PREFERRED_LANGUAGES` - preferred languages when importing titles from 3rd party sites like TMDB and Steam, comma-separated list of ISO-639-1 two-letter codes, `en,zh` by default. It can includes languages with no UI translations yet, e.g. if set to `ja,en,zh`, NeoDB scraper will fetch catalog metadata in three languages if they are available from third party sites, Japanese users (= whose browser language set to ja-JP) will see English UI with Japanese metadata. - - `NEODB_DISCOVER_FILTER_LANGUAGE` - `False` by default; when set to `True`, `/discover/` will only show items with languages match one of `NEODB_PREFERRED_LANGUAGES`. - - `NEODB_DISCOVER_SHOW_LOCAL_ONLY` - `False` by default; when set to `True`, only show items marked by local users rather than entire network on `/discover/` - - `NEODB_DISCOVER_UPDATE_INTERVAL` - minutes between each update for popular items on `/discover/` - - `NEODB_SITE_LINKS` - a list of title and links to show in the footer, comma separated, e.g. `Feedback=https://discord.gg/8KweCuApaK,ToS=/pages/rules/` - - `NEODB_INVITE_ONLY` - `False` by default, set to `True` to require invite code(generated by `neodb-manage invite --create`) to register - - `NEODB_ENABLE_LOCAL_ONLY` - `False` by default, set to `True` to allow user to post marks as "local public" - - `NEODB_LOGIN_MASTODON_WHITELIST` - a list of Mastodon instances to allow login from, comma separated - - `NEODB_EMAIL_FROM` - the email address to send email from - - `NEODB_EMAIL_URL` - email sender configuration, e.g. - - `smtp://:@:` - - `smtp+tls://:@:` - - `smtp+ssl://:@:` - - `anymail://?`, to send email via email service providers, see [anymail doc](https://anymail.dev/) - -## Settings for administration - - `DISCORD_WEBHOOKS` - Discord channel to send notification about user submitted suggestion and changes, e.g. `suggest=https://discord.com/api/webhooks/123/abc,audit=https://discord.com/api/webhooks/123/def`. Both suggest and audit channels must be in forum mode. - - `NEODB_SENTRY_DSN` , `TAKAHE_SENTRY_DSN` - [Sentry](https://sentry.io/) DSN to log errors. - -## Settings for Federation - - - `NEODB_SEARCH_PEERS` is empty by default, which means NeoDB will search all known peers running production version of NeoDB when user look for items. This can be set to a comma-separated list of host names, so that NeoDB will only search those servers; or search no other peers if set to just `-`. - - - `NEODB_DISABLE_DEFAULT_RELAY` is set to `False` by default, the server will send and receive public posts from `relay.neodb.net`. - - `relay.neodb.net` is [open sourced](https://github.com/neodb-social/neodb-relay) and operated by NeoDB developers, it works like most ActivityPub relays except it only relays between NeoDB instances, it helps public information like catalogs and trends flow between NeoDB instances. You may set it to `True` if you don't want to relay public posts with other NeoDB instances. - - -## Settings for external item sources - -- `SPOTIFY_API_KEY` - base64('CLIENT_ID:SECRET'), see [spotify doc](https://developer.spotify.com/documentation/web-api/tutorials/client-credentials-flow) -- `TMDB_API_V3_KEY` - API v3 key from [TMDB](https://developer.themoviedb.org/) -- `GOOGLE_API_KEY` - API key for [Google Books](https://developers.google.com/books/docs/v1/using) -- `DISCOGS_API_KEY` - personal access token from [Discogs](https://www.discogs.com/settings/developers) -- `IGDB_API_CLIENT_ID`, `IGDB_API_CLIENT_SECRET` - IGDB [keys](https://api-docs.igdb.com/) -- `NEODB_SEARCH_SITES` is empty by default, which means NeoDB will search all available sources. This can be set to a comma-separated list of site names (e.g. `goodreads,googlebooks,spotify,tmdb,igdb,bandcamp,apple_podcast`), so that NeoDB will only search those sites; or not search any of them if set to just `-`. - - -## Other maintenance tasks - -Add alias to your shell for easier access - -``` -alias neodb-manage='docker-compose --profile production run --rm shell neodb-manage' -``` - -Toggle user's active, staff and super user status - -``` -neodb-manage user --active -neodb-manage user --staff -neodb-manage user --super -``` - -create a super user; delete a user / remote identity (`takahe-stator` and `neodb-worker` containers must be running to complete the deletion) -``` -neodb-manage createsuperuser -neodb-manage user --delete username -neodb-manage user --delete username@remote.instance -``` - -Create an invite link - -``` -neodb-manage invite --create -``` - -Manage user tasks and cron jobs - -``` -neodb-manage task --list -neodb-manage cron --list -``` - -Manage search index -``` -neo-manage index --reindex -``` - -Crawl links -``` -neodb-manage cat [--save] # parse / save a supported link -neodb-manage crawl # crawl all recognizable links from a page -``` - - -## Run PostgresQL/Redis/Typesense without Docker - -It's currently possible but quite cumbersome to run without Docker, hence not recommended. However it's possible to only use docker to run neodb server but reuse existing PostgresQL/Redis/Typesense servers with `compose.override.yml`, an example for reference: - -``` -services: - redis: - profiles: ['disabled'] - typesense: - profiles: ['disabled'] - neodb-db: - profiles: ['disabled'] - takahe-db: - profiles: ['disabled'] - migration: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - neodb-web: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - healthcheck: !reset {} - neodb-web-api: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - healthcheck: !reset {} - neodb-worker: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - neodb-worker-extra: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - takahe-web: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - takahe-stator: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - shell: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - root: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - dev-neodb-web: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - dev-neodb-worker: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - dev-takahe-web: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - dev-takahe-stator: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - dev-shell: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] - dev-root: - extra_hosts: - - "host.docker.internal:host-gateway" - depends_on: !reset [] -``` -(`extra_hosts` is only needed if PostgresQL/Redis/Typesense is on your host server) - - -## Multiple instances on one server - -It's possible to run multiple clusters in one host server with docker compose, as long as `NEODB_SITE_DOMAIN`, `NEODB_PORT` and `NEODB_DATA` are different. - - -## Scaling up - -For high-traffic instance, spin up these configurations to a higher number, as long as the host server can handle them: - - - `NEODB_WEB_WORKER_NUM` - - `NEODB_API_WORKER_NUM` - - `NEODB_RQ_WORKER_NUM` - - `TAKAHE_WEB_WORKER_NUM` - - `TAKAHE_STATOR_CONCURRENCY` - - `TAKAHE_STATOR_CONCURRENCY_PER_MODEL` - -Further scaling up with multiple nodes (e.g. via Kubernetes) is beyond the scope of this document, but consider run db/redis/typesense separately, and then duplicate web/worker/stator containers as long as connections and mounts are properly configured; `migration` only runs once when start or upgrade, it should be kept that way. diff --git a/docs/development.md b/docs/development.md deleted file mode 100644 index 95d3f973..00000000 --- a/docs/development.md +++ /dev/null @@ -1,187 +0,0 @@ -Development -=========== - -Overview --------- -NeoDB is a Django project, and it runs side by side with a [modified version](https://github.com/neodb-social/neodb-takahe) of [Takahē](https://github.com/jointakahe/takahe) (a separate Django project, code in `neodb_takahe` folder of this repo as submodule). They communicate with each other mainly thru database and task queue, [the diagram](troubleshooting.md#containers) demonstrates a typical architecture. Currently the two are loosely coupled, so you may take either one offline without immediate impact on the other, which makes it very easy to conduct maintenance and troubleshooting separately. In the future, they may get combined but it's not decided and will not be decided very soon. - - -Prerequisite ------------- -- Python 3.12.x -- Docker Compose v2 or newer - - -Prepare the code ----------------- -When checking out NeoDB source code, make sure submodules are also checked out: -``` -git clone https://github.com/neodb-social/neodb.git -cd neodb -git submodule update --init -``` - -Install [rye](http://rye.astral.sh) package manager, packages and pre-commit hooks: -``` -curl -sSf https://rye.astral.sh/get | bash -rye sync -. .venv/bin/activate -pre-commit install -``` - -To develop Takahe, install requirements(-dev) and pre-commit hooks for `neodb-takahe` project as well, preferably using a different virtual environment. - -Note: the virtual environments and packages installed in this step are mostly for linting, the actual virtual environments and packages are from NeoDB docker image, and they can be configured differently, more on this later in this document. - - -Start local instance for development ------------------------------------- -Follow [install guide](install.md) to create `.env` in the root folder of NeoDB code, including at least these configuration: -``` -NEODB_SITE_NAME="My Test" -NEODB_SITE_DOMAIN=mydomain.dev -NEODB_SECRET_KEY=_random_string__50_characters_of_length__no_whitespaces_ -NEODB_IMAGE=neodb/neodb:edge -NEODB_DEBUG=True -``` - -Download docker images and start pgsql/redis/typesense before initializing database schema: -``` -docker compose --profile dev pull -docker compose up -d -``` - -Initialize database schema: -``` -docker compose --profile dev run --rm dev-shell takahe-manage collectstatic --noinput -docker compose --profile dev run --rm dev-shell neodb-init -``` - -Start the cluster: -``` -docker compose --profile dev up -d -``` - -Watch the logs: -``` -docker compose --profile dev logs -f -``` - -Now the local development instance is ready to serve at `http://localhost:8000`, but to develop or test anything related with ActivityPub, reverse proxying it from externally reachable https://`NEODB_SITE_DOMAIN`/ is required; https is optional theoretically but in reality required for various compatibility reasons. - -Note: `dev` profile is for development only, and quite different from `production`, so always use `--profile dev` instead of `--profile production`, more on those differences later in this document. - - -Common development tasks ------------------------- -Shutdown the cluster: -``` -docker compose --profile dev down -``` - -Restart background tasks (unlike web servers, background tasks don't auto reload after code change): -``` -docker-compose --profile dev restart dev-neodb-worker dev-takahe-stator -``` - -When updating code, always update submodules: -``` -git pull -git submodule update --init -``` -With newer git 2.15+, you main use `git pull --recurse-submodules` or `git config --global submodule.recurse true` to make it automatic. - - -To save some typing, consider adding some aliases to `~/.profile`: -``` -alias neodb-logs='docker compose --profile dev logs' -alias neodb-shell='docker compose --profile dev run --rm dev-shell' -alias neodb-manage='docker compose --profile dev run --rm dev-shell neodb-manage' -``` - -Always use `neodb-init`, not `python3 manage.py migrate`, to update db schema after updating code: -``` -neodb-shell neodb-init -``` - -Run unit test: -``` -neodb-manage test -``` - -Update translations: -``` -django-admin makemessages --no-wrap --no-obsolete -i .venv -i neodb-takahe --keep-pot -l zh_Hans -l zh_Hant - -# edit .po files, run the following to make sure edits are correct - -django-admin makemessages --no-wrap --no-obsolete -i .venv -i neodb-takahe --keep-pot -l zh_Hans -l zh_Hant -django-admin compilemessages -i .venv -i neodb-takahe -``` - -Preview documentation: -``` -python -m mkdocs serve -``` - -Development in Docker Compose ------------------------------ -The `dev` profile is different from `production`: - -- code in `NEODB_SRC` (default: .) and `TAKAHE_SRC` (default: ./neodb-takahe) will be mounted and used in the container instead of code in the image -- `runserver` with autoreload will be used instead of `gunicorn` for both neodb and takahe web server -- /static/ and /s/ url are not map to pre-generated/collected static file path, `NEODB_DEBUG=True` is required locate static files from source code -- one `rqworker` container will be started, instead of two -- use `dev-shell` and `dev-root` to invoke shells, instead of `shell` and `root` -- there's no automatic `migration` container, but it can be triggered manually via `docker compose run dev-shell neodb-init` - -Note: - -- Python virtual environments inside docker image, which are `/neodb-venv` and `/takahe-venv`, will be used by default. They can be changed to different locations with `TAKAHE_VENV` and `NEODB_VENV` if needed, usually in a case of development code using a package not in docker venv. -- Some packages inside python virtual environments are platform dependent, so mount venv built by macOS host into the Linux container will likely not work. -- Python servers are launched as `app` user, who has no write access to anywhere except /tmp and media path, that's by design. -- Database/redis/typesense used in the container cluster are not accessible from host directly, which is by design. Querying them can be done by one of the following: - - `neodb-manage dbshell` - - `neodb-shell redis-cli -h redis` - - or create `compose.override.yml` to uncomment `ports` section. -- To expose the neodb and takahe web server directly, in the folder for configuration, create `compose.override.yml` with the following content: - -``` -services: - dev-neodb-web: - ports: - - "8001:8000" - - dev-takahe-web: - ports: - - "8002:8000" -``` - - -Development with Github Codespace ---------------------------------- -At the time of writing, docker compose will work in Github Codespace by adding this in `.env`: - -``` -NEODB_SITE_DOMAIN=${CODESPACE_NAME}-8000.${GITHUB_CODESPACES_PORT_FORWARDING_DOMAIN} -``` - - -Applications ------------- -Main Django apps for NeoDB: - - - `users` manages user in typical Django fashion - - `mastodon` this leverages [Mastodon API](https://docs.joinmastodon.org/client/intro/), [Threads API](https://developers.facebook.com/docs/threads/) and [ATProto}(https://atproto.com) for user login and data sync - - `catalog` manages different types of items user may collect, and scrapers to fetch from external resources, see [catalog.md](internals/catalog.md) for more details - - `journal` manages user created content(review/ratings) and lists(collection/shelf/tag/note), see [journal.md](internals/journal.md) for more details - - `social` present timeline and notification for local users - - `takahe` communicate with Takahe (a separate Django server, run side by side with this server, code in `neodb_takahe` as submodule), see [federation.md](internals/federation.md) for customization of ActivityPub protocol - - `legacy` this is only used by instances upgraded from 0.4.x and earlier, to provide a link mapping from old urls to new ones. If your journey starts with 0.5 and later, feel free to ignore it. - - -Miscellaneous notes -------------------- -If models in `takahe/models.py` are changed, instead of adding incremental migrations, just regenerate `takahe/migrations/0001_initial.py` instead, because these migrations will never be applied except for constructing a test database. - -A `libsass` wheel is stored in the repo to speed up docker image building process in Github Action. diff --git a/docs/features.md b/docs/features.md deleted file mode 100644 index d054e7cc..00000000 --- a/docs/features.md +++ /dev/null @@ -1,69 +0,0 @@ -# Features - -NeoDB has [various features](features.md), and you may image it as a mix of Mastodon, Goodreads, Letterboxd, RateYourMusic, Podchaser, and more. - -## Public catalog - - - a shared catalog of books/movies/tv shows/music album/games/podcasts/performances - - search or create catalog items in each category - - one click create item with links to 3rd party sites: - - Goodreads - - IMDB - - The Movie Database - - Douban - - Google Books - - Discogs - - Spotify - - Apple Music - - Bandcamp - - Steam - - IGDB - - Bangumi - - Board Game Geek - - any RSS link to a podcast - - ...[full list](sites.md) - - -## Personal collections - - - mark an item as wishlist/in progress/complete/dropped - - rate and write reviews for an item - - write notes for an item with progress (e.g. reading notes at page 42) - - create tags for an item, either privately or publicly - - create and share list of items - - tracking progress of a list (e.g. personal reading challenges) - - Import and export full user data archive - - import list or archives from some 3rd party sites: - - Goodreads reading list - - Letterboxd watch list - - Douban archive (via [Doufen](https://doufen.org/)) - - -## Social - - - view home feed with friends' activities - - every activity can be set as viewable to self/follower-only/public - - eligible items, e.g. podcasts and albums, are playable in feed - - login with other Fediverse identity and import social graph - - supported servers: Mastodon/Pleroma/Firefish/GoToSocial/Pixelfed/friendica/Takahē - - login with Bluesky / ATProto identity and import social graph - - login with threads.net (requires app verification by Meta) - - share collections and reviews to Fediverse/Bluesky/Threads - - ActivityPub support - - NeoDB users can follow and interact with users on other ActivityPub services like Mastodon and Pleroma - - NeoDB instances communicate with each other via an extended version of ActivityPub - - NeoDB instances may share public rating and reviews with a default relay - - implementation is based on [Takahē](https://jointakahe.org/) server - - -## API - - Mastodon compatible API - - most mastodon compatible apps are compatible with NeoDB - - NeoDB API to manage reviews and collections - -## Languages - - - English - - Simplified Chinese - - Traditional Chinese - - more to come and your contributions are welcomed! diff --git a/docs/index.md b/docs/index.md deleted file mode 100644 index 7b504e75..00000000 --- a/docs/index.md +++ /dev/null @@ -1,47 +0,0 @@ -NeoDB is an open-source software and global community platform [since 2021](origin.md). It helps users to manage and explore collections, reviews, and ratings for various cultural products, including books, movies, music, podcasts, games, and performances. Additionally, users can share their collections, publish microblogs, and engage with others in the Fediverse. NeoDB integrates the functionalities of platforms like Goodreads, Letterboxd, RateYourMusic, and Podchaser, among others. It also supports self-hosting and interconnection through containerized deployment and the ActivityPub protocol. - -This project is licensed under AGPLv3. - - -## How to use - -Please [find a server](servers.md) to register with your email, Mastodon or Bluesky ID. - -Besides the web version of your chosen server, you may also use [various apps](apps.md) to login and manage your collections. - - -## Host your own instance - -You can [install and run your own instance of NeoDB](install.md). If you decide to share your server with the public, please inform us so that we can add it to the list above. - -All instances interact with each other in the Fediverse via ActivityPub, allowing you to follow users from other NeoDB instances or instances running other ActivityPub software, such as Mastodon. - - -## API, Development and Translation - - - NeoDB offers [APIs to manage user collections](api.md), and [Mastodon client compatible API](https://docs.joinmastodon.org/client/) to manage user posts. - - For those interested in developing for NeoDB, please refer to the [development](development.md) section for basic instructions to get started. - - To help translate NeoDB to more languages, please join [our project on Weblate](https://hosted.weblate.org/projects/neodb/) - - -## Copyleft - - - NeoDB software code is licensed under AGPL, please check it if you plan to provide a service with modified code. - - If you publish a work (e.g. application, website or service) utilizing NeoDB software or API, you may use "NeoDB" and its visual identity in your work, but please refrain from any usage that may imply the affiliation with NeoDB software and team. - - If you publish such work to serve others, we do appreciate you clearly mention the usage of NeoDB in your work, provide descriptions and links to NeoDB software (neodb.net) and service providers (e.g. neodb.social), and guide your users to support them directly. - - -## Donation - -If you appreciate this project, please help spread the words, and consider sponsoring the service providers. Your support is essential to keep these services free, open-sourced and stay committed to their mission. You may donate to the flagship instance, NeoDB.social, through the link below; or check the instance you use for their details about donations. - - [![Kofi](https://img.shields.io/badge/Ko--Fi-Donate-orange?label=Support%20this%20project%20on%20Ko-fi&style=for-the-badge&color=ff5f5f&logo=ko-fi)](https://ko-fi.com/neodb) - - -## Feedback - -Follow us on the Fediverse or join our Discord community to share your ideas, questions, or creations. - -[![Mastodon](https://img.shields.io/mastodon/follow/106935434138389355?domain=https%3A%2F%2Fmastodon.online&style=for-the-badge&logo=mastodon&logoColor=fff&label=%40neodb%40mastodon.online&color=6D75D2)](https://mastodon.online/@neodb) -[![Mastodon](https://img.shields.io/mastodon/follow/106919732872456302?style=for-the-badge&logo=mastodon&logoColor=fff&label=%40neodb%40mastodon.social&color=6D75D2)](https://mastodon.social/@neodb) -[![Discord](https://img.shields.io/discord/1041738638364528710?label=Discord%20Community&logo=discord&logoColor=fff&color=6D75D2&style=for-the-badge&cacheSeconds=21600)](https://discord.gg/QBHkrV8bxK) diff --git a/docs/install.md b/docs/install.md deleted file mode 100644 index 3ee14c42..00000000 --- a/docs/install.md +++ /dev/null @@ -1,90 +0,0 @@ -Install -======= - -For small and medium NeoDB instances, it's recommended to deploy as a container cluster with Docker Compose. To run a large instance, please see [scaling up](configuration.md#scaling-up) for some tips. - -## Install docker compose - -Follow [official instructions](https://docs.docker.com/compose/install/) to install Docker Compose if not yet. - -Please verify its version is 2.x or higher before next step: - -``` -docker compose version -``` - -The rest of this doc assumes you can run docker commands without `sudo`, to verify that: - -``` -docker run --rm hello-world -``` - -Follow [official instructions](https://docs.docker.com/engine/install/linux-postinstall/) if it's not enabled, or use `sudo` to run commands in this doc. - - -## Prepare configuration files - - create a folder for configuration, eg ~/mysite/config - - grab `compose.yml` and `neodb.env.example` from [latest release](https://github.com/neodb-social/neodb/releases) - - rename `neodb.env.example` to `.env` - - -## Set up .env file and web root -Change essential options like `NEODB_SITE_DOMAIN` in `.env` before starting the cluster for the first time. Changing them later may have unintended consequences, please make sure they are correct before exposing the service externally. - -- `NEODB_SITE_NAME` - name of your site -- `NEODB_SITE_DOMAIN` - domain name of your site -- `NEODB_SECRET_KEY` - encryption key of session data -- `NEODB_DATA` is the path to store db/media/cache, it's `../data` by default, but can be any path that's writable -- `NEODB_DEBUG` - set to `False` for production deployment -- `NEODB_PREFERRED_LANGUAGES` - preferred languages when importing titles from 3rd party sites like TMDB and Steam, comma-separated list of ISO-639-1 two-letter codes, 'en,zh' by default. - -Optionally, `robots.txt` and `logo.png` may be placed under `$NEODB_DATA/www-root/`. - -See [neodb.env.example](https://raw.githubusercontent.com/neodb-social/neodb/main/neodb.env.example) and [configuration](configuration.md) for more options - - -## Start container - -in the folder with `compose.yml` and `.env`, execute as the user you just created: -``` -docker compose --profile production pull -docker compose --profile production up -d -``` - -Starting up for the first time might take a few minutes, depending on download speed, use the following commands for status and logs: -``` -docker compose ps -docker compose --profile production logs -f -``` - -In a few seconds, the site should be up at 127.0.0.1:8000 , you may check it with: -``` -curl http://localhost:8000/nodeinfo/2.0/ -``` - -JSON response will be returned if the server is up and running: -``` -{"version": "2.0", "software": {"name": "neodb", "version": "0.8-dev"}, "protocols": ["activitypub", "neodb"], "services": {"outbound": [], "inbound": []}, "usage": {"users": {"total": 1}, "localPosts": 0}, "openRegistrations": true, "metadata": {}} -``` - - -## Make the site available publicly - -Next step is to expose `http://127.0.0.1:8000` to external network as `https://yourdomain.tld` (NeoDB requires `https`). There are many ways to do it, you may use nginx or caddy as a reverse proxy server with an SSL cert configured, or configure a tunnel provider like cloudflared to do the same. Once done, you may check it with: - -``` -curl https://yourdomain.tld/nodeinfo/2.0/ -``` - -You should see the same JSON response as above, and the site is now accessible to the public. - - -## Register an account and make it admin - -Open `https://yourdomain.tld` in your browser and register an account, assuming username `admin`, run the following command to make it super user - -``` -docker compose --profile production run --rm shell neodb-manage user --super admin -``` - -Now your instance should be ready to serve. More tweaks are available, see [configuration](configuration.md) for options. diff --git a/docs/internals/catalog.md b/docs/internals/catalog.md deleted file mode 100644 index 5d25a6c4..00000000 --- a/docs/internals/catalog.md +++ /dev/null @@ -1,114 +0,0 @@ -Catalog -======= - -Data Models ------------ -all types of catalog items inherits from `Item` which stores as multi-table django model. -one `Item` may have multiple `ExternalResource`s, each represents one page on an external site - -```mermaid -classDiagram - class Item { - <> - } - Item <|-- Album - class Album { - +String barcode - +String Douban_ID - +String Spotify_ID - } - Item <|-- Game - class Game { - +String Steam_ID - } - Item <|-- Podcast - class Podcast { - +String feed_url - +String Apple_ID - } - Item <|-- Performance - Item <|-- Work - class Work { - +String Douban_Work_ID - +String Goodreads_Work_ID - } - Item <|-- Edition - Item <|-- Series - - Series *-- Work - Work *-- Edition - - class Series { - +String Goodreads_Series_ID - } - class Work { - +String Douban_ID - +String Goodreads_ID - } - class Edition{ - +String ISBN - +String Douban_ID - +String Goodreads_ID - +String GoogleBooks_ID - } - - Item <|-- Movie - Item <|-- TVShow - Item <|-- TVSeason - Item <|-- TVEpisode - TVShow *-- TVSeason - TVSeason *-- TVEpisode - - class TVShow{ - +String IMDB_ID - +String TMDB_ID - } - class TVSeason{ - +String Douban_ID - +String TMDB_ID - } - class TVEpisode{ - +String IMDB_ID - +String TMDB_ID - } - class Movie{ - +String Douban_ID - +String IMDB_ID - +String TMDB_ID - } - - Item <|-- Collection - - ExternalResource --* Item - class ExternalResource { - +enum site - +url: string - } -``` - -Add a new site --------------- - - - If official API is available for the site, it should be the preferred way to get data. - - add a new value to `IdType` and `SiteName` in `catalog/common/models.py` - - add a new file in `catalog/sites/`, a new class inherits `AbstractSite`, with: - * `SITE_NAME` - * `ID_TYPE` - * `URL_PATTERNS` - * `WIKI_PROPERTY_ID` (not used now) - * `DEFAULT_MODEL` (unless specified in `scrape()` return val) - * a classmethod `id_to_url()` - * a method `scrape()` returns a `ResourceContent` object - * `BasicDownloader` or `ProxiedDownloader` can used to download website content or API data. e.g. `content = BasicDownloader(url).download().html()` - * check out existing files in `catalog/sites/` for more examples - - add an import in `catalog/sites/__init__.py` - - add some tests to `catalog//tests.py` according to site type - + add `DOWNLOADER_SAVEDIR = '/tmp'` to `settings.py` can save all response to /tmp - + run `neodb-manage cat ` for debugging or saving response file to `/tmp`. Detailed code of `cat` is in `catalog/management/commands/cat.py` - + move captured response file to `test_data/`, except large/images files. Or if have to, replace it with a smallest version (e.g. 1x1 pixel / 1s audio) - + add `@use_local_response` decorator to test methods that should pick up these responses (if `BasicDownloader` or `ProxiedDownloader` is used) - - run all the tests and make sure they pass - - Command: `neodb-manage python3 manage.py test [--keepdb]`. - - See [this issue](https://github.com/neodb-social/neodb/issues/5) if `lxml.etree.ParserError` occurs on macOS. - - add a site UI label style to `common/static/scss/_sitelabel.scss` - - update documentation in [sites.md](../sites.md) diff --git a/docs/internals/federation.md b/docs/internals/federation.md deleted file mode 100644 index 496019cb..00000000 --- a/docs/internals/federation.md +++ /dev/null @@ -1,131 +0,0 @@ -# Federation - -## Supported federation protocols and standards - -- [ActivityPub](https://www.w3.org/TR/activitypub/) (Server-to-Server) -- [WebFinger](https://webfinger.net/) -- [Http Signatures](https://datatracker.ietf.org/doc/html/draft-cavage-http-signatures) -- [NodeInfo](https://nodeinfo.diaspora.software/) - -## Supported FEPs - -- [FEP-f1d5: NodeInfo in Fediverse Software](https://codeberg.org/fediverse/fep/src/branch/main/fep/f1d5/fep-f1d5.md) - -## NodeInfo - -NeoDB instances can be identified from user agent string (`NeoDB/x.x (+https://example.org)`) and `protocols` in its nodeinfo, e.g. https://neodb.social/nodeinfo/2.0/ : -```json -{ - "version": "2.0", - "software": { - "name": "neodb", - "version": "0.10.4.13", - "repository": "https://github.com/neodb-social/neodb", - "homepage": "https://neodb.net/" - }, - "protocols": ["activitypub", "neodb"], -} -``` - - -## ActivityPub - -NeoDB's ActivityPub implementation is based on [Takahē](https://jointakahe.org), with some change to enable interchange of additional information between NeoDB instances. - -### Activity - -NeoDB add additional fields to `Note` activity: - - - `relatedWith` is a list of NeoDB specific activities which are associated with this `Note`. For each activity, `id` and `href` are both unique links to that activity, `withRegardTo` links to the catalog item, `attributedTo` links to the user, `type` is one of: - - `Status`, its `status` can be one of: `complete`, `progress`, `wishlist` and `dropped` - - `Rating`, its `value` is rating grade (int, 1-10), `worst` is always 1, `best` is always 10 - - `Comment`, its `content` is comment text - - `Review`, its `name` is review title, `content` is its body, `mediaType` is always `text/markdown` for now - - `Note`, its `content` is note text - - `tag` is used to store list of NeoDB catalog items, which are related with this activity. `type` of NeoDB catalog item can be one of `Edition`, `Movie`, `TVShow`, `TVSeason`, `TVEpisode`, `Album`, `Game`, `Podcast`, `PodcastEpisode`, `Performance`, `PerformanceProduction`; href will be the link to that item. - -Example: -```json -{ - "@context": ["https://www.w3.org/ns/activitystreams", { - "blurhash": "toot:blurhash", - "Emoji": "toot:Emoji", - "focalPoint": { - "@container": "@list", - "@id": "toot:focalPoint" - }, - "Hashtag": "as:Hashtag", - "manuallyApprovesFollowers": "as:manuallyApprovesFollowers", - "sensitive": "as:sensitive", - "toot": "http://joinmastodon.org/ns#", - "votersCount": "toot:votersCount", - "featured": { - "@id": "toot:featured", - "@type": "@id" - } - }, "https://w3id.org/security/v1"], - "id": "https://neodb.social/@april_long_face@neodb.social/posts/380919151408919488/", - "type": "Note", - "relatedWith": [{ - "id": "https://neodb.social/p/5oyF0qRx96mKKmVpFzHtMM", - "type": "Status", - "status": "complete", - "withRegardTo": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR", - "attributedTo": "https://neodb.social/@april_long_face@neodb.social/", - "href": "https://neodb.social/p/5oyF0qRx96mKKmVpFzHtMM", - "published": "2024-11-17T10:16:42.745240+00:00", - "updated": "2024-11-17T10:16:42.750917+00:00" - }, { - "id": "https://neodb.social/p/47cJnbQTkbSSN2izLwQMjo", - "type": "Comment", - "withRegardTo": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR", - "attributedTo": "https://neodb.social/@april_long_face@neodb.social/", - "content": "Broadway cin\u00e9math\u00e8que, at least I laughed hard.", - "href": "https://neodb.social/p/47cJnbQTkbSSN2izLwQMjo", - "published": "2024-11-17T10:16:42.745240+00:00", - "updated": "2024-11-17T10:16:42.777276+00:00" - }, { - "id": "https://neodb.social/p/3AyYu974qo6OU09AAsPweQ", - "type": "Rating", - "best": 10, - "value": 7, - "withRegardTo": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR", - "worst": 1, - "attributedTo": "https://neodb.social/@april_long_face@neodb.social/", - "href": "https://neodb.social/p/3AyYu974qo6OU09AAsPweQ", - "published": "2024-11-17T10:16:42.784220+00:00", - "updated": "2024-11-17T10:16:42.786458+00:00" - }], - "attributedTo": "https://neodb.social/@april_long_face@neodb.social/", - "content": "

\u770b\u8fc7 \u963f\u8bfa\u62c9 \ud83c\udf15\ud83c\udf15\ud83c\udf15\ud83c\udf17\ud83c\udf11
Broadway cin\u00e9math\u00e8que, at least I laughed hard.

#\u6211\u770b\u6211\u542c\u6211\u8bfb

", - "published": "2024-11-17T10:16:42.745Z", - "sensitive": false, - "tag": [{ - "type": "Hashtag", - "href": "https://neodb.social/tags/\u6211\u770b\u6211\u542c\u6211\u8bfb/", - "name": "#\u6211\u770b\u6211\u542c\u6211\u8bfb" - }, { - "type": "Movie", - "href": "https://neodb.social/movie/7hfF7d0aFMaqHpFjUpq4zR", - "image": "https://neodb.social/m/item/doubanmovie/2024/09/13/a30bf2f3-4f79-43ef-b22f-58ebc3fd8aae.jpg", - "name": "Anora" - }], - "to": ["https://www.w3.org/ns/activitystreams#Public"], - "updated": "2024-11-17T10:16:42.750Z", - "url": "https://neodb.social/@april_long_face/posts/380919151408919488/" -} -``` - -This is not ideal but a practical manner to pass along additional information between NeoDB instances and other ActivityPub servers. We have some ideas for improvements, but are open to more suggestions. - - -### Relay - -NeoDB instances may share public rating and reviews with a default relay, which is currently `https://relay.neodb.net`. This relay is used to propagate public activities and catalog information between NeoDB instances. - -Owner of each instance may choose to turn this off in their admin settings. - - -## ATProto - -NeoDB is not a PDS itself currently, but can interact with PDS to import user's social graph, and send status updates. So technically NeoDB does not do full federation in ATProto, but NeoDB will handle some side effect from federation, e.g. when user logging in via ATProto handle, NeoDB will resolve user's DID and store it, and will attempt further operation with the DID, and update user's handle if that's changed, and use the corresponding PDS for that handle; user may still have to login NeoDB again with their Bluesky app password, since the change of PDS may invalidates previous app password. diff --git a/docs/internals/journal.md b/docs/internals/journal.md deleted file mode 100644 index d2785ac3..00000000 --- a/docs/internals/journal.md +++ /dev/null @@ -1,99 +0,0 @@ -Journal -======= - - -Data Model ----------- -```mermaid -classDiagram -User .. Piece - -class Piece { - +User owner - +int visibility -} - -class Item { - +str title - +str brief - -enum type -} -Piece <|-- Content -Item .. Content -class Content { - +Item target -} -Content <|-- Rating -class Rating { - +int grade -} -Content <|-- Review -class Review { - +str title - +str body -} -Content <|-- Comment -class Comment { - +str text -} -Content <|-- Note -class Note { - +str title - +str content - +enum progress_type - +str progress_value - -} -Content <|-- Reply -class Reply { - +Content reply_to -} -Piece <|-- List -class List{ - +ListItem[] items -} -Item .. ListItem -List *-- ListItem -class ListItem { - +int position - +Item item - +Dict metadata -} -List <|-- Collection -Item .. Collection -class Collection { - +str title - +str brief - +Bool collabrative -} -List <|-- Tag -class Tag { - +str title -} -List <|-- Shelf -class Shelf { - +Enum type -} -User .. ShelfLogManager -class ShelfLogManager { - +User owner - +ShelfLogEntry[] logs -} -ShelfLogManager *-- ShelfLogEntry -class ShelfLogEntry { - +Item item - +Shelf shelf - +DateTime timestamp -} -ShelfLogEntry .. Item -ShelfLogEntry .. Shelf - -Shelf *-- ShelfItem -ListItem <|-- ShelfItem - -ListItem <|-- TagItem -ListItem <|-- CollectionItem - -Tag *-- TagItem -Collection *-- CollectionItem -``` diff --git a/docs/internals/social.md b/docs/internals/social.md deleted file mode 100644 index 8784aeec..00000000 --- a/docs/internals/social.md +++ /dev/null @@ -1,100 +0,0 @@ -Social -====== - -Data Modal ----------- -```mermaid -classDiagram -User .. Piece - -class Piece { - +User owner - +int visibility -} -User .. Activity -class Activity { - +User owner - +int visibility - +Piece action_object -} -Activity .. Piece -Activity .. Item -class Item { - +str title - +str brief - -enum type -} -``` - -Activities ----------- -Activity data may be used for: - 1. time line view of user and her friends - 2. chronological view of user's action about an item - 3. ActivityStreams `OrderedCollection` for ActivityPub - -However, 2 is currently implemented separately via `ShelfLogManager` in `journal` app, because users may want to change these records manually. - -Local Timeline --------------- -| Local Timeline Activities | action object class | -| ------------------------- | ------------------- | -| Add an Item to Shelf | ShelfMember | -| Create a Collection | Collection | -| Like a Collection | Like | -| Create a Review | Review | - - -Activity Streams ----------------- -These are list of activities should be either shown in the site or delivered as ActivityStreams or both: - - - `Add` / `Remove` an *Item* to / from a *List*: - + add / remove *Item* to / from a user *Collection* - + mark *Item* as wishlist / progress / complete, which are essentially add to / remove from user's predefined *Collection* - - `Create` / `Update` / `Delete` a user *Collection* - - `Create` / `Update` / `Delete` a *Content* with an `Object Link` to *Item* - + `Create` / `Update` / `Delete` a *Comment* or *Review* - + `Create` / `Update` / `Delete` a *Quote* or *Note* - - `Create` / `Update` / `Delete` a *Reply* to another *Content* - - `Announce` / `Like` a *Content* / *Collection*, or `Undo` that - - Social Graph interaction - + `Follow`/`Unfo` `Follow` - + `Accept`/`Reject` - + `Block`/`Undo` `Block` - -Supporting these activities above will be essential to a reasonable ActivityPub server implementation. - -There are additional activities not made into ActivityPub MVP but technically possible to support in future: - - `Create` / `Update` / `Delete` a *Content* in different flavors, without link to *Item* - * `Note` or `Article` without link to *Item* - * DM (`Note`) - * `Question` - * `Article` - * `Page` - * `Image` - * `Audio` - * `Video` - * `Event` - - `Add` `Content` to / `Remove` `Content` from a user *Collection* - * *Pin* / *Unpin* - - `Move`/`Delete` account - -ActivityPub ------------ - -TBA - -References: - - https://www.w3.org/TR/activitypub/ - - https://www.w3.org/TR/activitystreams-core/ - - https://www.w3.org/TR/activitystreams-vocabulary/ - - https://www.w3.org/TR/json-ld/ - - https://codeberg.org/fediverse/fep/src/branch/main/feps/fep-e232.md - - https://socialhub.activitypub.rocks/t/guide-for-new-activitypub-implementers/479 - - https://docs.joinmastodon.org/spec/activitypub/ - - https://docs.joinbookwyrm.com/activitypub.html - - https://github.com/Podcastindex-org/podcast-namespace/blob/main/proposal-docs/social/social.md#socialinteract-element - - https://dev.funkwhale.audio/funkwhale/funkwhale/-/tree/develop/docs/developer_documentation/federation - - https://github.com/inventaire/inventaire/issues/187 - - https://github.com/inventaire/inventaire/issues/533 diff --git a/docs/origin.md b/docs/origin.md deleted file mode 100644 index 1e8a992e..00000000 --- a/docs/origin.md +++ /dev/null @@ -1,16 +0,0 @@ -Our Story -========= - -Established in September 2021, NeoDB is an [open source project](https://github.com/neodb-social/neodb) and [free service](https://neodb.social/) to help users around the world share and discover reviews and ratings of books, movies, music and games in Fediverse without concern of tracking or censorship. - - -The community that we serve today are mostly those previously used social networks in China to share their reviews and ratings for book, movie and music. By doing so for the last 15 years, they have made these reviews a rich form of archive that is the container of personal stories, intellectual debates, political opinions, and more. However, in recent years many of them got suppressed and banned due to increasing censorship and authoritarian laws in China. Many users have lost trust in proprietary software and services in China. NeoDB started as a humble effort to build open source software and services to help these internet refugees recreate their social network identity, regain control of their own content, and share their discoveries and opinions with their friends freely in Fediverse and, thanks to the openness of ActivityPub, the whole world. - - -As we’ve gained some [initial attraction](https://blog.joinmastodon.org/2021/12/mastodon-recap-2021/), we expect to continue supporting this fast growing community with better portability, security and user experience, we also hope to connect our community with broader Fediverse users with diverse backgrounds and serve both via enhanced a16y and i18n, decentralized architecture and interchangeable data format and protocols on catalogue information and user generated content. Your support and donation will help us operate continuously and develop towards this vision with more confidence. - - -NeoDB is a fork of [NiceDB](https://github.com/doubaniux/boofilsic), our code is actively being developed and is open-sourced under the AGPL-v3 license. We highly welcome contributions in code, design, and localization. - - -Please follow us on Fediverse([en](https://mastodon.online/@neodb)|[cn](https://mastodon.social/@neodb)) / [Bluesky](https://bsky.app/profile/neodb.net) / [Twitter](https://x.com/NeoDBsocial), and help spread the words. Thank you for the support! diff --git a/docs/servers.json b/docs/servers.json deleted file mode 100644 index 08290ce4..00000000 --- a/docs/servers.json +++ /dev/null @@ -1,67 +0,0 @@ -{ - "version": "1.0", - "servers": [ - { - "host": "neodb.social", - "description": "Flagship instance, managed by NeoDB developers.", - "label": [ - "flagship" - ], - "language": [ - "zh", - "en" - ] - }, - { - "name": "NeoDB experimental", - "host": "eggplant.place", - "description": "Instance running development version of NeoDB software, which may have newer features and occationally bugs, managed by NeoDB developers.", - "label": [ - "beta" - ], - "language": [ - "en" - ] - }, - { - "name": "ReviewDB", - "host": "neodb.social", - "admin": [ - "@shlee@aus.social" - ], - "language": [ - "en" - ] - }, - { - "name": "Minreol", - "host": "minreol.dk", - "admin": [ - "@pmakholm@norrebro.space" - ], - "language": [ - "da" - ] - }, - { - "name": "CasDB", - "host": "db.casually.cat", - "admin": [ - "@casuallynoted@casually.cat" - ], - "language": [ - "en" - ] - }, - { - "name": "KevGa-NeoDB", - "host": "neodb.kevga.de", - "admin": [ - "@lorker@mastodon.kevga.de" - ], - "language": [ - "de" - ] - } - ] -} diff --git a/docs/servers.md b/docs/servers.md deleted file mode 100644 index 82170f43..00000000 --- a/docs/servers.md +++ /dev/null @@ -1,21 +0,0 @@ -# Servers - - -## Community instances - -NeoDB is not a single website. To use it, you need to sign up on an instance, that lets you connect with other people using NeoDB across Fediverse and Bluesky. - -{servers} - -JSON version of this list is also available [here](servers.json). If you are hosting a public instance of NeoDB and wish to share that with the community, please [edit this file](https://github.com/neodb-social/neodb/edit/main/docs/servers.json) and submit a pull request. - -To host your own instance of NeoDB, see [installation guide](install.md). - - -## Public relay hosted by NeoDB developers - - - `relay.neodb.net` - NeoDB instances may connect to this relay to send and receive public posts, this is to help share items, ratings and reviews in the network, more on this in [configuration doc](configuration.md). - - -## Honorable mention - - [NiceDB](https://nicedb.org) - the original instance, no longer open for registration. diff --git a/docs/sites.md b/docs/sites.md deleted file mode 100644 index 78f981e6..00000000 --- a/docs/sites.md +++ /dev/null @@ -1,27 +0,0 @@ -# Supported Sites - -the following sites are supported - - -| | import link for media type | import archive | -| ----------------------| ----------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------ | -| Apple Music | Music (Album) | | -| Archive of Our Own | Book (Edition) | | -| Bandcamp | Music (Album) | | -| Bangumi | Book (Edition)
Music (Album)
Movie
TV (Season)
Game | | -| Board Game Geek | Game | | -| BooksTW 博客來 | Book (Edition) | | -| Discogs | Music (Album) | | -| Douban 豆瓣 | Book (Edition, Work)
Music (Album)
Movie
TV (Show, Season, Episode)
Game
Performance (Performance, Production) | Yes, upload [doufen](https://doufen.org) archive | -| Goodreads | Book (Edition, Work) | Yes, submit profile or shelf link | -| Google Books | Book (Edition) | | -| IGDB | Game | | -| IMDB | Movie
TV (Show, Episode) | | -| jjwxc 晋江文学城 | Book (Edition) | | -| Letterboxd | not supported (link in archive is mapped to TMDB) | Yes, upload exported archive | -| Qidian 起点 | Book (Edition) | | -| Spotify | Music (Album) | | -| Steam | Game | | -| The Movie Database | Movie
TV (Show, Season, Episode) | | -| ypshuo 阅评说 | Book (Edition) | | -| RSS link to a podcast | Podcast | Yes, upload OPML | diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css deleted file mode 100644 index 283aadea..00000000 --- a/docs/stylesheets/extra.css +++ /dev/null @@ -1,14 +0,0 @@ -:root { - --md-primary-fg-color: #0172ad; - --md-primary-fg-color--light: #0172ad; - --md-primary-fg-color--dark: #0172ad; - --md-accent-fg-color: #01aaff; - --md-accent-fg-color--light: #01aaff; - --md-accent-fg-color--dark: #01aaff; - font-family:system-ui, "Segoe UI", Roboto, Oxygen, Ubuntu, Cantarell, Helvetica, Arial, "Helvetica Neue", sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji"; -} -@media screen and (min-width: 76.25em) { - .md-nav__title { - display: none; - } -} diff --git a/docs/templates/main.html b/docs/templates/main.html deleted file mode 100644 index 869f169f..00000000 --- a/docs/templates/main.html +++ /dev/null @@ -1,5 +0,0 @@ -{% extends "base.html" %} -{% block extrahead %} - - -{% endblock %} diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md deleted file mode 100644 index 585d68e2..00000000 --- a/docs/troubleshooting.md +++ /dev/null @@ -1,56 +0,0 @@ -# Troubleshooting - - -## Tips - - - `docker compose ps` to see if any service is down, (btw it's normal that `migration` is in `Exit 0` state) - - `docker compose run shell` to run a shell into the cluster; or `docker compose run root` for root shell, and `apt` is available if extra package needed - - see `Debug in Docker` in [development doc](development.md) for debugging tips - - -## Containers - -a typical neodb cluster looks like: - -```mermaid -flowchart TB - web[[Your reverse proxy server with SSL]] --- neodb-nginx[nginx listening on localhost:8000] - subgraph Containers managed by compose.yml - neodb-nginx --- neodb-web - neodb-nginx --- takahe-web - neodb-worker --- typesense[(typesense)] - neodb-worker --- neodb-db[(neodb-db)] - neodb-worker --- redis[(redis)] - neodb-web --- typesense - neodb-web --- neodb-db - neodb-web --- redis - neodb-web --- takahe-db[(takahe-db)] - migration([migration]) --- neodb-db - migration --- takahe-db - takahe-web --- takahe-db - takahe-web --- redis - takahe-stator --- takahe-db - takahe-stator --- redis - end -``` - - -## Data Folders - -a typical neodb folder after starting up should look like: - -``` -mysite -├── data # neodb data folder, location can be changed via NEODB_DATA in .env -│ ├── neodb-db # neodb database -│ ├── neodb-media # uid must be 1000 (app user in docker image), chmod if not so -│ ├── redis # neodb/takahe cache -│ ├── takahe-cache # uid must be 33 (www-data user in docker image), chmod if not so -│ ├── takahe-db # neodb database -│ ├── takahe-media # uid must be 1000 (app user in docker image), chmod if not so -│ ├── typesense # neodb search index -│ └── www-root # neodb web root for robots.txt, logo.png and etc -└── config - ├── compose.yml # copied from neodb release - └── .env # your configuration, see neodb.env.example -``` diff --git a/docs/upgrade.md b/docs/upgrade.md deleted file mode 100644 index c6350205..00000000 --- a/docs/upgrade.md +++ /dev/null @@ -1,21 +0,0 @@ -Upgrade -======= - -Check the [release notes](https://github.com/neodb-social/neodb/releases), update `compose.yml` and `.env` as instructed. - -If there is `compose.override.yml`, make sure it's compatible with the updated `compose.yml`. - -Pull the latest container image -```bash -docker compose --profile production pull -``` - -Restart the entire cluster: -```bash -docker compose --profile production up -d -``` - -Optionally, clean up old images: -```bash -docker system prune -af --volumes -``` diff --git a/journal/exporters/csv.py b/journal/exporters/csv.py index 5bbd9d78..2cbffb62 100644 --- a/journal/exporters/csv.py +++ b/journal/exporters/csv.py @@ -171,5 +171,5 @@ class CsvExporter(Task): shutil.make_archive(filename[:-4], "zip", temp_folder_path) self.metadata["file"] = filename self.metadata["total"] = total - self.message = "Export complete." + self.message = f"{total} records exported." self.save() diff --git a/journal/exporters/ndjson.py b/journal/exporters/ndjson.py index 119ee136..a91ba09d 100644 --- a/journal/exporters/ndjson.py +++ b/journal/exporters/ndjson.py @@ -3,6 +3,7 @@ import os import re import shutil import tempfile +import uuid from django.conf import settings from django.utils import timezone @@ -65,13 +66,15 @@ class NdjsonExporter(Task): def _save_image(url): if url.startswith("http"): - imgdl = ProxiedImageDownloader(url) - raw_img = imgdl.download().content - ext = imgdl.extention - file = GenerateDateUUIDMediaFilePath(f"x.{ext}", attachment_path) - with open(file, "wb") as binary_file: - binary_file.write(raw_img) - return file + try: + raw_img, ext = ProxiedImageDownloader.download_image(url, "") + if raw_img: + file = "%s/%s.%s" % (attachment_path, uuid.uuid4(), ext) + with open(file, "wb") as binary_file: + binary_file.write(raw_img) + return file + except Exception: + logger.debug(f"error downloading {url}") elif url.startswith("/"): p = os.path.abspath( os.path.join(settings.MEDIA_ROOT, url[len(settings.MEDIA_URL) :]) @@ -79,11 +82,8 @@ class NdjsonExporter(Task): if p.startswith(settings.MEDIA_ROOT): try: shutil.copy2(p, attachment_path) - except Exception as e: - logger.error( - f"error copying {p} to {attachment_path}", - extra={"exception": e}, - ) + except Exception: + logger.error(f"error copying {p} to {attachment_path}") return p return url @@ -206,6 +206,25 @@ class NdjsonExporter(Task): for item in self.ref_items: f.write(json.dumps(item.ap_object, default=str) + "\n") + # Export actor.ndjson with Takahe identity data + filename = os.path.join(temp_folder_path, "actor.ndjson") + with open(filename, "w") as f: + f.write(json.dumps(self.get_header()) + "\n") + takahe_identity = self.user.identity.takahe_identity + identity_data = { + "type": "Identity", + "username": takahe_identity.username, + "domain": takahe_identity.domain_id, + "actor_uri": takahe_identity.actor_uri, + "name": takahe_identity.name, + "summary": takahe_identity.summary, + "metadata": takahe_identity.metadata, + "private_key": takahe_identity.private_key, + "public_key": takahe_identity.public_key, + "public_key_id": takahe_identity.public_key_id, + } + f.write(json.dumps(identity_data, default=str) + "\n") + filename = GenerateDateUUIDMediaFilePath( "f.zip", settings.MEDIA_ROOT + "/" + settings.EXPORT_FILE_PATH_ROOT ) @@ -215,5 +234,5 @@ class NdjsonExporter(Task): self.metadata["file"] = filename self.metadata["total"] = total - self.message = "Export complete." + self.message = f"{total} records exported." self.save() diff --git a/journal/importers/__init__.py b/journal/importers/__init__.py index ab794d2f..07075888 100644 --- a/journal/importers/__init__.py +++ b/journal/importers/__init__.py @@ -1,34 +1,15 @@ -import os -import zipfile - from .csv import CsvImporter from .douban import DoubanImporter from .goodreads import GoodreadsImporter from .letterboxd import LetterboxdImporter +from .ndjson import NdjsonImporter from .opml import OPMLImporter - -def get_neodb_importer(filename: str) -> type[CsvImporter] | None: - if not os.path.exists(filename) or not zipfile.is_zipfile(filename): - return None - with zipfile.ZipFile(filename, "r") as z: - files = z.namelist() - if any(f == "journal.ndjson" for f in files): - return None - if any( - f.endswith("_mark.csv") - or f.endswith("_review.csv") - or f.endswith("_note.csv") - for f in files - ): - return CsvImporter - - __all__ = [ "CsvImporter", + "NdjsonImporter", "LetterboxdImporter", "OPMLImporter", "DoubanImporter", "GoodreadsImporter", - "get_neodb_importer", ] diff --git a/journal/importers/base.py b/journal/importers/base.py new file mode 100644 index 00000000..8edde4f7 --- /dev/null +++ b/journal/importers/base.py @@ -0,0 +1,197 @@ +import datetime +from typing import Dict, List, Literal, Optional + +from django.conf import settings +from django.utils.dateparse import parse_datetime +from loguru import logger + +from catalog.common.sites import SiteManager +from catalog.models import Edition, IdType, Item, SiteName +from journal.models import ShelfType +from users.models import Task + +_PREFERRED_SITES = [ + SiteName.Fediverse, + SiteName.RSS, + SiteName.TMDB, + SiteName.IMDB, + SiteName.GoogleBooks, + SiteName.Goodreads, + SiteName.IGDB, +] + + +class BaseImporter(Task): + class Meta: + app_label = "journal" # workaround bug in TypedModel + + ImportResult = Literal["imported", "skipped", "failed"] + TaskQueue = "import" + DefaultMetadata = { + "total": 0, + "processed": 0, + "skipped": 0, + "imported": 0, + "failed": 0, + "failed_items": [], + "file": None, + "visibility": 0, + } + + def progress(self, result: ImportResult) -> None: + """Update import progress. + + Args: + result: The import result ('imported', 'skipped', or 'failed') + """ + self.metadata["processed"] += 1 + self.metadata[result] = self.metadata.get(result, 0) + 1 + + if self.metadata["total"]: + progress_percentage = round( + self.metadata["processed"] / self.metadata["total"] * 100 + ) + self.message = f"Progress: {progress_percentage}% - " + else: + self.message = "" + self.message += ( + f"{self.metadata['imported']} imported, " + f"{self.metadata['skipped']} skipped, " + f"{self.metadata['failed']} failed" + ) + self.save(update_fields=["metadata", "message"]) + + def run(self) -> None: + raise NotImplementedError + + def get_item_by_info_and_links( + self, title: str, info_str: str, links: list[str] + ) -> Optional[Item]: + """Find an item based on information from CSV export. + + Args: + title: Item title + info_str: Item info string (space-separated key:value pairs) + links_str: Space-separated URLs + + Returns: + Item if found, None otherwise + """ + site_url = settings.SITE_INFO["site_url"] + "/" + # look for local items first + for link in links: + if link.startswith("/") or link.startswith(site_url): + item = Item.get_by_url(link, resolve_merge=True) + if item and not item.is_deleted: + return item + + sites = [ + SiteManager.get_site_by_url(link, detect_redirection=False) + for link in links + ] + sites = [site for site in sites if site] + sites.sort( + key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME) + if x.SITE_NAME in _PREFERRED_SITES + else 99 + ) + + # match items without extra requests + for site in sites: + item = site.get_item() + if item: + return item + + # match items after HEAD + sites = [ + SiteManager.get_site_by_url(site.url) if site.url else site + for site in sites + ] + sites = [site for site in sites if site] + for site in sites: + item = site.get_item() + if item: + return item + + # fetch from remote + for site in sites: + try: + logger.debug(f"fetching {site.url}") + site.get_resource_ready() + item = site.get_item() + if item: + return item + except Exception as e: + logger.error(f"Error fetching item: {e}") + + # Try using the info string + if info_str: + info_dict = {} + for pair in info_str.strip().split(): + if ":" in pair: + key, value = pair.split(":", 1) + info_dict[key] = value + + # Check for ISBN, IMDB, etc. + item = None + for key, value in info_dict.items(): + if key == "isbn" and value: + item = Edition.objects.filter( + primary_lookup_id_type=IdType.ISBN, + primary_lookup_id_value=value, + ).first() + elif key == "imdb" and value: + item = Item.objects.filter( + primary_lookup_id_type=IdType.IMDB, + primary_lookup_id_value=value, + ).first() + if item: + return item + return None + + def parse_tags(self, tags_str: str) -> List[str]: + """Parse space-separated tags string into a list of tags.""" + if not tags_str: + return [] + return [tag.strip() for tag in tags_str.split() if tag.strip()] + + def parse_info(self, info_str: str) -> Dict[str, str]: + """Parse info string into a dictionary.""" + info_dict = {} + if not info_str: + return info_dict + + for pair in info_str.split(): + if ":" in pair: + key, value = pair.split(":", 1) + info_dict[key] = value + + return info_dict + + def parse_datetime(self, timestamp_str: str | None) -> Optional[datetime.datetime]: + """Parse ISO format timestamp into datetime.""" + if not timestamp_str: + return None + + try: + dt = parse_datetime(timestamp_str) + if dt and dt.tzinfo is None: + dt = dt.replace(tzinfo=datetime.UTC) + return dt + except Exception as e: + logger.error(f"Error parsing datetime {timestamp_str}: {e}") + return None + + def parse_shelf_type(self, status_str: str) -> ShelfType: + """Parse shelf type string into ShelfType enum.""" + if not status_str: + return ShelfType.WISHLIST + + status_map = { + "wishlist": ShelfType.WISHLIST, + "progress": ShelfType.PROGRESS, + "complete": ShelfType.COMPLETE, + "dropped": ShelfType.DROPPED, + } + + return status_map.get(status_str.lower(), ShelfType.WISHLIST) diff --git a/journal/importers/csv.py b/journal/importers/csv.py index 93656bba..24401de3 100644 --- a/journal/importers/csv.py +++ b/journal/importers/csv.py @@ -1,181 +1,22 @@ import csv -import datetime import os import tempfile import zipfile -from typing import Dict, List, Optional +from typing import Dict -from django.conf import settings from django.utils import timezone -from django.utils.dateparse import parse_datetime -from django.utils.translation import gettext as _ from loguru import logger -from catalog.common.sites import SiteManager -from catalog.models import Edition, IdType, Item, ItemCategory, SiteName -from journal.models import Mark, Note, Review, ShelfType -from users.models import Task +from catalog.models import ItemCategory +from journal.models import Mark, Note, Review -_PREFERRED_SITES = [ - SiteName.Fediverse, - SiteName.RSS, - SiteName.TMDB, - SiteName.IMDB, - SiteName.GoogleBooks, - SiteName.Goodreads, - SiteName.IGDB, -] +from .base import BaseImporter -class CsvImporter(Task): +class CsvImporter(BaseImporter): class Meta: app_label = "journal" # workaround bug in TypedModel - TaskQueue = "import" - DefaultMetadata = { - "total": 0, - "processed": 0, - "skipped": 0, - "imported": 0, - "failed": 0, - "failed_items": [], - "file": None, - "visibility": 0, - } - - def get_item_by_info_and_links( - self, title: str, info_str: str, links_str: str - ) -> Optional[Item]: - """Find an item based on information from CSV export. - - Args: - title: Item title - info_str: Item info string (space-separated key:value pairs) - links_str: Space-separated URLs - - Returns: - Item if found, None otherwise - """ - site_url = settings.SITE_INFO["site_url"] + "/" - links = links_str.strip().split() - # look for local items first - for link in links: - if link.startswith("/") or link.startswith(site_url): - item = Item.get_by_url(link, resolve_merge=True) - if item and not item.is_deleted: - return item - - sites = [ - SiteManager.get_site_by_url(link, detect_redirection=False) - for link in links - ] - sites = [site for site in sites if site] - sites.sort( - key=lambda x: _PREFERRED_SITES.index(x.SITE_NAME) - if x.SITE_NAME in _PREFERRED_SITES - else 99 - ) - - # match items without extra requests - for site in sites: - item = site.get_item() - if item: - return item - - # match items after HEAD - sites = [ - SiteManager.get_site_by_url(site.url) if site.url else site - for site in sites - ] - sites = [site for site in sites if site] - for site in sites: - item = site.get_item() - if item: - return item - - # fetch from remote - for site in sites: - try: - logger.debug(f"fetching {site.url}") - site.get_resource_ready() - item = site.get_item() - if item: - return item - except Exception as e: - logger.error(f"Error fetching item: {e}") - - # Try using the info string - if info_str: - info_dict = {} - for pair in info_str.strip().split(): - if ":" in pair: - key, value = pair.split(":", 1) - info_dict[key] = value - - # Check for ISBN, IMDB, etc. - item = None - for key, value in info_dict.items(): - if key == "isbn" and value: - item = Edition.objects.filter( - primary_lookup_id_type=IdType.ISBN, - primary_lookup_id_value=value, - ).first() - elif key == "imdb" and value: - item = Item.objects.filter( - primary_lookup_id_type=IdType.IMDB, - primary_lookup_id_value=value, - ).first() - if item: - return item - return None - - def parse_tags(self, tags_str: str) -> List[str]: - """Parse space-separated tags string into a list of tags.""" - if not tags_str: - return [] - return [tag.strip() for tag in tags_str.split() if tag.strip()] - - def parse_info(self, info_str: str) -> Dict[str, str]: - """Parse info string into a dictionary.""" - info_dict = {} - if not info_str: - return info_dict - - for pair in info_str.split(): - if ":" in pair: - key, value = pair.split(":", 1) - info_dict[key] = value - - return info_dict - - def parse_datetime(self, timestamp_str: str) -> Optional[datetime.datetime]: - """Parse ISO format timestamp into datetime.""" - if not timestamp_str: - return None - - try: - dt = parse_datetime(timestamp_str) - if dt and dt.tzinfo is None: - dt = dt.replace(tzinfo=datetime.UTC) - return dt - except Exception as e: - logger.error(f"Error parsing datetime {timestamp_str}: {e}") - return None - - def parse_shelf_type(self, status_str: str) -> ShelfType: - """Parse shelf type string into ShelfType enum.""" - if not status_str: - return ShelfType.WISHLIST - - status_map = { - "wishlist": ShelfType.WISHLIST, - "progress": ShelfType.PROGRESS, - "complete": ShelfType.COMPLETE, - "dropped": ShelfType.DROPPED, - } - - return status_map.get(status_str.lower(), ShelfType.WISHLIST) - def import_mark(self, row: Dict[str, str]) -> str: """Import a mark from a CSV row. @@ -184,7 +25,9 @@ class CsvImporter(Task): """ try: item = self.get_item_by_info_and_links( - row.get("title", ""), row.get("info", ""), row.get("links", "") + row.get("title", ""), + row.get("info", ""), + row.get("links", "").strip().split(), ) if not item: @@ -246,7 +89,9 @@ class CsvImporter(Task): """ try: item = self.get_item_by_info_and_links( - row.get("title", ""), row.get("info", ""), row.get("links", "") + row.get("title", ""), + row.get("info", ""), + row.get("links", "").strip().split(), ) if not item: @@ -304,7 +149,9 @@ class CsvImporter(Task): """ try: item = self.get_item_by_info_and_links( - row.get("title", ""), row.get("info", ""), row.get("links", "") + row.get("title", ""), + row.get("info", ""), + row.get("links", "").strip().split(), ) if not item: @@ -361,26 +208,6 @@ class CsvImporter(Task): ) return "failed" - def progress(self, result: str) -> None: - """Update import progress. - - Args: - result: The import result ('imported', 'skipped', or 'failed') - """ - self.metadata["processed"] += 1 - self.metadata[result] = self.metadata.get(result, 0) + 1 - - progress_percentage = round( - self.metadata["processed"] / self.metadata["total"] * 100 - ) - self.message = ( - f"Progress: {progress_percentage}% - " - f"{self.metadata['imported']} imported, " - f"{self.metadata['skipped']} skipped, " - f"{self.metadata['failed']} failed" - ) - self.save(update_fields=["metadata", "message"]) - def process_csv_file(self, file_path: str, import_function) -> None: """Process a CSV file using the specified import function.""" logger.debug(f"Processing {file_path}") @@ -424,7 +251,7 @@ class CsvImporter(Task): # Set the total count in metadata self.metadata["total"] = total_rows - self.message = f"Found {total_rows} items to import" + self.message = f"found {total_rows} records to import" self.save(update_fields=["metadata", "message"]) # Now process all files @@ -432,7 +259,5 @@ class CsvImporter(Task): import_function = getattr(self, f"import_{file_type}") self.process_csv_file(file_path, import_function) - self.message = _("Import complete") - if self.metadata.get("failed_items", []): - self.message += f": {self.metadata['failed']} items failed ({len(self.metadata['failed_items'])} unique items)" + self.message = f"{self.metadata['imported']} items imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed." self.save() diff --git a/journal/importers/douban.py b/journal/importers/douban.py index 1157671c..627fd999 100644 --- a/journal/importers/douban.py +++ b/journal/importers/douban.py @@ -154,6 +154,8 @@ class DoubanImporter(Task): def run(self): logger.info(f"{self.user} import start") self.load_sheets() + self.message = f"豆瓣标记和评论导入开始,共{self.metadata['total']}篇。" + self.save(update_fields=["message"]) logger.info(f"{self.user} sheet loaded, {self.metadata['total']} lines total") for name, param in self.mark_sheet_config.items(): self.import_mark_sheet(self.mark_data[name], param[0], name) diff --git a/journal/importers/letterboxd.py b/journal/importers/letterboxd.py index f37241f8..8fb42ea8 100644 --- a/journal/importers/letterboxd.py +++ b/journal/importers/letterboxd.py @@ -1,4 +1,5 @@ import csv +import os import tempfile import zipfile from datetime import timedelta @@ -35,16 +36,24 @@ class LetterboxdImporter(Task): "file": None, } - def get_item_by_url(self, url): + @classmethod + def validate_file(cls, uploaded_file): + try: + return zipfile.is_zipfile(uploaded_file) + except Exception: + return False + + @classmethod + def get_item_by_url(cls, url): try: h = BasicDownloader(url).download().html() - tu = h.xpath("//a[@data-track-action='TMDb']/@href") + tu = h.xpath("//a[@data-track-action='TMDB']/@href") iu = h.xpath("//a[@data-track-action='IMDb']/@href") if not tu: i = h.xpath('//span[@class="film-title-wrapper"]/a/@href') u2 = "https://letterboxd.com" + i[0] # type:ignore h = BasicDownloader(u2).download().html() - tu = h.xpath("//a[@data-track-action='TMDb']/@href") + tu = h.xpath("//a[@data-track-action='TMDB']/@href") iu = h.xpath("//a[@data-track-action='IMDb']/@href") if not tu: logger.error(f"Unknown TMDB for {url}") @@ -121,7 +130,6 @@ class LetterboxdImporter(Task): self.progress(1) def progress(self, mark_state: int, url=None): - self.metadata["total"] += 1 self.metadata["processed"] += 1 match mark_state: case 1: @@ -142,49 +150,56 @@ class LetterboxdImporter(Task): with tempfile.TemporaryDirectory() as tmpdirname: logger.debug(f"Extracting {filename} to {tmpdirname}") zipref.extractall(tmpdirname) - with open(tmpdirname + "/reviews.csv") as f: - reader = csv.DictReader(f, delimiter=",") - for row in reader: - uris.add(row["Letterboxd URI"]) - self.mark( - row["Letterboxd URI"], - ShelfType.COMPLETE, - row["Watched Date"], - row["Rating"], - row["Review"], - row["Tags"], - ) - with open(tmpdirname + "/ratings.csv") as f: - reader = csv.DictReader(f, delimiter=",") - for row in reader: - if row["Letterboxd URI"] in uris: - continue - uris.add(row["Letterboxd URI"]) - self.mark( - row["Letterboxd URI"], - ShelfType.COMPLETE, - row["Date"], - row["Rating"], - ) - with open(tmpdirname + "/watched.csv") as f: - reader = csv.DictReader(f, delimiter=",") - for row in reader: - if row["Letterboxd URI"] in uris: - continue - uris.add(row["Letterboxd URI"]) - self.mark( - row["Letterboxd URI"], - ShelfType.COMPLETE, - row["Date"], - ) - with open(tmpdirname + "/watchlist.csv") as f: - reader = csv.DictReader(f, delimiter=",") - for row in reader: - if row["Letterboxd URI"] in uris: - continue - uris.add(row["Letterboxd URI"]) - self.mark( - row["Letterboxd URI"], - ShelfType.WISHLIST, - row["Date"], - ) + if os.path.exists(tmpdirname + "/reviews.csv"): + with open(tmpdirname + "/reviews.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.COMPLETE, + row["Watched Date"], + row["Rating"], + row["Review"], + row["Tags"], + ) + if os.path.exists(tmpdirname + "/ratings.csv"): + with open(tmpdirname + "/ratings.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + if row["Letterboxd URI"] in uris: + continue + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.COMPLETE, + row["Date"], + row["Rating"], + ) + if os.path.exists(tmpdirname + "/watched.csv"): + with open(tmpdirname + "/watched.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + if row["Letterboxd URI"] in uris: + continue + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.COMPLETE, + row["Date"], + ) + if os.path.exists(tmpdirname + "/watchlist.csv"): + with open(tmpdirname + "/watchlist.csv") as f: + reader = csv.DictReader(f, delimiter=",") + for row in reader: + if row["Letterboxd URI"] in uris: + continue + uris.add(row["Letterboxd URI"]) + self.mark( + row["Letterboxd URI"], + ShelfType.WISHLIST, + row["Date"], + ) + self.metadata["total"] = self.metadata["processed"] + self.message = f"{self.metadata['imported']} imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed" + self.save(update_fields=["metadata", "message"]) diff --git a/journal/importers/ndjson.py b/journal/importers/ndjson.py new file mode 100644 index 00000000..0f0aa3dd --- /dev/null +++ b/journal/importers/ndjson.py @@ -0,0 +1,484 @@ +import json +import os +import tempfile +import zipfile +from typing import Any, Dict + +from loguru import logger + +from journal.models import ( + Collection, + Comment, + Mark, + Note, + Rating, + Review, + ShelfLogEntry, + ShelfType, + Tag, + TagMember, +) +from takahe.utils import Takahe + +from .base import BaseImporter + + +class NdjsonImporter(BaseImporter): + """Importer for NDJSON files exported from NeoDB.""" + + class Meta: + app_label = "journal" # workaround bug in TypedModel + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.items = {} + + def import_collection(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a collection from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + metadata = data.get("metadata", {}) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + name = content_data.get("name", "") + content = content_data.get("content", "") + collection = Collection.objects.create( + owner=owner, + title=name, + brief=content, + visibility=visibility, + metadata=data.get("metadata", {}), + created_time=published_dt, + ) + item_data = data.get("items", []) + for item_entry in item_data: + item_url = item_entry.get("item") + if not item_url: + continue + item = self.items.get(item_url) + if not item: + logger.warning(f"Could not find item for collection: {item_url}") + continue + metadata = item_entry.get("metadata", {}) + collection.append_item(item, metadata=metadata) + return "imported" + except Exception: + logger.exception("Error importing collection") + return "failed" + + def import_shelf_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a shelf member (mark) from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + metadata = data.get("metadata", {}) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + item = self.items.get(content_data.get("withRegardTo", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + shelf_type = content_data.get("status", ShelfType.WISHLIST) + mark = Mark(owner, item) + if mark.created_time and published_dt and mark.created_time >= published_dt: + return "skipped" + mark.update( + shelf_type=shelf_type, + visibility=visibility, + metadata=metadata, + created_time=published_dt, + ) + return "imported" + except Exception: + logger.exception("Error importing shelf member") + return "failed" + + def import_shelf_log(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a shelf log entry from NDJSON data.""" + try: + item = self.items.get(data.get("item", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + owner = self.user.identity + shelf_type = data.get("status", ShelfType.WISHLIST) + # posts = data.get("posts", []) # TODO but will be tricky + timestamp = data.get("timestamp") + timestamp_dt = self.parse_datetime(timestamp) if timestamp else None + _, created = ShelfLogEntry.objects.update_or_create( + owner=owner, + item=item, + shelf_type=shelf_type, + timestamp=timestamp_dt, + ) + # return "imported" if created else "skipped" + # count skip as success otherwise it may confuse user + return "imported" + except Exception: + logger.exception("Error importing shelf log") + return "failed" + + def import_post(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a post from NDJSON data.""" + # TODO + return "skipped" + + def import_review(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a review from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + metadata = data.get("metadata", {}) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + item = self.items.get(content_data.get("withRegardTo", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + name = content_data.get("name", "") + content = content_data.get("content", "") + existing_review = Review.objects.filter( + owner=owner, item=item, title=name + ).first() + if ( + existing_review + and existing_review.created_time + and published_dt + and existing_review.created_time >= published_dt + ): + return "skipped" + Review.objects.create( + owner=owner, + item=item, + title=name, + body=content, + created_time=published_dt, + visibility=visibility, + metadata=metadata, + ) + return "imported" + except Exception: + logger.exception("Error importing review") + return "failed" + + def import_note(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a note from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + item = self.items.get(content_data.get("withRegardTo", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + title = content_data.get("title", "") + content = content_data.get("content", "") + sensitive = content_data.get("sensitive", False) + progress = content_data.get("progress", {}) + progress_type = progress.get("type", "") + progress_value = progress.get("value", "") + Note.objects.create( + item=item, + owner=owner, + title=title, + content=content, + sensitive=sensitive, + progress_type=progress_type, + progress_value=progress_value, + created_time=published_dt, + visibility=visibility, + metadata=data.get("metadata", {}), + ) + return "imported" + except Exception: + logger.exception("Error importing note") + return "failed" + + def import_comment(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a comment from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + metadata = data.get("metadata", {}) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + item = self.items.get(content_data.get("withRegardTo", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + content = content_data.get("content", "") + existing_comment = Comment.objects.filter(owner=owner, item=item).first() + if ( + existing_comment + and existing_comment.created_time + and published_dt + and existing_comment.created_time >= published_dt + ): + return "skipped" + Comment.objects.create( + owner=owner, + item=item, + text=content, + created_time=published_dt, + visibility=visibility, + metadata=metadata, + ) + return "imported" + except Exception: + logger.exception("Error importing comment") + return "failed" + + def import_rating(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import a rating from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + metadata = data.get("metadata", {}) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + item = self.items.get(content_data.get("withRegardTo", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + rating_grade = int(float(content_data.get("value", 0))) + existing_rating = Comment.objects.filter(owner=owner, item=item).first() + if ( + existing_rating + and existing_rating.created_time + and published_dt + and existing_rating.created_time >= published_dt + ): + return "skipped" + Rating.objects.create( + owner=owner, + item=item, + grade=rating_grade, + created_time=published_dt, + visibility=visibility, + metadata=metadata, + ) + return "imported" + except Exception: + logger.exception("Error importing rating") + return "failed" + + def import_tag(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import tags from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + pinned = data.get("pinned", self.metadata.get("pinned", False)) + tag_title = Tag.cleanup_title(data.get("name", "")) + _, created = Tag.objects.update_or_create( + owner=owner, + title=tag_title, + defaults={ + "visibility": visibility, + "pinned": pinned, + }, + ) + return "imported" if created else "skipped" + except Exception: + logger.exception("Error importing tag member") + return "failed" + + def import_tag_member(self, data: Dict[str, Any]) -> BaseImporter.ImportResult: + """Import tags from NDJSON data.""" + try: + owner = self.user.identity + visibility = data.get("visibility", self.metadata.get("visibility", 0)) + metadata = data.get("metadata", {}) + content_data = data.get("content", {}) + published_dt = self.parse_datetime(content_data.get("published")) + item = self.items.get(content_data.get("withRegardTo", "")) + if not item: + raise KeyError(f"Could not find item: {data.get('item', '')}") + tag_title = Tag.cleanup_title(content_data.get("tag", "")) + tag, _ = Tag.objects.get_or_create( + owner=owner, + title=tag_title, + defaults={ + "created_time": published_dt, + "visibility": visibility, + "pinned": False, + "metadata": metadata, + }, + ) + _, created = TagMember.objects.update_or_create( + owner=owner, + item=item, + parent=tag, + defaults={ + "created_time": published_dt, + "visibility": visibility, + "metadata": metadata, + "position": 0, + }, + ) + return "imported" if created else "skipped" + except Exception: + logger.exception("Error importing tag member") + return "failed" + + def process_journal(self, file_path: str) -> None: + """Process a NDJSON file and import all items.""" + logger.debug(f"Processing {file_path}") + lines_error = 0 + import_funcs = { + "Tag": self.import_tag, + "TagMember": self.import_tag_member, + "Rating": self.import_rating, + "Comment": self.import_comment, + "ShelfMember": self.import_shelf_member, + "Review": self.import_review, + "Note": self.import_note, + "Collection": self.import_collection, + "ShelfLog": self.import_shelf_log, + "Post": self.import_post, + } + journal = {k: [] for k in import_funcs.keys()} + with open(file_path, "r") as jsonfile: + # Skip header line + next(jsonfile, None) + + for line in jsonfile: + try: + data = json.loads(line) + except json.JSONDecodeError: + lines_error += 1 + continue + data_type = data.get("type") + if not data_type: + continue + if data_type not in journal: + journal[data_type] = [] + journal[data_type].append(data) + + self.metadata["total"] = sum(len(items) for items in journal.values()) + self.message = f"found {self.metadata['total']} records to import" + self.save(update_fields=["metadata", "message"]) + + logger.debug(f"Processing {self.metadata['total']} entries") + if lines_error: + logger.error(f"Error processing journal.ndjson: {lines_error} lines") + + for typ, func in import_funcs.items(): + for data in journal.get(typ, []): + result = func(data) + self.progress(result) + logger.info( + f"Imported {self.metadata['imported']}, skipped {self.metadata['skipped']}, failed {self.metadata['failed']}" + ) + + def parse_catalog(self, file_path: str) -> None: + """Parse the catalog.ndjson file and build item lookup tables.""" + logger.debug(f"Parsing catalog file: {file_path}") + item_count = 0 + try: + with open(file_path, "r") as jsonfile: + for line in jsonfile: + try: + i = json.loads(line) + except (json.JSONDecodeError, Exception): + logger.exception("Error processing catalog item") + continue + u = i.get("id") + if not u: + continue + # self.catalog_items[u] = i + item_count += 1 + links = [u] + [r["url"] for r in i.get("external_resources", [])] + self.items[u] = self.get_item_by_info_and_links("", "", links) + logger.info(f"Loaded {item_count} items from catalog") + self.metadata["catalog_processed"] = item_count + except Exception: + logger.exception("Error parsing catalog file") + + def parse_header(self, file_path: str) -> Dict[str, Any]: + try: + with open(file_path, "r") as jsonfile: + first_line = jsonfile.readline().strip() + if first_line: + header = json.loads(first_line) + if header.get("server"): + return header + except (json.JSONDecodeError, IOError): + logger.exception("Error parsing header") + return {} + + def process_actor(self, file_path: str) -> None: + """Process the actor.ndjson file to update user identity information.""" + logger.debug(f"Processing actor data from {file_path}") + try: + with open(file_path, "r") as jsonfile: + next(jsonfile, None) + for line in jsonfile: + try: + data = json.loads(line) + except json.JSONDecodeError: + logger.error("Error parsing actor data line") + continue + + if data.get("type") == "Identity": + logger.debug("Found identity data in actor.ndjson") + takahe_identity = self.user.identity.takahe_identity + updated = False + if ( + data.get("name") + and data.get("name") != takahe_identity.name + ): + logger.debug( + f"Updating identity name from {takahe_identity.name} to {data.get('name')}" + ) + takahe_identity.name = data.get("name") + updated = True + if ( + data.get("summary") + and data.get("summary") != takahe_identity.summary + ): + logger.debug("Updating identity summary") + takahe_identity.summary = data.get("summary") + updated = True + if updated: + takahe_identity.save() + Takahe.update_state(takahe_identity, "edited") + logger.info("Updated identity") + return + except Exception as e: + logger.exception(f"Error processing actor file: {e}") + + def run(self) -> None: + """Run the NDJSON import.""" + filename = self.metadata["file"] + logger.debug(f"Importing {filename}") + + with zipfile.ZipFile(filename, "r") as zipref: + with tempfile.TemporaryDirectory() as tmpdirname: + zipref.extractall(tmpdirname) + + # Process actor data first if available + actor_path = os.path.join(tmpdirname, "actor.ndjson") + if os.path.exists(actor_path): + actor_header = self.parse_header(actor_path) + logger.debug(f"Found actor.ndjson with {actor_header}") + self.process_actor(actor_path) + else: + logger.debug("No actor.ndjson file found in the archive") + + catalog_path = os.path.join(tmpdirname, "catalog.ndjson") + if os.path.exists(catalog_path): + catalog_header = self.parse_header(catalog_path) + logger.debug(f"Loading catalog.ndjson with {catalog_header}") + self.parse_catalog(catalog_path) + else: + logger.warning("catalog.ndjson file not found in the archive") + + journal_path = os.path.join(tmpdirname, "journal.ndjson") + if not os.path.exists(journal_path): + logger.error("journal.ndjson file not found in the archive") + self.message = "Import failed: journal.ndjson file not found" + self.save() + return + header = self.parse_header(journal_path) + self.metadata["journal_header"] = header + logger.debug(f"Importing journal.ndjson with {header}") + self.process_journal(journal_path) + + self.message = f"{self.metadata['imported']} items imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed." + self.save() diff --git a/journal/importers/opml.py b/journal/importers/opml.py index 184b8151..d1685d61 100644 --- a/journal/importers/opml.py +++ b/journal/importers/opml.py @@ -1,43 +1,54 @@ -import django_rq import listparser -from auditlog.context import set_actor from django.utils.translation import gettext as _ from loguru import logger -from user_messages import api as msg from catalog.common import * from catalog.common.downloaders import * from catalog.sites.rss import RSS from journal.models import * +from users.models.task import Task -class OPMLImporter: - def __init__(self, user, visibility, mode): - self.user = user - self.visibility = visibility - self.mode = mode +class OPMLImporter(Task): + class Meta: + app_label = "journal" # workaround bug in TypedModel - def parse_file(self, uploaded_file): - return listparser.parse(uploaded_file.read()).feeds + TaskQueue = "import" + DefaultMetadata = { + "total": 0, + "mode": 0, + "processed": 0, + "skipped": 0, + "imported": 0, + "failed": 0, + "visibility": 0, + "failed_urls": [], + "file": None, + } - def import_from_file(self, uploaded_file): - feeds = self.parse_file(uploaded_file) - if not feeds: + @classmethod + def validate_file(cls, f): + try: + return bool(listparser.parse(f.read()).feeds) + except Exception: return False - django_rq.get_queue("import").enqueue(self.import_from_file_task, feeds) - return True - def import_from_file_task(self, feeds): - logger.info(f"{self.user} import opml start") - skip = 0 - collection = None - with set_actor(self.user): - if self.mode == 1: + def run(self): + with open(self.metadata["file"], "r") as f: + feeds = listparser.parse(f.read()).feeds + self.metadata["total"] = len(feeds) + self.message = f"Processing {self.metadata['total']} feeds." + self.save(update_fields=["metadata", "message"]) + + collection = None + if self.metadata["mode"] == 1: title = _("{username}'s podcast subscriptions").format( username=self.user.display_name ) collection = Collection.objects.create( - owner=self.user.identity, title=title + owner=self.user.identity, + title=title, + visibility=self.metadata["visibility"], ) for feed in feeds: logger.info(f"{self.user} import {feed.url}") @@ -47,21 +58,26 @@ class OPMLImporter: res = None if not res or not res.item: logger.warning(f"{self.user} feed error {feed.url}") + self.metadata["failed"] += 1 continue item = res.item - if self.mode == 0: + if self.metadata["mode"] == 0: mark = Mark(self.user.identity, item) if mark.shelfmember: logger.info(f"{self.user} marked, skip {feed.url}") - skip += 1 + self.metadata["skipped"] += 1 else: + self.metadata["imported"] += 1 mark.update( - ShelfType.PROGRESS, None, None, visibility=self.visibility + ShelfType.PROGRESS, + None, + None, + visibility=self.metadata["visibility"], ) - elif self.mode == 1 and collection: + elif self.metadata["mode"] == 1 and collection: + self.metadata["imported"] += 1 collection.append_item(item) - logger.info(f"{self.user} import opml end") - msg.success( - self.user, - f"OPML import complete, {len(feeds)} feeds processed, {skip} exisiting feeds skipped.", - ) + self.metadata["processed"] += 1 + self.save(update_fields=["metadata"]) + self.message = f"{self.metadata['imported']} feeds imported, {self.metadata['skipped']} skipped, {self.metadata['failed']} failed." + self.save(update_fields=["message"]) diff --git a/journal/migrations/0006_csvimporter.py b/journal/migrations/0006_csvimporter.py index 7b6f45c6..ceaa90b9 100644 --- a/journal/migrations/0006_csvimporter.py +++ b/journal/migrations/0006_csvimporter.py @@ -10,6 +10,16 @@ class Migration(migrations.Migration): ] operations = [ + migrations.CreateModel( + name="BaseImporter", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("users.task",), + ), migrations.CreateModel( name="CsvImporter", fields=[], @@ -20,4 +30,24 @@ class Migration(migrations.Migration): }, bases=("users.task",), ), + migrations.CreateModel( + name="OPMLImporter", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("users.task",), + ), + migrations.CreateModel( + name="NdjsonImporter", + fields=[], + options={ + "proxy": True, + "indexes": [], + "constraints": [], + }, + bases=("journal.baseimporter",), + ), ] diff --git a/journal/models/mark.py b/journal/models/mark.py index d91a1c67..4cce6614 100644 --- a/journal/models/mark.py +++ b/journal/models/mark.py @@ -163,7 +163,7 @@ class Mark: log entries log entry will be created when item is added to shelf log entry will be created when item is moved to another shelf - log entry will be created when item is removed from shelf (TODO change this to DEFERRED shelf) + log entry will be created when item is removed from shelf timestamp of log entry will be updated whenever created_time of shelfmember is updated any log entry can be deleted by user arbitrarily diff --git a/journal/models/rating.py b/journal/models/rating.py index 4ccfbe98..d0319de9 100644 --- a/journal/models/rating.py +++ b/journal/models/rating.py @@ -5,14 +5,14 @@ from django.core.validators import MaxValueValidator, MinValueValidator from django.db import models from django.db.models import Avg, Count -from catalog.models import Item +from catalog.models import Item, Performance, TVShow from takahe.utils import Takahe from users.models import APIdentity from .common import Content MIN_RATING_COUNT = 5 -RATING_INCLUDES_CHILD_ITEMS = ["tvshow", "performance"] +RATING_INCLUDES_CHILD_ITEMS = [TVShow, Performance] class Rating(Content): @@ -73,10 +73,41 @@ class Rating(Content): p.link_post_id(post.id) return p + @classmethod + def get_info_for_item(cls, item: Item) -> dict: + stat = Rating.objects.filter(grade__isnull=False) + if item.__class__ in RATING_INCLUDES_CHILD_ITEMS: + stat = stat.filter(item_id__in=item.child_item_ids + [item.pk]) + else: + stat = stat.filter(item=item) + stat = stat.values("grade").annotate(count=Count("grade")) + grades = [0] * 11 + votes = 0 + total = 0 + for s in stat: + if s["grade"] and s["grade"] > 0 and s["grade"] < 11: + grades[s["grade"]] = s["count"] + total += s["count"] * s["grade"] + votes += s["count"] + if votes < MIN_RATING_COUNT: + return {"average": None, "count": votes, "distribution": [0] * 5} + else: + return { + "average": round(total / votes, 1), + "count": votes, + "distribution": [ + 100 * (grades[1] + grades[2]) // votes, + 100 * (grades[3] + grades[4]) // votes, + 100 * (grades[5] + grades[6]) // votes, + 100 * (grades[7] + grades[8]) // votes, + 100 * (grades[9] + grades[10]) // votes, + ], + } + @staticmethod def get_rating_for_item(item: Item) -> float | None: stat = Rating.objects.filter(grade__isnull=False) - if item.class_name in RATING_INCLUDES_CHILD_ITEMS: + if item.__class__ in RATING_INCLUDES_CHILD_ITEMS: stat = stat.filter(item_id__in=item.child_item_ids + [item.pk]) else: stat = stat.filter(item=item) @@ -86,7 +117,7 @@ class Rating(Content): @staticmethod def get_rating_count_for_item(item: Item) -> int: stat = Rating.objects.filter(grade__isnull=False) - if item.class_name in RATING_INCLUDES_CHILD_ITEMS: + if item.__class__ in RATING_INCLUDES_CHILD_ITEMS: stat = stat.filter(item_id__in=item.child_item_ids + [item.pk]) else: stat = stat.filter(item=item) @@ -96,7 +127,7 @@ class Rating(Content): @staticmethod def get_rating_distribution_for_item(item: Item): stat = Rating.objects.filter(grade__isnull=False) - if item.class_name in RATING_INCLUDES_CHILD_ITEMS: + if item.__class__ in RATING_INCLUDES_CHILD_ITEMS: stat = stat.filter(item_id__in=item.child_item_ids + [item.pk]) else: stat = stat.filter(item=item) diff --git a/journal/models/shelf.py b/journal/models/shelf.py index 68eb4074..71535e92 100644 --- a/journal/models/shelf.py +++ b/journal/models/shelf.py @@ -7,6 +7,7 @@ from django.db import connection, models from django.utils import timezone from django.utils.translation import gettext_lazy as _ from loguru import logger +from polymorphic.models import PolymorphicManager from catalog.models import Item, ItemCategory from takahe.utils import Takahe @@ -310,6 +311,28 @@ _SHELF_LABELS = [ # grammatically problematic, for translation only +class ShelfMemberManager(PolymorphicManager): + def get_queryset(self): + from .comment import Comment + from .rating import Rating + + rating_subquery = Rating.objects.filter( + owner_id=models.OuterRef("owner_id"), item_id=models.OuterRef("item_id") + ).values("grade")[:1] + comment_subquery = Comment.objects.filter( + owner_id=models.OuterRef("owner_id"), item_id=models.OuterRef("item_id") + ).values("text")[:1] + return ( + super() + .get_queryset() + .annotate( + _rating_grade=models.Subquery(rating_subquery), + _comment_text=models.Subquery(comment_subquery), + _shelf_type=models.F("parent__shelf_type"), + ) + ) + + class ShelfMember(ListMember): if TYPE_CHECKING: parent: models.ForeignKey["ShelfMember", "Shelf"] @@ -318,6 +341,8 @@ class ShelfMember(ListMember): "Shelf", related_name="members", on_delete=models.CASCADE ) + objects = ShelfMemberManager() + class Meta: unique_together = [["owner", "item"]] indexes = [ @@ -448,6 +473,15 @@ class ShelfMember(ListMember): "content": content, } + def save(self, *args, **kwargs): + try: + del self._shelf_type # type:ignore + del self._rating_grade # type:ignore + del self._comment_text # type:ignore + except AttributeError: + pass + return super().save(*args, **kwargs) + @cached_property def sibling_comment(self) -> "Comment | None": from .comment import Comment @@ -470,19 +504,28 @@ class ShelfMember(ListMember): @property def shelf_label(self) -> str | None: - return ShelfManager.get_label(self.parent.shelf_type, self.item.category) + return ShelfManager.get_label(self.shelf_type, self.item.category) @property def shelf_type(self): - return self.parent.shelf_type + try: + return getattr(self, "_shelf_type") + except AttributeError: + return self.parent.shelf_type @property def rating_grade(self): - return self.mark.rating_grade + try: + return getattr(self, "_rating_grade") + except AttributeError: + return self.mark.rating_grade @property def comment_text(self): - return self.mark.comment_text + try: + return getattr(self, "_comment_text") + except AttributeError: + return self.mark.comment_text @property def tags(self): diff --git a/journal/tests/__init__.py b/journal/tests/__init__.py index e94df8d2..a9a9f732 100644 --- a/journal/tests/__init__.py +++ b/journal/tests/__init__.py @@ -1,3 +1,5 @@ from .csv import * +from .ndjson import * from .piece import * +from .rating import * from .search import * diff --git a/journal/tests/csv.py b/journal/tests/csv.py index 22f3ace7..d8fb45bd 100644 --- a/journal/tests/csv.py +++ b/journal/tests/csv.py @@ -9,7 +9,7 @@ from loguru import logger from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow from journal.exporters import CsvExporter -from journal.importers import CsvImporter, get_neodb_importer +from journal.importers import CsvImporter from users.models import User from ..models import * @@ -219,10 +219,9 @@ class CsvExportImportTest(TestCase): f"Expected file {filename} with {expected_data_count} data rows, but file not found" ) - self.assertEqual(get_neodb_importer(export_path), CsvImporter) importer = CsvImporter.create(user=self.user2, file=export_path, visibility=2) importer.run() - self.assertEqual(importer.message, "Import complete") + self.assertEqual(importer.message, "11 items imported, 0 skipped, 0 failed.") # Verify imported data diff --git a/journal/tests/ndjson.py b/journal/tests/ndjson.py new file mode 100644 index 00000000..431faf5e --- /dev/null +++ b/journal/tests/ndjson.py @@ -0,0 +1,506 @@ +import json +import os +import zipfile +from tempfile import TemporaryDirectory + +from django.test import TestCase +from django.utils.dateparse import parse_datetime +from loguru import logger + +from catalog.models import ( + Edition, + IdType, + Movie, + Podcast, + PodcastEpisode, + TVEpisode, + TVSeason, + TVShow, +) +from journal.exporters import NdjsonExporter +from journal.importers import NdjsonImporter +from users.models import User + +from ..models import * + + +class NdjsonExportImportTest(TestCase): + databases = "__all__" + maxDiff = None + + def setUp(self): + self.user1 = User.register( + email="ndjson_export@test.com", username="ndjson_exporter" + ) + self.user2 = User.register( + email="ndjson_import@test.com", username="ndjson_importer" + ) + self.tag1 = Tag.objects.create( + owner=self.user1.identity, title="favorite", pinned=True, visibility=2 + ) + self.dt = parse_datetime("2021-01-01T00:00:00Z") + self.dt2 = parse_datetime("2021-02-01T00:00:00Z") + self.dt3 = parse_datetime("2021-03-01T00:00:00Z") + self.book1 = Edition.objects.create( + localized_title=[{"lang": "en", "text": "Hyperion"}], + primary_lookup_id_type=IdType.ISBN, + primary_lookup_id_value="9780553283686", + author=["Dan Simmons"], + pub_year=1989, + ) + self.book2 = Edition.objects.create( + localized_title=[{"lang": "en", "text": "Dune"}], + primary_lookup_id_type=IdType.ISBN, + primary_lookup_id_value="9780441172719", + author=["Frank Herbert"], + pub_year=1965, + ) + self.movie1 = Movie.objects.create( + localized_title=[{"lang": "en", "text": "Inception"}], + primary_lookup_id_type=IdType.IMDB, + primary_lookup_id_value="tt1375666", + director=["Christopher Nolan"], + year=2010, + ) + self.movie2 = Movie.objects.create( + localized_title=[{"lang": "en", "text": "The Matrix"}], + primary_lookup_id_type=IdType.IMDB, + primary_lookup_id_value="tt0133093", + director=["Lana Wachowski", "Lilly Wachowski"], + year=1999, + ) + self.tvshow = TVShow.objects.create( + localized_title=[{"lang": "en", "text": "Breaking Bad"}], + primary_lookup_id_type=IdType.IMDB, + primary_lookup_id_value="tt0903747", + year=2008, + ) + self.tvseason = TVSeason.objects.create( + localized_title=[{"lang": "en", "text": "Breaking Bad Season 1"}], + show=self.tvshow, + season_number=1, + ) + self.tvepisode1 = TVEpisode.objects.create( + localized_title=[{"lang": "en", "text": "Pilot"}], + season=self.tvseason, + episode_number=1, + ) + self.tvepisode2 = TVEpisode.objects.create( + localized_title=[{"lang": "en", "text": "Cat's in the Bag..."}], + season=self.tvseason, + episode_number=2, + ) + # Create podcast test items + self.podcast = Podcast.objects.create( + localized_title=[{"lang": "en", "text": "Test Podcast"}], + primary_lookup_id_type=IdType.RSS, + primary_lookup_id_value="https://example.com/feed.xml", + host=["Test Host"], + ) + self.podcastepisode = PodcastEpisode.objects.create( + localized_title=[{"lang": "en", "text": "Test Episode 1"}], + program=self.podcast, + guid="111", + pub_date=self.dt, + ) + + def test_ndjson_export_import(self): + # set name and summary for user1 + identity1 = self.user1.identity + takahe_identity1 = identity1.takahe_identity + takahe_identity1.name = "Test User" + takahe_identity1.summary = "Test summary" + takahe_identity1.save() + + # Book marks with ratings and tags + mark_book1 = Mark(self.user1.identity, self.book1) + mark_book1.update( + ShelfType.COMPLETE, + "Great sci-fi classic", + 10, + ["sci-fi", "favorite", "space"], + 1, + created_time=self.dt, + ) + mark_book2 = Mark(self.user1.identity, self.book2) + mark_book2.update( + ShelfType.WISHLIST, + "Read it?", + None, + ["sci-fi", "desert"], + 1, + created_time=self.dt, + ) + mark_book2.update( + ShelfType.PROGRESS, + "Reading!", + None, + ["sci-fi", "desert"], + 0, + created_time=self.dt2, + ) + mark_book2.update( + ShelfType.COMPLETE, + "Read.", + None, + ["sci-fi", "desert"], + 0, + created_time=self.dt3, + ) + + # Movie marks with ratings + mark_movie1 = Mark(self.user1.identity, self.movie1) + mark_movie1.update( + ShelfType.COMPLETE, + "Mind-bending", + 8, + ["mindbender", "scifi"], + 1, + created_time=self.dt, + ) + + mark_movie2 = Mark(self.user1.identity, self.movie2) + mark_movie2.update( + ShelfType.WISHLIST, "Need to rewatch", None, [], 1, created_time=self.dt2 + ) + + # TV show mark + mark_tvshow = Mark(self.user1.identity, self.tvshow) + mark_tvshow.update( + ShelfType.WISHLIST, + "Heard it's good", + None, + ["drama"], + 1, + created_time=self.dt, + ) + + # TV episode marks + mark_episode1 = Mark(self.user1.identity, self.tvepisode1) + mark_episode1.update( + ShelfType.COMPLETE, + "Great start", + 9, + ["pilot", "drama"], + 1, + created_time=self.dt2, + ) + + mark_episode2 = Mark(self.user1.identity, self.tvepisode2) + mark_episode2.update( + ShelfType.COMPLETE, "It gets better", 9, [], 1, created_time=self.dt3 + ) + + # Podcast episode mark + mark_podcast = Mark(self.user1.identity, self.podcastepisode) + mark_podcast.update( + ShelfType.COMPLETE, + "Insightful episode", + 8, + ["tech", "interview"], + 1, + created_time=self.dt, + ) + + # Create reviews + Review.update_item_review( + self.book1, + self.user1.identity, + "My thoughts on Hyperion", + "A masterpiece of science fiction that weaves multiple storylines into a captivating narrative.", + visibility=1, + created_time=self.dt, + ) + + Review.update_item_review( + self.movie1, + self.user1.identity, + "Inception Review", + "Christopher Nolan at his best. The movie plays with reality and dreams in a fascinating way.", + visibility=1, + ) + + # Create notes + Note.objects.create( + item=self.book2, + owner=self.user1.identity, + title="Reading progress", + content="Just finished the first part. The world-building is incredible.\n\n - p 125", + progress_type=Note.ProgressType.PAGE, + progress_value="125", + visibility=1, + ) + + Note.objects.create( + item=self.tvshow, + owner=self.user1.identity, + title="Before watching", + content="Things to look out for according to friends:\n- Character development\n- Color symbolism\n\n - e 0", + progress_type=Note.ProgressType.EPISODE, + progress_value="2", + visibility=1, + ) + + # Create TV episode note + Note.objects.create( + item=self.tvepisode1, + owner=self.user1.identity, + title="Episode thoughts", + content="Great pilot episode. Sets up the character arcs really well.", + visibility=1, + ) + + # Create podcast episode note + Note.objects.create( + item=self.podcastepisode, + owner=self.user1.identity, + title="Podcast episode notes", + content="Interesting discussion about tech trends. Timestamp 23:45 has a good point about AI.", + progress_type=Note.ProgressType.TIMESTAMP, + progress_value="23:45", + visibility=1, + ) + + # Create collections + items = [self.book1, self.movie1] + collection = Collection.objects.create( + owner=self.user1.identity, + title="Favorites", + brief="My all-time favorites", + visibility=1, + ) + for i in items: + collection.append_item(i) + + # Create another collection with different items + items2 = [self.book2, self.movie2, self.tvshow] + collection2 = Collection.objects.create( + owner=self.user1.identity, + title="To Review", + brief="Items I need to review soon", + visibility=1, + ) + for i in items2: + collection2.append_item(i) + + # Create shelf log entries + logs = ShelfLogEntry.objects.filter(owner=self.user1.identity).order_by( + "timestamp", "item_id" + ) + + # Export data to NDJSON + exporter = NdjsonExporter.create(user=self.user1) + exporter.run() + export_path = exporter.metadata["file"] + logger.debug(f"exported to {export_path}") + self.assertTrue(os.path.exists(export_path)) + self.assertEqual(exporter.metadata["total"], 61) + + # Validate the NDJSON export file structure + with TemporaryDirectory() as extract_dir: + with zipfile.ZipFile(export_path, "r") as zip_ref: + zip_ref.extractall(extract_dir) + logger.debug(f"unzipped to {extract_dir}") + + # Check journal.ndjson exists + journal_path = os.path.join(extract_dir, "journal.ndjson") + self.assertTrue( + os.path.exists(journal_path), "journal.ndjson file missing" + ) + + # Check catalog.ndjson exists + catalog_path = os.path.join(extract_dir, "catalog.ndjson") + self.assertTrue( + os.path.exists(catalog_path), "catalog.ndjson file missing" + ) + + # Check attachments directory exists + attachments_path = os.path.join(extract_dir, "attachments") + self.assertTrue( + os.path.exists(attachments_path), "attachments directory missing" + ) + + # Count the number of JSON objects in journal.ndjson + with open(journal_path, "r") as f: + lines = f.readlines() + # First line is header, rest are data + self.assertGreater( + len(lines), 1, "journal.ndjson has no data lines" + ) + + # Check the first line is a header + header = json.loads(lines[0]) + self.assertIn("server", header, "Missing server in header") + self.assertIn("username", header, "Missing username in header") + self.assertEqual( + header["username"], + "ndjson_exporter", + "Wrong username in header", + ) + + # Count data objects by type + type_counts = { + "ShelfMember": 0, + "Review": 0, + "Note": 0, + "Collection": 0, + "ShelfLog": 0, + "post": 0, + } + + for line in lines[1:]: + data = json.loads(line) + if "type" in data: + type_counts[data["type"]] = ( + type_counts.get(data["type"], 0) + 1 + ) + + # Verify counts + self.assertEqual( + type_counts["ShelfMember"], 8, "Expected 8 ShelfMember entries" + ) + self.assertEqual( + type_counts["Review"], 2, "Expected 2 Review entries" + ) + self.assertEqual(type_counts["Note"], 4, "Expected 4 Note entries") + self.assertEqual( + type_counts["Collection"], 2, "Expected 2 Collection entries" + ) + self.assertEqual(type_counts["ShelfLog"], logs.count()) + + # Now import the export file into a different user account + importer = NdjsonImporter.create( + user=self.user2, file=export_path, visibility=2 + ) + importer.run() + self.assertIn("61 items imported, 0 skipped, 0 failed.", importer.message) + + # Verify imported data + identity2 = self.user2.identity + takahe_identity2 = identity2.takahe_identity + + # Check that name and summary were updated + self.assertEqual(takahe_identity2.name, "Test User") + self.assertEqual(takahe_identity2.summary, "Test summary") + # Check marks + mark_book1_imported = Mark(self.user2.identity, self.book1) + self.assertEqual(mark_book1_imported.shelf_type, ShelfType.COMPLETE) + self.assertEqual(mark_book1_imported.comment_text, "Great sci-fi classic") + self.assertEqual(mark_book1_imported.rating_grade, 10) + self.assertEqual(mark_book1_imported.visibility, 1) + self.assertEqual( + set(mark_book1_imported.tags), set(["sci-fi", "favorite", "space"]) + ) + + mark_book2_imported = Mark(self.user2.identity, self.book2) + self.assertEqual(mark_book2_imported.shelf_type, ShelfType.COMPLETE) + self.assertEqual(mark_book2_imported.comment_text, "Read.") + self.assertIsNone(mark_book2_imported.rating_grade) + self.assertEqual(set(mark_book2_imported.tags), set(["sci-fi", "desert"])) + self.assertEqual(mark_book2_imported.visibility, 0) + + mark_movie1_imported = Mark(self.user2.identity, self.movie1) + self.assertEqual(mark_movie1_imported.shelf_type, ShelfType.COMPLETE) + self.assertEqual(mark_movie1_imported.comment_text, "Mind-bending") + self.assertEqual(mark_movie1_imported.rating_grade, 8) + self.assertEqual(set(mark_movie1_imported.tags), set(["mindbender", "scifi"])) + + mark_episode1_imported = Mark(self.user2.identity, self.tvepisode1) + self.assertEqual(mark_episode1_imported.shelf_type, ShelfType.COMPLETE) + self.assertEqual(mark_episode1_imported.comment_text, "Great start") + self.assertEqual(mark_episode1_imported.rating_grade, 9) + self.assertEqual(set(mark_episode1_imported.tags), set(["pilot", "drama"])) + + # Check podcast episode mark + mark_podcast_imported = Mark(self.user2.identity, self.podcastepisode) + self.assertEqual(mark_podcast_imported.shelf_type, ShelfType.COMPLETE) + self.assertEqual(mark_podcast_imported.comment_text, "Insightful episode") + self.assertEqual(mark_podcast_imported.rating_grade, 8) + self.assertEqual(set(mark_podcast_imported.tags), set(["tech", "interview"])) + + # Check reviews + book1_reviews = Review.objects.filter( + owner=self.user2.identity, item=self.book1 + ) + self.assertEqual(book1_reviews.count(), 1) + self.assertEqual(book1_reviews[0].title, "My thoughts on Hyperion") + self.assertIn("masterpiece of science fiction", book1_reviews[0].body) + + movie1_reviews = Review.objects.filter( + owner=self.user2.identity, item=self.movie1 + ) + self.assertEqual(movie1_reviews.count(), 1) + self.assertEqual(movie1_reviews[0].title, "Inception Review") + self.assertIn("Christopher Nolan", movie1_reviews[0].body) + + # Check notes + book2_notes = Note.objects.filter(owner=self.user2.identity, item=self.book2) + self.assertEqual(book2_notes.count(), 1) + self.assertEqual(book2_notes[0].title, "Reading progress") + self.assertIn("world-building is incredible", book2_notes[0].content) + self.assertEqual(book2_notes[0].progress_type, Note.ProgressType.PAGE) + self.assertEqual(book2_notes[0].progress_value, "125") + + tvshow_notes = Note.objects.filter(owner=self.user2.identity, item=self.tvshow) + self.assertEqual(tvshow_notes.count(), 1) + self.assertEqual(tvshow_notes[0].title, "Before watching") + self.assertIn("Character development", tvshow_notes[0].content) + + # Check TV episode notes + tvepisode_notes = Note.objects.filter( + owner=self.user2.identity, item=self.tvepisode1 + ) + self.assertEqual(tvepisode_notes.count(), 1) + self.assertEqual(tvepisode_notes[0].title, "Episode thoughts") + self.assertIn("Sets up the character arcs", tvepisode_notes[0].content) + + # Check podcast episode notes + podcast_notes = Note.objects.filter( + owner=self.user2.identity, item=self.podcastepisode + ) + self.assertEqual(podcast_notes.count(), 1) + self.assertEqual(podcast_notes[0].title, "Podcast episode notes") + self.assertIn( + "Interesting discussion about tech trends", podcast_notes[0].content + ) + self.assertEqual(podcast_notes[0].progress_type, Note.ProgressType.TIMESTAMP) + self.assertEqual(podcast_notes[0].progress_value, "23:45") + + # Check first collection + collections = Collection.objects.filter( + owner=self.user2.identity, title="Favorites" + ) + self.assertEqual(collections.count(), 1) + self.assertEqual(collections[0].brief, "My all-time favorites") + self.assertEqual(collections[0].visibility, 1) + collection_items = list(collections[0].ordered_items) + self.assertEqual([self.book1, self.movie1], collection_items) + + # Check second collection + collections2 = Collection.objects.filter( + owner=self.user2.identity, title="To Review" + ) + self.assertEqual(collections2.count(), 1) + self.assertEqual(collections2[0].brief, "Items I need to review soon") + self.assertEqual(collections2[0].visibility, 1) + + # Check second collection items + collection2_items = [m.item for m in collections2[0].members.all()] + self.assertEqual(len(collection2_items), 3) + self.assertIn(self.book2, collection2_items) + self.assertIn(self.movie2, collection2_items) + self.assertIn(self.tvshow, collection2_items) + + tag1 = Tag.objects.filter(owner=self.user2.identity, title="favorite").first() + self.assertIsNotNone(tag1) + if tag1: + self.assertTrue(tag1.pinned) + self.assertEqual(tag1.visibility, 2) + + # Check shelf log entries + logs2 = ShelfLogEntry.objects.filter(owner=self.user2.identity).order_by( + "timestamp", "item_id" + ) + l1 = [(log.item, log.shelf_type, log.timestamp) for log in logs] + l2 = [(log.item, log.shelf_type, log.timestamp) for log in logs2] + self.assertEqual(l1, l2) diff --git a/journal/tests/piece.py b/journal/tests/piece.py index 96d3696d..55572a3b 100644 --- a/journal/tests/piece.py +++ b/journal/tests/piece.py @@ -65,23 +65,34 @@ class ShelfTest(TestCase): self.assertEqual(q1.members.all().count(), 0) self.assertEqual(q2.members.all().count(), 0) Mark(user.identity, book1).update(ShelfType.WISHLIST) - time.sleep(0.001) # add a little delay to make sure the timestamp is different Mark(user.identity, book2).update(ShelfType.WISHLIST) + log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)] + self.assertEqual(log, ["wishlist"]) + log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book2)] + self.assertEqual(log, ["wishlist"]) + time.sleep(0.001) # add a little delay to make sure the timestamp is different + + Mark(user.identity, book1).update(ShelfType.WISHLIST) + log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)] + self.assertEqual(log, ["wishlist"]) time.sleep(0.001) + self.assertEqual(q1.members.all().count(), 2) Mark(user.identity, book1).update(ShelfType.PROGRESS) - time.sleep(0.001) self.assertEqual(q1.members.all().count(), 1) self.assertEqual(q2.members.all().count(), 1) + time.sleep(0.001) + self.assertEqual(len(Mark(user.identity, book1).all_post_ids), 2) - log = shelf_manager.get_log_for_item(book1) - self.assertEqual(log.count(), 2) + log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)] + + self.assertEqual(log, ["wishlist", "progress"]) Mark(user.identity, book1).update(ShelfType.PROGRESS, metadata={"progress": 1}) time.sleep(0.001) self.assertEqual(q1.members.all().count(), 1) self.assertEqual(q2.members.all().count(), 1) - log = shelf_manager.get_log_for_item(book1) - self.assertEqual(log.count(), 2) + log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)] + self.assertEqual(log, ["wishlist", "progress"]) self.assertEqual(len(Mark(user.identity, book1).all_post_ids), 2) # theses tests are not relevant anymore, bc we don't use log to track metadata changes @@ -127,7 +138,8 @@ class ShelfTest(TestCase): # test delete mark -> one more log Mark(user.identity, book1).delete() - self.assertEqual(log.count(), 4) + log = [ll.shelf_type for ll in shelf_manager.get_log_for_item(book1)] + self.assertEqual(log, ["wishlist", "progress", "complete", None]) deleted_mark = Mark(user.identity, book1) self.assertEqual(deleted_mark.shelf_type, None) self.assertEqual(deleted_mark.tags, []) diff --git a/journal/tests/rating.py b/journal/tests/rating.py new file mode 100644 index 00000000..b6d4f8fc --- /dev/null +++ b/journal/tests/rating.py @@ -0,0 +1,201 @@ +from django.test import TestCase + +from catalog.common.models import Item +from catalog.models import Edition, IdType, Movie, TVEpisode, TVSeason, TVShow +from journal.models.rating import Rating +from users.models import User + + +class RatingTest(TestCase): + databases = "__all__" + + def setUp(self): + # Create 10 users + self.users = [] + for i in range(10): + user = User.register(email=f"user{i}@example.com", username=f"user{i}") + self.users.append(user) + + # Create a book + self.book = Edition.objects.create( + localized_title=[{"lang": "en", "text": "Test Book"}], + primary_lookup_id_type=IdType.ISBN, + primary_lookup_id_value="9780553283686", + author=["Test Author"], + ) + + # Create a movie + self.movie = Movie.objects.create( + localized_title=[{"lang": "en", "text": "Test Movie"}], + primary_lookup_id_type=IdType.IMDB, + primary_lookup_id_value="tt1234567", + director=["Test Director"], + year=2020, + ) + + # Create a TV show with a season and episode + self.tvshow = TVShow.objects.create( + localized_title=[{"lang": "en", "text": "Test Show"}], + primary_lookup_id_type=IdType.IMDB, + primary_lookup_id_value="tt9876543", + ) + self.tvseason = TVSeason.objects.create( + localized_title=[{"lang": "en", "text": "Season 1"}], + show=self.tvshow, + season_number=1, + ) + self.tvepisode = TVEpisode.objects.create( + localized_title=[{"lang": "en", "text": "Episode 1"}], + season=self.tvseason, + episode_number=1, + ) + + def test_rating_basic(self): + """Test basic rating functionality for a single item.""" + # Add ratings for the book from all users + ratings = [7, 8, 9, 10, 8, 7, 6, 9, 10, 8] + + for i, user in enumerate(self.users): + Rating.update_item_rating( + self.book, user.identity, ratings[i], visibility=1 + ) + + # Get rating info for the book + rating_info = Rating.get_info_for_item(self.book) + + # Check rating count + self.assertEqual(rating_info["count"], 10) + + # Check average rating - should be 8.2 + expected_avg = sum(ratings) / len(ratings) + self.assertEqual(rating_info["average"], round(expected_avg, 1)) + + # Check distribution + # [1-2, 3-4, 5-6, 7-8, 9-10] buckets represented as percentages + expected_distribution = [0, 0, 10, 50, 40] # Based on our ratings + self.assertEqual(rating_info["distribution"], expected_distribution) + + # Test individual user rating + user_rating = Rating.get_item_rating(self.book, self.users[0].identity) + self.assertEqual(user_rating, 7) + + book = Item.objects.get(pk=self.book.pk) + self.assertEqual(book.rating, round(expected_avg, 1)) + self.assertEqual(book.rating_count, 10) + self.assertEqual(book.rating_distribution, expected_distribution) + + def test_rating_multiple_items(self): + """Test ratings across multiple items.""" + # Rate the movie with varying scores + movie_ratings = [3, 4, 5, 6, 7, 8, 9, 10, 2, 1] + + for i, user in enumerate(self.users): + Rating.update_item_rating( + self.movie, user.identity, movie_ratings[i], visibility=1 + ) + + # Rate the TV show + tvshow_ratings = [10, 9, 8, 9, 10, 9, 8, 10, 9, 8] + + for i, user in enumerate(self.users): + Rating.update_item_rating( + self.tvshow, user.identity, tvshow_ratings[i], visibility=1 + ) + + # Get rating info for both items + movie_info = Rating.get_info_for_item(self.movie) + tvshow_info = Rating.get_info_for_item(self.tvshow) + + # Check counts + self.assertEqual(movie_info["count"], 10) + self.assertEqual(tvshow_info["count"], 10) + + # Check averages + expected_movie_avg = sum(movie_ratings) / len(movie_ratings) + expected_tvshow_avg = sum(tvshow_ratings) / len(tvshow_ratings) + + self.assertEqual(movie_info["average"], round(expected_movie_avg, 1)) + self.assertEqual(tvshow_info["average"], round(expected_tvshow_avg, 1)) + + # Check distribution for movie + # [1-2, 3-4, 5-6, 7-8, 9-10] buckets + expected_movie_distribution = [ + 20, + 20, + 20, + 20, + 20, + ] # Evenly distributed across buckets + self.assertEqual(movie_info["distribution"], expected_movie_distribution) + + # Check distribution for TV show + # [1-2, 3-4, 5-6, 7-8, 9-10] buckets + expected_tvshow_distribution = [0, 0, 0, 30, 70] # High ratings only + self.assertEqual(tvshow_info["distribution"], expected_tvshow_distribution) + + def test_rating_update_and_delete(self): + """Test updating and deleting ratings.""" + # Add initial ratings + for user in self.users[:5]: + Rating.update_item_rating(self.tvepisode, user.identity, 8, visibility=1) + + # Check initial count + self.assertEqual(Rating.get_rating_count_for_item(self.tvepisode), 5) + + # Update a rating + Rating.update_item_rating( + self.tvepisode, self.users[0].identity, 10, visibility=1 + ) + + # Check that rating was updated + updated_rating = Rating.get_item_rating(self.tvepisode, self.users[0].identity) + self.assertEqual(updated_rating, 10) + + # Delete a rating by setting it to None + Rating.update_item_rating( + self.tvepisode, self.users[1].identity, None, visibility=1 + ) + + # Check that rating count decreased + self.assertEqual(Rating.get_rating_count_for_item(self.tvepisode), 4) + + # Check that the rating was deleted + deleted_rating = Rating.get_item_rating(self.tvepisode, self.users[1].identity) + self.assertIsNone(deleted_rating) + + def test_rating_minimum_count(self): + """Test the minimum rating count threshold.""" + # Add only 4 ratings to the book (below MIN_RATING_COUNT of 5) + for user in self.users[:4]: + Rating.update_item_rating(self.book, user.identity, 10, visibility=1) + + # Check that get_rating_for_item returns None due to insufficient ratings + rating = Rating.get_rating_for_item(self.book) + self.assertIsNone(rating) + + # Add one more rating to reach the threshold + Rating.update_item_rating(self.book, self.users[4].identity, 10, visibility=1) + + # Now we should get a valid rating + rating = Rating.get_rating_for_item(self.book) + self.assertEqual(rating, 10.0) + + def test_tvshow_rating_includes_children(self): + """Test that TV show ratings include ratings from child items.""" + # Rate the TV show directly + Rating.update_item_rating(self.tvshow, self.users[0].identity, 6, visibility=1) + + # Rate the episode (which is a child of the TV show) + for i in range(1, 6): # Users 1-5 + Rating.update_item_rating( + self.tvseason, self.users[i].identity, 10, visibility=1 + ) + + # Get info for TV show - should include ratings from episode + tvshow_info = Rating.get_info_for_item(self.tvshow) + + # Check count (1 for show + 5 for episode = 6) + self.assertEqual(tvshow_info["count"], 6) + + # The average should consider all ratings (6 + 5*10 = 56, divided by 6 = 9.3) + self.assertEqual(tvshow_info["average"], 9.3) diff --git a/mkdocs.yml b/mkdocs.yml deleted file mode 100644 index 3083d6bf..00000000 --- a/mkdocs.yml +++ /dev/null @@ -1,55 +0,0 @@ -site_name: 🧩 NeoDB -site_url: https://neodb.net -repo_url: http://github.com/neodb-social/neodb -edit_uri: blob/main/docs/ -site_description: Mark the things you love. -nav: - - Overview: 'index.md' - - features.md - - servers.md - - apps.md - - install.md - - upgrade.md - - configuration.md - - troubleshooting.md - - development.md - - api.md - - origin.md -theme: - logo: assets/logo.svg - name: material - font: - text: Lato - palette: - primary: custom - accent: custom - features: - # - navigation.tabs - # - navigation.footer - custom_dir: docs/templates -extra: - social: - - icon: fontawesome/brands/mastodon - link: https://mastodon.online/@neodb/ - - icon: fontawesome/brands/bluesky - link: https://bsky.app/profile/neodb.net - - icon: fontawesome/brands/x-twitter - link: https://twitter.com/NeoDBsocial - - icon: fontawesome/brands/threads - link: https://www.threads.net/@neodb.social - - icon: fontawesome/brands/discord - link: https://discord.gg/QBHkrV8bxK - - icon: fontawesome/brands/github - link: http://github.com/neodb-social/neodb - - icon: fontawesome/brands/docker - link: https://hub.docker.com/u/neodb -extra_css: - - stylesheets/extra.css -markdown_extensions: - - pymdownx.superfences: - custom_fences: - - name: mermaid - class: mermaid - format: !!python/name:pymdownx.superfences.fence_code_format -hooks: - - mkdocs_hook.py diff --git a/mkdocs_hook.py b/mkdocs_hook.py deleted file mode 100644 index 234d08b6..00000000 --- a/mkdocs_hook.py +++ /dev/null @@ -1,26 +0,0 @@ -import json - - -def on_page_markdown(markdown, page, config, **kwargs): - if page.url == "servers/": - with open(config.docs_dir + "/servers.json") as f: - servers = json.load(f) - m = "" - for s in servers["servers"]: - host = s["host"] - name = s.get("name", host) - admin = s.get("admin", []) - label = s.get("label", []) - language = s.get("language", []) - description = s.get("description", "") - m += f" - **[{name}](https://{host})**" - if label: - m += f" {' '.join([f'`{a}`' for a in label])}" - if language: - m += f" {' '.join([f'`{a}`' for a in language])}" - if description: - m += f" \n {description}" - if admin: - m += f" \n admin: {', '.join([f'`{a}`' for a in admin])}" - m += "\n" - return markdown.replace("{servers}", m) diff --git a/requirements-dev.lock b/requirements-dev.lock index aa871438..96d586fc 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -9,7 +9,7 @@ # generate-hashes: false # universal: false -aiohappyeyeballs==2.4.6 +aiohappyeyeballs==2.5.0 # via aiohttp aiohttp==3.11.13 # via discord-py @@ -62,10 +62,10 @@ cssbeautifier==1.15.4 # via djlint dateparser==1.2.1 deepmerge==2.0 -discord-py==2.5.0 +discord-py==2.5.2 distlib==0.3.9 # via virtualenv -django==4.2.19 +django==4.2.20 # via django-anymail # via django-appconf # via django-auditlog @@ -130,7 +130,7 @@ httpcore==1.0.7 # via httpx httpx==0.28.1 # via atproto -identify==2.6.8 +identify==2.6.9 # via pre-commit idna==3.10 # via anyio @@ -138,7 +138,7 @@ idna==3.10 # via requests # via yarl igdb-api-v4==0.3.3 -jinja2==3.1.5 +jinja2==3.1.6 # via mkdocs # via mkdocs-material jsbeautifier==1.15.4 @@ -159,7 +159,7 @@ markdown==3.7 # via mkdocs # via mkdocs-material # via pymdown-extensions -markdownify==1.0.0 +markdownify==1.1.0 markupsafe==3.0.2 # via jinja2 # via mkdocs @@ -201,7 +201,7 @@ pre-commit==4.1.0 propcache==0.3.0 # via aiohttp # via yarl -protobuf==5.29.3 +protobuf==6.30.0 # via igdb-api-v4 psycopg2-binary==2.9.10 pycparser==2.22 @@ -252,7 +252,7 @@ rjsmin==1.2.2 # via django-compressor rq==2.1.0 # via django-rq -ruff==0.9.9 +ruff==0.9.10 sentry-sdk==2.22.0 setproctitle==1.3.5 six==1.17.0 @@ -285,7 +285,7 @@ typing-extensions==4.12.2 # via pydantic # via pydantic-core # via pyright -tzlocal==5.3 +tzlocal==5.3.1 # via dateparser urllib3==2.3.0 # via django-anymail @@ -293,7 +293,7 @@ urllib3==2.3.0 # via sentry-sdk urlman==2.0.2 validators==0.34.0 -virtualenv==20.29.2 +virtualenv==20.29.3 # via pre-commit watchdog==6.0.0 # via mkdocs diff --git a/requirements.lock b/requirements.lock index 7bccec2a..d638c763 100644 --- a/requirements.lock +++ b/requirements.lock @@ -9,7 +9,7 @@ # generate-hashes: false # universal: false -aiohappyeyeballs==2.4.6 +aiohappyeyeballs==2.5.0 # via aiohttp aiohttp==3.11.13 # via discord-py @@ -48,8 +48,8 @@ cryptography==44.0.2 # via atproto dateparser==1.2.1 deepmerge==2.0 -discord-py==2.5.0 -django==4.2.19 +discord-py==2.5.2 +django==4.2.20 # via django-anymail # via django-appconf # via django-auditlog @@ -116,7 +116,7 @@ loguru==0.7.3 lxml==5.3.1 markdown==3.7 # via django-markdownx -markdownify==1.0.0 +markdownify==1.1.0 mistune==3.1.2 multidict==6.1.0 # via aiohttp @@ -132,7 +132,7 @@ podcastparser==0.6.10 propcache==0.3.0 # via aiohttp # via yarl -protobuf==5.29.3 +protobuf==6.30.0 # via igdb-api-v4 psycopg2-binary==2.9.10 pycparser==2.22 @@ -190,7 +190,7 @@ typing-extensions==4.12.2 # via beautifulsoup4 # via pydantic # via pydantic-core -tzlocal==5.3 +tzlocal==5.3.1 # via dateparser urllib3==2.3.0 # via django-anymail diff --git a/test_data/https___music_apple_com_cn_album_1284391545 b/test_data/https___music_apple_com_cn_album_1284391545 new file mode 100644 index 00000000..daf7d8f4 --- /dev/null +++ b/test_data/https___music_apple_com_cn_album_1284391545 @@ -0,0 +1,209 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎《Kids Only》- 窦靖童的专辑 - Apple Music + + + + + + + + + + + + + + + + + + + +
+

Kids Only

Kids Only

首张原创专辑《Stone Café》惊艳亮相后,窦靖童便开始了在音乐上的深入探索,与世界顶级音乐人合作,到世界各地演出,不断历练自己。时隔一年推出了全新的概念专辑《Kids Only》。她包办了这张专辑的词曲创作,大部分歌曲都采用了同期录制,每首歌都邀请不同的音乐人合作,多样的音乐风格加上大家现场的自由发挥,每首歌都散发着鲜活可爱而又自由肆意的气息。整张专辑最明显的一点就是对人声的处理,将人声与器乐声融合,低吟恰到好处。似她母亲王菲的唱腔,似她父亲窦唯的创作风格,但她又是如此与众不同,顽皮的孩子气、少女心事的烦忧、童年往昔的回顾、对自由的向往追求,小小年龄有大大的思虑,皆呈现于此。

选择国家或地区

非洲、中东和印度

亚太地区

欧洲

拉丁美洲和加勒比海地区

美国和加拿大

+
+ + + + + diff --git a/test_data/https___music_apple_com_fr_album_1284391545 b/test_data/https___music_apple_com_fr_album_1284391545 new file mode 100644 index 00000000..d4b43a89 --- /dev/null +++ b/test_data/https___music_apple_com_fr_album_1284391545 @@ -0,0 +1,201 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎Kids Only – Album par 竇靖童 – Apple Music + + + + + + + + + + + + + + + + + + + +
+

Kids Only

Kids Only
Choisissez un pays ou une région

Afrique, Moyen‑Orient et Inde

Asie‑Pacifique

Europe

Amérique latine et Caraïbes

États‑Unis et Canada

+
+ + + + + diff --git a/test_data/https___music_apple_com_jp_album_1284391545 b/test_data/https___music_apple_com_jp_album_1284391545 new file mode 100644 index 00000000..d6fa4749 --- /dev/null +++ b/test_data/https___music_apple_com_jp_album_1284391545 @@ -0,0 +1,197 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎Kids Only - リア・ドウのアルバム - Apple Music + + + + + + + + + + + + + + + + + + + +
+

Kids Only

Kids Only
国または地域を選択

アフリカ、中東、インド

アジア太平洋

ヨーロッパ

ラテンアメリカ、カリブ海地域

米国およびカナダ

+
+ + + + + diff --git a/test_data/https___music_apple_com_kr_album_1284391545 b/test_data/https___music_apple_com_kr_album_1284391545 new file mode 100644 index 00000000..c8241613 --- /dev/null +++ b/test_data/https___music_apple_com_kr_album_1284391545 @@ -0,0 +1,197 @@ + + + + + + + + + + + + + + + + + + + + + + + + + ‎Kids Only - 竇靖童의 앨범 - Apple Music + + + + + + + + + + + + + + + + + + + +
+
국가 또는 지역 선택

아프리카, 중동 및 인도

아시아 태평양

유럽

라틴 아메리카 및 카리브해

미국 및 캐나다

+
+ + + + + diff --git a/test_data/https___music_apple_com_us_album_1284391545 b/test_data/https___music_apple_com_us_album_1284391545 index b07debbd..c8fde71e 100644 --- a/test_data/https___music_apple_com_us_album_1284391545 +++ b/test_data/https___music_apple_com_us_album_1284391545 @@ -1,244 +1,55 @@ - - - - - - + + + + + + + + + + + + > + + > + + > + - + > + + - ‎Kids Only by Leah Dou on Apple Music - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + ‎Kids Only - Album by Leah Dou - Apple Music - - - - + + + @@ -272,1636 +83,123 @@ /> - - - -
- - -
- -
- - -
- -
- - -
-
- - - - - - - - -
- -
- - -
- -

Kids Only

- -
- Leah Dou -
- -
-
- -
- -
-
-
- -
-
- -
-
+

Kids Only

- - - - - - -
-
- - - - - - Kids Only -
- -
- - -

Psychedelic electro-soul colors Dou’s second album.

-
- -
- -
- -
- - -
-
- -
- - - -
- -
- - - - - - - - - -
- - - - -
- - - - -
-
- - - -
- -
1
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
2
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
3
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
4
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
5
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
6
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
7
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
8
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
9
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - - - -
-
- - - -
- -
10
- -
-
-
- -
- - -
- -
- - - - - - - -
- -
- - - -
-
-
- - -
- - -
- - - -
- -
- -

Psychedelic electro-soul colors Dou’s second album.

- - - -
- -
- -
- - - -
- -
- - -
- -

More By Leah Dou

- -
- - - -
- - - -

More By Leah Dou

-
- - -
-
-
- - - -
- -
- - -
- -

Featured On

- -
- - - -
- - - -

Featured On

-
- - -
-
-
- - - -
- - - - - -
-
+ ">
- - - - -
- - - - - - - - - - - -
-

Select a country or region

-

Africa, Middle East, and India

-
-

Asia Pacific

-
-

Europe

-
-

Latin America and the Caribbean

-
-

The United States and Canada

-
-
-
-
- -
+ ">
Select a country or region

Africa, Middle East, and India

Asia Pacific

Europe

Latin America and the Caribbean

The United States and Canada

+ - - + + diff --git a/users/migrations/0008_alter_task_type.py b/users/migrations/0008_alter_task_type.py index aba30336..5b120d5d 100644 --- a/users/migrations/0008_alter_task_type.py +++ b/users/migrations/0008_alter_task_type.py @@ -14,6 +14,7 @@ class Migration(migrations.Migration): name="type", field=models.CharField( choices=[ + ("journal.baseimporter", "base importer"), ("journal.csvexporter", "csv exporter"), ("journal.csvimporter", "csv importer"), ("journal.doubanimporter", "douban importer"), @@ -21,6 +22,8 @@ class Migration(migrations.Migration): ("journal.goodreadsimporter", "goodreads importer"), ("journal.letterboxdimporter", "letterboxd importer"), ("journal.ndjsonexporter", "ndjson exporter"), + ("journal.ndjsonimporter", "ndjson importer"), + ("journal.opmlimporter", "opml importer"), ], db_index=True, max_length=255, diff --git a/users/models/task.py b/users/models/task.py index 85fab411..71f9cf26 100644 --- a/users/models/task.py +++ b/users/models/task.py @@ -82,7 +82,6 @@ class Task(TypedModel): task.refresh_from_db() task.state = cls.States.complete if ok else cls.States.failed task.save() - task.notify() def enqueue(self): return django_rq.get_queue(self.TaskQueue).enqueue( diff --git a/users/templates/users/data.html b/users/templates/users/data.html index 20279ea0..dc530535 100644 --- a/users/templates/users/data.html +++ b/users/templates/users/data.html @@ -10,137 +10,18 @@ {{ site_name }} - {% trans 'Data Management' %} {% include "common_libs.html" %} + {% include "_header.html" %}
-
-
- {% trans 'Export Data' %} -
- {% csrf_token %} - - {% if csv_export_task %} -
- {% trans 'Last export' %}: {{ csv_export_task.created_time }} - {% trans 'Status' %}: {{ csv_export_task.get_state_display }} -
- {{ csv_export_task.message }} - {% if csv_export_task.metadata.file %} - {% trans 'Download' %} - {% endif %} - {% endif %} -
-
-
- {% csrf_token %} - - {% if ndjson_export_task %} -
- {% trans 'Last export' %}: {{ ndjson_export_task.created_time }} - {% trans 'Status' %}: {{ ndjson_export_task.get_state_display }} -
- {{ ndjson_export_task.message }} - {% if ndjson_export_task.metadata.file %} - {% trans 'Download' %} - {% endif %} - {% endif %} -
-
-
- {% csrf_token %} - - exporting to this format will be deprecated soon. - {% if export_task %} -
- {% trans 'Last export' %}: {{ export_task.created_time }} - {% trans 'Status' %}: {{ export_task.get_state_display }} -
- {{ export_task.message }} - {% if export_task.metadata.file %} - {% trans 'Download' %} - {% endif %} - {% endif %} -
-
-
-
-
- {% trans 'Import Data' %} -
- {% csrf_token %} -
    -
  • - {% trans 'Upload a .zip file containing .csv or .ndjson files exported from NeoDB.' %} -
  • -
  • {% trans 'Existing marks and reviews with newer dates will be preserved.' %}
  • -
-
- -

- {% trans 'Visibility' %}: .csv only -
- - - -

- - - {% if csv_import_task %} - {% trans 'Last import started' %}: {{ csv_import_task.created_time }} - {% if csv_import_task.state == 0 or csv_import_task.state == 1 %} -
- {% else %} - {% trans 'Status' %}: {{ csv_import_task.get_state_display }}。 - {{ csv_import_task.message }} - {% endif %} - {% if csv_import_task.metadata.failed_items %} - {% trans 'Failed items' %}: -
- - {% endif %} - {% endif %} -
-
-
-
{% trans 'Import Marks and Reviews from Douban' %} @@ -193,59 +74,50 @@ -
{% trans 'Import Shelf or List from Goodreads' %} -
+ {% csrf_token %}
{% trans 'Link to Goodreads Profile / Shelf / List' %} +
    +
  • + Profile https://www.goodreads.com/user/show/12345-janedoe +
    + {% trans 'want-to-read / currently-reading / read books and their reviews will be imported.' %} +
  • +
  • + Shelf https://www.goodreads.com/review/list/12345-janedoe?shelf=name +
    + {% trans 'Shelf will be imported as a new collection.' %} +
  • +
  • + List https://www.goodreads.com/list/show/155086.Popular_Highlights +
    + {% trans 'List will be imported as a new collection.' %} +
  • +
  • + Who Can View My Profile must be set as anyone prior to import. +
  • +
- - {% if goodreads_task %} -
- {% trans 'Last import started' %}: {{ goodreads_task.created_time }} - {% trans 'Status' %}: {{ goodreads_task.get_state_display }}。 -
- {{ goodreads_task.message }} - {% endif %} -
-
    -
  • - Profile https://www.goodreads.com/user/show/12345-janedoe - {% trans 'want-to-read / currently-reading / read books and their reviews will be imported.' %} -
  • -
  • - Shelf https://www.goodreads.com/review/list/12345-janedoe?shelf=name - {% trans 'Shelf will be imported as a new collection.' %} -
  • -
  • - List https://www.goodreads.com/list/show/155086.Popular_Highlights - {% trans 'List will be imported as a new collection.' %} -
  • -
  • - Who Can View My Profile must be set as anyone prior to import. -
  • -
+ {% include "users/user_task_status.html" with task=goodreads_task %}
{% trans 'Import from Letterboxd' %} -
{% csrf_token %}
    @@ -292,30 +164,15 @@

    - - {% trans 'Only forward changes(none->to-watch->watched) will be imported.' %} - {% if letterboxd_task %} -
    - {% trans 'Last import started' %}: {{ letterboxd_task.created_time }} - {% trans 'Status' %}: {{ letterboxd_task.get_state_display }}。 -
    - {{ letterboxd_task.message }} - {% if letterboxd_task.metadata.failed_urls %} - {% trans 'Failed links, likely due to Letterboxd error, you may have to mark them manually' %}: -
    - - {% endif %} - {% endif %} -
    + {% trans 'Only forward changes(none->to-watch->watched) will be imported.' %} + {% include "users/user_task_status.html" with task=letterboxd_task %}
{% trans 'Import Podcast Subscriptions' %} -
+ {% csrf_token %}
{% trans 'Import Method' %}: @@ -331,11 +188,230 @@ {% trans 'Import as a new collection' %} + {% trans 'Visibility' %}: + + +
{% trans 'Select OPML file' %} - +
+ {% include "users/user_task_status.html" with task=opml_import_task %} +
+
+
+
+
+ {% trans 'Import NeoDB Archive' %} +
+ {% csrf_token %} +
    +
  • + {% trans 'Upload a .zip file containing .csv or .ndjson files exported from NeoDB.' %} +
  • +
  • {% trans 'Existing data may be overwritten.' %}
  • +
+ + + + + + + + {% include "users/user_task_status.html" with task=neodb_import_task %} +
+
+
+
+
+ {% trans 'Export NeoDB Archive' %} +
+ {% csrf_token %} + + {% include "users/user_task_status.html" with task=csv_export_task %} +
+
+
+ {% csrf_token %} + + {% include "users/user_task_status.html" with task=ndjson_export_task %} +
+
+
+ {% csrf_token %} + exporting to this format will be deprecated soon, please use csv or ndjson format. + + {% if export_task %} +
+ {% trans 'Last export' %}: {{ export_task.created_time }} + {% trans 'Status' %}: {{ export_task.get_state_display }} +
+ {{ export_task.message }} + {% if export_task.metadata.file %} + {% trans 'Download' %} + {% endif %} + {% endif %}
@@ -351,25 +427,6 @@
- {% comment %} -
-
- {% trans 'Reset visibility for all marks' %} -
- {% csrf_token %} - -
- - - - - - -
-
-
-
- {% endcomment %} {% include "_sidebar.html" with show_profile=1 identity=request.user.identity %}
diff --git a/users/templates/users/user_task_status.html b/users/templates/users/user_task_status.html index b4f58447..077c5e76 100644 --- a/users/templates/users/user_task_status.html +++ b/users/templates/users/user_task_status.html @@ -1,19 +1,33 @@ {% load i18n %} -
- {% trans 'Status' %}: {{ task.get_state_display }}。 - {{ task.message }} -
- {% if task.metadata.total and task.metadata.processed %} -
- -
- {{ task.metadata.processed }} / {{ task.metadata.total }} - ({{ task.metadata.imported }} imported, - {{ task.metadata.skipped }} skipped, - {{ task.metadata.failed }} failed) -
+{% if task %} +
+
+ {% if task.state == 0 %} + + {% elif task.state == 1 %} + + {% elif task.state == 3 %} + + {% elif 'exporter' in task.type %} + + {% else %} + + {% endif %} + {{ task.created_time }} + {{ task.message }}
- {% endif %} -
+ {% if task.state == 0 or task.state == 1 %} + {% if task.metadata.total and task.metadata.processed %} +
+ +
+ {% endif %} + {% endif %} + {% if task.metadata.failed_items %} + {% trans 'Failed items' %}: + + {% endif %} +
+{% endif %} diff --git a/users/urls.py b/users/urls.py index 8f306c6b..4ef5fa80 100644 --- a/users/urls.py +++ b/users/urls.py @@ -10,7 +10,10 @@ urlpatterns = [ path("data", data, name="data"), path("info", account_info, name="info"), path("profile", account_profile, name="profile"), - path("task//status", user_task_status, name="user_task_status"), + path("task//status", user_task_status, name="user_task_status"), + path( + "task//download", user_task_download, name="user_task_download" + ), path("data/import/status", data_import_status, name="import_status"), path("data/import/goodreads", import_goodreads, name="import_goodreads"), path("data/import/douban", import_douban, name="import_douban"), diff --git a/users/views/data.py b/users/views/data.py index 732ff635..e615e066 100644 --- a/users/views/data.py +++ b/users/views/data.py @@ -4,6 +4,7 @@ import os from django.conf import settings from django.contrib import messages from django.contrib.auth.decorators import login_required +from django.core.exceptions import BadRequest from django.db.models import Min from django.http import HttpResponse from django.shortcuts import redirect, render @@ -18,8 +19,8 @@ from journal.importers import ( DoubanImporter, GoodreadsImporter, LetterboxdImporter, + NdjsonImporter, OPMLImporter, - get_neodb_importer, ) from journal.models import ShelfType from takahe.utils import Takahe @@ -92,6 +93,19 @@ def data(request): start_date = queryset.aggregate(Min("created_time"))["created_time__min"] start_year = start_date.year if start_date else current_year years = reversed(range(start_year, current_year + 1)) + + # Import tasks - check for both CSV and NDJSON importers + csv_import_task = CsvImporter.latest_task(request.user) + ndjson_import_task = NdjsonImporter.latest_task(request.user) + # Use the most recent import task for display + if ndjson_import_task and ( + not csv_import_task + or ndjson_import_task.created_time > csv_import_task.created_time + ): + neodb_import_task = ndjson_import_task + else: + neodb_import_task = csv_import_task + return render( request, "users/data.html", @@ -100,10 +114,11 @@ def data(request): "import_task": DoubanImporter.latest_task(request.user), "export_task": DoufenExporter.latest_task(request.user), "csv_export_task": CsvExporter.latest_task(request.user), - "csv_import_task": CsvImporter.latest_task(request.user), + "neodb_import_task": neodb_import_task, # Use the most recent import task "ndjson_export_task": NdjsonExporter.latest_task(request.user), "letterboxd_task": LetterboxdImporter.latest_task(request.user), "goodreads_task": GoodreadsImporter.latest_task(request.user), + # "opml_task": OPMLImporter.latest_task(request.user), "years": years, }, ) @@ -121,19 +136,23 @@ def data_import_status(request): @login_required -def user_task_status(request, task_name: str): - match task_name: - case "csv_import": +def user_task_status(request, task_type: str): + match task_type: + case "journal.csvimporter": task_cls = CsvImporter - case "csv_export": + case "journal.ndjsonimporter": + task_cls = NdjsonImporter + case "journal.csvexporter": task_cls = CsvExporter - case "ndjson_export": + case "journal.ndjsonexporter": task_cls = NdjsonExporter - case "letterboxd": + case "journal.letterboxdimporter": task_cls = LetterboxdImporter - case "goodreads": + case "journal.goodreadsimporter": task_cls = GoodreadsImporter - case "douban": + case "journal.opmlimporter": + task_cls = OPMLImporter + case "journal.doubanimporter": task_cls = DoubanImporter case _: return redirect(reverse("users:data")) @@ -141,6 +160,28 @@ def user_task_status(request, task_name: str): return render(request, "users/user_task_status.html", {"task": task}) +@login_required +def user_task_download(request, task_type: str): + match task_type: + case "journal.csvexporter": + task_cls = CsvExporter + case "journal.ndjsonexporter": + task_cls = NdjsonExporter + case _: + return redirect(reverse("users:data")) + task = task_cls.latest_task(request.user) + if not task or task.state != Task.States.complete or not task.metadata.get("file"): + messages.add_message(request, messages.ERROR, _("Export file not available.")) + return redirect(reverse("users:data")) + response = HttpResponse() + response["X-Accel-Redirect"] = ( + settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :] + ) + response["Content-Type"] = "application/zip" + response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"' + return response + + @login_required def export_reviews(request): if request.method != "POST": @@ -150,6 +191,7 @@ def export_reviews(request): @login_required def export_marks(request): + # TODO: deprecated if request.method == "POST": DoufenExporter.create(request.user).enqueue() messages.add_message(request, messages.INFO, _("Generating exports.")) @@ -189,22 +231,10 @@ def export_csv(request): ) return redirect(reverse("users:data")) CsvExporter.create(request.user).enqueue() - messages.add_message(request, messages.INFO, _("Generating exports.")) - return redirect(reverse("users:data")) - else: - task = CsvExporter.latest_task(request.user) - if not task or task.state != Task.States.complete: - messages.add_message( - request, messages.ERROR, _("Export file not available.") - ) - return redirect(reverse("users:data")) - response = HttpResponse() - response["X-Accel-Redirect"] = ( - settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :] + return redirect( + reverse("users:user_task_status", args=("journal.csvexporter",)) ) - response["Content-Type"] = "application/zip" - response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"' - return response + return redirect(reverse("users:data")) @login_required @@ -221,22 +251,10 @@ def export_ndjson(request): ) return redirect(reverse("users:data")) NdjsonExporter.create(request.user).enqueue() - messages.add_message(request, messages.INFO, _("Generating exports.")) - return redirect(reverse("users:data")) - else: - task = NdjsonExporter.latest_task(request.user) - if not task or task.state != Task.States.complete: - messages.add_message( - request, messages.ERROR, _("Export file not available.") - ) - return redirect(reverse("users:data")) - response = HttpResponse() - response["X-Accel-Redirect"] = ( - settings.MEDIA_URL + task.metadata["file"][len(settings.MEDIA_ROOT) :] + return redirect( + reverse("users:user_task_status", args=("journal.ndjsonexporter",)) ) - response["Content-Type"] = "application/zip" - response["Content-Disposition"] = f'attachment; filename="{task.filename}.zip"' - return response + return redirect(reverse("users:data")) @login_required @@ -263,24 +281,26 @@ def sync_mastodon_preference(request): @login_required def import_goodreads(request): - if request.method == "POST": - raw_url = request.POST.get("url") - if GoodreadsImporter.validate_url(raw_url): - GoodreadsImporter.create( - request.user, - visibility=int(request.POST.get("visibility", 0)), - url=raw_url, - ).enqueue() - messages.add_message(request, messages.INFO, _("Import in progress.")) - else: - messages.add_message(request, messages.ERROR, _("Invalid URL.")) - return redirect(reverse("users:data")) + if request.method != "POST": + return redirect(reverse("users:data")) + raw_url = request.POST.get("url") + if not GoodreadsImporter.validate_url(raw_url): + raise BadRequest(_("Invalid URL.")) + task = GoodreadsImporter.create( + request.user, + visibility=int(request.POST.get("visibility", 0)), + url=raw_url, + ) + task.enqueue() + return redirect(reverse("users:user_task_status", args=(task.type,))) @login_required def import_douban(request): if request.method != "POST": return redirect(reverse("users:data")) + if not DoubanImporter.validate_file(request.FILES["file"]): + raise BadRequest(_("Invalid file.")) f = ( settings.MEDIA_ROOT + "/" @@ -290,64 +310,75 @@ def import_douban(request): with open(f, "wb+") as destination: for chunk in request.FILES["file"].chunks(): destination.write(chunk) - if not DoubanImporter.validate_file(request.FILES["file"]): - messages.add_message(request, messages.ERROR, _("Invalid file.")) - return redirect(reverse("users:data")) - DoubanImporter.create( + task = DoubanImporter.create( request.user, visibility=int(request.POST.get("visibility", 0)), mode=int(request.POST.get("import_mode", 0)), file=f, - ).enqueue() - messages.add_message( - request, messages.INFO, _("File is uploaded and will be imported soon.") ) - return redirect(reverse("users:data")) + task.enqueue() + return redirect(reverse("users:user_task_status", args=(task.type,))) @login_required def import_letterboxd(request): - if request.method == "POST": - f = ( - settings.MEDIA_ROOT - + "/" - + GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT) - ) - os.makedirs(os.path.dirname(f), exist_ok=True) - with open(f, "wb+") as destination: - for chunk in request.FILES["file"].chunks(): - destination.write(chunk) - LetterboxdImporter.create( - request.user, - visibility=int(request.POST.get("visibility", 0)), - file=f, - ).enqueue() - messages.add_message( - request, messages.INFO, _("File is uploaded and will be imported soon.") - ) - return redirect(reverse("users:data")) + if request.method != "POST": + return redirect(reverse("users:data")) + if not LetterboxdImporter.validate_file(request.FILES["file"]): + raise BadRequest(_("Invalid file.")) + f = ( + settings.MEDIA_ROOT + + "/" + + GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT) + ) + os.makedirs(os.path.dirname(f), exist_ok=True) + with open(f, "wb+") as destination: + for chunk in request.FILES["file"].chunks(): + destination.write(chunk) + task = LetterboxdImporter.create( + request.user, + visibility=int(request.POST.get("visibility", 0)), + file=f, + ) + task.enqueue() + return redirect(reverse("users:user_task_status", args=(task.type,))) @login_required def import_opml(request): - if request.method == "POST": - importer = OPMLImporter( - request.user, - int(request.POST.get("visibility", 0)), - int(request.POST.get("import_mode", 0)), - ) - if importer.import_from_file(request.FILES["file"]): - messages.add_message( - request, messages.INFO, _("File is uploaded and will be imported soon.") - ) - else: - messages.add_message(request, messages.ERROR, _("Invalid file.")) - return redirect(reverse("users:data")) + if request.method != "POST": + return redirect(reverse("users:data")) + if not OPMLImporter.validate_file(request.FILES["file"]): + raise BadRequest(_("Invalid file.")) + f = ( + settings.MEDIA_ROOT + + "/" + + GenerateDateUUIDMediaFilePath("x.zip", settings.SYNC_FILE_PATH_ROOT) + ) + os.makedirs(os.path.dirname(f), exist_ok=True) + with open(f, "wb+") as destination: + for chunk in request.FILES["file"].chunks(): + destination.write(chunk) + task = OPMLImporter.create( + request.user, + visibility=int(request.POST.get("visibility", 0)), + mode=int(request.POST.get("import_mode", 0)), + file=f, + ) + task.enqueue() + return redirect(reverse("users:user_task_status", args=(task.type,))) @login_required def import_neodb(request): if request.method == "POST": + format_type_hint = request.POST.get("format_type", "").lower() + if format_type_hint == "csv": + importer = CsvImporter + elif format_type_hint == "ndjson": + importer = NdjsonImporter + else: + raise BadRequest("Invalid file.") f = ( settings.MEDIA_ROOT + "/" @@ -357,16 +388,11 @@ def import_neodb(request): with open(f, "wb+") as destination: for chunk in request.FILES["file"].chunks(): destination.write(chunk) - importer = get_neodb_importer(f) - if not importer: - messages.add_message(request, messages.ERROR, _("Invalid file.")) - return redirect(reverse("users:data")) - importer.create( + task = importer.create( request.user, visibility=int(request.POST.get("visibility", 0)), file=f, - ).enqueue() - messages.add_message( - request, messages.INFO, _("File is uploaded and will be imported soon.") ) + task.enqueue() + return redirect(reverse("users:user_task_status", args=(task.type,))) return redirect(reverse("users:data"))