From cb390f1d1700f24d2a9f288b0dcef3419b423494 Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 7 Jan 2023 12:00:09 -0500 Subject: [PATCH] 503 pages --- catalog/common/models.py | 3 +- catalog/common/sites.py | 40 +++- catalog/sites/tmdb.py | 37 +-- catalog/tv/models.py | 10 - common/templates/503.html | 211 ++++++++++++++++++ journal/models.py | 16 +- legacy/management/commands/migrate_catalog.py | 12 + requirements.txt | 2 +- social/models.py | 5 + 9 files changed, 294 insertions(+), 42 deletions(-) create mode 100644 common/templates/503.html diff --git a/catalog/common/models.py b/catalog/common/models.py index c4e344bc..f0d45683 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -208,9 +208,8 @@ class Item(SoftDeleteMixin, PolymorphicModel): ) class Meta: - unique_together = [ + index_together = [ [ - "polymorphic_ctype_id", "primary_lookup_id_type", "primary_lookup_id_value", ] diff --git a/catalog/common/sites.py b/catalog/common/sites.py index 4e7574e4..6306e699 100644 --- a/catalog/common/sites.py +++ b/catalog/common/sites.py @@ -8,7 +8,7 @@ ResourceContent persists as an ExternalResource which may link to an Item """ from typing import Callable import re -from .models import ExternalResource +from .models import ExternalResource, IdType, Item from dataclasses import dataclass, field import logging import json @@ -75,7 +75,7 @@ class AbstractSite: self.url = self.id_to_url(self.id_value) if url else None self.resource = None - def get_resource(self): + def get_resource(self) -> ExternalResource: if not self.resource: self.resource = ExternalResource.objects.filter(url=self.url).first() if self.resource is None: @@ -89,6 +89,25 @@ class AbstractSite: data = ResourceContent() return data + @staticmethod + def match_existing_item(resource, model=Item) -> Item | None: + t, v = model.get_best_lookup_id(resource.get_all_lookup_ids()) + matched = None + if t is not None: + matched = model.objects.filter( + primary_lookup_id_type=t, + primary_lookup_id_value=v, + title=resource.metadata["title"], + ).first() + if matched is None and resource.id_type not in [ + IdType.DoubanMusic, # DoubanMusic has many dirty data with same UPC + IdType.Goodreads, # previous scraper generated some dirty data + ]: + matched = model.objects.filter( + primary_lookup_id_type=t, primary_lookup_id_value=v + ).first() + return matched + def get_item(self): p = self.get_resource() if not p: @@ -100,12 +119,9 @@ class AbstractSite: model = p.get_preferred_model() if not model: model = self.DEFAULT_MODEL - t, v = model.get_best_lookup_id(p.get_all_lookup_ids()) - if t is not None: - p.item = model.objects.filter( - primary_lookup_id_type=t, primary_lookup_id_value=v - ).first() + p.item = self.match_existing_item(p, model) if p.item is None: + t, v = model.get_best_lookup_id(p.get_all_lookup_ids()) obj = model.copy_metadata(p.metadata) obj["primary_lookup_id_type"] = t obj["primary_lookup_id_value"] = v @@ -159,7 +175,7 @@ class AbstractSite: if not p.ready: _logger.error(f"unable to get resource {self.url} ready") return None - if auto_create: # and p.item is None: + if auto_create and p.item is None: self.get_item() if auto_save: p.save() @@ -176,7 +192,8 @@ class AbstractSite: ) else: _logger.error(f'unable to get site for {linked_resource["url"]}') - django_rq.get_queue("crawl").enqueue(crawl_related_resources_task, p.pk) + if p.related_resources: + django_rq.get_queue("crawl").enqueue(crawl_related_resources_task, p.pk) p.item.update_linked_items_from_external_resource(p) p.item.save() return p @@ -234,7 +251,10 @@ ExternalResource.get_site = lambda resource: SiteManager.get_site_by_id_type( def crawl_related_resources_task(resource_pk): - resource = ExternalResource.objects.get(pk=resource_pk) + resource = ExternalResource.objects.filter(pk=resource_pk).first() + if not resource: + _logger.warn(f"crawl resource not found {resource_pk}") + return links = resource.related_resources for w in links: try: diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py index 1b721bfe..3651205d 100644 --- a/catalog/sites/tmdb.py +++ b/catalog/sites/tmdb.py @@ -337,7 +337,11 @@ class TMDB_TVSeason(AbstractSite): def scrape(self): v = self.id_value.split("-") - api_url = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits" + show_id = v[0] + season_id = v[1] + site = TMDB_TV(TMDB_TV.id_to_url(show_id)) + show_resource = site.get_resource_ready(auto_create=False, auto_link=False) + api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits" d = BasicDownloader(api_url).download().json() if not d.get("id"): raise ParseError("id") @@ -353,6 +357,9 @@ class TMDB_TVSeason(AbstractSite): }, ) ) + pd.metadata["title"] = ( + show_resource.metadata["title"] + " " + pd.metadata["title"] + ) pd.metadata["required_resources"] = [ { "model": "TVShow", @@ -388,17 +395,17 @@ class TMDB_TVSeason(AbstractSite): ) # get external id from 1st episode - if pd.lookup_ids[IdType.IMDB]: - _logger.warning("Unexpected IMDB id for TMDB tv season") - elif len(pd.metadata["episode_number_list"]) == 0: - _logger.warning( - "Unable to lookup IMDB id for TMDB tv season with zero episodes" - ) - else: - ep = pd.metadata["episode_number_list"][0] - api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits" - d2 = BasicDownloader(api_url2).download().json() - if not d2.get("id"): - raise ParseError("episode id for season") - pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id") - return pd + # if pd.lookup_ids[IdType.IMDB]: + # _logger.warning("Unexpected IMDB id for TMDB tv season") + # elif len(pd.metadata["episode_number_list"]) == 0: + # _logger.warning( + # "Unable to lookup IMDB id for TMDB tv season with zero episodes" + # ) + # else: + # ep = pd.metadata["episode_number_list"][0] + # api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits" + # d2 = BasicDownloader(api_url2).download().json() + # if not d2.get("id"): + # raise ParseError("episode id for season") + # pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id") + # return pd diff --git a/catalog/tv/models.py b/catalog/tv/models.py index 2b7fb381..dda96acf 100644 --- a/catalog/tv/models.py +++ b/catalog/tv/models.py @@ -275,15 +275,6 @@ class TVSeason(Item): ] return [(i.value, i.label) for i in id_types] - def is_partial_title(self): - return re.match("^(第.+季|特别篇)$", self.title) is not None - - def get_full_title(self): - if self.is_partial_title() and self.show: - return f"{self.show.title} {self.title}" - else: - return self.title - def update_linked_items_from_external_resource(self, resource): """add Work from resource.metadata['work'] if not yet""" links = resource.required_resources + resource.related_resources @@ -294,7 +285,6 @@ class TVSeason(Item): ).first() if p and p.item and w in resource.required_resources: self.show = p.item - self.title = self.get_full_title() def all_seasons(self): return self.show.all_seasons if self.show else [] diff --git a/common/templates/503.html b/common/templates/503.html new file mode 100644 index 00000000..069b11f4 --- /dev/null +++ b/common/templates/503.html @@ -0,0 +1,211 @@ + + + + +☃ NeoDB 升级中... + + +
+
+
NeoDB 升级中...
+
+
+
+
+
+
+
+
+ + + + \ No newline at end of file diff --git a/journal/models.py b/journal/models.py index 275a60da..b0645ab1 100644 --- a/journal/models.py +++ b/journal/models.py @@ -20,7 +20,6 @@ from catalog.common.utils import DEFAULT_ITEM_COVER, item_cover_path from django.utils.baseconv import base62 from django.db.models import Q from catalog.models import * -import mistune from django.contrib.contenttypes.models import ContentType from markdown import markdown from catalog.common import jsondata @@ -213,7 +212,7 @@ class Review(Content): @property def html_content(self): - return mistune.html(self.body) + return markdown(self.body) @cached_property def rating_grade(self): @@ -248,8 +247,9 @@ class Review(Content): class Rating(Content): - class Meta: - unique_together = [["owner", "item"]] + # class Meta: + # unique_together = [["owner", "item"]] + # FIXME enable after migration grade = models.PositiveSmallIntegerField( default=0, validators=[MaxValueValidator(10), MinValueValidator(1)], null=True @@ -487,6 +487,10 @@ class ShelfMember(ListMember): "Shelf", related_name="members", on_delete=models.CASCADE ) + # class Meta: + # unique_together = [["parent", "item"]] + # FIXME enable after migration + @cached_property def mark(self): m = Mark(self.owner, self.item) @@ -694,6 +698,10 @@ Tag class TagMember(ListMember): parent = models.ForeignKey("Tag", related_name="members", on_delete=models.CASCADE) + # class Meta: + # unique_together = [["parent", "item"]] + # FIXME enable after migration + TagValidators = [RegexValidator(regex=r"\s+", inverse_match=True)] diff --git a/legacy/management/commands/migrate_catalog.py b/legacy/management/commands/migrate_catalog.py index ee75b375..3ae9cc61 100644 --- a/legacy/management/commands/migrate_catalog.py +++ b/legacy/management/commands/migrate_catalog.py @@ -190,6 +190,11 @@ class Command(BaseCommand): parser.add_argument( "--clearlink", help="clear legacy link table", action="store_true" ) + parser.add_argument( + "--doubantv", + help="go thru douban tv and generate TMDB_Season link for TVSeason", + action="store_true", + ) parser.add_argument( "--reload", help="reload and ignore existing ExternalResource", @@ -209,6 +214,10 @@ class Command(BaseCommand): if SongLink.objects.filter(old_id=entity.id).count() == 0: SongLink.objects.create(old_id=entity.id, new_uid=new_uid) + def douban_tv(self): + """go thru douban tv and generate TMDB link""" + pass + def handle(self, *args, **options): if options["song"]: for sm in SongMark.objects.all(): @@ -217,6 +226,9 @@ class Command(BaseCommand): self.process_song(ci.song) return + if options["doubantv"]: + return self.douban_tv() + types = options["types"] or [ Legacy_Game, Legacy_Album, diff --git a/requirements.txt b/requirements.txt index b9a6338a..ac7621f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,4 +26,4 @@ dnspython typesense markdownify igdb-api-v4 -mistune +django-maintenance-mode diff --git a/social/models.py b/social/models.py index 9cf563be..caf93f35 100644 --- a/social/models.py +++ b/social/models.py @@ -40,6 +40,11 @@ class LocalActivity(models.Model, UserOwnedObjectMixin): action_object = models.ForeignKey(Piece, on_delete=models.CASCADE) created_time = models.DateTimeField(default=timezone.now, db_index=True) + class Meta: + index_together = [ + ["owner", "created_time"], + ] + def __str__(self): return f"Activity [{self.owner}:{self.template}:{self.action_object}]"