503 pages

This commit is contained in:
Your Name 2023-01-07 12:00:09 -05:00
parent 4447de4943
commit cb390f1d17
9 changed files with 294 additions and 42 deletions

View file

@ -208,9 +208,8 @@ class Item(SoftDeleteMixin, PolymorphicModel):
)
class Meta:
unique_together = [
index_together = [
[
"polymorphic_ctype_id",
"primary_lookup_id_type",
"primary_lookup_id_value",
]

View file

@ -8,7 +8,7 @@ ResourceContent persists as an ExternalResource which may link to an Item
"""
from typing import Callable
import re
from .models import ExternalResource
from .models import ExternalResource, IdType, Item
from dataclasses import dataclass, field
import logging
import json
@ -75,7 +75,7 @@ class AbstractSite:
self.url = self.id_to_url(self.id_value) if url else None
self.resource = None
def get_resource(self):
def get_resource(self) -> ExternalResource:
if not self.resource:
self.resource = ExternalResource.objects.filter(url=self.url).first()
if self.resource is None:
@ -89,6 +89,25 @@ class AbstractSite:
data = ResourceContent()
return data
@staticmethod
def match_existing_item(resource, model=Item) -> Item | None:
t, v = model.get_best_lookup_id(resource.get_all_lookup_ids())
matched = None
if t is not None:
matched = model.objects.filter(
primary_lookup_id_type=t,
primary_lookup_id_value=v,
title=resource.metadata["title"],
).first()
if matched is None and resource.id_type not in [
IdType.DoubanMusic, # DoubanMusic has many dirty data with same UPC
IdType.Goodreads, # previous scraper generated some dirty data
]:
matched = model.objects.filter(
primary_lookup_id_type=t, primary_lookup_id_value=v
).first()
return matched
def get_item(self):
p = self.get_resource()
if not p:
@ -100,12 +119,9 @@ class AbstractSite:
model = p.get_preferred_model()
if not model:
model = self.DEFAULT_MODEL
t, v = model.get_best_lookup_id(p.get_all_lookup_ids())
if t is not None:
p.item = model.objects.filter(
primary_lookup_id_type=t, primary_lookup_id_value=v
).first()
p.item = self.match_existing_item(p, model)
if p.item is None:
t, v = model.get_best_lookup_id(p.get_all_lookup_ids())
obj = model.copy_metadata(p.metadata)
obj["primary_lookup_id_type"] = t
obj["primary_lookup_id_value"] = v
@ -159,7 +175,7 @@ class AbstractSite:
if not p.ready:
_logger.error(f"unable to get resource {self.url} ready")
return None
if auto_create: # and p.item is None:
if auto_create and p.item is None:
self.get_item()
if auto_save:
p.save()
@ -176,7 +192,8 @@ class AbstractSite:
)
else:
_logger.error(f'unable to get site for {linked_resource["url"]}')
django_rq.get_queue("crawl").enqueue(crawl_related_resources_task, p.pk)
if p.related_resources:
django_rq.get_queue("crawl").enqueue(crawl_related_resources_task, p.pk)
p.item.update_linked_items_from_external_resource(p)
p.item.save()
return p
@ -234,7 +251,10 @@ ExternalResource.get_site = lambda resource: SiteManager.get_site_by_id_type(
def crawl_related_resources_task(resource_pk):
resource = ExternalResource.objects.get(pk=resource_pk)
resource = ExternalResource.objects.filter(pk=resource_pk).first()
if not resource:
_logger.warn(f"crawl resource not found {resource_pk}")
return
links = resource.related_resources
for w in links:
try:

View file

@ -337,7 +337,11 @@ class TMDB_TVSeason(AbstractSite):
def scrape(self):
v = self.id_value.split("-")
api_url = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
show_id = v[0]
season_id = v[1]
site = TMDB_TV(TMDB_TV.id_to_url(show_id))
show_resource = site.get_resource_ready(auto_create=False, auto_link=False)
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
d = BasicDownloader(api_url).download().json()
if not d.get("id"):
raise ParseError("id")
@ -353,6 +357,9 @@ class TMDB_TVSeason(AbstractSite):
},
)
)
pd.metadata["title"] = (
show_resource.metadata["title"] + " " + pd.metadata["title"]
)
pd.metadata["required_resources"] = [
{
"model": "TVShow",
@ -388,17 +395,17 @@ class TMDB_TVSeason(AbstractSite):
)
# get external id from 1st episode
if pd.lookup_ids[IdType.IMDB]:
_logger.warning("Unexpected IMDB id for TMDB tv season")
elif len(pd.metadata["episode_number_list"]) == 0:
_logger.warning(
"Unable to lookup IMDB id for TMDB tv season with zero episodes"
)
else:
ep = pd.metadata["episode_number_list"][0]
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
d2 = BasicDownloader(api_url2).download().json()
if not d2.get("id"):
raise ParseError("episode id for season")
pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id")
return pd
# if pd.lookup_ids[IdType.IMDB]:
# _logger.warning("Unexpected IMDB id for TMDB tv season")
# elif len(pd.metadata["episode_number_list"]) == 0:
# _logger.warning(
# "Unable to lookup IMDB id for TMDB tv season with zero episodes"
# )
# else:
# ep = pd.metadata["episode_number_list"][0]
# api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
# d2 = BasicDownloader(api_url2).download().json()
# if not d2.get("id"):
# raise ParseError("episode id for season")
# pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id")
# return pd

View file

@ -275,15 +275,6 @@ class TVSeason(Item):
]
return [(i.value, i.label) for i in id_types]
def is_partial_title(self):
return re.match("^(第.+季|特别篇)$", self.title) is not None
def get_full_title(self):
if self.is_partial_title() and self.show:
return f"{self.show.title} {self.title}"
else:
return self.title
def update_linked_items_from_external_resource(self, resource):
"""add Work from resource.metadata['work'] if not yet"""
links = resource.required_resources + resource.related_resources
@ -294,7 +285,6 @@ class TVSeason(Item):
).first()
if p and p.item and w in resource.required_resources:
self.show = p.item
self.title = self.get_full_title()
def all_seasons(self):
return self.show.all_seasons if self.show else []

211
common/templates/503.html Normal file

File diff suppressed because one or more lines are too long

View file

@ -20,7 +20,6 @@ from catalog.common.utils import DEFAULT_ITEM_COVER, item_cover_path
from django.utils.baseconv import base62
from django.db.models import Q
from catalog.models import *
import mistune
from django.contrib.contenttypes.models import ContentType
from markdown import markdown
from catalog.common import jsondata
@ -213,7 +212,7 @@ class Review(Content):
@property
def html_content(self):
return mistune.html(self.body)
return markdown(self.body)
@cached_property
def rating_grade(self):
@ -248,8 +247,9 @@ class Review(Content):
class Rating(Content):
class Meta:
unique_together = [["owner", "item"]]
# class Meta:
# unique_together = [["owner", "item"]]
# FIXME enable after migration
grade = models.PositiveSmallIntegerField(
default=0, validators=[MaxValueValidator(10), MinValueValidator(1)], null=True
@ -487,6 +487,10 @@ class ShelfMember(ListMember):
"Shelf", related_name="members", on_delete=models.CASCADE
)
# class Meta:
# unique_together = [["parent", "item"]]
# FIXME enable after migration
@cached_property
def mark(self):
m = Mark(self.owner, self.item)
@ -694,6 +698,10 @@ Tag
class TagMember(ListMember):
parent = models.ForeignKey("Tag", related_name="members", on_delete=models.CASCADE)
# class Meta:
# unique_together = [["parent", "item"]]
# FIXME enable after migration
TagValidators = [RegexValidator(regex=r"\s+", inverse_match=True)]

View file

@ -190,6 +190,11 @@ class Command(BaseCommand):
parser.add_argument(
"--clearlink", help="clear legacy link table", action="store_true"
)
parser.add_argument(
"--doubantv",
help="go thru douban tv and generate TMDB_Season link for TVSeason",
action="store_true",
)
parser.add_argument(
"--reload",
help="reload and ignore existing ExternalResource",
@ -209,6 +214,10 @@ class Command(BaseCommand):
if SongLink.objects.filter(old_id=entity.id).count() == 0:
SongLink.objects.create(old_id=entity.id, new_uid=new_uid)
def douban_tv(self):
"""go thru douban tv and generate TMDB link"""
pass
def handle(self, *args, **options):
if options["song"]:
for sm in SongMark.objects.all():
@ -217,6 +226,9 @@ class Command(BaseCommand):
self.process_song(ci.song)
return
if options["doubantv"]:
return self.douban_tv()
types = options["types"] or [
Legacy_Game,
Legacy_Album,

View file

@ -26,4 +26,4 @@ dnspython
typesense
markdownify
igdb-api-v4
mistune
django-maintenance-mode

View file

@ -40,6 +40,11 @@ class LocalActivity(models.Model, UserOwnedObjectMixin):
action_object = models.ForeignKey(Piece, on_delete=models.CASCADE)
created_time = models.DateTimeField(default=timezone.now, db_index=True)
class Meta:
index_together = [
["owner", "created_time"],
]
def __str__(self):
return f"Activity [{self.owner}:{self.template}:{self.action_object}]"