supports localized title
This commit is contained in:
parent
9b13e8028e
commit
65098a137d
151 changed files with 16260 additions and 12811 deletions
1
.github/workflows/tests.yml
vendored
1
.github/workflows/tests.yml
vendored
|
@ -52,7 +52,6 @@ jobs:
|
|||
NEODB_SITE_NAME: test
|
||||
NEODB_SITE_DOMAIN: test.domain
|
||||
NEODB_SECRET_KEY: test
|
||||
NEODB_LANGUAGE: zh-hans
|
||||
run: |
|
||||
python manage.py compilemessages -i .venv -l zh_Hans
|
||||
python manage.py test
|
||||
|
|
|
@ -46,8 +46,8 @@ env = environ.FileAwareEnv(
|
|||
NEODB_SITE_LINKS=(dict, {}),
|
||||
# Alternative domains
|
||||
NEODB_ALTERNATIVE_DOMAINS=(list, []),
|
||||
# Default language
|
||||
NEODB_LANGUAGE=(str, "zh-hans"),
|
||||
# Preferred languages in catalog
|
||||
NEODB_PREFERRED_LANGUAGES=(list, ["en", "zh"]), # , "ja", "ko", "de", "fr", "es"
|
||||
# Invite only mode
|
||||
# when True: user will not be able to register unless with invite token
|
||||
# (generated by `neodb-manage invite --create`)
|
||||
|
@ -408,14 +408,38 @@ if SLACK_TOKEN:
|
|||
|
||||
MARKDOWNX_MARKDOWNIFY_FUNCTION = "journal.models.render_md"
|
||||
|
||||
LANGUAGE_CODE = env("NEODB_LANGUAGE", default="zh-hans") # type: ignore
|
||||
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
|
||||
LANGUAGES = (
|
||||
("en", _("English")),
|
||||
("zh-hans", _("Simplified Chinese")),
|
||||
("zh-hant", _("Traditional Chinese")),
|
||||
SUPPORTED_UI_LANGUAGES = {
|
||||
"en": _("English"),
|
||||
"zh-hans": _("Simplified Chinese"),
|
||||
"zh-hant": _("Traditional Chinese"),
|
||||
}
|
||||
|
||||
LANGUAGES = SUPPORTED_UI_LANGUAGES.items()
|
||||
|
||||
|
||||
def _init_language_settings(preferred_lanugages_env):
|
||||
default_language = None
|
||||
preferred_lanugages = []
|
||||
for pl in preferred_lanugages_env:
|
||||
lang = pl.strip().lower()
|
||||
if not default_language:
|
||||
if lang in SUPPORTED_UI_LANGUAGES:
|
||||
default_language = lang
|
||||
elif lang == "zh":
|
||||
default_language = "zh-hans"
|
||||
if lang.startswith("zh-"):
|
||||
lang = "zh"
|
||||
if lang not in preferred_lanugages:
|
||||
preferred_lanugages.append(lang)
|
||||
return default_language or "en", preferred_lanugages or ["en"]
|
||||
|
||||
|
||||
LANGUAGE_CODE, PREFERRED_LANGUAGES = _init_language_settings(
|
||||
env("NEODB_PREFERRED_LANGUAGES")
|
||||
)
|
||||
|
||||
LOCALE_PATHS = [os.path.join(BASE_DIR, "locale")]
|
||||
|
||||
TIME_ZONE = env("NEODB_TIMEZONE", default="Asia/Shanghai") # type: ignore
|
||||
|
||||
USE_I18N = True
|
||||
|
|
|
@ -17,7 +17,6 @@ work data seems asymmetric (a book links to a work, but may not listed in that w
|
|||
|
||||
"""
|
||||
|
||||
from os.path import exists
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from django.core.validators import MaxValueValidator, MinValueValidator
|
||||
|
@ -37,6 +36,7 @@ from catalog.common import (
|
|||
PrimaryLookupIdDescriptor,
|
||||
jsondata,
|
||||
)
|
||||
from catalog.common.models import SCRIPT_CHOICES
|
||||
|
||||
from .utils import *
|
||||
|
||||
|
@ -90,6 +90,7 @@ class Edition(Item):
|
|||
"pages",
|
||||
"price",
|
||||
"brief",
|
||||
"localized_description",
|
||||
"contents",
|
||||
]
|
||||
subtitle = jsondata.CharField(
|
||||
|
@ -113,7 +114,12 @@ class Edition(Item):
|
|||
default=list,
|
||||
)
|
||||
language = jsondata.CharField(
|
||||
_("language"), null=True, blank=True, default=None, max_length=500
|
||||
_("language"),
|
||||
null=False,
|
||||
blank=True,
|
||||
default=None,
|
||||
max_length=500,
|
||||
choices=SCRIPT_CHOICES,
|
||||
)
|
||||
pub_house = jsondata.CharField(
|
||||
_("publishing house"), null=True, blank=False, default=None, max_length=500
|
||||
|
|
|
@ -13,12 +13,16 @@ from django.db import connection, models
|
|||
from django.db.models import QuerySet, Value
|
||||
from django.template.defaultfilters import default
|
||||
from django.utils import timezone
|
||||
from django.utils.translation import get_language
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from loguru import logger
|
||||
from ninja import Field, Schema
|
||||
from polymorphic.models import PolymorphicModel
|
||||
|
||||
from catalog.common import jsondata
|
||||
from common.models import LANGUAGE_CHOICES, LOCALE_CHOICES, SCRIPT_CHOICES
|
||||
from common.models.lang import get_current_locales
|
||||
from common.models.misc import uniq
|
||||
|
||||
from .utils import DEFAULT_ITEM_COVER, item_cover_path, resource_cover_path
|
||||
|
||||
|
@ -258,9 +262,16 @@ class BaseSchema(Schema):
|
|||
external_resources: list[ExternalResourceSchema] | None
|
||||
|
||||
|
||||
class LocalizedTitleSchema(Schema):
|
||||
lang: str
|
||||
text: str
|
||||
|
||||
|
||||
class ItemInSchema(Schema):
|
||||
title: str
|
||||
brief: str
|
||||
localized_title: list[LocalizedTitleSchema] = []
|
||||
localized_description: list[LocalizedTitleSchema] = []
|
||||
cover_image_url: str | None
|
||||
rating: float | None
|
||||
rating_count: int | None
|
||||
|
@ -270,6 +281,63 @@ class ItemSchema(BaseSchema, ItemInSchema):
|
|||
pass
|
||||
|
||||
|
||||
def get_locale_choices_for_jsonform(choices):
|
||||
"""return list for jsonform schema"""
|
||||
return [{"title": v, "value": k} for k, v in choices]
|
||||
|
||||
|
||||
LOCALE_CHOICES_JSONFORM = get_locale_choices_for_jsonform(LOCALE_CHOICES)
|
||||
LANGUAGE_CHOICES_JSONFORM = get_locale_choices_for_jsonform(LANGUAGE_CHOICES)
|
||||
|
||||
LOCALIZED_TITLE_SCHEMA = {
|
||||
"type": "list",
|
||||
"items": {
|
||||
"type": "dict",
|
||||
"keys": {
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"title": _("language"),
|
||||
"choices": LOCALE_CHOICES_JSONFORM,
|
||||
},
|
||||
"text": {"type": "string", "title": _("content")},
|
||||
},
|
||||
"required": ["lang", "s"],
|
||||
},
|
||||
"uniqueItems": True,
|
||||
}
|
||||
|
||||
LOCALIZED_DESCRIPTION_SCHEMA = {
|
||||
"type": "list",
|
||||
"items": {
|
||||
"type": "dict",
|
||||
"keys": {
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"title": _("language"),
|
||||
"choices": LOCALE_CHOICES_JSONFORM,
|
||||
},
|
||||
"text": {"type": "string", "title": _("content"), "widget": "textarea"},
|
||||
},
|
||||
"required": ["lang", "s"],
|
||||
},
|
||||
"uniqueItems": True,
|
||||
}
|
||||
|
||||
|
||||
def LanguageListField():
|
||||
return jsondata.ArrayField(
|
||||
verbose_name=_("language"),
|
||||
base_field=models.CharField(blank=True, default="", max_length=100),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
# schema={
|
||||
# "type": "list",
|
||||
# "items": {"type": "string", "choices": LANGUAGE_CHOICES_JSONFORM},
|
||||
# },
|
||||
)
|
||||
|
||||
|
||||
class Item(PolymorphicModel):
|
||||
if TYPE_CHECKING:
|
||||
external_resources: QuerySet["ExternalResource"]
|
||||
|
@ -308,6 +376,22 @@ class Item(PolymorphicModel):
|
|||
related_name="merged_from_items",
|
||||
)
|
||||
|
||||
localized_title = jsondata.JSONField(
|
||||
verbose_name=_("title"),
|
||||
null=False,
|
||||
blank=True,
|
||||
default=list,
|
||||
schema=LOCALIZED_TITLE_SCHEMA,
|
||||
)
|
||||
|
||||
localized_description = jsondata.JSONField(
|
||||
verbose_name=_("description"),
|
||||
null=False,
|
||||
blank=True,
|
||||
default=list,
|
||||
schema=LOCALIZED_DESCRIPTION_SCHEMA,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
index_together = [
|
||||
[
|
||||
|
@ -494,12 +578,52 @@ class Item(PolymorphicModel):
|
|||
def class_name(self) -> str:
|
||||
return self.__class__.__name__.lower()
|
||||
|
||||
@property
|
||||
def display_title(self) -> str:
|
||||
return self.title
|
||||
def get_localized_title(self) -> str | None:
|
||||
if self.localized_title:
|
||||
locales = get_current_locales()
|
||||
for loc in locales:
|
||||
v = next(
|
||||
filter(lambda t: t["lang"] == loc, self.localized_title), {}
|
||||
).get("text")
|
||||
if v:
|
||||
return v
|
||||
|
||||
def get_localized_description(self) -> str | None:
|
||||
if self.localized_description:
|
||||
locales = get_current_locales()
|
||||
for loc in locales:
|
||||
v = next(
|
||||
filter(lambda t: t["lang"] == loc, self.localized_description), {}
|
||||
).get("text")
|
||||
if v:
|
||||
return v
|
||||
|
||||
@property
|
||||
def display_description(self):
|
||||
def display_title(self) -> str:
|
||||
return (
|
||||
self.get_localized_title()
|
||||
or self.title
|
||||
or (
|
||||
self.orig_title # type:ignore
|
||||
if hasattr(self, "orig_title")
|
||||
else ""
|
||||
)
|
||||
) or (self.localized_title[0]["text"] if self.localized_title else "")
|
||||
|
||||
@property
|
||||
def display_description(self) -> str:
|
||||
return (
|
||||
self.get_localized_description()
|
||||
or self.brief
|
||||
or (
|
||||
self.localized_description[0]["text"]
|
||||
if self.localized_description
|
||||
else ""
|
||||
)
|
||||
)
|
||||
|
||||
@property
|
||||
def brief_description(self):
|
||||
return self.brief[:155]
|
||||
|
||||
@classmethod
|
||||
|
@ -547,7 +671,13 @@ class Item(PolymorphicModel):
|
|||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
"brief",
|
||||
"localized_title",
|
||||
"localized_description",
|
||||
] # list of metadata keys to copy from resource to item
|
||||
METADATA_MERGE_LIST = [
|
||||
"localized_title",
|
||||
"localized_description",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def copy_metadata(cls, metadata: dict[str, Any]) -> dict[str, Any]:
|
||||
|
@ -568,19 +698,26 @@ class Item(PolymorphicModel):
|
|||
else None
|
||||
)
|
||||
|
||||
def merge_data_from_external_resource(
|
||||
self, p: "ExternalResource", ignore_existing_content: bool = False
|
||||
):
|
||||
for k in self.METADATA_COPY_LIST:
|
||||
v = p.metadata.get(k)
|
||||
if v:
|
||||
if not getattr(self, k) or ignore_existing_content:
|
||||
setattr(self, k, v)
|
||||
elif k in self.METADATA_MERGE_LIST:
|
||||
setattr(self, k, uniq((v or []) + getattr(self, k, [])))
|
||||
if p.cover and (not self.has_cover() or ignore_existing_content):
|
||||
self.cover = p.cover
|
||||
|
||||
def merge_data_from_external_resources(self, ignore_existing_content: bool = False):
|
||||
"""Subclass may override this"""
|
||||
lookup_ids = []
|
||||
for p in self.external_resources.all():
|
||||
lookup_ids.append((p.id_type, p.id_value))
|
||||
lookup_ids += p.other_lookup_ids.items()
|
||||
for k in self.METADATA_COPY_LIST:
|
||||
if p.metadata.get(k) and (
|
||||
not getattr(self, k) or ignore_existing_content
|
||||
):
|
||||
setattr(self, k, p.metadata.get(k))
|
||||
if p.cover and (not self.has_cover() or ignore_existing_content):
|
||||
self.cover = p.cover
|
||||
self.merge_data_from_external_resource(p, ignore_existing_content)
|
||||
self.update_lookup_ids(list(set(lookup_ids)))
|
||||
|
||||
def update_linked_items_from_external_resource(self, resource: "ExternalResource"):
|
||||
|
|
|
@ -349,8 +349,12 @@ def crawl_related_resources_task(resource_pk):
|
|||
if not site and w.get("url"):
|
||||
site = SiteManager.get_site_by_url(w["url"])
|
||||
if site:
|
||||
site.get_resource_ready(ignore_existing_content=False, auto_link=True)
|
||||
res = site.get_resource_ready(
|
||||
ignore_existing_content=False, auto_link=True
|
||||
)
|
||||
item = site.get_item()
|
||||
if item and res and w in resource.prematched_resources:
|
||||
item.merge_data_from_external_resource(res)
|
||||
if item:
|
||||
logger.info(f"crawled {w} {item}")
|
||||
else:
|
||||
|
|
5
catalog/common/tests.py
Normal file
5
catalog/common/tests.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
from django.test import TestCase as DjangoTestCase
|
||||
|
||||
|
||||
class TestCase(DjangoTestCase):
|
||||
databases = "__all__"
|
|
@ -3,6 +3,7 @@ from django.utils.translation import gettext_lazy as _
|
|||
|
||||
from catalog.models import *
|
||||
from common.forms import PreviewImageInput
|
||||
from common.models import DEFAULT_CATALOG_LANGUAGE, detect_language, uniq
|
||||
|
||||
CatalogForms = {}
|
||||
|
||||
|
@ -39,6 +40,73 @@ def _EditForm(item_model):
|
|||
"cover": PreviewImageInput(),
|
||||
}
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.migrate_initial()
|
||||
|
||||
# {'id': 547, 'primary_lookup_id_type': 'imdb', 'primary_lookup_id_value': 'tt0056923', 'cover': <ImageFieldFile: item/tmdb_movie/2024/01/12/10973d2b-1d20-4e37-8c3c-ecc89e671a80.jpg>, 'orig_title': 'Charade', 'other_title': [], 'director': ['Stanley Donen'], 'playwright': ['Peter Stone'], 'actor': ['Cary Grant', 'Audrey Hepburn', 'Walter Matthau', 'James Coburn', 'George Kennedy', 'Dominique Minot', 'Ned Glass', 'Jacques Marin', 'Paul Bonifas', 'Thomas Chelimsky', 'Marc Arian', 'Claudine Berg', 'Marcel Bernier', 'Albert Daumergue', 'Raoul Delfosse', 'Stanley Donen', 'Jean Gold', 'Chantal Goya', 'Clément Harari', 'Monte Landis', 'Bernard Musson', 'Antonio Passalia', 'Jacques Préboist', 'Peter Stone', 'Michel Thomass', 'Roger Trapp', 'Louis Viret'], 'genre': ['喜剧', '悬疑', '爱情'], 'showtime': [{'time': '1963-12-05', 'region': ''}], 'site': '', 'area': [], 'language': ['English', 'Français', 'Deutsch', 'Italiano'], 'year': 1963, 'duration': '', 'localized_title': [], 'localized_description': []}
|
||||
|
||||
def migrate_initial(self):
|
||||
if self.initial and self.instance:
|
||||
if (
|
||||
"localized_title" in self.Meta.fields
|
||||
and not self.initial["localized_title"]
|
||||
):
|
||||
titles = []
|
||||
if self.instance.title:
|
||||
titles.append(
|
||||
{
|
||||
"lang": detect_language(self.instance.title),
|
||||
"text": self.instance.title,
|
||||
}
|
||||
)
|
||||
if (
|
||||
hasattr(self.instance, "orig_title")
|
||||
and self.instance.orig_title
|
||||
):
|
||||
titles.append(
|
||||
{
|
||||
"lang": detect_language(self.instance.orig_title),
|
||||
"text": self.instance.orig_title,
|
||||
}
|
||||
)
|
||||
if (
|
||||
hasattr(self.instance, "other_title")
|
||||
and self.instance.other_title
|
||||
):
|
||||
for t in self.instance.other_title:
|
||||
titles.append(
|
||||
{
|
||||
"lang": detect_language(t),
|
||||
"text": self.instance.orig_title,
|
||||
}
|
||||
)
|
||||
if not titles:
|
||||
titles = [
|
||||
{"lang": DEFAULT_CATALOG_LANGUAGE, "text": "<no title>"}
|
||||
]
|
||||
self.initial["localized_title"] = uniq(titles) # type:ignore
|
||||
if (
|
||||
"localized_description" in self.Meta.fields
|
||||
and not self.initial["localized_description"]
|
||||
):
|
||||
if self.instance.brief:
|
||||
d = {
|
||||
"lang": detect_language(self.instance.brief),
|
||||
"text": self.instance.brief,
|
||||
}
|
||||
else:
|
||||
d = {
|
||||
"lang": self.initial["localized_title"][0]["lang"],
|
||||
"text": "",
|
||||
}
|
||||
self.initial["localized_description"] = [d] # type:ignore
|
||||
# if (
|
||||
# "language" in self.Meta.fields
|
||||
# and self.initial["language"]
|
||||
# ):
|
||||
# if isinstance(self.initial["language"], str):
|
||||
|
||||
def clean(self):
|
||||
data = super().clean() or {}
|
||||
t, v = self.Meta.model.lookup_id_cleanup(
|
||||
|
|
|
@ -39,9 +39,11 @@ class Game(Item):
|
|||
douban_game = PrimaryLookupIdDescriptor(IdType.DoubanGame)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
"brief",
|
||||
"other_title",
|
||||
# "title",
|
||||
# "brief",
|
||||
# "other_title",
|
||||
"localized_title",
|
||||
"localized_description",
|
||||
"designer",
|
||||
"artist",
|
||||
"developer",
|
||||
|
|
|
@ -5,6 +5,8 @@ from catalog.models import *
|
|||
|
||||
|
||||
class IGDBTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.IGDB
|
||||
t_id_value = "portal-2"
|
||||
|
@ -42,7 +44,9 @@ class IGDBTestCase(TestCase):
|
|||
)
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IGDB)
|
||||
self.assertEqual(site.resource.item.genre, ["Role-playing (RPG)", "Adventure"])
|
||||
self.assertEqual(
|
||||
site.resource.item.genre, ["Puzzle", "Role-playing (RPG)", "Adventure"]
|
||||
)
|
||||
self.assertEqual(
|
||||
site.resource.item.primary_lookup_id_value,
|
||||
"the-legend-of-zelda-breath-of-the-wild",
|
||||
|
@ -50,6 +54,8 @@ class IGDBTestCase(TestCase):
|
|||
|
||||
|
||||
class SteamTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Steam
|
||||
t_id_value = "620"
|
||||
|
@ -70,10 +76,7 @@ class SteamTestCase(TestCase):
|
|||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "Portal 2")
|
||||
self.assertEqual(
|
||||
site.resource.metadata["brief"],
|
||||
"“终身测试计划”现已升级,您可以为您自己或您的好友设计合作谜题!",
|
||||
)
|
||||
self.assertEqual(site.resource.metadata["brief"][:6], "Sequel")
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.steam, "620")
|
||||
self.assertEqual(
|
||||
|
@ -82,6 +85,8 @@ class SteamTestCase(TestCase):
|
|||
|
||||
|
||||
class DoubanGameTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.DoubanGame
|
||||
t_id_value = "10734307"
|
||||
|
@ -100,16 +105,16 @@ class DoubanGameTestCase(TestCase):
|
|||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "传送门2 Portal 2")
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.display_title, "Portal 2")
|
||||
self.assertEqual(site.resource.item.douban_game, "10734307")
|
||||
self.assertEqual(
|
||||
site.resource.item.genre, ["第一人称射击", "益智", "射击", "动作"]
|
||||
)
|
||||
self.assertEqual(site.resource.item.genre, ["第一人称射击", "益智"])
|
||||
self.assertEqual(site.resource.item.other_title, [])
|
||||
|
||||
|
||||
class BangumiGameTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Bangumi
|
||||
|
@ -124,6 +129,8 @@ class BangumiGameTestCase(TestCase):
|
|||
|
||||
|
||||
class BoardGameGeekTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = "https://boardgamegeek.com/boardgame/167791"
|
||||
|
@ -134,15 +141,17 @@ class BoardGameGeekTestCase(TestCase):
|
|||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "Terraforming Mars")
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.display_title, "Terraforming Mars")
|
||||
self.assertEqual(site.resource.item.platform, ["Boardgame"])
|
||||
self.assertEqual(site.resource.item.genre[0], "Economic")
|
||||
self.assertEqual(site.resource.item.other_title[0], "殖民火星")
|
||||
# self.assertEqual(site.resource.item.other_title[0], "殖民火星")
|
||||
self.assertEqual(site.resource.item.designer, ["Jacob Fryxelius"])
|
||||
|
||||
|
||||
class MultiGameSitesTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_games(self):
|
||||
url1 = "https://www.igdb.com/games/portal-2"
|
||||
|
|
|
@ -13,6 +13,7 @@ from catalog.common import (
|
|||
PrimaryLookupIdDescriptor,
|
||||
jsondata,
|
||||
)
|
||||
from catalog.common.models import LanguageListField
|
||||
|
||||
|
||||
class MovieInSchema(ItemInSchema):
|
||||
|
@ -43,9 +44,10 @@ class Movie(Item):
|
|||
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
# "title",
|
||||
"localized_title",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
# "other_title",
|
||||
"director",
|
||||
"playwright",
|
||||
"actor",
|
||||
|
@ -59,7 +61,8 @@ class Movie(Item):
|
|||
# "season_number",
|
||||
# "episodes",
|
||||
# "single_episode_length",
|
||||
"brief",
|
||||
"localized_description",
|
||||
# "brief",
|
||||
]
|
||||
orig_title = jsondata.CharField(
|
||||
verbose_name=_("original title"), blank=True, default="", max_length=500
|
||||
|
@ -141,17 +144,7 @@ class Movie(Item):
|
|||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
verbose_name=_("language"),
|
||||
base_field=models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = LanguageListField()
|
||||
year = jsondata.IntegerField(verbose_name=_("year"), null=True, blank=True)
|
||||
duration = jsondata.CharField(
|
||||
verbose_name=_("length"), blank=True, default="", max_length=200
|
||||
|
|
|
@ -4,6 +4,8 @@ from catalog.common import *
|
|||
|
||||
|
||||
class DoubanMovieTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id = "3541415"
|
||||
t_url = "https://movie.douban.com/subject/3541415/"
|
||||
|
@ -28,6 +30,8 @@ class DoubanMovieTestCase(TestCase):
|
|||
|
||||
|
||||
class TMDBMovieTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id = "293767"
|
||||
t_url = (
|
||||
|
@ -49,13 +53,17 @@ class TMDBMovieTestCase(TestCase):
|
|||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, "293767")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata["title"], "比利·林恩的中场战事")
|
||||
self.assertEqual(
|
||||
site.resource.metadata["title"], "Billy Lynn's Long Halftime Walk"
|
||||
)
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "Movie")
|
||||
self.assertEqual(site.resource.item.imdb, "tt2513074")
|
||||
|
||||
|
||||
class IMDBMovieTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id = "tt1375666"
|
||||
t_url = "https://www.imdb.com/title/tt1375666/"
|
||||
|
@ -75,24 +83,28 @@ class IMDBMovieTestCase(TestCase):
|
|||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, "tt1375666")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata["title"], "盗梦空间")
|
||||
self.assertEqual(site.resource.metadata["title"], "Inception")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.imdb, "tt1375666")
|
||||
|
||||
|
||||
class BangumiMovieTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
url = "https://bgm.tv/subject/237"
|
||||
site = SiteManager.get_site_by_url(url)
|
||||
self.assertEqual(site.id_value, "237")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata["title"], "攻壳机动队")
|
||||
self.assertEqual(site.resource.item.display_title, "GHOST IN THE SHELL")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.imdb, "tt0113568")
|
||||
|
||||
|
||||
class MultiMovieSitesTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_movies(self):
|
||||
url1 = "https://www.themoviedb.org/movie/27205-inception"
|
||||
|
|
|
@ -41,12 +41,14 @@ class Album(Item):
|
|||
douban_music = PrimaryLookupIdDescriptor(IdType.DoubanMusic)
|
||||
spotify_album = PrimaryLookupIdDescriptor(IdType.Spotify_Album)
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
"other_title",
|
||||
# "title",
|
||||
# "other_title",
|
||||
"localized_title",
|
||||
"artist",
|
||||
"company",
|
||||
"track_list",
|
||||
"brief",
|
||||
# "brief",
|
||||
"localized_description",
|
||||
"album_type",
|
||||
"media",
|
||||
"disc_count",
|
||||
|
|
|
@ -69,7 +69,7 @@ class DoubanMusicTestCase(TestCase):
|
|||
self.assertIsInstance(site.resource.item, Album)
|
||||
self.assertEqual(site.resource.item.barcode, "0077774644020")
|
||||
self.assertEqual(site.resource.item.genre, ["摇滚"])
|
||||
self.assertEqual(site.resource.item.other_title, ["橡胶灵魂"])
|
||||
self.assertEqual(len(site.resource.item.localized_title), 2)
|
||||
|
||||
|
||||
class MultiMusicSitesTestCase(TestCase):
|
||||
|
|
|
@ -14,6 +14,7 @@ from catalog.common import (
|
|||
ItemType,
|
||||
jsondata,
|
||||
)
|
||||
from catalog.common.models import LanguageListField
|
||||
from catalog.common.utils import DEFAULT_ITEM_COVER
|
||||
|
||||
|
||||
|
@ -124,13 +125,7 @@ class Performance(Item):
|
|||
blank=False,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
verbose_name=_("language"),
|
||||
base_field=models.CharField(blank=False, default="", max_length=200),
|
||||
null=False,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = LanguageListField()
|
||||
director = jsondata.ArrayField(
|
||||
verbose_name=_("director"),
|
||||
base_field=models.CharField(blank=False, default="", max_length=500),
|
||||
|
@ -211,10 +206,12 @@ class Performance(Item):
|
|||
verbose_name=_("website"), max_length=1000, null=True, blank=True
|
||||
)
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
"brief",
|
||||
# "title",
|
||||
# "brief",
|
||||
"localized_title",
|
||||
"localized_description",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
# "other_title",
|
||||
"genre",
|
||||
"language",
|
||||
"opening_date",
|
||||
|
@ -266,13 +263,7 @@ class PerformanceProduction(Item):
|
|||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
verbose_name=_("language"),
|
||||
base_field=models.CharField(blank=False, default="", max_length=200),
|
||||
null=False,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = LanguageListField()
|
||||
director = jsondata.ArrayField(
|
||||
verbose_name=_("director"),
|
||||
base_field=models.CharField(blank=False, default="", max_length=500),
|
||||
|
@ -353,10 +344,12 @@ class PerformanceProduction(Item):
|
|||
verbose_name=_("website"), max_length=1000, null=True, blank=True
|
||||
)
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
"brief",
|
||||
"localized_title",
|
||||
"localized_description",
|
||||
# "title",
|
||||
# "brief",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
# "other_title",
|
||||
"language",
|
||||
"opening_date",
|
||||
"closing_date",
|
||||
|
@ -389,7 +382,9 @@ class PerformanceProduction(Item):
|
|||
|
||||
@property
|
||||
def display_title(self):
|
||||
return f"{self.show.title if self.show else '♢'} {self.title}"
|
||||
return (
|
||||
f"{self.show.display_title if self.show else '♢'} {super().display_title}"
|
||||
)
|
||||
|
||||
@property
|
||||
def cover_image_url(self) -> str | None:
|
||||
|
|
|
@ -5,6 +5,8 @@ from catalog.common.sites import crawl_related_resources_task
|
|||
|
||||
|
||||
class DoubanDramaTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
|
@ -29,10 +31,10 @@ class DoubanDramaTestCase(TestCase):
|
|||
site = SiteManager.get_site_by_url(t_url)
|
||||
resource = site.get_resource_ready()
|
||||
item = site.get_item()
|
||||
self.assertEqual(item.title, "不眠之人·拿破仑")
|
||||
self.assertEqual(
|
||||
item.other_title, ["眠らない男・ナポレオン ―愛と栄光の涯(はて)に―"]
|
||||
item.display_title, "眠らない男・ナポレオン ―愛と栄光の涯(はて)に―"
|
||||
)
|
||||
self.assertEqual(len(item.localized_title), 2)
|
||||
self.assertEqual(item.genre, ["音乐剧"])
|
||||
self.assertEqual(item.troupe, ["宝塚歌剧团"])
|
||||
self.assertEqual(item.composer, ["ジェラール・プレスギュルヴィック"])
|
||||
|
@ -41,7 +43,7 @@ class DoubanDramaTestCase(TestCase):
|
|||
site = SiteManager.get_site_by_url(t_url)
|
||||
resource = site.get_resource_ready()
|
||||
item = site.get_item()
|
||||
self.assertEqual(item.title, "相声说垮鬼子们")
|
||||
self.assertEqual(item.display_title, "相聲說垮鬼子們")
|
||||
self.assertEqual(item.opening_date, "1997-05")
|
||||
self.assertEqual(item.location, ["臺北新舞臺"])
|
||||
|
||||
|
@ -54,7 +56,8 @@ class DoubanDramaTestCase(TestCase):
|
|||
if item is None:
|
||||
raise ValueError()
|
||||
self.assertEqual(item.orig_title, "Iphigenie auf Tauris")
|
||||
self.assertEqual(sorted(item.other_title), ["死而复生的伊菲格尼"])
|
||||
print(item.localized_title)
|
||||
self.assertEqual(len(item.localized_title), 3)
|
||||
self.assertEqual(item.opening_date, "1974-04-21")
|
||||
self.assertEqual(item.choreographer, ["Pina Bausch"])
|
||||
|
||||
|
@ -68,9 +71,9 @@ class DoubanDramaTestCase(TestCase):
|
|||
item = site.get_item()
|
||||
if item is None:
|
||||
raise ValueError()
|
||||
self.assertEqual(item.title, "红花侠")
|
||||
self.assertEqual(sorted(item.other_title), ["THE SCARLET PIMPERNEL"])
|
||||
self.assertEqual(len(item.brief), 545)
|
||||
self.assertEqual(item.display_title, "THE SCARLET PIMPERNEL")
|
||||
self.assertEqual(len(item.localized_title), 3)
|
||||
self.assertEqual(len(item.display_description), 545)
|
||||
self.assertEqual(item.genre, ["音乐剧"])
|
||||
# self.assertEqual(
|
||||
# item.version, ["08星组公演版", "10年月組公演版", "17年星組公演版", "ュージカル(2017年)版"]
|
||||
|
@ -80,7 +83,7 @@ class DoubanDramaTestCase(TestCase):
|
|||
item.playwright, ["小池修一郎", "Baroness Orczy(原作)", "小池 修一郎"]
|
||||
)
|
||||
self.assertEqual(
|
||||
item.actor,
|
||||
sorted(item.actor, key=lambda a: a["name"]),
|
||||
[
|
||||
{"name": "安蘭けい", "role": ""},
|
||||
{"name": "柚希礼音", "role": ""},
|
||||
|
@ -110,7 +113,10 @@ class DoubanDramaTestCase(TestCase):
|
|||
self.assertEqual(productions[2].closing_date, "2017-03-17")
|
||||
self.assertEqual(productions[3].opening_date, "2017-11-13")
|
||||
self.assertEqual(productions[3].closing_date, None)
|
||||
self.assertEqual(productions[3].title, "ミュージカル(2017年)版")
|
||||
self.assertEqual(
|
||||
productions[3].display_title,
|
||||
"THE SCARLET PIMPERNEL ミュージカル(2017年)版",
|
||||
)
|
||||
self.assertEqual(len(productions[3].actor), 6)
|
||||
self.assertEqual(productions[3].language, ["日语"])
|
||||
self.assertEqual(productions[3].opening_date, "2017-11-13")
|
||||
|
|
|
@ -15,6 +15,7 @@ from catalog.common import (
|
|||
PrimaryLookupIdDescriptor,
|
||||
jsondata,
|
||||
)
|
||||
from catalog.common.models import LanguageListField
|
||||
|
||||
|
||||
class PodcastInSchema(ItemInSchema):
|
||||
|
@ -44,6 +45,8 @@ class Podcast(Item):
|
|||
default=list,
|
||||
)
|
||||
|
||||
language = LanguageListField()
|
||||
|
||||
hosts = jsondata.ArrayField(
|
||||
verbose_name=_("host"),
|
||||
base_field=models.CharField(blank=True, default="", max_length=200),
|
||||
|
@ -55,8 +58,11 @@ class Podcast(Item):
|
|||
)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
"brief",
|
||||
# "title",
|
||||
# "brief",
|
||||
"localized_title",
|
||||
"localized_description",
|
||||
"language",
|
||||
"hosts",
|
||||
"genre",
|
||||
"official_site",
|
||||
|
|
|
@ -91,7 +91,7 @@ class PodcastRSSFeedTestCase(TestCase):
|
|||
metadata["official_site"], "http://www.bbc.co.uk/programmes/b006qykl"
|
||||
)
|
||||
self.assertEqual(metadata["genre"], ["History"])
|
||||
self.assertEqual(metadata["hosts"], ["BBC Radio 4"])
|
||||
self.assertEqual(metadata["host"], ["BBC Radio 4"])
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].title)
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].link)
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].media_url)
|
||||
|
@ -108,7 +108,7 @@ class PodcastRSSFeedTestCase(TestCase):
|
|||
metadata["official_site"], "https://www.ximalaya.com/qita/51101122/"
|
||||
)
|
||||
self.assertEqual(metadata["genre"], ["人文国学"])
|
||||
self.assertEqual(metadata["hosts"], ["看理想vistopia"])
|
||||
self.assertEqual(metadata["host"], ["看理想vistopia"])
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].title)
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].link)
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].media_url)
|
||||
|
@ -123,7 +123,7 @@ class PodcastRSSFeedTestCase(TestCase):
|
|||
self.assertEqual(metadata["title"], "跳岛FM")
|
||||
self.assertEqual(metadata["official_site"], "https://tiaodao.typlog.io/")
|
||||
self.assertEqual(metadata["genre"], ["Arts", "Books"])
|
||||
self.assertEqual(metadata["hosts"], ["中信出版·大方"])
|
||||
self.assertEqual(metadata["host"], ["中信出版·大方"])
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].title)
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].link)
|
||||
self.assertIsNotNone(site.get_item().recent_episodes[0].media_url)
|
||||
|
|
|
@ -11,7 +11,7 @@ from lxml import html
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from catalog.sites.spotify import get_spotify_token
|
||||
from catalog.sites.tmdb import get_language_code
|
||||
from catalog.sites.tmdb import TMDB_DEFAULT_LANG
|
||||
|
||||
SEARCH_PAGE_SIZE = 5 # not all apis support page size
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -173,7 +173,7 @@ class TheMovieDatabase:
|
|||
@classmethod
|
||||
def search(cls, q, page=1):
|
||||
results = []
|
||||
api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={page}&api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&include_adult=true"
|
||||
api_url = f"https://api.themoviedb.org/3/search/multi?query={quote_plus(q)}&page={page}&api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&include_adult=true"
|
||||
try:
|
||||
j = requests.get(api_url, timeout=2).json()
|
||||
if j.get("results"):
|
||||
|
|
|
@ -10,11 +10,18 @@ Scraping the website directly.
|
|||
|
||||
import json
|
||||
import logging
|
||||
from threading import local
|
||||
|
||||
import dateparser
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from common.models.lang import (
|
||||
DEFAULT_CATALOG_LANGUAGE,
|
||||
PREFERRED_LANGUAGES,
|
||||
detect_language,
|
||||
)
|
||||
from common.models.misc import uniq
|
||||
|
||||
from .douban import *
|
||||
|
||||
|
@ -47,27 +54,58 @@ class AppleMusic(AbstractSite):
|
|||
def id_to_url(cls, id_value):
|
||||
return f"https://music.apple.com/album/{id_value}"
|
||||
|
||||
def get_localized_urls(self):
|
||||
return [
|
||||
f"https://music.apple.com/{locale}/album/{self.id_value}"
|
||||
for locale in ["hk", "tw", "us", "sg", "jp", "cn", "gb", "ca", "fr"]
|
||||
]
|
||||
def get_locales(self):
|
||||
locales = {}
|
||||
for l in PREFERRED_LANGUAGES:
|
||||
match l:
|
||||
case "zh":
|
||||
locales.update({"zh": ["cn", "tw", "hk", "sg"]})
|
||||
case "en":
|
||||
locales.update({"en": ["us", "gb", "ca"]})
|
||||
case "ja":
|
||||
locales.update({"ja": ["jp"]})
|
||||
case "ko":
|
||||
locales.update({"ko": ["kr"]})
|
||||
case "fr":
|
||||
locales.update({"fr": ["fr", "ca"]})
|
||||
if not locales:
|
||||
locales = {"en": ["us"]}
|
||||
return locales
|
||||
|
||||
def scrape(self):
|
||||
content = None
|
||||
# it's less than ideal to waterfall thru locales, a better solution
|
||||
# would be change ExternalResource to store preferred locale,
|
||||
# or to find an AppleMusic API to get available locales for an album
|
||||
for url in self.get_localized_urls():
|
||||
try:
|
||||
content = BasicDownloader(url, headers=self.headers).download().html()
|
||||
_logger.info(f"got localized content from {url}")
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if content is None:
|
||||
matched_content = None
|
||||
localized_title = []
|
||||
localized_desc = []
|
||||
for lang, locales in self.get_locales().items():
|
||||
for loc in locales: # waterfall thru all locales
|
||||
url = f"https://music.apple.com/{loc}/album/{self.id_value}"
|
||||
try:
|
||||
content = (
|
||||
BasicDownloader(url, headers=self.headers).download().html()
|
||||
)
|
||||
_logger.info(f"got localized content from {url}")
|
||||
elem = content.xpath(
|
||||
"//script[@id='serialized-server-data']/text()"
|
||||
)
|
||||
txt: str = elem[0] # type:ignore
|
||||
page_data = json.loads(txt)[0]
|
||||
album_data = page_data["data"]["sections"][0]["items"][0]
|
||||
title = album_data["title"]
|
||||
brief = album_data.get("modalPresentationDescriptor", {}).get(
|
||||
"paragraphText", ""
|
||||
)
|
||||
l = detect_language(title + " " + brief)
|
||||
localized_title.append({"lang": l, "text": title})
|
||||
if brief:
|
||||
localized_desc.append({"lang": l, "text": brief})
|
||||
if lang == DEFAULT_CATALOG_LANGUAGE or not matched_content:
|
||||
matched_content = content
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
if matched_content is None:
|
||||
raise ParseError(self, f"localized content for {self.url}")
|
||||
elem = content.xpath("//script[@id='serialized-server-data']/text()")
|
||||
elem = matched_content.xpath("//script[@id='serialized-server-data']/text()")
|
||||
txt: str = elem[0] # type:ignore
|
||||
page_data = json.loads(txt)[0]
|
||||
album_data = page_data["data"]["sections"][0]["items"][0]
|
||||
|
@ -99,12 +137,14 @@ class AppleMusic(AbstractSite):
|
|||
genre[0]
|
||||
] # apple treat "Music" as a genre. Thus, only the first genre is obtained.
|
||||
|
||||
images = content.xpath("//source[@type='image/jpeg']/@srcset")
|
||||
images = matched_content.xpath("//source[@type='image/jpeg']/@srcset")
|
||||
image_elem: str = images[0] if images else "" # type:ignore
|
||||
image_url = image_elem.split(" ")[0] if image_elem else None
|
||||
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": uniq(localized_title),
|
||||
"localized_description": uniq(localized_desc),
|
||||
"title": title,
|
||||
"brief": brief,
|
||||
"artist": artist,
|
||||
|
|
|
@ -26,11 +26,12 @@ class ApplePodcast(AbstractSite):
|
|||
resp = dl.download()
|
||||
r = resp.json()["results"][0]
|
||||
feed_url = r["feedUrl"]
|
||||
title = r["trackName"]
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": r["trackName"],
|
||||
"title": title,
|
||||
"feed_url": feed_url,
|
||||
"hosts": [r["artistName"]],
|
||||
"host": [r["artistName"]],
|
||||
"genres": r["genres"],
|
||||
"cover_image_url": r["artworkUrl600"],
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ import dns.resolver
|
|||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -76,14 +77,19 @@ class Bandcamp(AbstractSite):
|
|||
duration = None
|
||||
company = None
|
||||
brief_nodes = content.xpath("//div[@class='tralbumData tralbum-about']/text()")
|
||||
brief = "".join(brief_nodes) if brief_nodes else None # type:ignore
|
||||
brief = "".join(brief_nodes) if brief_nodes else "" # type:ignore
|
||||
cover_url = self.query_str(content, "//div[@id='tralbumArt']/a/@href")
|
||||
bandcamp_page_data = json.loads(
|
||||
self.query_str(content, "//meta[@name='bc-page-properties']/@content")
|
||||
)
|
||||
bandcamp_album_id = bandcamp_page_data["item_id"]
|
||||
|
||||
localized_title = [{"lang": detect_language(title), "text": title}]
|
||||
localized_desc = (
|
||||
[{"lang": detect_language(brief), "text": brief}] if brief else []
|
||||
)
|
||||
data = {
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
|
|
|
@ -3,6 +3,7 @@ import logging
|
|||
from catalog.book.utils import detect_isbn_asin
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -101,7 +102,14 @@ class Bangumi(AbstractSite):
|
|||
raw_img, ext = BasicImageDownloader.download_image(
|
||||
img_url, None, headers={}
|
||||
)
|
||||
titles = set(
|
||||
[title] + (other_title or []) + ([orig_title] if orig_title else [])
|
||||
)
|
||||
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
|
||||
localized_desc = [{"lang": detect_language(brief), "text": brief}]
|
||||
data = {
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
"preferred_model": model,
|
||||
"title": title,
|
||||
"orig_title": orig_title,
|
||||
|
|
|
@ -11,13 +11,7 @@ from loguru import logger
|
|||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
|
||||
|
||||
def _lang(s: str) -> str:
|
||||
try:
|
||||
return detect(s)
|
||||
except Exception:
|
||||
return "en"
|
||||
from common.models.lang import detect_language
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
|
@ -43,14 +37,13 @@ class BoardGameGeek(AbstractSite):
|
|||
item = items[0]
|
||||
title = self.query_str(item, "name[@type='primary']/@value")
|
||||
other_title = self.query_list(item, "name[@type='alternate']/@value")
|
||||
zh_title = [
|
||||
t for t in other_title if _lang(t) in ["zh", "jp", "ko", "zh-cn", "zh-tw"]
|
||||
localized_title = [
|
||||
{"lang": detect_language(t), "text": t} for t in [title] + other_title
|
||||
]
|
||||
if zh_title:
|
||||
for z in zh_title:
|
||||
other_title.remove(z)
|
||||
other_title = zh_title + other_title
|
||||
|
||||
zh_title = [
|
||||
t["text"] for t in localized_title if t["lang"] in ["zh", "zh-cn", "zh-tw"]
|
||||
]
|
||||
title = zh_title[0] if zh_title else other_title[0]
|
||||
cover_image_url = self.query_str(item, "image/text()")
|
||||
brief = html.unescape(self.query_str(item, "description/text()"))
|
||||
year = self.query_str(item, "yearpublished/@value")
|
||||
|
@ -62,6 +55,8 @@ class BoardGameGeek(AbstractSite):
|
|||
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": localized_title,
|
||||
"localized_description": [{"lang": "en", "text": brief}],
|
||||
"title": title,
|
||||
"other_title": other_title,
|
||||
"genre": category,
|
||||
|
|
|
@ -3,6 +3,7 @@ import logging
|
|||
from catalog.book.models import *
|
||||
from catalog.book.utils import *
|
||||
from catalog.common import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import *
|
||||
|
||||
|
@ -40,7 +41,7 @@ class BooksTW(AbstractSite):
|
|||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
subtitle = None
|
||||
orig_title = content.xpath("string(//h1/following-sibling::h2)")
|
||||
orig_title = str(content.xpath("string(//h1/following-sibling::h2)"))
|
||||
|
||||
authors = content.xpath("string(//div/ul/li[contains(text(),'作者:')])")
|
||||
authors = authors.strip().split(":", 1)[1].split(",") if authors else [] # type: ignore
|
||||
|
@ -116,9 +117,14 @@ class BooksTW(AbstractSite):
|
|||
"string(//div[contains(@class,'cover_img')]//img[contains(@class,'cover')]/@src)"
|
||||
)
|
||||
img_url = re.sub(r"&[wh]=\d+", "", img_url) if img_url else None # type: ignore
|
||||
|
||||
localized_title = [{"lang": "zh-tw", "text": title}]
|
||||
if orig_title:
|
||||
localized_title.append(
|
||||
{"lang": detect_language(orig_title), "text": orig_title}
|
||||
)
|
||||
data = {
|
||||
"title": title,
|
||||
"localized_title": localized_title,
|
||||
"subtitle": subtitle,
|
||||
"orig_title": orig_title,
|
||||
"author": authors,
|
||||
|
|
|
@ -63,10 +63,11 @@ class DiscogsRelease(AbstractSite):
|
|||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"localized_title": [{"lang": "en", "text": title}],
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"track_list": "\n".join(track_list),
|
||||
"release_date": None, # only year provided by API
|
||||
# "release_date": None, # only year provided by API
|
||||
"company": company,
|
||||
"media": media,
|
||||
"disc_count": disc_count,
|
||||
|
|
|
@ -6,6 +6,7 @@ from lxml import html
|
|||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import DoubanDownloader
|
||||
|
||||
|
@ -64,6 +65,7 @@ class DoubanDramaVersion(AbstractSite):
|
|||
raise ParseError(self, "title")
|
||||
data = {
|
||||
"title": title,
|
||||
"localized_title": [{"lang": "zh-cn", "text": title}],
|
||||
"director": [x.strip() for x in h.xpath(q.format("导演"))],
|
||||
"playwright": [x.strip() for x in h.xpath(q.format("编剧"))],
|
||||
# "actor": [x.strip() for x in h.xpath(q.format("主演"))],
|
||||
|
@ -238,6 +240,21 @@ class DoubanDrama(AbstractSite):
|
|||
)
|
||||
img_url_elem = h.xpath("//img[@itemprop='image']/@src")
|
||||
data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
|
||||
data["localized_title"] = (
|
||||
[{"lang": "zh-cn", "text": data["title"]}]
|
||||
+ (
|
||||
[
|
||||
{
|
||||
"lang": detect_language(data["orig_title"]),
|
||||
"text": data["orig_title"],
|
||||
}
|
||||
]
|
||||
if data["orig_title"]
|
||||
else []
|
||||
)
|
||||
+ [{"lang": detect_language(t), "text": t} for t in data["other_title"]]
|
||||
)
|
||||
data["localized_description"] = [{"lang": "zh-cn", "text": data["brief"]}]
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
|
|
|
@ -4,6 +4,7 @@ import dateparser
|
|||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import DoubanDownloader
|
||||
|
||||
|
@ -34,51 +35,68 @@ class DoubanGame(AbstractSite):
|
|||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
elem = content.xpath("//div[@id='comments']//h2/text()")
|
||||
title2 = elem[0].strip() if len(elem) else ""
|
||||
if title2:
|
||||
sp = title2.strip().rsplit("的短评", 1)
|
||||
title2 = sp[0] if len(sp) > 1 else ""
|
||||
if title2 and title.startswith(title2):
|
||||
orig_title = title[len(title2) :].strip()
|
||||
title = title2
|
||||
else:
|
||||
orig_title = ""
|
||||
|
||||
other_title_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
|
||||
"//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
other_title = (
|
||||
other_title_elem[0].strip().split(" / ") if other_title_elem else None
|
||||
other_title_elem[0].strip().split(" / ") if other_title_elem else []
|
||||
)
|
||||
|
||||
developer_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
|
||||
"//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
developer = developer_elem[0].strip().split(" / ") if developer_elem else None
|
||||
|
||||
publisher_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
|
||||
"//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
publisher = publisher_elem[0].strip().split(" / ") if publisher_elem else None
|
||||
|
||||
platform_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
|
||||
"//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
|
||||
)
|
||||
platform = platform_elem if platform_elem else None
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
|
||||
"//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
|
||||
)
|
||||
genre = None
|
||||
if genre_elem:
|
||||
genre = [g for g in genre_elem if g != "游戏"]
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
|
||||
"//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
|
||||
release_date = release_date.strftime("%Y-%m-%d") if release_date else None
|
||||
|
||||
brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
|
||||
brief = "\n".join(brief_elem) if brief_elem else None
|
||||
brief = "\n".join(brief_elem) if brief_elem else ""
|
||||
|
||||
img_url_elem = content.xpath(
|
||||
"//div[@class='item-subject-info']/div[@class='pic']//img/@src"
|
||||
)
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
titles = set([title] + other_title + ([orig_title] if orig_title else []))
|
||||
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
|
||||
localized_desc = [{"lang": detect_language(brief), "text": brief}]
|
||||
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
"title": title,
|
||||
"other_title": other_title,
|
||||
"developer": developer,
|
||||
|
|
|
@ -4,6 +4,7 @@ import logging
|
|||
from catalog.common import *
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import *
|
||||
from .tmdb import TMDB_TV, TMDB_TVSeason, query_tmdb_tv_episode, search_tmdb_by_imdb_id
|
||||
|
@ -205,9 +206,18 @@ class DoubanMovie(AbstractSite):
|
|||
img_url_elem = content.xpath("//img[@rel='v:image']/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
titles = set(
|
||||
[title]
|
||||
+ ([orig_title] if orig_title else [])
|
||||
+ (other_title if other_title else [])
|
||||
)
|
||||
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
|
||||
localized_desc = [{"lang": detect_language(brief), "text": brief}]
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
"orig_title": orig_title,
|
||||
"other_title": other_title,
|
||||
"imdb_code": imdb_code,
|
||||
|
|
|
@ -5,6 +5,7 @@ import dateparser
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from catalog.music.utils import upc_to_gtin_13
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import DoubanDownloader
|
||||
|
||||
|
@ -77,9 +78,19 @@ class DoubanMusic(AbstractSite):
|
|||
|
||||
img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
|
||||
)
|
||||
other_title = other_elem[0].strip().split(" / ") if other_elem else []
|
||||
lang = detect_language(f"{title} {brief}")
|
||||
localized_title = [{"lang": lang, "text": title}]
|
||||
localized_title += [
|
||||
{"lang": detect_language(t), "text": t} for t in other_title
|
||||
]
|
||||
data = {
|
||||
"title": title,
|
||||
"localized_title": localized_title,
|
||||
"localized_description": [{"lang": lang, "text": brief}],
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"release_date": release_date,
|
||||
|
@ -91,11 +102,6 @@ class DoubanMusic(AbstractSite):
|
|||
}
|
||||
gtin = None
|
||||
isrc = None
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
data["other_title"] = other_elem[0].strip().split(" / ")
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]"
|
||||
)
|
||||
|
|
|
@ -129,6 +129,8 @@ class IGDB(AbstractSite):
|
|||
steam_url = website["url"]
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": [{"lang": "en", "text": r["name"]}],
|
||||
"localized_description": [{"lang": "en", "text": brief}],
|
||||
"title": r["name"],
|
||||
"other_title": [],
|
||||
"developer": [developer] if developer else [],
|
||||
|
|
|
@ -91,6 +91,8 @@ class IMDB(AbstractSite):
|
|||
d["primaryImage"].get("url") if d.get("primaryImage") else None
|
||||
),
|
||||
}
|
||||
data["localized_title"] = [{"lang": "en", "text": data["title"]}]
|
||||
data["localized_description"] = [{"lang": "en", "text": data["brief"]}]
|
||||
if d.get("series"):
|
||||
episode_info = d["series"].get("episodeNumber")
|
||||
if episode_info:
|
||||
|
@ -133,14 +135,11 @@ class IMDB(AbstractSite):
|
|||
url = f"https://m.imdb.com{show_url}episodes/?season={season_id}"
|
||||
h = BasicDownloader(url).download().html()
|
||||
episodes = []
|
||||
for e in h.xpath('//div[@id="eplist"]/div/a'): # type: ignore
|
||||
episode_number = e.xpath(
|
||||
'./span[contains(@class,"episode-list__title")]/text()'
|
||||
)[0].strip()
|
||||
episode_number = int(episode_number.split(".")[0])
|
||||
episode_title = " ".join(
|
||||
e.xpath('.//strong[@class="episode-list__title-text"]/text()')
|
||||
).strip()
|
||||
for e in h.xpath('//article//a[@class="ipc-title-link-wrapper"]'): # type: ignore
|
||||
title = e.xpath('div[@class="ipc-title__text"]/text()')[0].split("∙", 1)
|
||||
episode_id = title[0].strip()
|
||||
episode_number = int(episode_id.split(".")[1][1:])
|
||||
episode_title = title[1].strip()
|
||||
episode_url = e.xpath("./@href")[0]
|
||||
episode_url = "https://www.imdb.com" + episode_url
|
||||
episodes.append(
|
||||
|
|
|
@ -19,6 +19,8 @@ from catalog.common.downloaders import (
|
|||
)
|
||||
from catalog.models import *
|
||||
from catalog.podcast.models import PodcastEpisode
|
||||
from common.models.lang import detect_language
|
||||
from journal.models.renderers import html_to_text
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -86,11 +88,16 @@ class RSS(AbstractSite):
|
|||
feed = self.parse_feed_from_url(self.url)
|
||||
if not feed:
|
||||
raise ValueError(f"no feed avaialble in {self.url}")
|
||||
title = feed["title"]
|
||||
desc = html_to_text(feed["description"])
|
||||
lang = detect_language(title + " " + desc)
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": feed["title"],
|
||||
"brief": bleach.clean(feed["description"], strip=True),
|
||||
"hosts": (
|
||||
"title": title,
|
||||
"brief": desc,
|
||||
"localized_title": [{"lang": lang, "text": title}],
|
||||
"localized_description": [{"lang": lang, "text": desc}],
|
||||
"host": (
|
||||
[feed.get("itunes_author")] if feed.get("itunes_author") else []
|
||||
),
|
||||
"official_site": feed.get("link"),
|
||||
|
|
|
@ -13,6 +13,7 @@ from django.conf import settings
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from catalog.music.utils import upc_to_gtin_13
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import *
|
||||
|
||||
|
@ -83,10 +84,11 @@ class Spotify(AbstractSite):
|
|||
isrc = None
|
||||
if res_data["external_ids"].get("isrc"):
|
||||
isrc = res_data["external_ids"].get("isrc")
|
||||
|
||||
lang = detect_language(title)
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"localized_title": [{"lang": lang, "text": title}],
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"track_list": track_list,
|
||||
|
|
|
@ -1,15 +1,33 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
import dateparser
|
||||
from django.conf import settings
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from common.models.lang import PREFERRED_LANGUAGES
|
||||
from journal.models.renderers import html_to_text
|
||||
|
||||
from .igdb import search_igdb_by_3p_url
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _get_preferred_languages():
|
||||
langs = {}
|
||||
for l in PREFERRED_LANGUAGES:
|
||||
if l == "zh":
|
||||
langs.update({"zh-cn": "zh-CN", "zh-tw": "zh-TW"})
|
||||
# zh-HK data is not good
|
||||
else:
|
||||
langs[l] = l
|
||||
return langs
|
||||
|
||||
|
||||
STEAM_PREFERRED_LANGS = _get_preferred_languages()
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class Steam(AbstractSite):
|
||||
SITE_NAME = SiteName.Steam
|
||||
|
@ -22,69 +40,72 @@ class Steam(AbstractSite):
|
|||
def id_to_url(cls, id_value):
|
||||
return "https://store.steampowered.com/app/" + str(id_value)
|
||||
|
||||
def download(self, lang):
|
||||
api_url = (
|
||||
f"https://store.steampowered.com/api/appdetails?appids={self.id_value}"
|
||||
)
|
||||
headers = {
|
||||
"User-Agent": settings.NEODB_USER_AGENT,
|
||||
"Accept": "application/json",
|
||||
"Accept-Language": STEAM_PREFERRED_LANGS[lang],
|
||||
}
|
||||
return BasicDownloader(api_url, headers=headers).download().json()
|
||||
|
||||
def scrape(self):
|
||||
i = search_igdb_by_3p_url(self.url)
|
||||
pd = i.scrape() if i else ResourceContent()
|
||||
|
||||
headers = BasicDownloader.headers.copy()
|
||||
headers["Host"] = "store.steampowered.com"
|
||||
headers["Cookie"] = "wants_mature_content=1; birthtime=754700401;"
|
||||
content = BasicDownloader(self.url, headers=headers).download().html()
|
||||
|
||||
title = self.query_str(content, "//div[@class='apphub_AppName']/text()")
|
||||
developer = content.xpath("//div[@id='developers_list']/a/text()")
|
||||
publisher = content.xpath(
|
||||
"//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()"
|
||||
)
|
||||
dts = self.query_str(
|
||||
content, "//div[@class='release_date']/div[@class='date']/text()"
|
||||
)
|
||||
dt = dateparser.parse(dts.replace(" ", "")) if dts else None
|
||||
release_date = dt.strftime("%Y-%m-%d") if dt else None
|
||||
|
||||
genre = content.xpath(
|
||||
"//div[@class='details_block']/b[2]/following-sibling::a/text()"
|
||||
)
|
||||
platform = ["PC"]
|
||||
try:
|
||||
brief = self.query_str(
|
||||
content, "//div[@class='game_description_snippet']/text()"
|
||||
)
|
||||
except Exception:
|
||||
brief = ""
|
||||
# try Steam images if no image from IGDB
|
||||
if pd.cover_image is None:
|
||||
pd.metadata["cover_image_url"] = self.query_str(
|
||||
content, "//img[@class='game_header_image_full']/@src"
|
||||
).replace("header.jpg", "library_600x900.jpg")
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
if pd.cover_image is None:
|
||||
pd.metadata["cover_image_url"] = self.query_str(
|
||||
content, "//img[@class='game_header_image_full']/@src"
|
||||
)
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
en_data = {}
|
||||
localized_title = []
|
||||
localized_desc = []
|
||||
for lang in STEAM_PREFERRED_LANGS.keys():
|
||||
data = self.download(lang).get(self.id_value, {}).get("data", {})
|
||||
if lang == "en":
|
||||
en_data = data
|
||||
localized_title.append({"lang": lang, "text": data["name"]})
|
||||
desc = html_to_text(data["detailed_description"])
|
||||
localized_desc.append({"lang": lang, "text": desc})
|
||||
if not en_data:
|
||||
en_data = self.download("en")
|
||||
if not en_data:
|
||||
raise ParseError(self, "id")
|
||||
# merge data from IGDB, use localized Steam data if available
|
||||
d = {
|
||||
"developer": developer,
|
||||
"publisher": publisher,
|
||||
"release_date": release_date,
|
||||
"genre": genre,
|
||||
"platform": platform,
|
||||
"developer": en_data["developers"],
|
||||
"publisher": en_data["publishers"],
|
||||
"release_date": en_data["release_date"].get("date"),
|
||||
"genre": [g["description"] for g in en_data["genres"]],
|
||||
"platform": ["PC"],
|
||||
}
|
||||
if en_data["release_date"].get("date"):
|
||||
d["release_date"] = en_data["release_date"].get("date")
|
||||
d.update(pd.metadata)
|
||||
d.update(
|
||||
{
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
}
|
||||
)
|
||||
pd.metadata = d
|
||||
if title:
|
||||
pd.metadata["title"] = title
|
||||
if brief:
|
||||
pd.metadata["brief"] = brief
|
||||
|
||||
# try Steam images if no image from IGDB
|
||||
header = en_data.get("header_image")
|
||||
if header:
|
||||
if pd.cover_image is None:
|
||||
cover = header.replace("header.jpg", "library_600x900_2x.jpg")
|
||||
pd.metadata["cover_image_url"] = cover
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
if pd.cover_image is None:
|
||||
pd.metadata["cover_image_url"] = header
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -1,5 +1,13 @@
|
|||
"""
|
||||
The Movie Database
|
||||
|
||||
|
||||
these language code from TMDB are not in currently iso-639-1
|
||||
{'iso_639_1': 'xx', 'english_name': 'No Language', 'name': 'No Language'}
|
||||
{'iso_639_1': 'sh', 'english_name': 'Serbo-Croatian', 'name': ''} - deprecated for several
|
||||
{'iso_639_1': 'mo', 'english_name': 'Moldavian', 'name': ''} - deprecated for ro-MD
|
||||
{'iso_639_1': 'cn', 'english_name': 'Cantonese', 'name': '粤语'} - faked for yue
|
||||
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
@ -10,13 +18,14 @@ from django.conf import settings
|
|||
from catalog.common import *
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
from common.models.lang import PREFERRED_LANGUAGES
|
||||
|
||||
from .douban import *
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_language_code():
|
||||
def _get_language_code():
|
||||
match settings.LANGUAGE_CODE:
|
||||
case "zh-hans":
|
||||
return "zh-CN"
|
||||
|
@ -26,14 +35,28 @@ def get_language_code():
|
|||
return "en-US"
|
||||
|
||||
|
||||
def _get_preferred_languages():
|
||||
langs = {}
|
||||
for l in PREFERRED_LANGUAGES:
|
||||
if l == "zh":
|
||||
langs.update({"zh-cn": "zh-CN", "zh-tw": "zh-TW", "zh-hk": "zh-HK"})
|
||||
else:
|
||||
langs[l] = l
|
||||
return langs
|
||||
|
||||
|
||||
TMDB_DEFAULT_LANG = _get_language_code()
|
||||
TMDB_PREFERRED_LANGS = _get_preferred_languages()
|
||||
|
||||
|
||||
def search_tmdb_by_imdb_id(imdb_id):
|
||||
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&external_source=imdb_id"
|
||||
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&external_source=imdb_id"
|
||||
res_data = BasicDownloader(tmdb_api_url).download().json()
|
||||
return res_data
|
||||
|
||||
|
||||
def query_tmdb_tv_episode(tv, season, episode):
|
||||
tmdb_api_url = f"https://api.themoviedb.org/3/tv/{tv}/season/{season}/episode/{episode}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids"
|
||||
tmdb_api_url = f"https://api.themoviedb.org/3/tv/{tv}/season/{season}/episode/{episode}?api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&append_to_response=external_ids"
|
||||
res_data = BasicDownloader(tmdb_api_url).download().json()
|
||||
return res_data
|
||||
|
||||
|
@ -58,61 +81,42 @@ class TMDB_Movie(AbstractSite):
|
|||
return f"https://www.themoviedb.org/movie/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
is_series = False
|
||||
if is_series:
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
else:
|
||||
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
|
||||
if is_series:
|
||||
title = res_data["name"]
|
||||
orig_title = res_data["original_name"]
|
||||
year = (
|
||||
int(res_data["first_air_date"].split("-")[0])
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["external_ids"]["imdb_id"]
|
||||
showtime = (
|
||||
[{"time": res_data["first_air_date"], "region": "首播日期"}]
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
duration = None
|
||||
else:
|
||||
title = res_data["title"]
|
||||
orig_title = res_data["original_title"]
|
||||
year = (
|
||||
int(res_data["release_date"].split("-")[0])
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
showtime = (
|
||||
[{"time": res_data["release_date"], "region": "发布日期"}]
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["imdb_id"]
|
||||
# in minutes
|
||||
duration = res_data["runtime"] if res_data["runtime"] else None
|
||||
res_data = {}
|
||||
localized_title = []
|
||||
localized_desc = []
|
||||
# GET api urls in all locales
|
||||
# btw it seems no way to tell if TMDB does not have a certain translation
|
||||
for lang, lang_param in reversed(TMDB_PREFERRED_LANGS.items()):
|
||||
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={lang_param}&append_to_response=external_ids,credits"
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
localized_title.append({"lang": lang, "text": res_data["title"]})
|
||||
localized_desc.append({"lang": lang, "text": res_data["overview"]})
|
||||
title = res_data["title"]
|
||||
orig_title = res_data["original_title"]
|
||||
year = (
|
||||
int(res_data["release_date"].split("-")[0])
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
showtime = (
|
||||
[{"time": res_data["release_date"], "region": "发布日期"}]
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["imdb_id"]
|
||||
# in minutes
|
||||
duration = res_data["runtime"] if res_data["runtime"] else None
|
||||
|
||||
genre = [x["name"] for x in res_data["genres"]]
|
||||
language = list(map(lambda x: x["name"], res_data["spoken_languages"]))
|
||||
brief = res_data["overview"]
|
||||
|
||||
if is_series:
|
||||
director = list(map(lambda x: x["name"], res_data["created_by"]))
|
||||
else:
|
||||
director = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(
|
||||
lambda c: c["job"] == "Director", res_data["credits"]["crew"]
|
||||
),
|
||||
)
|
||||
director = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(lambda c: c["job"] == "Director", res_data["credits"]["crew"]),
|
||||
)
|
||||
)
|
||||
playwright = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
|
@ -128,19 +132,21 @@ class TMDB_Movie(AbstractSite):
|
|||
# other_info['Metacritic评分'] = res_data['metacriticRating']
|
||||
# other_info['奖项'] = res_data['awards']
|
||||
# other_info['TMDB_ID'] = id
|
||||
if is_series:
|
||||
other_info["Seasons"] = res_data["number_of_seasons"]
|
||||
other_info["Episodes"] = res_data["number_of_episodes"]
|
||||
# if is_series:
|
||||
# other_info["Seasons"] = res_data["number_of_seasons"]
|
||||
# other_info["Episodes"] = res_data["number_of_episodes"]
|
||||
|
||||
# TODO: use GET /configuration to get base url
|
||||
img_url = (
|
||||
("https://image.tmdb.org/t/p/original/" + res_data["poster_path"])
|
||||
if res_data["poster_path"] is not None
|
||||
if res_data.get("poster_path") is not None
|
||||
else None
|
||||
)
|
||||
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
"title": title,
|
||||
"orig_title": orig_title,
|
||||
"other_title": [],
|
||||
|
@ -192,62 +198,33 @@ class TMDB_TV(AbstractSite):
|
|||
return f"https://www.themoviedb.org/tv/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
is_series = True
|
||||
if is_series:
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
else:
|
||||
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
|
||||
if is_series:
|
||||
title = res_data["name"]
|
||||
orig_title = res_data["original_name"]
|
||||
year = (
|
||||
int(res_data["first_air_date"].split("-")[0])
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["external_ids"]["imdb_id"]
|
||||
showtime = (
|
||||
[{"time": res_data["first_air_date"], "region": "首播日期"}]
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
duration = None
|
||||
else:
|
||||
title = res_data["title"]
|
||||
orig_title = res_data["original_title"]
|
||||
year = (
|
||||
int(res_data["release_date"].split("-")[0])
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
showtime = (
|
||||
[{"time": res_data["release_date"], "region": "发布日期"}]
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["imdb_id"]
|
||||
# in minutes
|
||||
duration = res_data["runtime"] if res_data["runtime"] else None
|
||||
res_data = {}
|
||||
localized_title = []
|
||||
localized_desc = []
|
||||
for lang, lang_param in reversed(TMDB_PREFERRED_LANGS.items()):
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language={lang_param}&append_to_response=external_ids,credits"
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
localized_title.append({"lang": lang, "text": res_data["name"]})
|
||||
localized_desc.append({"lang": lang, "text": res_data["overview"]})
|
||||
|
||||
title = res_data["name"]
|
||||
orig_title = res_data["original_name"]
|
||||
year = (
|
||||
int(res_data["first_air_date"].split("-")[0])
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["external_ids"]["imdb_id"]
|
||||
showtime = (
|
||||
[{"time": res_data["first_air_date"], "region": "首播日期"}]
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
duration = None
|
||||
genre = [x["name"] for x in res_data["genres"]]
|
||||
|
||||
language = list(map(lambda x: x["name"], res_data["spoken_languages"]))
|
||||
brief = res_data["overview"]
|
||||
|
||||
if is_series:
|
||||
director = list(map(lambda x: x["name"], res_data["created_by"]))
|
||||
else:
|
||||
director = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(
|
||||
lambda c: c["job"] == "Director", res_data["credits"]["crew"]
|
||||
),
|
||||
)
|
||||
)
|
||||
director = list(map(lambda x: x["name"], res_data["created_by"]))
|
||||
playwright = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
|
@ -256,24 +233,15 @@ class TMDB_TV(AbstractSite):
|
|||
)
|
||||
actor = list(map(lambda x: x["name"], res_data["credits"]["cast"]))
|
||||
area = []
|
||||
|
||||
other_info = {}
|
||||
# other_info['TMDB评分'] = res_data['vote_average']
|
||||
# other_info['分级'] = res_data['contentRating']
|
||||
# other_info['Metacritic评分'] = res_data['metacriticRating']
|
||||
# other_info['奖项'] = res_data['awards']
|
||||
# other_info['TMDB_ID'] = id
|
||||
if is_series:
|
||||
other_info["Seasons"] = res_data["number_of_seasons"]
|
||||
other_info["Episodes"] = res_data["number_of_episodes"]
|
||||
|
||||
other_info["Seasons"] = res_data["number_of_seasons"]
|
||||
other_info["Episodes"] = res_data["number_of_episodes"]
|
||||
# TODO: use GET /configuration to get base url
|
||||
img_url = (
|
||||
("https://image.tmdb.org/t/p/original/" + res_data["poster_path"])
|
||||
if res_data["poster_path"] is not None
|
||||
if res_data.get("poster_path") is not None
|
||||
else None
|
||||
)
|
||||
|
||||
season_links = list(
|
||||
map(
|
||||
lambda s: {
|
||||
|
@ -288,6 +256,8 @@ class TMDB_TV(AbstractSite):
|
|||
)
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
"title": title,
|
||||
"orig_title": orig_title,
|
||||
"other_title": [],
|
||||
|
@ -313,7 +283,6 @@ class TMDB_TV(AbstractSite):
|
|||
)
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
|
@ -357,22 +326,31 @@ class TMDB_TVSeason(AbstractSite):
|
|||
show_resource = site.get_resource_ready(auto_create=False, auto_link=False)
|
||||
if not show_resource:
|
||||
raise ValueError(f"TMDB: failed to get show for season {self.url}")
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
d = BasicDownloader(api_url).download().json()
|
||||
if not d.get("id"):
|
||||
|
||||
res_data = {}
|
||||
localized_title = []
|
||||
localized_desc = []
|
||||
for lang, lang_param in reversed(TMDB_PREFERRED_LANGS.items()):
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}?api_key={settings.TMDB_API3_KEY}&language={lang_param}&append_to_response=external_ids,credits"
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
localized_title.append({"lang": lang, "text": res_data["name"]})
|
||||
localized_desc.append({"lang": lang, "text": res_data["overview"]})
|
||||
if not res_data.get("id"):
|
||||
raise ParseError(self, "id")
|
||||
pd = ResourceContent(
|
||||
metadata=_copy_dict(
|
||||
d,
|
||||
{
|
||||
"name": "title",
|
||||
"overview": "brief",
|
||||
"air_date": "air_date",
|
||||
"season_number": 0,
|
||||
"external_ids": [],
|
||||
},
|
||||
)
|
||||
d = res_data
|
||||
r = _copy_dict(
|
||||
res_data,
|
||||
{
|
||||
"name": "title",
|
||||
"overview": "brief",
|
||||
"air_date": "air_date",
|
||||
"season_number": 0,
|
||||
"external_ids": [],
|
||||
},
|
||||
)
|
||||
r["localized_title"] = localized_title
|
||||
r["localized_description"] = localized_desc
|
||||
pd = ResourceContent(metadata=r)
|
||||
pd.metadata["title"] = (
|
||||
show_resource.metadata["title"] + " " + pd.metadata["title"]
|
||||
)
|
||||
|
@ -388,12 +366,12 @@ class TMDB_TVSeason(AbstractSite):
|
|||
pd.lookup_ids[IdType.IMDB] = d["external_ids"].get("imdb_id")
|
||||
pd.metadata["cover_image_url"] = (
|
||||
("https://image.tmdb.org/t/p/original/" + d["poster_path"])
|
||||
if d["poster_path"]
|
||||
if d.get("poster_path")
|
||||
else None
|
||||
)
|
||||
pd.metadata["title"] = (
|
||||
pd.metadata["title"]
|
||||
if pd.metadata["title"]
|
||||
if pd.metadata.get("title")
|
||||
else f'Season {d["season_number"]}'
|
||||
)
|
||||
pd.metadata["episode_number_list"] = list(
|
||||
|
@ -429,7 +407,7 @@ class TMDB_TVSeason(AbstractSite):
|
|||
)
|
||||
else:
|
||||
ep = pd.metadata["episode_number_list"][0]
|
||||
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&append_to_response=external_ids,credits"
|
||||
d2 = BasicDownloader(api_url2).download().json()
|
||||
if not d2.get("id"):
|
||||
raise ParseError(self, "first episode id for season")
|
||||
|
@ -469,7 +447,7 @@ class TMDB_TVEpisode(AbstractSite):
|
|||
episode_id = v[2]
|
||||
site = TMDB_TV(TMDB_TV.id_to_url(show_id))
|
||||
show_resource = site.get_resource_ready(auto_create=False, auto_link=False)
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}/episode/{episode_id}?api_key={settings.TMDB_API3_KEY}&language={get_language_code()}&append_to_response=external_ids,credits"
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}/episode/{episode_id}?api_key={settings.TMDB_API3_KEY}&language={TMDB_DEFAULT_LANG}&append_to_response=external_ids,credits"
|
||||
d = BasicDownloader(api_url).download().json()
|
||||
if not d.get("id"):
|
||||
raise ParseError(self, "id")
|
||||
|
|
|
@ -44,7 +44,7 @@
|
|||
</div>
|
||||
<div>
|
||||
{% if item.language %}
|
||||
{% trans 'language' %}: {{ item.language }}
|
||||
{% trans 'language' %}: {{ item.get_language_display }}
|
||||
{% endif %}
|
||||
</div>
|
||||
<div>
|
||||
|
|
|
@ -30,7 +30,7 @@
|
|||
<div id="item-title" class="middle">
|
||||
{% if item.is_deleted %}[DELETED]{% endif %}
|
||||
{% if item.merged_to_item %}
|
||||
[MERGED TO <a href="{{ item.merged_to_item.url }}">{{ item.merged_to_item.title }}</a>]
|
||||
[MERGED TO <a href="{{ item.merged_to_item.url }}">{{ item.merged_to_item.display_title }}</a>]
|
||||
{% endif %}
|
||||
<h1>
|
||||
{{ item.display_title }}
|
||||
|
|
|
@ -47,6 +47,7 @@ from catalog.common import (
|
|||
PrimaryLookupIdDescriptor,
|
||||
jsondata,
|
||||
)
|
||||
from catalog.common.models import LANGUAGE_CHOICES_JSONFORM, LanguageListField
|
||||
|
||||
|
||||
class TVShowInSchema(ItemInSchema):
|
||||
|
@ -112,14 +113,16 @@ class TVShow(Item):
|
|||
)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
"title",
|
||||
# "title",
|
||||
"localized_title",
|
||||
"season_count",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
# "other_title",
|
||||
"director",
|
||||
"playwright",
|
||||
"actor",
|
||||
"brief",
|
||||
# "brief",
|
||||
"localized_description",
|
||||
"genre",
|
||||
"showtime",
|
||||
"site",
|
||||
|
@ -210,17 +213,8 @@ class TVShow(Item):
|
|||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
verbose_name=_("language"),
|
||||
base_field=models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = LanguageListField()
|
||||
|
||||
year = jsondata.IntegerField(verbose_name=_("year"), null=True, blank=True)
|
||||
single_episode_length = jsondata.IntegerField(
|
||||
verbose_name=_("episode length"), null=True, blank=True
|
||||
|
@ -374,16 +368,16 @@ class TVSeason(Item):
|
|||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
language = jsondata.JSONField(
|
||||
verbose_name=_("language"),
|
||||
base_field=models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
# base_field=models.CharField(blank=True, default="", max_length=100, choices=LANGUAGE_CHOICES ),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
schema={
|
||||
"type": "list",
|
||||
"items": {"type": "string", "choices": LANGUAGE_CHOICES_JSONFORM},
|
||||
},
|
||||
)
|
||||
year = jsondata.IntegerField(verbose_name=_("year"), null=True, blank=True)
|
||||
single_episode_length = jsondata.IntegerField(
|
||||
|
|
|
@ -6,6 +6,8 @@ from catalog.tv.models import *
|
|||
|
||||
|
||||
class JSONFieldTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_legacy_data(self):
|
||||
o = TVShow()
|
||||
self.assertEqual(o.other_title, [])
|
||||
|
@ -18,6 +20,8 @@ class JSONFieldTestCase(TestCase):
|
|||
|
||||
|
||||
class TMDBTVTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id = "57243"
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who"
|
||||
|
@ -43,13 +47,15 @@ class TMDBTVTestCase(TestCase):
|
|||
self.assertEqual(site.id_value, "57243")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "神秘博士")
|
||||
self.assertEqual(site.resource.metadata["title"], "Doctor Who")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "TVShow")
|
||||
self.assertEqual(site.resource.item.imdb, "tt0436992")
|
||||
|
||||
|
||||
class TMDBTVSeasonTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
def test_parse(self):
|
||||
t_id = "57243-11"
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/11"
|
||||
|
@ -70,7 +76,7 @@ class TMDBTVSeasonTestCase(TestCase):
|
|||
self.assertEqual(site.id_value, "57243-4")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "神秘博士 第 4 季")
|
||||
self.assertEqual(site.resource.metadata["title"], "Doctor Who Series 4")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "TVSeason")
|
||||
self.assertEqual(site.resource.item.imdb, "tt1159991")
|
||||
|
@ -79,6 +85,8 @@ class TMDBTVSeasonTestCase(TestCase):
|
|||
|
||||
|
||||
class TMDBEpisodeTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_tmdb(self):
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/4/episode/1"
|
||||
|
@ -87,7 +95,7 @@ class TMDBEpisodeTestCase(TestCase):
|
|||
self.assertEqual(site.id_value, "57243-4-1")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "活宝搭档")
|
||||
self.assertEqual(site.resource.metadata["title"], "Partners in Crime")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "TVEpisode")
|
||||
self.assertEqual(site.resource.item.imdb, "tt1159991")
|
||||
|
@ -98,6 +106,8 @@ class TMDBEpisodeTestCase(TestCase):
|
|||
|
||||
|
||||
class DoubanMovieTVTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
url3 = "https://movie.douban.com/subject/3627919/"
|
||||
|
@ -122,6 +132,8 @@ class DoubanMovieTVTestCase(TestCase):
|
|||
|
||||
|
||||
class MultiTVSitesTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_tvshows(self):
|
||||
url1 = "https://www.themoviedb.org/tv/57243-doctor-who"
|
||||
|
@ -170,6 +182,8 @@ class MultiTVSitesTestCase(TestCase):
|
|||
|
||||
|
||||
class MovieTVModelRecastTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_recast(self):
|
||||
from catalog.models import Movie, TVShow
|
||||
|
@ -178,13 +192,15 @@ class MovieTVModelRecastTestCase(TestCase):
|
|||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
tv = p2.item
|
||||
self.assertEqual(tv.class_name, "tvshow")
|
||||
self.assertEqual(tv.title, "神秘博士")
|
||||
self.assertEqual(tv.display_title, "Doctor Who")
|
||||
movie = tv.recast_to(Movie)
|
||||
self.assertEqual(movie.class_name, "movie")
|
||||
self.assertEqual(movie.title, "神秘博士")
|
||||
self.assertEqual(movie.display_title, "Doctor Who")
|
||||
|
||||
|
||||
class IMDBTestCase(TestCase):
|
||||
databases = "__all__"
|
||||
|
||||
@use_local_response
|
||||
def test_fetch_episodes(self):
|
||||
t_url = "https://movie.douban.com/subject/1920763/"
|
||||
|
@ -243,7 +259,7 @@ class IMDBTestCase(TestCase):
|
|||
self.assertEqual(site.id_value, "tt1159991")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata["title"], "活宝搭档")
|
||||
self.assertEqual(site.resource.metadata["title"], "Partners in Crime")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "TVEpisode")
|
||||
self.assertEqual(site.resource.item.imdb, "tt1159991")
|
||||
|
|
9
common/models/__init__.py
Normal file
9
common/models/__init__.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from .cron import BaseJob, JobManager
|
||||
from .lang import (
|
||||
DEFAULT_CATALOG_LANGUAGE,
|
||||
LANGUAGE_CHOICES,
|
||||
LOCALE_CHOICES,
|
||||
SCRIPT_CHOICES,
|
||||
detect_language,
|
||||
)
|
||||
from .misc import uniq
|
319
common/models/lang.py
Normal file
319
common/models/lang.py
Normal file
|
@ -0,0 +1,319 @@
|
|||
"""
|
||||
language support utilities
|
||||
|
||||
https://en.wikipedia.org/wiki/IETF_language_tag
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from django.conf import settings
|
||||
from django.utils.translation import get_language
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from langdetect import detect
|
||||
from loguru import logger
|
||||
|
||||
PREFERRED_LANGUAGES: list[str] = settings.PREFERRED_LANGUAGES
|
||||
|
||||
DEFAULT_CATALOG_LANGUAGE = PREFERRED_LANGUAGES[0] if PREFERRED_LANGUAGES else "en"
|
||||
|
||||
ISO_639_1 = {
|
||||
"aa": _("Afar"),
|
||||
"af": _("Afrikaans"),
|
||||
"ak": _("Akan"),
|
||||
"an": _("Aragonese"),
|
||||
"as": _("Assamese"),
|
||||
"av": _("Avaric"),
|
||||
"ae": _("Avestan"),
|
||||
"ay": _("Aymara"),
|
||||
"az": _("Azerbaijani"),
|
||||
"ba": _("Bashkir"),
|
||||
"bm": _("Bambara"),
|
||||
"bi": _("Bislama"),
|
||||
"bo": _("Tibetan"),
|
||||
"br": _("Breton"),
|
||||
"ca": _("Catalan"),
|
||||
"cs": _("Czech"),
|
||||
"ce": _("Chechen"),
|
||||
"cu": _("Slavic"),
|
||||
"cv": _("Chuvash"),
|
||||
"kw": _("Cornish"),
|
||||
"co": _("Corsican"),
|
||||
"cr": _("Cree"),
|
||||
"cy": _("Welsh"),
|
||||
"da": _("Danish"),
|
||||
"de": _("German"),
|
||||
"dv": _("Divehi"),
|
||||
"dz": _("Dzongkha"),
|
||||
"eo": _("Esperanto"),
|
||||
"et": _("Estonian"),
|
||||
"eu": _("Basque"),
|
||||
"fo": _("Faroese"),
|
||||
"fj": _("Fijian"),
|
||||
"fi": _("Finnish"),
|
||||
"fr": _("French"),
|
||||
"fy": _("Frisian"),
|
||||
"ff": _("Fulah"),
|
||||
"gd": _("Gaelic"),
|
||||
"ga": _("Irish"),
|
||||
"gl": _("Galician"),
|
||||
"gv": _("Manx"),
|
||||
"gn": _("Guarani"),
|
||||
"gu": _("Gujarati"),
|
||||
"ht": _("Haitian; Haitian Creole"),
|
||||
"ha": _("Hausa"),
|
||||
"sh": _("Serbo-Croatian"),
|
||||
"hz": _("Herero"),
|
||||
"ho": _("Hiri Motu"),
|
||||
"hr": _("Croatian"),
|
||||
"hu": _("Hungarian"),
|
||||
"ig": _("Igbo"),
|
||||
"io": _("Ido"),
|
||||
"ii": _("Yi"),
|
||||
"iu": _("Inuktitut"),
|
||||
"ie": _("Interlingue"),
|
||||
"ia": _("Interlingua"),
|
||||
"id": _("Indonesian"),
|
||||
"ik": _("Inupiaq"),
|
||||
"is": _("Icelandic"),
|
||||
"it": _("Italian"),
|
||||
"jv": _("Javanese"),
|
||||
"ja": _("Japanese"),
|
||||
"kl": _("Kalaallisut"),
|
||||
"kn": _("Kannada"),
|
||||
"ks": _("Kashmiri"),
|
||||
"kr": _("Kanuri"),
|
||||
"kk": _("Kazakh"),
|
||||
"km": _("Khmer"),
|
||||
"ki": _("Kikuyu"),
|
||||
"rw": _("Kinyarwanda"),
|
||||
"ky": _("Kirghiz"),
|
||||
"kv": _("Komi"),
|
||||
"kg": _("Kongo"),
|
||||
"ko": _("Korean"),
|
||||
"kj": _("Kuanyama"),
|
||||
"ku": _("Kurdish"),
|
||||
"lo": _("Lao"),
|
||||
"la": _("Latin"),
|
||||
"lv": _("Latvian"),
|
||||
"li": _("Limburgish"),
|
||||
"ln": _("Lingala"),
|
||||
"lt": _("Lithuanian"),
|
||||
"lb": _("Letzeburgesch"),
|
||||
"lu": _("Luba-Katanga"),
|
||||
"lg": _("Ganda"),
|
||||
"mh": _("Marshall"),
|
||||
"ml": _("Malayalam"),
|
||||
"mr": _("Marathi"),
|
||||
"mg": _("Malagasy"),
|
||||
"mt": _("Maltese"),
|
||||
"mo": _("Moldavian"),
|
||||
"mn": _("Mongolian"),
|
||||
"mi": _("Maori"),
|
||||
"ms": _("Malay"),
|
||||
"my": _("Burmese"),
|
||||
"na": _("Nauru"),
|
||||
"nv": _("Navajo"),
|
||||
"nr": _("Ndebele"),
|
||||
"nd": _("Ndebele"),
|
||||
"ng": _("Ndonga"),
|
||||
"ne": _("Nepali"),
|
||||
"nl": _("Dutch"),
|
||||
"nn": _("Norwegian Nynorsk"),
|
||||
"nb": _("Norwegian Bokmål"),
|
||||
"no": _("Norwegian"),
|
||||
"ny": _("Chichewa; Nyanja"),
|
||||
"oc": _("Occitan"),
|
||||
"oj": _("Ojibwa"),
|
||||
"or": _("Oriya"),
|
||||
"om": _("Oromo"),
|
||||
"os": _("Ossetian; Ossetic"),
|
||||
"pi": _("Pali"),
|
||||
"pl": _("Polish"),
|
||||
"pt": _("Portuguese"),
|
||||
"qu": _("Quechua"),
|
||||
"rm": _("Raeto-Romance"),
|
||||
"ro": _("Romanian"),
|
||||
"rn": _("Rundi"),
|
||||
"ru": _("Russian"),
|
||||
"sg": _("Sango"),
|
||||
"sa": _("Sanskrit"),
|
||||
"si": _("Sinhalese"),
|
||||
"sk": _("Slovak"),
|
||||
"sl": _("Slovenian"),
|
||||
"se": _("Northern Sami"),
|
||||
"sm": _("Samoan"),
|
||||
"sn": _("Shona"),
|
||||
"sd": _("Sindhi"),
|
||||
"so": _("Somali"),
|
||||
"st": _("Sotho"),
|
||||
"es": _("Spanish"),
|
||||
"sq": _("Albanian"),
|
||||
"sc": _("Sardinian"),
|
||||
"sr": _("Serbian"),
|
||||
"ss": _("Swati"),
|
||||
"su": _("Sundanese"),
|
||||
"sw": _("Swahili"),
|
||||
"sv": _("Swedish"),
|
||||
"ty": _("Tahitian"),
|
||||
"ta": _("Tamil"),
|
||||
"tt": _("Tatar"),
|
||||
"te": _("Telugu"),
|
||||
"tg": _("Tajik"),
|
||||
"tl": _("Tagalog"),
|
||||
"th": _("Thai"),
|
||||
"ti": _("Tigrinya"),
|
||||
"to": _("Tonga"),
|
||||
"tn": _("Tswana"),
|
||||
"ts": _("Tsonga"),
|
||||
"tk": _("Turkmen"),
|
||||
"tr": _("Turkish"),
|
||||
"tw": _("Twi"),
|
||||
"ug": _("Uighur"),
|
||||
"uk": _("Ukrainian"),
|
||||
"ur": _("Urdu"),
|
||||
"uz": _("Uzbek"),
|
||||
"ve": _("Venda"),
|
||||
"vi": _("Vietnamese"),
|
||||
"vo": _("Volapük"),
|
||||
"wa": _("Walloon"),
|
||||
"wo": _("Wolof"),
|
||||
"xh": _("Xhosa"),
|
||||
"yi": _("Yiddish"),
|
||||
"za": _("Zhuang"),
|
||||
"zu": _("Zulu"),
|
||||
"ab": _("Abkhazian"),
|
||||
"zh": _("Chinese"),
|
||||
"ps": _("Pushto"),
|
||||
"am": _("Amharic"),
|
||||
"ar": _("Arabic"),
|
||||
"bg": _("Bulgarian"),
|
||||
"mk": _("Macedonian"),
|
||||
"el": _("Greek"),
|
||||
"fa": _("Persian"),
|
||||
"he": _("Hebrew"),
|
||||
"hi": _("Hindi"),
|
||||
"hy": _("Armenian"),
|
||||
"en": _("English"),
|
||||
"ee": _("Ewe"),
|
||||
"ka": _("Georgian"),
|
||||
"pa": _("Punjabi"),
|
||||
"bn": _("Bengali"),
|
||||
"bs": _("Bosnian"),
|
||||
"ch": _("Chamorro"),
|
||||
"be": _("Belarusian"),
|
||||
"yo": _("Yoruba"),
|
||||
"x": _("Unknown or Other"),
|
||||
}
|
||||
TOP_USED_LANG = [
|
||||
"en",
|
||||
"de",
|
||||
"es",
|
||||
"zh",
|
||||
"fr",
|
||||
"ja",
|
||||
"it",
|
||||
"ru",
|
||||
"pt",
|
||||
"nl",
|
||||
"kr",
|
||||
"hi",
|
||||
"ar",
|
||||
"bn",
|
||||
]
|
||||
ZH_LOCALE_SUBTAGS_PRIO = {
|
||||
"zh-cn": _("Simplified Chinese (Mainland)"),
|
||||
"zh-tw": _("Traditional Chinese (Taiwan)"),
|
||||
"zh-hk": _("Traditional Chinese (Hongkong)"),
|
||||
}
|
||||
ZH_LOCALE_SUBTAGS = {
|
||||
"zh-sg": _("Simplified Chinese (Singapore)"),
|
||||
"zh-my": _("Simplified Chinese (Malaysia)"),
|
||||
"zh-mo": _("Traditional Chinese (Taiwan)"),
|
||||
}
|
||||
ZH_LANGUAGE_SUBTAGS_PRIO = {
|
||||
"cmn": _("Mandarin Chinese"),
|
||||
"yue": _("Yue Chinese"),
|
||||
}
|
||||
ZH_LANGUAGE_SUBTAGS = {
|
||||
"nan": _("Min Nan Chinese"),
|
||||
"wuu": _("Wu Chinese"),
|
||||
"hak": _("Hakka Chinese"),
|
||||
}
|
||||
|
||||
ZH_LOCALE_SUBTAGS_PRIO.keys()
|
||||
|
||||
|
||||
def get_base_lang_list():
|
||||
langs = {}
|
||||
for k in PREFERRED_LANGUAGES + TOP_USED_LANG:
|
||||
if k not in langs:
|
||||
if k in ISO_639_1:
|
||||
langs[k] = ISO_639_1[k]
|
||||
else:
|
||||
logger.error(f"{k} is not a supported ISO-639-1 language tag")
|
||||
for k, v in ISO_639_1.items():
|
||||
if k not in langs:
|
||||
langs[k] = v
|
||||
return langs
|
||||
|
||||
|
||||
BASE_LANG_LIST: dict[str, Any] = get_base_lang_list()
|
||||
|
||||
|
||||
def get_locale_choices():
|
||||
choices = []
|
||||
for k, v in BASE_LANG_LIST.items():
|
||||
if k == "zh":
|
||||
choices += ZH_LOCALE_SUBTAGS_PRIO.items()
|
||||
else:
|
||||
choices.append((k, v))
|
||||
choices += ZH_LOCALE_SUBTAGS.items()
|
||||
return choices
|
||||
|
||||
|
||||
def get_script_choices():
|
||||
return list(BASE_LANG_LIST.items())
|
||||
|
||||
|
||||
def get_language_choices():
|
||||
choices = []
|
||||
for k, v in BASE_LANG_LIST.items():
|
||||
if k == "zh":
|
||||
choices += ZH_LANGUAGE_SUBTAGS_PRIO.items()
|
||||
else:
|
||||
choices.append((k, v))
|
||||
choices += ZH_LANGUAGE_SUBTAGS.items()
|
||||
return choices
|
||||
|
||||
|
||||
LOCALE_CHOICES: list[tuple[str, Any]] = get_locale_choices()
|
||||
SCRIPT_CHOICES: list[tuple[str, Any]] = get_script_choices()
|
||||
LANGUAGE_CHOICES: list[tuple[str, Any]] = get_language_choices()
|
||||
|
||||
|
||||
def get_current_locales() -> list[str]:
|
||||
lang = get_language().lower()
|
||||
if lang == "zh-hans":
|
||||
return ["zh-cn", "zh-sg", "zh-my", "zh-hk", "zh-tw", "zh-mo", "en"]
|
||||
elif lang == "zh-hant":
|
||||
return ["zh-tw", "zh-hk", "zh-mo", "zh-cn", "zh-sg", "zh-my", "en"]
|
||||
else:
|
||||
lng = lang.split("-")
|
||||
return ["en"] if lng[0] == "en" else [lng[0], "en"]
|
||||
|
||||
|
||||
_eng = re.compile(r"^[A-Za-z0-9\s]{1,13}$")
|
||||
|
||||
|
||||
def detect_language(s: str) -> str:
|
||||
try:
|
||||
if _eng.match(s):
|
||||
return "en"
|
||||
return detect(s).lower()
|
||||
except Exception:
|
||||
return "x"
|
||||
|
||||
|
||||
def migrate_languages(languages: list[str]) -> list[str]:
|
||||
return []
|
6
common/models/misc.py
Normal file
6
common/models/misc.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
def uniq(ls: list) -> list:
|
||||
r = []
|
||||
for i in ls:
|
||||
if i not in r:
|
||||
r.append(i)
|
||||
return r
|
|
@ -8,7 +8,11 @@
|
|||
}
|
||||
}
|
||||
// override django_jsonform/react-json-form styles
|
||||
.rjf-form-group-wrapper{
|
||||
max-width: unset !important;
|
||||
}
|
||||
.rjf-form-wrapper {
|
||||
max-width: unset !important;
|
||||
input[type="text"] {
|
||||
max-width: unset !important;
|
||||
margin-top: 0 !important;
|
||||
|
@ -33,15 +37,15 @@
|
|||
}
|
||||
|
||||
.rjf-form-row-inner>div {
|
||||
display: grid !important;
|
||||
grid-template-columns: repeat(auto-fit, minmax(0%, 1fr));
|
||||
// display: grid !important;
|
||||
// grid-template-columns: repeat(auto-fit, minmax(0%, 1fr));
|
||||
|
||||
>label {
|
||||
margin-top: var(--pico-form-element-spacing-vertical);
|
||||
}
|
||||
|
||||
>* {
|
||||
width: max-content !important;
|
||||
// width: max-content !important;
|
||||
|
||||
|
||||
button {
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
<script src="{{ cdn_url }}/npm/hyperscript.org@0.9.12"></script>
|
||||
<link rel="stylesheet"
|
||||
href="{{ cdn_url }}/npm/@picocss/pico@2/css/pico.min.css" />
|
||||
<link href="{% sass_src 'scss/neodb.scss' %}"
|
||||
<link href="{% sass_src 'scss/neodb.scss' %}?xxddddddddxdd"
|
||||
rel="stylesheet"
|
||||
type="text/css" />
|
||||
<link href="{{ cdn_url }}/npm/@fortawesome/fontawesome-free@6.5.2/css/all.min.css"
|
||||
|
|
|
@ -4,6 +4,7 @@ from typing import TYPE_CHECKING
|
|||
|
||||
from discord import SyncWebhook
|
||||
from django.conf import settings
|
||||
from django.conf.locale import LANG_INFO
|
||||
from django.core.exceptions import ObjectDoesNotExist, PermissionDenied
|
||||
from django.core.signing import b62_decode, b62_encode
|
||||
from django.http import Http404, HttpRequest, HttpResponseRedirect, QueryDict
|
||||
|
|
|
@ -99,15 +99,15 @@ Add a new site
|
|||
* `DEFAULT_MODEL` (unless specified in `scrape()` return val)
|
||||
* a classmethod `id_to_url()`
|
||||
* a method `scrape()` returns a `ResourceContent` object
|
||||
* `BasicDownloader` or `ProxiedDownloader` can used to download website content or API data. e.g. `content = BasicDownloader(url).download().html()`
|
||||
* `BasicDownloader` or `ProxiedDownloader` can used to download website content or API data. e.g. `content = BasicDownloader(url).download().html()`
|
||||
* check out existing files in `catalog/sites/` for more examples
|
||||
- add an import in `catalog/sites/__init__.py`
|
||||
- add some tests to `catalog/<folder>/tests.py` according to site type
|
||||
+ add `DOWNLOADER_SAVEDIR = '/tmp'` to `settings.py` can save all response to /tmp
|
||||
+ run `neodb-manage cat <url>` for debugging or saving response file to `/tmp`. Detailed code of `cat` is in `catalog/management/commands/cat.py`
|
||||
+ move captured response file to `test_data/`, except large/images files. Or if have to, replace it with a smallest version (e.g. 1x1 pixel / 1s audio)
|
||||
+ add `@use_local_response` decorator to test methods that should pick up these responses (if `BasicDownloader` or `ProxiedDownloader` is used)
|
||||
+ add `DOWNLOADER_SAVEDIR = '/tmp'` to `settings.py` can save all response to /tmp
|
||||
+ run `neodb-manage cat <url>` for debugging or saving response file to `/tmp`. Detailed code of `cat` is in `catalog/management/commands/cat.py`
|
||||
+ move captured response file to `test_data/`, except large/images files. Or if have to, replace it with a smallest version (e.g. 1x1 pixel / 1s audio)
|
||||
+ add `@use_local_response` decorator to test methods that should pick up these responses (if `BasicDownloader` or `ProxiedDownloader` is used)
|
||||
- run all the tests and make sure they pass
|
||||
- Command: `neodb-manage python3 manage.py test [--keepdb]`.
|
||||
- See [this issue](https://github.com/neodb-social/neodb/issues/5) if `lxml.etree.ParserError` occurs on macOS.
|
||||
- Command: `neodb-manage python3 manage.py test [--keepdb]`.
|
||||
- See [this issue](https://github.com/neodb-social/neodb/issues/5) if `lxml.etree.ParserError` occurs on macOS.
|
||||
- add a site UI label style to `common/static/scss/_sitelabel.scss`
|
||||
|
|
|
@ -509,7 +509,7 @@ class Content(Piece):
|
|||
raise NotImplementedError("subclass should override this")
|
||||
|
||||
@property
|
||||
def display_description(self) -> str:
|
||||
def brief_description(self) -> str:
|
||||
raise NotImplementedError("subclass should override this")
|
||||
|
||||
class Meta:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import re
|
||||
from html import unescape
|
||||
from typing import cast
|
||||
|
||||
import mistune
|
||||
|
@ -38,6 +39,13 @@ def render_md(s: str) -> str:
|
|||
return cast(str, _markdown(s))
|
||||
|
||||
|
||||
_RE_HTML_TAG = re.compile(r"<[^>]*>")
|
||||
|
||||
|
||||
def html_to_text(h: str) -> str:
|
||||
return unescape(_RE_HTML_TAG.sub(" ", h.replace("\r", "")))
|
||||
|
||||
|
||||
def _spolier(s: str) -> str:
|
||||
sl = s.split(">!", 1)
|
||||
if len(sl) == 1:
|
||||
|
|
|
@ -32,7 +32,7 @@ class Review(Content):
|
|||
return self.title
|
||||
|
||||
@property
|
||||
def display_description(self):
|
||||
def brief_description(self):
|
||||
return self.plain_content[:155]
|
||||
|
||||
@property
|
||||
|
|
|
@ -146,7 +146,7 @@ class TagTest(TestCase):
|
|||
|
||||
def test_cleanup(self):
|
||||
self.assertEqual(Tag.cleanup_title("# "), "_")
|
||||
self.assertEqual(Tag.deep_cleanup_title("# C "), "c")
|
||||
self.assertEqual(Tag.deep_cleanup_title("# C "), "text")
|
||||
|
||||
def test_user_tag(self):
|
||||
t1 = "tag 1"
|
||||
|
@ -183,7 +183,7 @@ class MarkTest(TestCase):
|
|||
|
||||
mark = Mark(self.user1.identity, self.book1)
|
||||
self.assertEqual(mark.shelf_type, ShelfType.WISHLIST)
|
||||
self.assertEqual(mark.shelf_label, "想读的书")
|
||||
self.assertEqual(mark.shelf_label, "books to read")
|
||||
self.assertEqual(mark.comment_text, "a gentle comment")
|
||||
self.assertEqual(mark.rating_grade, 9)
|
||||
self.assertEqual(mark.visibility, 1)
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -27,10 +27,11 @@ class MastodonSiteCheck(BaseJob):
|
|||
try:
|
||||
api_domain = site.api_domain or site.domain_name
|
||||
domain, api_domain, v = detect_server_info(api_domain)
|
||||
site.server_version = v
|
||||
site.last_reachable_date = timezone.now()
|
||||
site.detect_configurations()
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
logger.warning(
|
||||
f"Failed to detect server info for {site.domain_name}/{site.api_domain}",
|
||||
extra={"exception": e},
|
||||
)
|
||||
|
@ -40,12 +41,16 @@ class MastodonSiteCheck(BaseJob):
|
|||
if timezone.now() > site.last_reachable_date + timedelta(
|
||||
days=self.max_unreachable_days
|
||||
):
|
||||
logger.error(
|
||||
f"Failed to detect server info for {site.domain_name}/{site.api_domain} disabling it."
|
||||
)
|
||||
site.disabled = True
|
||||
count_disabled += 1
|
||||
finally:
|
||||
site.save(
|
||||
update_fields=[
|
||||
"star_mode",
|
||||
"server_version",
|
||||
"max_status_len",
|
||||
"last_reachable_date",
|
||||
"disabled",
|
||||
|
|
|
@ -257,7 +257,7 @@ class BlueskyAccount(SocialAccount):
|
|||
embed = models.AppBskyEmbedExternal.Main(
|
||||
external=models.AppBskyEmbedExternal.External(
|
||||
title=obj.display_title,
|
||||
description=obj.display_description,
|
||||
description=obj.brief_description,
|
||||
uri=obj.absolute_url,
|
||||
)
|
||||
)
|
||||
|
|
|
@ -57,7 +57,6 @@ dependencies = [
|
|||
"deepmerge>=1.1.1",
|
||||
"django-typed-models @ git+https://github.com/alphatownsman/django-typed-models.git",
|
||||
"atproto>=0.0.49",
|
||||
"pyright>=1.1.370",
|
||||
]
|
||||
|
||||
[tool.rye]
|
||||
|
@ -70,7 +69,7 @@ dev-dependencies = [
|
|||
"djlint~=1.34.1",
|
||||
"isort~=5.13.2",
|
||||
"lxml-stubs",
|
||||
"pyright>=1.1.369",
|
||||
"pyright>=1.1.371",
|
||||
"ruff",
|
||||
"mkdocs-material>=9.5.25",
|
||||
]
|
||||
|
|
|
@ -229,7 +229,7 @@ pygments==2.18.0
|
|||
# via mkdocs-material
|
||||
pymdown-extensions==10.8.1
|
||||
# via mkdocs-material
|
||||
pyright==1.1.370
|
||||
pyright==1.1.371
|
||||
python-dateutil==2.9.0.post0
|
||||
# via dateparser
|
||||
# via django-auditlog
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[{"backdrop_path":"/zJkLznQNzeLfKuD238Czo6jk65X.jpg","id":71365,"name":"Battlestar Galactica","original_name":"Battlestar Galactica","overview":"A re-imagining of the original series in which a \"rag-tag fugitive fleet\" of the last remnants of mankind flees pursuing robots while simultaneously searching for their true home, Earth.","poster_path":"/imTQ4nBdA68TVpLaWhhQJnb7NQh.jpg","media_type":"tv","adult":false,"original_language":"en","genre_ids":[10759,18,10765],"popularity":51.919,"first_air_date":"2003-12-08","vote_average":8.184,"vote_count":801,"origin_country":["CA"]}],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[{"backdrop_path":"/nfH8SZJVOxcBlFaqqtoqS5hHizG.jpg","id":57243,"name":"Doctor Who","original_name":"Doctor Who","overview":"The Doctor is a Time Lord: a 900 year old alien with 2 hearts, part of a gifted civilization who mastered time travel. The Doctor saves planets for a living—more of a hobby actually, and the Doctor's very, very good at it.","poster_path":"/4edFyasCrkH4MKs6H4mHqlrxA6b.jpg","media_type":"tv","adult":false,"original_language":"en","genre_ids":[10759,18,10765],"popularity":1090.391,"first_air_date":"2005-03-26","vote_average":7.519,"vote_count":2930,"origin_country":["GB"]}],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[{"backdrop_path":"/eCTWG4HzsOQomghw8sRd1qpeOlA.jpg","id":282758,"title":"Doctor Who: The Runaway Bride","original_title":"Doctor Who: The Runaway Bride","overview":"A young bride in the midst of her wedding finds herself mysteriously transported to the TARDIS. The Doctor must discover what her connection is with the Empress of the Racnoss's plan to destroy the world.","poster_path":"/dy7JzhXnDFhQsHRiPXxpu62j3yQ.jpg","media_type":"movie","adult":false,"original_language":"en","genre_ids":[878],"popularity":17.25,"release_date":"2006-12-25","video":false,"vote_average":7.728,"vote_count":224}],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":1008547,"name":"The Runaway Bride","overview":"Bride-to-be Donna vanishes as she walks down the aisle to marry boyfriend Lance. To her complete astonishment - and the Doctor's - she reappears in the Tardis. As the Time Lord, still reeling from Rose's departure, investigates how Donna came to be there, the duo uncover a terrifying enemy. How far will the Doctor go to save Earth from the latest alien threat?","media_type":"tv_episode","vote_average":6.925,"vote_count":20,"air_date":"2006-12-25","episode_number":4,"episode_type":"standard","production_code":"NCFT094N","runtime":64,"season_number":0,"show_id":57243,"still_path":"/pncNamTuydXWinybPuMTsBUVjSD.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":941505,"name":"Partners in Crime","overview":"During an alien emergency in London, a woman called Donna Noble must search for an old friend who can save the day - a man named the Doctor. But can even the Doctor halt the plans of the mysterious Miss Foster?","media_type":"tv_episode","vote_average":7.26,"vote_count":52,"air_date":"2008-04-05","episode_number":1,"episode_type":"standard","production_code":"","runtime":51,"season_number":4,"show_id":57243,"still_path":"/vg5oP1tOzivl4EV7iHiEaKwiZkK.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[{"backdrop_path":"/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg","id":27205,"title":"Inception","original_title":"Inception","overview":"Cobb, a skilled thief who commits corporate espionage by infiltrating the subconscious of his targets is offered a chance to regain his old life as payment for a task considered to be impossible: \"inception\", the implantation of another person's idea into a target's subconscious.","poster_path":"/oYuLEt3zVCKq57qu2F8dT7NIa6f.jpg","media_type":"movie","adult":false,"original_language":"en","genre_ids":[28,878,12],"popularity":92.871,"release_date":"2010-07-15","video":false,"vote_average":8.369,"vote_count":35987}],"person_results":[],"tv_results":[],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":1305550,"name":"Part 1","overview":"In a distant part of the galaxy lie The Twelve Colonies of Man, a civilization that has been at peace for some forty years with an empire of machines, the Cylons, who were created generations before as worker drones for mankind, but became independent, rose in rebellion, and launched war on their masters. Now, the Cylons have evolved into more human form, into machine-created biological beings, who seek to exterminate true biological humans. To this end they use a human scientist, Gaius, to help one of their infiltrators, known as #6, penetrate the Colonies' master ...","media_type":"tv_episode","vote_average":8.1,"vote_count":20,"air_date":"2003-12-08","episode_number":1,"episode_type":"standard","production_code":"","runtime":95,"season_number":1,"show_id":71365,"still_path":"/mBkKJW9ppIEjkD4CXGaAntekQNm.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":1305551,"name":"Part 2","overview":"After forty years of armistice, the Cylons attacks the Twelve Colonies of Kobol. Their strategy: a virus implanted into the mankind defense system. The former Battlestar Galactica, which is being adapted into a museum, is not connected with the defense system and becomes the only warship capable of fighting against the Cylons in the hopes of leading the survivors to planet 'Earth'.","media_type":"tv_episode","vote_average":8.118,"vote_count":17,"air_date":"2003-12-09","episode_number":2,"episode_type":"finale","production_code":"","runtime":90,"season_number":1,"show_id":71365,"still_path":"/77kEx9Zw6yI69oCrffQc1hIGOjC.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":3891529,"name":"Solaricks","overview":"The Smiths deal with last season's fallout, and Rick and Morty are stranded in space.","media_type":"tv_episode","vote_average":8.091,"vote_count":55,"air_date":"2022-09-04","episode_number":1,"episode_type":"standard","production_code":"","runtime":23,"season_number":6,"show_id":60625,"still_path":"/5tiOEjp03nvaGiKT73knretU8e8.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[{"backdrop_path":"/8IC1q0lHFwi5m8VtChLzIfmpaZH.jpg","id":86941,"name":"The North Water","original_name":"The North Water","overview":"Henry Drax is a harpooner and brutish killer whose amorality has been shaped to fit the harshness of his world, who will set sail on a whaling expedition to the Arctic with Patrick Sumner, a disgraced ex-army surgeon who signs up as the ship’s doctor. Hoping to escape the horrors of his past, Sumner finds himself on an ill-fated journey with a murderous psychopath. In search of redemption, his story becomes a harsh struggle for survival in the Arctic wasteland.","poster_path":"/9CM0ca8pX1os3SJ24hsIc0nN8ph.jpg","media_type":"tv","adult":false,"original_language":"en","genre_ids":[18,9648],"popularity":40.783,"first_air_date":"2021-07-14","vote_average":7.392,"vote_count":120,"origin_country":["US"]}],"tv_episode_results":[],"tv_season_results":[]}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue