diff --git a/catalog/common/models.py b/catalog/common/models.py index 370830c7..eb2173ec 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -28,8 +28,8 @@ class SiteName(models.TextChoices): Goodreads = "goodreads", _("Goodreads") GoogleBooks = "googlebooks", _("谷歌图书") BooksTW = "bookstw", _("博客来") - IMDB = "imdb", _("IMDB") - TMDB = "tmdb", _("The Movie Database") + IMDB = "imdb", _("IMDb") + TMDB = "tmdb", _("TMDB") Bandcamp = "bandcamp", _("Bandcamp") Spotify = "spotify", _("Spotify") IGDB = "igdb", _("IGDB") diff --git a/catalog/common/sites.py b/catalog/common/sites.py index 414e9d17..986e603a 100644 --- a/catalog/common/sites.py +++ b/catalog/common/sites.py @@ -212,14 +212,16 @@ class AbstractSite: self.scrape_additional_data() if auto_link: for linked_resource in p.required_resources: - linked_site = SiteManager.get_site_by_url(linked_resource["url"]) - if linked_site: - linked_site.get_resource_ready( - auto_link=False, - preloaded_content=linked_resource.get("content"), - ) - else: - _logger.error(f'unable to get site for {linked_resource["url"]}') + linked_url = linked_resource.get("url") + if linked_url: + linked_site = SiteManager.get_site_by_url(linked_url) + if linked_site: + linked_site.get_resource_ready( + auto_link=False, + preloaded_content=linked_resource.get("content"), + ) + else: + _logger.error(f"unable to get site for {linked_url}") if p.related_resources: django_rq.get_queue("crawl").enqueue(crawl_related_resources_task, p.pk) if p.item: diff --git a/catalog/podcast/models.py b/catalog/podcast/models.py index 184b90b4..df1d5fb8 100644 --- a/catalog/podcast/models.py +++ b/catalog/podcast/models.py @@ -66,6 +66,10 @@ class Podcast(Item): return None return f"http://{self.primary_lookup_id_value}" + @property + def child_items(self): + return self.episodes.all() + class PodcastEpisode(Item): category = ItemCategory.Podcast diff --git a/catalog/sites/imdb.py b/catalog/sites/imdb.py index b798f616..151e1a11 100644 --- a/catalog/sites/imdb.py +++ b/catalog/sites/imdb.py @@ -11,6 +11,13 @@ _logger = logging.getLogger(__name__) @SiteManager.register class IMDB(AbstractSite): + """ + IMDb site manager + + IMDB ids map to Movie, TVShow or TVEpisode + IMDB + """ + SITE_NAME = SiteName.IMDB ID_TYPE = IdType.IMDB URL_PATTERNS = [ @@ -25,6 +32,8 @@ class IMDB(AbstractSite): def scrape(self): res_data = search_tmdb_by_imdb_id(self.id_value) + url = None + pd = None if ( "movie_results" in res_data and len(res_data["movie_results"]) > 0 @@ -46,21 +55,15 @@ class IMDB(AbstractSite): tv_id = res_data["tv_episode_results"][0]["show_id"] season_number = res_data["tv_episode_results"][0]["season_number"] episode_number = res_data["tv_episode_results"][0]["episode_number"] - if season_number == 0: - url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}" - elif episode_number == 1: - url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}" - else: - raise ParseError( - self, - "IMDB id matching TMDB but not first episode, this is not supported", - ) - else: - # IMDB id not found in TMDB use real IMDB scraper - return self.scrape_imdb() - tmdb = SiteManager.get_site_by_url(url) - pd = tmdb.scrape() - pd.metadata["preferred_model"] = tmdb.DEFAULT_MODEL.__name__ + url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}" + if url: + tmdb = SiteManager.get_site_by_url(url) + pd = tmdb.scrape() + pd.metadata["preferred_model"] = tmdb.DEFAULT_MODEL.__name__ + pd.metadata["required_resources"] = [] # do not auto fetch parent season + if not pd: + # if IMDB id not found in TMDB, use real IMDB scraper + pd = self.scrape_imdb() return pd def scrape_imdb(self): @@ -81,9 +84,17 @@ class IMDB(AbstractSite): if d.get("primaryImage") else None, } + if d.get("series"): + episode_info = d["series"].get("episodeNumber") + if episode_info: + data["season_number"] = episode_info["seasonNumber"] + data["episode_number"] = episode_info["episodeNumber"] + series = d["series"].get("series") + if series: + data["show_imdb_id"] = series["id"] # TODO more data fields and localized title (in releaseinfo/) data["preferred_model"] = ( - "" # "TVSeason" not supported yet + "TVEpisode" if data["is_episode"] else ("TVShow" if data["is_series"] else "Movie") ) @@ -100,3 +111,60 @@ class IMDB(AbstractSite): f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' ) return pd + + @staticmethod + def get_episode_list(show_id, season_id): + url = f"https://m.imdb.com/title/{show_id}/" + h = BasicDownloader(url).download().html() + show_url = "".join( + h.xpath('//a[@data-testid="hero-title-block__series-link"]/@href') + ).split("?")[0] + if not show_url: + show_url = f"/title/{show_id}/" + url = f"https://m.imdb.com{show_url}episodes/?season={season_id}" + h = BasicDownloader(url).download().html() + episodes = [] + for e in h.xpath('//div[@id="eplist"]/div/a'): + episode_number = e.xpath( + './span[contains(@class,"episode-list__title")]/text()' + )[0].strip() + episode_number = int(episode_number.split(".")[0]) + episode_title = " ".join( + e.xpath('.//strong[@class="episode-list__title-text"]/text()') + ).strip() + episode_url = e.xpath("./@href")[0] + episode_url = "https://www.imdb.com" + episode_url + episodes.append( + { + "model": "TVEpisode", + "id_type": IdType.IMDB, + "id_value": IMDB.url_to_id(episode_url), + "url": episode_url, + "title": episode_title, + "episode_number": episode_number, + } + ) + return episodes + + @staticmethod + def fetch_episodes_for_season(season_uuid): + season = TVSeason.get_by_url(season_uuid) + if not season.season_number or not season.imdb: + _logger.warning(f"season {season} is missing season number or imdb id") + return + episodes = IMDB.get_episode_list(season.imdb, season.season_number) + if not episodes: + _logger.warning(f"season {season} has no episodes fetched") + return + if not season.episode_count or season.episode_count < len(episodes): + season.episode_count = len(episodes) + season.save() + for e in episodes: + episode = TVEpisode.objects.filter( + season=season, episode_number=e["episode_number"] + ).first() + if not episode: + site = SiteManager.get_site_by_url(e["url"]) + episode = site.get_resource_ready().item + episode.set_parent_item(season) + episode.save() diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py index 92cae430..4960d641 100644 --- a/catalog/sites/tmdb.py +++ b/catalog/sites/tmdb.py @@ -417,3 +417,85 @@ class TMDB_TVSeason(AbstractSite): raise ParseError("first episode id for season") pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id") return pd + + +@SiteManager.register +class TMDB_TVEpisode(AbstractSite): + SITE_NAME = SiteName.TMDB + ID_TYPE = IdType.TMDB_TVEpisode + URL_PATTERNS = [ + r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)/episode/(\d+)[^/]*$" + ] + WIKI_PROPERTY_ID = "?" + DEFAULT_MODEL = TVEpisode + ID_PATTERN = r"^(\d+)-(\d+)-(\d+)$" + + @classmethod + def url_to_id(cls, url: str): + u = next( + iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), None + ) + return u[1] + "-" + u[2] + "-" + u[3] if u else None + + @classmethod + def id_to_url(cls, id_value): + v = id_value.split("-") + return f"https://www.themoviedb.org/tv/{v[0]}/season/{v[1]}/episode/{v[2]}" + + def scrape(self): + v = self.id_value.split("-") + show_id = v[0] + season_id = v[1] + episode_id = v[2] + site = TMDB_TV(TMDB_TV.id_to_url(show_id)) + show_resource = site.get_resource_ready(auto_create=False, auto_link=False) + api_url = f"https://api.themoviedb.org/3/tv/{show_id}/season/{season_id}/episode/{episode_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits" + d = BasicDownloader(api_url).download().json() + if not d.get("id"): + raise ParseError("id") + pd = ResourceContent( + metadata=_copy_dict( + d, + { + "name": "title", + "overview": "brief", + # "air_date": "air_date", + "season_number": 0, + "episode_number": 0, + "external_ids": [], + }, + ) + ) + pd.metadata["required_resources"] = [ + { + "model": "TVSeason", + "id_type": IdType.TMDB_TVSeason, + "id_value": f"{show_id}-{season_id}", + "title": f"TMDB TV Season {show_id}-{season_id}", + "url": f"https://www.themoviedb.org/tv/{show_id}/season/{season_id}", + } + ] + pd.lookup_ids[IdType.IMDB] = d["external_ids"].get("imdb_id") + pd.metadata["cover_image_url"] = ( + ("https://image.tmdb.org/t/p/original/" + d["poster_path"]) + if d.get("poster_path") + else None + ) + pd.metadata["title"] = ( + pd.metadata["title"] + if pd.metadata["title"] + else f'S{d["season_number"]} E{d["episode_number"]}' + ) + if pd.metadata["cover_image_url"]: + imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) + try: + pd.cover_image = imgdl.download().content + pd.cover_image_extention = imgdl.extention + except Exception: + _logger.debug( + f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' + ) + + if pd.lookup_ids.get(IdType.IMDB): + pd.lookup_ids[IdType.IMDB] = pd.lookup_ids[IdType.IMDB] + return pd diff --git a/catalog/templates/_item_comments.html b/catalog/templates/_item_comments.html index c4118b0b..763667d5 100644 --- a/catalog/templates/_item_comments.html +++ b/catalog/templates/_item_comments.html @@ -7,6 +7,24 @@ {% load truncate %} {% load duration %} {% load user_actions %} +{% if item.class_name == "tvseason" and not request.GET.last %} +

+ + {% if item.episodes.all %} + 全部 + {% for ep in item.episodes.all %} + + 第{{ ep.episode_number }}集 + + {% endfor %} + {% else %} + 编辑本条目添加分集信息后可开启分集短评功能。 + {% endif %} + +

+{% endif %} {% for comment in comments %} {% if forloop.counter <= 10 %}
diff --git a/catalog/templates/_item_comments_by_episode.html b/catalog/templates/_item_comments_by_episode.html new file mode 100644 index 00000000..00f0e6b2 --- /dev/null +++ b/catalog/templates/_item_comments_by_episode.html @@ -0,0 +1,85 @@ +{% load static %} +{% load i18n %} +{% load l10n %} +{% load admin_url %} +{% load mastodon %} +{% load oauth_token %} +{% load truncate %} +{% load duration %} +{% load user_actions %} +
+ {% if not request.GET.last %} +

+ + + 全部 + + {% for ep in item.episodes.all %} + + 第{{ ep.episode_number }}集 + + {% endfor %} + +

+

+ + + {% if mark.comment_text %} + + {% else %} + + {% endif %} + 写该集短评 + + +

+ {% endif %} + {% for comment in comments %} + {% if forloop.counter <= 10 %} +
+ + + {% liked_piece comment as liked %} + {% include 'like_stats.html' with liked=liked piece=comment %} + + + + + + + {% if comment.rating_grade %}{{ comment.rating_grade|rating_star }}{% endif %} + {{ comment.owner.display_name }} + + + + {{ comment.created_time|date }} + {{ comment.mark.action_label }} + + + {% if comment.focus_item %}{{ comment.focus_item.title }}{% endif %} + {% if comment.item != item %}{{ comment.item.title }}{% endif %} +
{{ comment.html|safe }}
+
+ {% else %} + + + + {% endif %} + {% empty %} +
{% trans '暂无' %}
+ {% endfor %} +
diff --git a/catalog/templates/_sidebar_edit.html b/catalog/templates/_sidebar_edit.html index edb8cfc6..7e710892 100644 --- a/catalog/templates/_sidebar_edit.html +++ b/catalog/templates/_sidebar_edit.html @@ -77,13 +77,28 @@ {% endfor %} {% if item.child_class %}
- {% trans '新建子条目' %} + {% trans '创建子条目' %}
- +
{% endif %} + {% if item.class_name == "tvseason" %} +
+ {% trans '更新单集条目' %} + {% if item.imdb and item.season_number is not None %} +
+ {% csrf_token %} +

因豆瓣/IMDB/TMDB之间对分季处理的差异,少量剧集和动画可能无法返回正确结果,更新后请手工确认和清理。

+ +
+ {% else %} + ⛔️ 获取单集条目需要本季序号和IMDB,不便填写也可以手工创建子条目。 + {% endif %} +
+ {% endif %} {% if item.class_name == "movie" %}
{% trans '切换分类' %} diff --git a/catalog/templates/item_base.html b/catalog/templates/item_base.html index 58429e55..17dbabea 100644 --- a/catalog/templates/item_base.html +++ b/catalog/templates/item_base.html @@ -365,24 +365,45 @@ {% block content %}{% endblock %}
-
- 短评 - {% if request.user.is_authenticated %} - - | {% trans '全部标记' %} - | 关注的人的标记 - - {% endif %} -
{% if request.user.is_authenticated %} -
-
- + + {% comment %} {% if item.class_name == "tvseason" %} + + {% endif %} {% endcomment %} {% else %} +
短评

登录后可见

{% endif %}
diff --git a/catalog/templates/item_mark_list.html b/catalog/templates/item_mark_list.html index 6d9e783c..c7722bfc 100644 --- a/catalog/templates/item_mark_list.html +++ b/catalog/templates/item_mark_list.html @@ -62,27 +62,7 @@ {% empty %}
{% trans '暂无标记' %}
{% endfor %} - + {% include "_pagination.html" %}