fix douban tv type parsing

2023-06-29 08:58:08 -04:00 · 2023-06-29 08:58:08 -04:00 · 9dc6de2069
commit 9dc6de2069
parent 1fa6a09cbb
1 changed files with 6 additions and 3 deletions
--- a/catalog/sites/douban_movie.py
+++ b/catalog/sites/douban_movie.py
@ -3,7 +3,7 @@ from .douban import *
 from catalog.movie.models import *
 from catalog.tv.models import *
 import logging
-from django.db import models
+import json
 from django.utils.translation import gettext_lazy as _
 from .tmdb import TMDB_TV, TMDB_TVSeason, search_tmdb_by_imdb_id, query_tmdb_tv_episode

@ -28,6 +28,10 @@ class DoubanMovie(AbstractSite):

    def scrape(self):
        content = DoubanDownloader(self.url).download().html()
+        schema_data = "".join(
+            content.xpath('//script[@type="application/ld+json"]/text()')
+        )
+        d = json.loads(schema_data) if schema_data else {}

        try:
            raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
@ -181,8 +185,7 @@ class DoubanMovie(AbstractSite):
            else None
        )

-        # if has field `episodes` not none then must be series
-        is_series = True if episodes else False
+        is_series = d.get("@type") == "TVSeries" or episodes is not None

        brief_elem = content.xpath("//span[@class='all hidden']")
        if not brief_elem: