lib.itmens/catalog/sites/douban_drama.py

import re

from django.core.cache import cache
from lxml import html

from catalog.common import *
from catalog.models import *
from common.models.lang import detect_language

from .douban import DoubanDownloader


def _cache_key(url):
    return f"$:{url}"


@SiteManager.register
class DoubanDramaVersion(AbstractSite):
    """
    Parse Douban Drama Version section in Douban Drama page

    It's the same page as the drama page, each version resides in a <div id="1234" />
    since they all get parsed about the same time, page content will be cached to avoid duplicate fetch
    """

    SITE_NAME = SiteName.Douban
    ID_TYPE = IdType.DoubanDramaVersion
    URL_PATTERNS = [
        r"\w+://www.douban.com/location/drama/(\d+)/#(\d+)$",
    ]

    WIKI_PROPERTY_ID = "?"
    DEFAULT_MODEL = PerformanceProduction

    @classmethod
    def url_to_id(cls, url: str):
        m = re.match(cls.URL_PATTERNS[0], url)
        if not m:
            return None
        return m.group(1) + "-" + m.group(2)

    @classmethod
    def id_to_url(cls, id_value):
        ids = id_value.split("-")
        return f"https://www.douban.com/location/drama/{ids[0]}/#{ids[1]}"

    def scrape(self):
        if not self.id_value or not self.url:
            raise ParseError(self, "id_value or url")
        show_url = self.url.split("#")[0]
        show_id = self.id_value.split("-")[0]
        version_id = self.id_value.split("-")[1]

        key = _cache_key(show_url)
        r = cache.get(key, None)
        if r is None:
            r = DoubanDownloader(show_url).download().content.decode("utf-8")
            cache.set(key, r, 3600)
        h = html.fromstring(r)

        p = "//div[@id='" + version_id + "']"
        q = p + "//dt[text()='{}：']/following-sibling::dd[1]/a/span/text()"
        q2 = p + "//dt[text()='{}：']/following-sibling::dd[1]/text()"
        title = " ".join(self.query_list(h, p + "//h3/text()")).strip()
        if not title:
            raise ParseError(self, "title")
        data = {
            "title": title,
            "localized_title": [{"lang": "zh-cn", "text": title}],
            "director": [x.strip() for x in self.query_list(h, q.format("导演"))],
            "playwright": [x.strip() for x in self.query_list(h, q.format("编剧"))],
            # "actor": [x.strip() for x in self.query_list(h, q.format("主演"))],
            "composer": [x.strip() for x in self.query_list(h, q.format("作曲"))],
            "language": [x.strip() for x in self.query_list(h, q2.format("语言"))],
            "opening_date": " ".join(self.query_list(h, q2.format("演出日期"))).strip(),
            "troupe": [x.strip() for x in self.query_list(h, q.format("演出团体"))],
            "location": [x.strip() for x in self.query_list(h, q.format("演出剧院"))],
        }
        if data["opening_date"]:
            d = data["opening_date"].split("-")
            dl = len(d) if len(d) < 6 else 6
            if dl > 3:
                data["opening_date"] = "-".join(d[:3])
                data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
        actor_elem = self.query_list(
            h, p + "//dt[text()='主演：']/following-sibling::dd[1]/a"
        )
        data["actor"] = []
        for e in actor_elem:
            n = "".join(e.xpath("span/text()")).strip()
            t = "".join(e.xpath("following-sibling::text()[1]")).strip()
            t = re.sub(r"^[\s\(饰]*(.+)\)[\s\/]*$", r"\1", t).strip()
            t = t if t != "/" else ""
            data["actor"].append({"name": n, "role": t})
        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
        data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
        pd = ResourceContent(metadata=data)
        pd.metadata["required_resources"] = [
            {
                "model": "Performance",
                "id_type": IdType.DoubanDrama,
                "id_value": show_id,
                "title": f"Douban Drama {show_id}",
                "url": show_url,
            }
        ]
        return pd


@SiteManager.register
class DoubanDrama(AbstractSite):
    SITE_NAME = SiteName.Douban
    ID_TYPE = IdType.DoubanDrama
    URL_PATTERNS = [
        r"\w+://www.douban.com/location/drama/(\d+)/[^#]*$",
        r"\w+://www.douban.com/doubanapp/dispatch\?uri=/drama/(\d+)/",
        r"\w+://www.douban.com/doubanapp/dispatch/drama/(\d+)",
    ]
    WIKI_PROPERTY_ID = "P6443"
    DEFAULT_MODEL = Performance

    @classmethod
    def id_to_url(cls, id_value):
        return "https://www.douban.com/location/drama/" + id_value + "/"

    def scrape(self):
        key = _cache_key(self.url)
        r = cache.get(key, None)
        if r is None:
            r = DoubanDownloader(self.url).download().content.decode("utf-8")
            cache.set(key, r, 3600)
        h = html.fromstring(r)
        data = {}

        title_elem = self.query_list(h, "/html/body//h1/span/text()")
        if title_elem:
            data["title"] = title_elem[0].strip()
            data["orig_title"] = title_elem[1] if len(title_elem) > 1 else None
        else:
            raise ParseError(self, "title")

        other_title_elem = self.query_list(
            h, "//dl//dt[text()='又名：']/following::dd[@itemprop='name']/text()"
        )
        data["other_title"] = other_title_elem

        plot_elem = self.query_list(
            h, "//div[@class='pure-text']/div[@class='full']/text()"
        )
        if len(plot_elem) == 0:
            plot_elem = self.query_list(
                h, "//div[@class='pure-text']/div[@class='abstract']/text()"
            )
        if len(plot_elem) == 0:
            plot_elem = self.query_list(h, "//div[@class='pure-text']/text()")
        data["brief"] = "\n".join(plot_elem)

        data["genre"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']//dl//dt[text()='类型：']/following-sibling::dd[@itemprop='genre']/text()",
            )
        ]
        # data["version"] = [
        #     s.strip()
        #     for s in self.query_list(h,
        #         "//dl//dt[text()='版本：']/following-sibling::dd[@class='titles']/a//text()"
        #     )
        # ]
        data["director"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='导演：']/following-sibling::dd/a[@itemprop='director']//text()",
            )
        ]
        data["composer"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='作曲：']/following-sibling::dd/a[@itemprop='musicBy']//text()",
            )
        ]
        data["choreographer"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='编舞：']/following-sibling::dd/a[@itemprop='choreographer']//text()",
            )
        ]
        data["troupe"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='演出团体：']/following-sibling::dd/a[@itemprop='performer']//text()",
            )
        ]
        data["playwright"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='编剧：']/following-sibling::dd/a[@itemprop='author']//text()",
            )
        ]
        data["actor"] = [
            {"name": s.strip(), "role": ""}
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='主演：']/following-sibling::dd/a[@itemprop='actor']//text()",
            )
        ]

        date_elem = self.query_list(
            h, "//div[@class='meta']//dl//dt[text()='演出日期：']/following::dd/text()"
        )
        data["opening_date"] = date_elem[0] if date_elem else None
        if data["opening_date"]:
            d = data["opening_date"].split("-")
            dl = len(d) if len(d) < 6 else 6
            if dl > 3:
                data["opening_date"] = "-".join(d[:3])
                data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])

        data["location"] = [
            s.strip()
            for s in self.query_list(
                h,
                "//div[@class='meta']/dl//dt[text()='演出剧院：']/following-sibling::dd/a[@itemprop='location']//text()",
            )
        ]

        versions = self.query_list(
            h, "//div[@id='versions']/div[@class='fluid-mods']/div/@id"
        )
        data["related_resources"] = list(
            map(
                lambda v: {
                    "model": "PerformanceProduction",
                    "id_type": IdType.DoubanDramaVersion,
                    "id_value": f"{self.id_value}-{v}",
                    "title": f"{data['title']} - {v}",
                    "url": f"{self.url}#{v}",
                },
                versions,
            )
        )
        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
        data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
        data["localized_title"] = (
            [{"lang": "zh-cn", "text": data["title"]}]
            + (
                [
                    {
                        "lang": detect_language(data["orig_title"]),
                        "text": data["orig_title"],
                    }
                ]
                if data["orig_title"]
                else []
            )
            + [{"lang": detect_language(t), "text": t} for t in data["other_title"]]
        )
        data["localized_description"] = [{"lang": "zh-cn", "text": data["brief"]}]

        pd = ResourceContent(metadata=data)
        return pd
-												support additional douban url format

											
										
										
											2023-07-19 11:12:58 -04:00
+								import re
 								from django.core.cache import cache
 								from lxml import html
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								from catalog.common import *
-												rename a few methods

											
										
										
											2022-12-08 16:59:03 +00:00
+								from catalog.models import *
-												supports localized title

											
										
										
											2024-07-13 00:16:47 -04:00
+								from common.models.lang import detect_language
-												support additional douban url format

											
										
										
											2023-07-19 11:12:58 -04:00
-												update lint: replace black and isort with ruff

											
										
										
											2025-01-11 17:20:02 -05:00
+								from .douban import DoubanDownloader
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								def _cache_key(url):
 								    return f"$:{url}"
 								@SiteManager.register
 								class DoubanDramaVersion(AbstractSite):
 								    """
 								    Parse Douban Drama Version section in Douban Drama page
 								    It's the same page as the drama page, each version resides in a <div id="1234" />
 								    since they all get parsed about the same time, page content will be cached to avoid duplicate fetch
 								    """
 								    SITE_NAME = SiteName.Douban
 								    ID_TYPE = IdType.DoubanDramaVersion
-												update new link format from douban app

											
										
										
											2024-10-13 17:35:36 -04:00
+								    URL_PATTERNS = [
 								        r"\w+://www.douban.com/location/drama/(\d+)/#(\d+)$",
 								    ]
-												import performance in doufen

											
										
										
											2023-06-05 03:39:37 -04:00
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								    WIKI_PROPERTY_ID = "?"
 								    DEFAULT_MODEL = PerformanceProduction
-												import performance in doufen

											
										
										
											2023-06-05 03:39:37 -04:00
+								    @classmethod
 								    def url_to_id(cls, url: str):
 								        m = re.match(cls.URL_PATTERNS[0], url)
 								        if not m:
 								            return None
 								        return m.group(1) + "-" + m.group(2)
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								    @classmethod
 								    def id_to_url(cls, id_value):
 								        ids = id_value.split("-")
 								        return f"https://www.douban.com/location/drama/{ids[0]}/#{ids[1]}"
 								    def scrape(self):
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        if not self.id_value or not self.url:
 								            raise ParseError(self, "id_value or url")
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        show_url = self.url.split("#")[0]
 								        show_id = self.id_value.split("-")[0]
 								        version_id = self.id_value.split("-")[1]
 								        key = _cache_key(show_url)
 								        r = cache.get(key, None)
 								        if r is None:
 								            r = DoubanDownloader(show_url).download().content.decode("utf-8")
 								            cache.set(key, r, 3600)
 								        h = html.fromstring(r)
 								        p = "//div[@id='" + version_id + "']"
 								        q = p + "//dt[text()='{}：']/following-sibling::dd[1]/a/span/text()"
 								        q2 = p + "//dt[text()='{}：']/following-sibling::dd[1]/text()"
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        title = " ".join(self.query_list(h, p + "//h3/text()")).strip()
-												import performance in doufen

											
										
										
											2023-06-05 03:39:37 -04:00
+								        if not title:
 								            raise ParseError(self, "title")
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        data = {
-												import performance in doufen

											
										
										
											2023-06-05 03:39:37 -04:00
+								            "title": title,
-												supports localized title

											
										
										
											2024-07-13 00:16:47 -04:00
+								            "localized_title": [{"lang": "zh-cn", "text": title}],
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            "director": [x.strip() for x in self.query_list(h, q.format("导演"))],
 								            "playwright": [x.strip() for x in self.query_list(h, q.format("编剧"))],
 								            # "actor": [x.strip() for x in self.query_list(h, q.format("主演"))],
 								            "composer": [x.strip() for x in self.query_list(h, q.format("作曲"))],
 								            "language": [x.strip() for x in self.query_list(h, q2.format("语言"))],
 								            "opening_date": " ".join(self.query_list(h, q2.format("演出日期"))).strip(),
 								            "troupe": [x.strip() for x in self.query_list(h, q.format("演出团体"))],
 								            "location": [x.strip() for x in self.query_list(h, q.format("演出剧院"))],
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        }
-												adjust fields for performance

											
										
										
											2023-06-05 11:45:57 -04:00
+								        if data["opening_date"]:
 								            d = data["opening_date"].split("-")
-												move cover download out of scrape()

											
										
										
											2024-07-27 03:22:27 -04:00
+								            dl = len(d) if len(d) < 6 else 6
 								            if dl > 3:
-												adjust fields for performance

											
										
										
											2023-06-05 11:45:57 -04:00
+								                data["opening_date"] = "-".join(d[:3])
-												move cover download out of scrape()

											
										
										
											2024-07-27 03:22:27 -04:00
+								                data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        actor_elem = self.query_list(
 								            h, p + "//dt[text()='主演：']/following-sibling::dd[1]/a"
 								        )
-												use json format for crew & actors

											
										
										
											2023-06-05 17:22:34 -04:00
+								        data["actor"] = []
 								        for e in actor_elem:
 								            n = "".join(e.xpath("span/text()")).strip()
 								            t = "".join(e.xpath("following-sibling::text()[1]")).strip()
 								            t = re.sub(r"^[\s\(饰]*(.+)\)[\s\/]*$", r"\1", t).strip()
 								            t = t if t != "/" else ""
 								            data["actor"].append({"name": n, "role": t})
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
-												import performance in doufen

											
										
										
											2023-06-05 03:39:37 -04:00
+								        data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        pd = ResourceContent(metadata=data)
 								        pd.metadata["required_resources"] = [
 								            {
 								                "model": "Performance",
 								                "id_type": IdType.DoubanDrama,
 								                "id_value": show_id,
 								                "title": f"Douban Drama {show_id}",
 								                "url": show_url,
 								            }
 								        ]
 								        return pd
-												new data model: /book/<uid>

											
										
										
											2022-12-15 17:29:35 -05:00
+								@SiteManager.register
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								class DoubanDrama(AbstractSite):
-												new data model: view detail page

											
										
										
											2022-12-16 01:08:10 -05:00
+								    SITE_NAME = SiteName.Douban
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								    ID_TYPE = IdType.DoubanDrama
-												support additional douban url format

											
										
										
											2023-07-19 11:12:58 -04:00
+								    URL_PATTERNS = [
 								        r"\w+://www.douban.com/location/drama/(\d+)/[^#]*$",
 								        r"\w+://www.douban.com/doubanapp/dispatch\?uri=/drama/(\d+)/",
-												fix douban drama url parsing

											
										
										
											2024-11-22 17:38:21 -05:00
+								        r"\w+://www.douban.com/doubanapp/dispatch/drama/(\d+)",
-												support additional douban url format

											
										
										
											2023-07-19 11:12:58 -04:00
+								    ]
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								    WIKI_PROPERTY_ID = "P6443"
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								    DEFAULT_MODEL = Performance
 								    @classmethod
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								    def id_to_url(cls, id_value):
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								        return "https://www.douban.com/location/drama/" + id_value + "/"
 								    def scrape(self):
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        key = _cache_key(self.url)
 								        r = cache.get(key, None)
 								        if r is None:
 								            r = DoubanDownloader(self.url).download().content.decode("utf-8")
 								            cache.set(key, r, 3600)
 								        h = html.fromstring(r)
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								        data = {}
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        title_elem = self.query_list(h, "/html/body//h1/span/text()")
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								        if title_elem:
 								            data["title"] = title_elem[0].strip()
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								            data["orig_title"] = title_elem[1] if len(title_elem) > 1 else None
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								        else:
 								            raise ParseError(self, "title")
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        other_title_elem = self.query_list(
 								            h, "//dl//dt[text()='又名：']/following::dd[@itemprop='name']/text()"
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								        )
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        data["other_title"] = other_title_elem
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        plot_elem = self.query_list(
 								            h, "//div[@class='pure-text']/div[@class='full']/text()"
 								        )
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								        if len(plot_elem) == 0:
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            plot_elem = self.query_list(
 								                h, "//div[@class='pure-text']/div[@class='abstract']/text()"
-												fix brief parsing

											
										
										
											2023-06-05 13:30:40 -04:00
+								            )
 								        if len(plot_elem) == 0:
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            plot_elem = self.query_list(h, "//div[@class='pure-text']/text()")
-												fix brief parsing

											
										
										
											2023-06-05 13:30:40 -04:00
+								        data["brief"] = "\n".join(plot_elem)
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								        data["genre"] = [
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']//dl//dt[text()='类型：']/following-sibling::dd[@itemprop='genre']/text()",
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            )
 								        ]
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        # data["version"] = [
 								        #     s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        #     for s in self.query_list(h,
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        #         "//dl//dt[text()='版本：']/following-sibling::dd[@class='titles']/a//text()"
 								        #     )
 								        # ]
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								        data["director"] = [
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='导演：']/following-sibling::dd/a[@itemprop='director']//text()",
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            )
 								        ]
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								        data["composer"] = [
 								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='作曲：']/following-sibling::dd/a[@itemprop='musicBy']//text()",
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								            )
 								        ]
 								        data["choreographer"] = [
 								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='编舞：']/following-sibling::dd/a[@itemprop='choreographer']//text()",
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								            )
 								        ]
 								        data["troupe"] = [
 								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='演出团体：']/following-sibling::dd/a[@itemprop='performer']//text()",
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								            )
 								        ]
 								        data["playwright"] = [
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='编剧：']/following-sibling::dd/a[@itemprop='author']//text()",
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            )
 								        ]
-												use json format for crew & actors

											
										
										
											2023-06-05 17:22:34 -04:00
+								        data["actor"] = [
 								            {"name": s.strip(), "role": ""}
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='主演：']/following-sibling::dd/a[@itemprop='actor']//text()",
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								            )
 								        ]
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        date_elem = self.query_list(
 								            h, "//div[@class='meta']//dl//dt[text()='演出日期：']/following::dd/text()"
-												adjust fields for performance

											
										
										
											2023-06-05 11:45:57 -04:00
+								        )
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								        data["opening_date"] = date_elem[0] if date_elem else None
-												adjust fields for performance

											
										
										
											2023-06-05 11:45:57 -04:00
+								        if data["opening_date"]:
 								            d = data["opening_date"].split("-")
-												move cover download out of scrape()

											
										
										
											2024-07-27 03:22:27 -04:00
+								            dl = len(d) if len(d) < 6 else 6
 								            if dl > 3:
-												adjust fields for performance

											
										
										
											2023-06-05 11:45:57 -04:00
+								                data["opening_date"] = "-".join(d[:3])
-												move cover download out of scrape()

											
										
										
											2024-07-27 03:22:27 -04:00
+								                data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        data["location"] = [
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								            s.strip()
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								            for s in self.query_list(
 								                h,
 								                "//div[@class='meta']/dl//dt[text()='演出剧院：']/following-sibling::dd/a[@itemprop='location']//text()",
-												basic support for douban drama

											
										
										
											2023-02-15 23:45:12 -05:00
+								            )
 								        ]
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        versions = self.query_list(
 								            h, "//div[@id='versions']/div[@class='fluid-mods']/div/@id"
 								        )
-												add performance/production

											
										
										
											2023-06-05 02:04:52 -04:00
+								        data["related_resources"] = list(
 								            map(
 								                lambda v: {
 								                    "model": "PerformanceProduction",
 								                    "id_type": IdType.DoubanDramaVersion,
 								                    "id_value": f"{self.id_value}-{v}",
 								                    "title": f"{data['title']} - {v}",
 								                    "url": f"{self.url}#{v}",
 								                },
 								                versions,
 								            )
 								        )
-												fix some lint issues

											
										
										
											2025-01-04 11:23:07 -05:00
+								        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
-												reformat new code with black

											
										
										
											2022-12-29 23:57:02 -05:00
+								        data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
-												supports localized title

											
										
										
											2024-07-13 00:16:47 -04:00
+								        data["localized_title"] = (
 								            [{"lang": "zh-cn", "text": data["title"]}]
 								            + (
 								                [
 								                    {
 								                        "lang": detect_language(data["orig_title"]),
 								                        "text": data["orig_title"],
 								                    }
 								                ]
 								                if data["orig_title"]
 								                else []
 								            )
 								            + [{"lang": detect_language(t), "text": t} for t in data["other_title"]]
 								        )
 								        data["localized_description"] = [{"lang": "zh-cn", "text": data["brief"]}]
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
-												new data model: rename some classes

											
										
										
											2022-12-08 16:08:59 +00:00
+								        pd = ResourceContent(metadata=data)
-												new catalog data model, wip, not enabled

											
										
										
											2022-12-07 19:09:05 -05:00
+								        return pd