fix some lint issues

2025-01-04 11:23:07 -05:00 · 2025-01-04 11:23:07 -05:00 · 86b1ee19e0
commit 86b1ee19e0
parent ea4f52dfa6
9 changed files with 288 additions and 193 deletions
--- a/catalog/common/sites.py
+++ b/catalog/common/sites.py
@ -104,7 +104,7 @@ class AbstractSite:
        return content.xpath(query)[0].strip()

    @staticmethod
-    def query_list(content, query: str) -> list[str]:
+    def query_list(content, query: str) -> list:
        return list(content.xpath(query))

    @classmethod
--- a/catalog/search/models.py
+++ b/catalog/search/models.py
@ -79,6 +79,9 @@ class ExternalSearchResultItem:
        self.display_description = brief
        self.cover_image_url = cover_url

+    def __repr__(self):
+        return f"[{self.category}] {self.display_title} {self.url}"
+
    @property
    def verbose_category_name(self):
        return self.category.label if self.category else ""
--- a/catalog/sites/douban.py
+++ b/catalog/sites/douban.py
@ -1,6 +1,8 @@
+import json
 import re

 from catalog.common import *
+from catalog.search.models import ExternalSearchResultItem

 RE_NUMBERS = re.compile(r"\d+\d*")
 RE_WHITESPACES = re.compile(r"\s+")
@ -30,3 +32,35 @@ class DoubanDownloader(ProxiedDownloader):
                return RESPONSE_OK
        else:
            return RESPONSE_INVALID_CONTENT
+
+
+class DoubanSearcher:
+    @classmethod
+    def search(cls, cat: ItemCategory, c: str, q: str, p: int = 1):
+        url = f"https://search.douban.com/{c}/subject_search?search_text={q}&start={15*(p-1)}"
+        content = DoubanDownloader(url).download().html()
+        j = json.loads(
+            content.xpath(
+                "//script[text()[contains(.,'window.__DATA__')]]/text()"
+            )[  # type:ignore
+                0
+            ]
+            .split("window.__DATA__ = ")[1]  # type:ignore
+            .split("};")[0]  # type:ignore
+            + "}"
+        )
+        results = [
+            ExternalSearchResultItem(
+                cat,
+                SiteName.Douban,
+                item["url"],
+                item["title"],
+                item["abstract"],
+                item["abstract_2"],
+                item["cover_url"],
+            )
+            for item in j["items"]
+            for item in j["items"]
+            if item.get("tpl_name") == "search_subject"
+        ]
+        return results
--- a/catalog/sites/douban_book.py
+++ b/catalog/sites/douban_book.py
@ -3,7 +3,7 @@ from catalog.book.utils import *
 from catalog.common import *
 from common.models.lang import detect_language

-from .douban import *
+from .douban import RE_NUMBERS, RE_WHITESPACES, DoubanDownloader, DoubanSearcher


@SiteManager.register
@ -23,46 +23,51 @@ class DoubanBook(AbstractSite):
    def id_to_url(cls, id_value):
        return "https://book.douban.com/subject/" + id_value + "/"

+    @classmethod
+    def search(cls, q: str, p: int = 1):
+        return DoubanSearcher.search(ItemCategory.Book, "book", q, p)
+
    def scrape(self):
        content = DoubanDownloader(self.url).download().html()

-        isbn_elem = content.xpath(
-            "//div[@id='info']//span[text()='ISBN:']/following::text()"
+        isbn_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='ISBN:']/following::text()"
        )
        isbn = isbn_elem[0].strip() if isbn_elem else None

-        title_elem = content.xpath("/html/body//h1/span/text()")
+        title_elem = self.query_list(content, "/html/body//h1/span/text()")
        title = (
            title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
        )

-        subtitle_elem = content.xpath(
-            "//div[@id='info']//span[text()='副标题:']/following::text()"
+        subtitle_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='副标题:']/following::text()"
        )
        subtitle = subtitle_elem[0].strip()[:500] if subtitle_elem else None

-        orig_title_elem = content.xpath(
-            "//div[@id='info']//span[text()='原作名:']/following::text()"
+        orig_title_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='原作名:']/following::text()"
        )
        orig_title = orig_title_elem[0].strip()[:500] if orig_title_elem else None

-        language_elem = content.xpath(
-            "//div[@id='info']//span[text()='语言:']/following::text()"
+        language_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='语言:']/following::text()"
        )
        language = [language_elem[0].strip()] if language_elem else []

-        pub_house_elem = content.xpath(
-            "//div[@id='info']//span[text()='出版社:']/following::text()"
+        pub_house_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='出版社:']/following::text()"
        )
        pub_house = pub_house_elem[0].strip() if pub_house_elem else None
        if not pub_house:
-            pub_house_elem = content.xpath(
-                "//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()"
+            pub_house_elem = self.query_list(
+                content,
+                "//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()",
            )
            pub_house = pub_house_elem[0].strip() if pub_house_elem else None

-        pub_date_elem = content.xpath(
-            "//div[@id='info']//span[text()='出版年:']/following::text()"
+        pub_date_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='出版年:']/following::text()"
        )
        pub_date = pub_date_elem[0].strip() if pub_date_elem else ""
        year_month_day = RE_NUMBERS.findall(pub_date)
@ -88,18 +93,18 @@ class DoubanBook(AbstractSite):
            else pub_month
        )

-        binding_elem = content.xpath(
-            "//div[@id='info']//span[text()='装帧:']/following::text()"
+        binding_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='装帧:']/following::text()"
        )
        binding = binding_elem[0].strip() if binding_elem else None

-        price_elem = content.xpath(
-            "//div[@id='info']//span[text()='定价:']/following::text()"
+        price_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='定价:']/following::text()"
        )
        price = price_elem[0].strip() if price_elem else None

-        pages_elem = content.xpath(
-            "//div[@id='info']//span[text()='页数:']/following::text()"
+        pages_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='页数:']/following::text()"
        )
        pages = pages_elem[0].strip() if pages_elem else None
        if pages is not None:
@ -109,15 +114,16 @@ class DoubanBook(AbstractSite):
            if pages and (pages > 999999 or pages < 1):
                pages = None

-        brief_elem = content.xpath(
-            "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()"
+        brief_elem = self.query_list(
+            content,
+            "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()",
        )
        brief = "\n".join(p.strip() for p in brief_elem) if brief_elem else None

        contents = None
        try:
-            contents_elem = content.xpath(
-                "//h2/span[text()='目录']/../following-sibling::div[1]"
+            contents_elem = self.query_list(
+                content, "//h2/span[text()='目录']/../following-sibling::div[1]"
            )[0]
            # if next the id of next sibling contains `dir`, that would be the full contents
            if "dir" in contents_elem.getnext().xpath("@id")[0]:
@ -129,24 +135,28 @@ class DoubanBook(AbstractSite):
                )
            else:
                contents = (
-                    "\n".join(p.strip() for p in contents_elem.xpath("text()"))
+                    "\n".join(
+                        p.strip() for p in self.query_list(contents_elem, "text()")
+                    )
                    if contents_elem is not None
                    else None
                )
        except Exception:
            pass

-        img_url_elem = content.xpath("//*[@id='mainpic']/a/img/@src")
+        img_url_elem = self.query_list(content, "//*[@id='mainpic']/a/img/@src")
        img_url = img_url_elem[0].strip() if img_url_elem else None

        # there are two html formats for authors and translators
-        authors_elem = content.xpath(
+        authors_elem = self.query_list(
+            content,
            """//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
-            preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()"""
+            preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()""",
        )
        if not authors_elem:
-            authors_elem = content.xpath(
-                """//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()"""
+            authors_elem = self.query_list(
+                content,
+                """//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()""",
            )
        if authors_elem:
            authors = []
@ -155,13 +165,15 @@ class DoubanBook(AbstractSite):
        else:
            authors = None

-        translators_elem = content.xpath(
+        translators_elem = self.query_list(
+            content,
            """//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
-            preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()"""
+            preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()""",
        )
        if not translators_elem:
-            translators_elem = content.xpath(
-                """//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()"""
+            translators_elem = self.query_list(
+                content,
+                """//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()""",
            )
        if translators_elem:
            translators = []
@ -170,18 +182,20 @@ class DoubanBook(AbstractSite):
        else:
            translators = None

-        cncode_elem = content.xpath(
-            "//div[@id='info']//span[text()='统一书号:']/following::text()"
+        cncode_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='统一书号:']/following::text()"
        )
        cubn = cncode_elem[0].strip() if cncode_elem else None

-        series_elem = content.xpath(
-            "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()"
+        series_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()",
        )
        series = series_elem[0].strip() if series_elem else None

-        imprint_elem = content.xpath(
-            "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()"
+        imprint_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()",
        )
        imprint = imprint_elem[0].strip() if imprint_elem else None

@ -212,8 +226,9 @@ class DoubanBook(AbstractSite):
            "cover_image_url": img_url,
        }

-        works_element = content.xpath(
-            '//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href'
+        works_element = self.query_list(
+            content,
+            '//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href',
        )
        if works_element:
            r = re.match(r"\w+://book.douban.com/works/(\d+)", works_element[0])
@ -234,7 +249,7 @@ class DoubanBook(AbstractSite):
            ]

        pd = ResourceContent(metadata=data)
-        t, n = detect_isbn_asin(isbn)
+        t, n = detect_isbn_asin(isbn or "")
        if t:
            pd.lookup_ids[t] = n
        pd.lookup_ids[IdType.CUBN] = cubn
@ -255,11 +270,11 @@ class DoubanBook_Work(AbstractSite):

    def scrape(self):
        content = DoubanDownloader(self.url).download().html()
-        title_elem = content.xpath("//h1/text()")
+        title_elem = self.query_list(content, "//h1/text()")
        title = title_elem[0].split("全部版本(")[0].strip() if title_elem else None
        if not title:
            raise ParseError(self, "title")
-        book_urls = content.xpath('//a[@class="pl2"]/@href')
+        book_urls = self.query_list(content, '//a[@class="pl2"]/@href')
        related_resources = []
        for url in book_urls:
            site = SiteManager.get_site_by_url(url)
--- a/catalog/sites/douban_drama.py
+++ b/catalog/sites/douban_drama.py
@ -7,7 +7,7 @@ from catalog.common import *
 from catalog.models import *
 from common.models.lang import detect_language

-from .douban import DoubanDownloader
+from .douban import DoubanDownloader, DoubanSearcher


 def _cache_key(url):
@ -45,6 +45,8 @@ class DoubanDramaVersion(AbstractSite):
        return f"https://www.douban.com/location/drama/{ids[0]}/#{ids[1]}"

    def scrape(self):
+        if not self.id_value or not self.url:
+            raise ParseError(self, "id_value or url")
        show_url = self.url.split("#")[0]
        show_id = self.id_value.split("-")[0]
        version_id = self.id_value.split("-")[1]
@ -59,20 +61,20 @@ class DoubanDramaVersion(AbstractSite):
        p = "//div[@id='" + version_id + "']"
        q = p + "//dt[text()='{}：']/following-sibling::dd[1]/a/span/text()"
        q2 = p + "//dt[text()='{}：']/following-sibling::dd[1]/text()"
-        title = " ".join(h.xpath(p + "//h3/text()")).strip()
+        title = " ".join(self.query_list(h, p + "//h3/text()")).strip()
        if not title:
            raise ParseError(self, "title")
        data = {
            "title": title,
            "localized_title": [{"lang": "zh-cn", "text": title}],
-            "director": [x.strip() for x in h.xpath(q.format("导演"))],
-            "playwright": [x.strip() for x in h.xpath(q.format("编剧"))],
-            # "actor": [x.strip() for x in h.xpath(q.format("主演"))],
-            "composer": [x.strip() for x in h.xpath(q.format("作曲"))],
-            "language": [x.strip() for x in h.xpath(q2.format("语言"))],
-            "opening_date": " ".join(h.xpath(q2.format("演出日期"))).strip(),
-            "troupe": [x.strip() for x in h.xpath(q.format("演出团体"))],
-            "location": [x.strip() for x in h.xpath(q.format("演出剧院"))],
+            "director": [x.strip() for x in self.query_list(h, q.format("导演"))],
+            "playwright": [x.strip() for x in self.query_list(h, q.format("编剧"))],
+            # "actor": [x.strip() for x in self.query_list(h, q.format("主演"))],
+            "composer": [x.strip() for x in self.query_list(h, q.format("作曲"))],
+            "language": [x.strip() for x in self.query_list(h, q2.format("语言"))],
+            "opening_date": " ".join(self.query_list(h, q2.format("演出日期"))).strip(),
+            "troupe": [x.strip() for x in self.query_list(h, q.format("演出团体"))],
+            "location": [x.strip() for x in self.query_list(h, q.format("演出剧院"))],
        }
        if data["opening_date"]:
            d = data["opening_date"].split("-")
@ -80,7 +82,9 @@ class DoubanDramaVersion(AbstractSite):
            if dl > 3:
                data["opening_date"] = "-".join(d[:3])
                data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
-        actor_elem = h.xpath(p + "//dt[text()='主演：']/following-sibling::dd[1]/a")
+        actor_elem = self.query_list(
+            h, p + "//dt[text()='主演：']/following-sibling::dd[1]/a"
+        )
        data["actor"] = []
        for e in actor_elem:
            n = "".join(e.xpath("span/text()")).strip()
@ -88,7 +92,7 @@ class DoubanDramaVersion(AbstractSite):
            t = re.sub(r"^[\s\(饰]*(.+)\)[\s\/]*$", r"\1", t).strip()
            t = t if t != "/" else ""
            data["actor"].append({"name": n, "role": t})
-        img_url_elem = h.xpath("//img[@itemprop='image']/@src")
+        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
        data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
        pd = ResourceContent(metadata=data)
        pd.metadata["required_resources"] = [
@ -128,78 +132,87 @@ class DoubanDrama(AbstractSite):
        h = html.fromstring(r)
        data = {}

-        title_elem = h.xpath("/html/body//h1/span/text()")
+        title_elem = self.query_list(h, "/html/body//h1/span/text()")
        if title_elem:
            data["title"] = title_elem[0].strip()
            data["orig_title"] = title_elem[1] if len(title_elem) > 1 else None
        else:
            raise ParseError(self, "title")

-        other_title_elem = h.xpath(
-            "//dl//dt[text()='又名：']/following::dd[@itemprop='name']/text()"
+        other_title_elem = self.query_list(
+            h, "//dl//dt[text()='又名：']/following::dd[@itemprop='name']/text()"
        )
        data["other_title"] = other_title_elem

-        plot_elem = h.xpath("//div[@class='pure-text']/div[@class='full']/text()")
-        if len(plot_elem) == 0:
-            plot_elem = h.xpath(
-                "//div[@class='pure-text']/div[@class='abstract']/text()"
+        plot_elem = self.query_list(
+            h, "//div[@class='pure-text']/div[@class='full']/text()"
        )
        if len(plot_elem) == 0:
-            plot_elem = h.xpath("//div[@class='pure-text']/text()")
+            plot_elem = self.query_list(
+                h, "//div[@class='pure-text']/div[@class='abstract']/text()"
+            )
+        if len(plot_elem) == 0:
+            plot_elem = self.query_list(h, "//div[@class='pure-text']/text()")
        data["brief"] = "\n".join(plot_elem)

        data["genre"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']//dl//dt[text()='类型：']/following-sibling::dd[@itemprop='genre']/text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']//dl//dt[text()='类型：']/following-sibling::dd[@itemprop='genre']/text()",
            )
        ]
        # data["version"] = [
        #     s.strip()
-        #     for s in h.xpath(
+        #     for s in self.query_list(h,
        #         "//dl//dt[text()='版本：']/following-sibling::dd[@class='titles']/a//text()"
        #     )
        # ]
        data["director"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='导演：']/following-sibling::dd/a[@itemprop='director']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='导演：']/following-sibling::dd/a[@itemprop='director']//text()",
            )
        ]
        data["composer"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='作曲：']/following-sibling::dd/a[@itemprop='musicBy']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='作曲：']/following-sibling::dd/a[@itemprop='musicBy']//text()",
            )
        ]
        data["choreographer"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='编舞：']/following-sibling::dd/a[@itemprop='choreographer']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='编舞：']/following-sibling::dd/a[@itemprop='choreographer']//text()",
            )
        ]
        data["troupe"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='演出团体：']/following-sibling::dd/a[@itemprop='performer']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='演出团体：']/following-sibling::dd/a[@itemprop='performer']//text()",
            )
        ]
        data["playwright"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='编剧：']/following-sibling::dd/a[@itemprop='author']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='编剧：']/following-sibling::dd/a[@itemprop='author']//text()",
            )
        ]
        data["actor"] = [
            {"name": s.strip(), "role": ""}
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='主演：']/following-sibling::dd/a[@itemprop='actor']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='主演：']/following-sibling::dd/a[@itemprop='actor']//text()",
            )
        ]

-        date_elem = h.xpath(
-            "//div[@class='meta']//dl//dt[text()='演出日期：']/following::dd/text()"
+        date_elem = self.query_list(
+            h, "//div[@class='meta']//dl//dt[text()='演出日期：']/following::dd/text()"
        )
        data["opening_date"] = date_elem[0] if date_elem else None
        if data["opening_date"]:
@ -211,12 +224,15 @@ class DoubanDrama(AbstractSite):

        data["location"] = [
            s.strip()
-            for s in h.xpath(
-                "//div[@class='meta']/dl//dt[text()='演出剧院：']/following-sibling::dd/a[@itemprop='location']//text()"
+            for s in self.query_list(
+                h,
+                "//div[@class='meta']/dl//dt[text()='演出剧院：']/following-sibling::dd/a[@itemprop='location']//text()",
            )
        ]

-        versions = h.xpath("//div[@id='versions']/div[@class='fluid-mods']/div/@id")
+        versions = self.query_list(
+            h, "//div[@id='versions']/div[@class='fluid-mods']/div/@id"
+        )
        data["related_resources"] = list(
            map(
                lambda v: {
@ -229,7 +245,7 @@ class DoubanDrama(AbstractSite):
                versions,
            )
        )
-        img_url_elem = h.xpath("//img[@itemprop='image']/@src")
+        img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
        data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
        data["localized_title"] = (
            [{"lang": "zh-cn", "text": data["title"]}]
--- a/catalog/sites/douban_game.py
+++ b/catalog/sites/douban_game.py
@ -7,9 +7,7 @@ from catalog.models import *
 from common.models.lang import detect_language
 from common.models.misc import uniq

-from .douban import DoubanDownloader
-
-_logger = logging.getLogger(__name__)
+from .douban import DoubanDownloader, DoubanSearcher


@SiteManager.register
@ -26,18 +24,18 @@ class DoubanGame(AbstractSite):
    DEFAULT_MODEL = Game

    @classmethod
-    def id_to_url(self, id_value):
+    def id_to_url(cls, id_value):
        return "https://www.douban.com/game/" + id_value + "/"

    def scrape(self):
        content = DoubanDownloader(self.url).download().html()

-        elem = content.xpath("//div[@id='content']/h1/text()")
+        elem = self.query_list(content, "//div[@id='content']/h1/text()")
        title = elem[0].strip() if len(elem) else None
        if not title:
            raise ParseError(self, "title")

-        elem = content.xpath("//div[@id='comments']//h2/text()")
+        elem = self.query_list(content, "//div[@id='comments']//h2/text()")
        title2 = elem[0].strip() if len(elem) else ""
        if title2:
            sp = title2.strip().rsplit("的短评", 1)
@ -48,46 +46,52 @@ class DoubanGame(AbstractSite):
        else:
            orig_title = ""

-        other_title_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
+        other_title_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()",
        )
        other_title = (
            other_title_elem[0].strip().split(" / ") if other_title_elem else []
        )

-        developer_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
+        developer_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()",
        )
        developer = developer_elem[0].strip().split(" / ") if developer_elem else None

-        publisher_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
+        publisher_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()",
        )
        publisher = publisher_elem[0].strip().split(" / ") if publisher_elem else None

-        platform_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
+        platform_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()",
        )
        platform = platform_elem if platform_elem else None

-        genre_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
+        genre_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()",
        )
        genre = None
        if genre_elem:
            genre = [g for g in genre_elem if g != "游戏"]

-        date_elem = content.xpath(
-            "//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
+        date_elem = self.query_list(
+            content,
+            "//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()",
        )
        release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
        release_date = release_date.strftime("%Y-%m-%d") if release_date else None

-        brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
+        brief_elem = self.query_list(content, "//div[@class='mod item-desc']/p/text()")
        brief = "\n".join(brief_elem) if brief_elem else ""

-        img_url_elem = content.xpath(
-            "//div[@class='item-subject-info']/div[@class='pic']//img/@src"
+        img_url_elem = self.query_list(
+            content, "//div[@class='item-subject-info']/div[@class='pic']//img/@src"
        )
        img_url = img_url_elem[0].strip() if img_url_elem else None

--- a/catalog/sites/douban_movie.py
+++ b/catalog/sites/douban_movie.py
@ -1,16 +1,17 @@
 import json
 import logging

+from loguru import logger
+
 from catalog.common import *
 from catalog.movie.models import *
 from catalog.tv.models import *
 from common.models.lang import detect_language
+from common.models.misc import int_

-from .douban import *
+from .douban import DoubanDownloader, DoubanSearcher
 from .tmdb import TMDB_TV, TMDB_TVSeason, query_tmdb_tv_episode, search_tmdb_by_imdb_id

-_logger = logging.getLogger(__name__)
-

@SiteManager.register
 class DoubanMovie(AbstractSite):
@ -29,11 +30,15 @@ class DoubanMovie(AbstractSite):
    def id_to_url(cls, id_value):
        return "https://movie.douban.com/subject/" + id_value + "/"

+    @classmethod
+    def search(cls, q: str, p: int = 1):
+        return DoubanSearcher.search(ItemCategory.Movie, "movie", q, p)
+
    def scrape(self):
        content = DoubanDownloader(self.url).download().html()
        try:
            schema_data = "".join(
-                content.xpath('//script[@type="application/ld+json"]/text()')
+                self.query_list(content, '//script[@type="application/ld+json"]/text()')
            ).replace(
                "\n", ""
            )  # strip \n bc multi-line string is not properly coded in json by douban
@ -42,13 +47,13 @@ class DoubanMovie(AbstractSite):
            d = {}

        try:
-            raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
-                0
-            ].strip()
+            raw_title = self.query_list(
+                content, "//span[@property='v:itemreviewed']/text()"
+            )[0].strip()
        except IndexError:
            raise ParseError(self, "title")

-        orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()
+        orig_title = self.query_list(content, "//img[@rel='v:image']/@alt")[0].strip()
        title = raw_title.split(orig_title)[0].strip()
        # if has no chinese title
        if title == "":
@ -58,40 +63,46 @@ class DoubanMovie(AbstractSite):
            orig_title = None

        # there are two html formats for authors and translators
-        other_title_elem = content.xpath(
-            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
+        other_title_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]",
        )
        other_title = (
            other_title_elem[0].strip().split(" / ") if other_title_elem else None
        )

-        imdb_elem = content.xpath(
-            "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"
+        imdb_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()",
        )
        if not imdb_elem:
-            imdb_elem = content.xpath(
-                "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"
+            imdb_elem = self.query_list(
+                content,
+                "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]",
            )
        imdb_code = imdb_elem[0].strip() if imdb_elem else None

-        director_elem = content.xpath(
-            "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"
+        director_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()",
        )
        director = director_elem if director_elem else None

-        playwright_elem = content.xpath(
-            "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"
+        playwright_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()",
        )
        playwright = (
            list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
        )

-        actor_elem = content.xpath(
-            "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"
+        actor_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()",
        )
        actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None

-        genre_elem = content.xpath("//span[@property='v:genre']/text()")
+        genre_elem = self.query_list(content, "//span[@property='v:genre']/text()")
        genre = []
        if genre_elem:
            for g in genre_elem:
@ -102,7 +113,9 @@ class DoubanMovie(AbstractSite):
                    g = "惊悚"
                genre.append(g)

-        showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")
+        showtime_elem = self.query_list(
+            content, "//span[@property='v:initialReleaseDate']/text()"
+        )
        if showtime_elem:
            showtime = []
            for st in showtime_elem:
@ -122,39 +135,39 @@ class DoubanMovie(AbstractSite):
        else:
            showtime = None

-        site_elem = content.xpath(
-            "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"
+        site_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href",
        )
        site = site_elem[0].strip()[:200] if site_elem else None
        if site and not re.match(r"http.+", site):
            site = None

-        area_elem = content.xpath(
-            "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"
+        area_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]",
        )
        if area_elem:
            area = [a.strip()[:100] for a in area_elem[0].split("/")]
        else:
            area = None

-        language_elem = content.xpath(
-            "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"
+        language_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]",
        )
        if language_elem:
            language = [a.strip() for a in language_elem[0].split(" / ")]
        else:
            language = None

-        year_elem = content.xpath("//span[@class='year']/text()")
-        year = (
-            int(re.search(r"\d+", year_elem[0])[0])
-            if year_elem and re.search(r"\d+", year_elem[0])
-            else None
-        )
+        year_s = self.query_str(content, "//span[@class='year']/text()")
+        year_r = re.search(r"\d+", year_s) if year_s else None
+        year = int_(year_r[0]) if year_r else None

-        duration_elem = content.xpath("//span[@property='v:runtime']/text()")
-        other_duration_elem = content.xpath(
-            "//span[@property='v:runtime']/following-sibling::text()[1]"
+        duration_elem = self.query_list(content, "//span[@property='v:runtime']/text()")
+        other_duration_elem = self.query_list(
+            content, "//span[@property='v:runtime']/following-sibling::text()[1]"
        )
        if duration_elem:
            duration = duration_elem[0].strip()
@ -164,19 +177,21 @@ class DoubanMovie(AbstractSite):
        else:
            duration = None

-        season_elem = content.xpath(
-            "//*[@id='season']/option[@selected='selected']/text()"
+        season_elem = self.query_list(
+            content, "//*[@id='season']/option[@selected='selected']/text()"
        )
        if not season_elem:
-            season_elem = content.xpath(
-                "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"
+            season_elem = self.query_list(
+                content,
+                "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]",
            )
            season = int(season_elem[0].strip()) if season_elem else None
        else:
            season = int(season_elem[0].strip())

-        episodes_elem = content.xpath(
-            "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"
+        episodes_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]",
        )
        episodes = (
            int(episodes_elem[0].strip())
@ -184,8 +199,9 @@ class DoubanMovie(AbstractSite):
            else None
        )

-        single_episode_length_elem = content.xpath(
-            "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"
+        single_episode_length_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]",
        )
        single_episode_length = (
            single_episode_length_elem[0].strip()[:100]
@ -195,16 +211,16 @@ class DoubanMovie(AbstractSite):

        is_series = d.get("@type") == "TVSeries" or episodes is not None

-        brief_elem = content.xpath("//span[@class='all hidden']")
+        brief_elem = self.query_list(content, "//span[@class='all hidden']")
        if not brief_elem:
-            brief_elem = content.xpath("//span[@property='v:summary']")
+            brief_elem = self.query_list(content, "//span[@property='v:summary']")
        brief = (
            "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
            if brief_elem
            else None
        )

-        img_url_elem = content.xpath("//img[@rel='v:image']/@src")
+        img_url_elem = self.query_list(content, "//img[@rel='v:image']/@src")
        img_url = img_url_elem[0].strip() if img_url_elem else None

        titles = set(
@ -261,26 +277,26 @@ class DoubanMovie(AbstractSite):
                    pd.metadata.get("season_number")
                    and pd.metadata.get("season_number") != 1
                ):
-                    _logger.warn(f"{imdb_code} matched imdb tv show, force season 1")
+                    logger.warning(f"{imdb_code} matched imdb tv show, force season 1")
                    pd.metadata["season_number"] = 1
            elif pd.metadata["preferred_model"] == "TVSeason" and has_episode:
                if res_data["tv_episode_results"][0]["episode_number"] != 1:
-                    _logger.warning(
+                    logger.warning(
                        f"Douban Movie {self.url} IMDB {imdb_code} mapping to non-first episode in a season"
                    )
                elif res_data["tv_episode_results"][0]["season_number"] == 1:
-                    _logger.warning(
+                    logger.warning(
                        f"Douban Movie {self.url} IMDB {imdb_code} mapping to first season episode in a season"
                    )
            elif has_movie:
                if pd.metadata["preferred_model"] != "Movie":
-                    _logger.warn(f"{imdb_code} matched imdb movie, force Movie")
+                    logger.warning(f"{imdb_code} matched imdb movie, force Movie")
                    pd.metadata["preferred_model"] = "Movie"
            elif has_tv or has_episode:
-                _logger.warn(f"{imdb_code} matched imdb tv/episode, force TVSeason")
+                logger.warning(f"{imdb_code} matched imdb tv/episode, force TVSeason")
                pd.metadata["preferred_model"] = "TVSeason"
            else:
-                _logger.warn(f"{imdb_code} unknown to TMDB")
+                logger.warning(f"{imdb_code} unknown to TMDB")

            pd.lookup_ids[IdType.IMDB] = imdb_code

--- a/catalog/sites/douban_music.py
+++ b/catalog/sites/douban_music.py
@ -7,9 +7,7 @@ from catalog.models import *
 from catalog.music.utils import upc_to_gtin_13
 from common.models.lang import detect_language

-from .douban import DoubanDownloader
-
-_logger = logging.getLogger(__name__)
+from .douban import DoubanDownloader, DoubanSearcher


@SiteManager.register
@ -29,58 +27,63 @@ class DoubanMusic(AbstractSite):
    def id_to_url(cls, id_value):
        return "https://music.douban.com/subject/" + id_value + "/"

+    @classmethod
+    def search(cls, q: str, p: int = 1):
+        return DoubanSearcher.search(ItemCategory.Music, "music", q, p)
+
    def scrape(self):
        content = DoubanDownloader(self.url).download().html()

-        elem = content.xpath("//h1/span/text()")
+        elem = self.query_list(content, "//h1/span/text()")
        title = elem[0].strip() if len(elem) else None
        if not title:
            raise ParseError(self, "title")

-        artists_elem = content.xpath(
-            "//div[@id='info']/span/span[@class='pl']/a/text()"
+        artists_elem = self.query_list(
+            content, "//div[@id='info']/span/span[@class='pl']/a/text()"
        )
        artist = (
            None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
        )

-        genre_elem = content.xpath(
-            "//div[@id='info']//span[text()='流派:']/following::text()[1]"
+        genre_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='流派:']/following::text()[1]"
        )
        genre = genre_elem[0].strip().split(" / ") if genre_elem else []

-        date_elem = content.xpath(
-            "//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
+        date_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
        )
        release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
        release_date = release_date.strftime("%Y-%m-%d") if release_date else None

-        company_elem = content.xpath(
-            "//div[@id='info']//span[text()='出版者:']/following::text()[1]"
+        company_elem = self.query_list(
+            content, "//div[@id='info']//span[text()='出版者:']/following::text()[1]"
        )
        company = company_elem[0].strip() if company_elem else None

-        track_list_elem = content.xpath(
-            "//div[@class='track-list']/div[@class='indent']/div/text()"
+        track_list_elem = self.query_list(
+            content, "//div[@class='track-list']/div[@class='indent']/div/text()"
        )
        if track_list_elem:
            track_list = "\n".join([track.strip() for track in track_list_elem])
        else:
            track_list = None

-        brief_elem = content.xpath("//span[@class='all hidden']")
+        brief_elem = self.query_list(content, "//span[@class='all hidden']")
        if not brief_elem:
-            brief_elem = content.xpath("//span[@property='v:summary']")
+            brief_elem = self.query_list(content, "//span[@property='v:summary']")
        brief = (
            "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
            if brief_elem
            else None
        )

-        img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
+        img_url_elem = self.query_list(content, "//div[@id='mainpic']//img/@src")
        img_url = img_url_elem[0].strip() if img_url_elem else None
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]",
        )
        other_title = other_elem[0].strip().split(" / ") if other_elem else []
        lang = detect_language(f"{title} {brief}")
@ -103,28 +106,33 @@ class DoubanMusic(AbstractSite):
        }
        gtin = None
        isrc = None
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]",
        )
        if other_elem:
            data["album_type"] = other_elem[0].strip()
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]",
        )
        if other_elem:
            data["media"] = other_elem[0].strip()
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]",
        )
        if other_elem:
            isrc = other_elem[0].strip()
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]",
        )
        if other_elem:
            gtin = upc_to_gtin_13(other_elem[0].strip())
-        other_elem = content.xpath(
-            "//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]"
+        other_elem = self.query_list(
+            content,
+            "//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]",
        )
        if other_elem:
            data["disc_count"] = other_elem[0].strip()
--- a/pyproject.toml
+++ b/pyproject.toml
@ -80,7 +80,6 @@ exclude = [
    "journal/tests.py",
    "neodb",
    "**/migrations",
-    "**/sites/douban_*",
    "neodb-takahe",
 ]
 reportIncompatibleVariableOverride = false