From 7cf8c085d194f8dbae9b64d57fa17027730659e6 Mon Sep 17 00:00:00 2001 From: Jigsaw Date: Wed, 18 Sep 2024 10:34:31 +0000 Subject: [PATCH] add qidian,ypshuo parser --- catalog/common/models.py | 4 +++ catalog/sites/__init__.py | 2 ++ catalog/sites/qidian.py | 54 ++++++++++++++++++++++++++++++ catalog/sites/ypshuo.py | 33 ++++++++++++++++++ common/static/scss/_sitelabel.scss | 6 ++++ 5 files changed, 99 insertions(+) create mode 100644 catalog/sites/qidian.py create mode 100644 catalog/sites/ypshuo.py diff --git a/catalog/common/models.py b/catalog/common/models.py index b623b6b0..b2d894d5 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -57,6 +57,8 @@ class SiteName(models.TextChoices): Discogs = "discogs", _("Discogs") # type:ignore[reportCallIssue] AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue] Fediverse = "fedi", _("Fediverse") # type:ignore[reportCallIssue] + Qidian = "qidian", _("Qidian") # type:ignore[reportCallIssue] + Ypshuo = "ypshuo", _("Ypshuo") # type:ignore[reportCallIssue] class IdType(models.TextChoices): @@ -118,6 +120,8 @@ class IdType(models.TextChoices): ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue] AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue] Fediverse = "fedi", _("Fediverse") # type:ignore[reportCallIssue] + Qidian = "qidian", _("Qidian") # type:ignore[reportCallIssue] + Ypshuo = "ypshuo", _("Ypshuo") # type:ignore[reportCallIssue] IdealIdTypes = [ diff --git a/catalog/sites/__init__.py b/catalog/sites/__init__.py index 71c75948..1e84cf65 100644 --- a/catalog/sites/__init__.py +++ b/catalog/sites/__init__.py @@ -15,9 +15,11 @@ from .goodreads import Goodreads from .google_books import GoogleBooks from .igdb import IGDB from .imdb import IMDB +from .qidian import Qidian from .rss import RSS from .spotify import Spotify from .steam import Steam from .tmdb import TMDB_Movie +from .ypshuo import Ypshuo # from .apple_podcast import ApplePodcast diff --git a/catalog/sites/qidian.py b/catalog/sites/qidian.py new file mode 100644 index 00000000..723694f1 --- /dev/null +++ b/catalog/sites/qidian.py @@ -0,0 +1,54 @@ +import logging + +from catalog.common import * +from catalog.models import * + + +@SiteManager.register +class Qidian(AbstractSite): + SITE_NAME = SiteName.Qidian + ID_TYPE = IdType.Qidian + URL_PATTERNS = [ + r"https://www\.qidian\.com/book/(\d+)", + r"https://book\.qidian\.com/info/(\d+)", + ] + WIKI_PROPERTY_ID = "" + DEFAULT_MODEL = Edition + + @classmethod + def id_to_url(cls, id_value): + return f"https://book.qidian.com/info/{id_value}" + + def scrape(self): + content = ProxiedDownloader(self.url).download().html() + title_elem = content.xpath('//*[@id="bookName"]/text()') + title = ( + title_elem[0].strip() # type:ignore + if title_elem + else f"Unknown Title {self.id_value}" + ) + + brief_elem = content.xpath( + "/html/body/div[1]/div[5]/div[3]/div[1]/div/div[1]/div[1]/p/text()" + ) + brief = ( + "\n".join(p.strip() for p in brief_elem) # type:ignore + if brief_elem + else None + ) + + img_url = f"https://bookcover.yuewen.com/qdbimg/349573/{self.id_value}" + + author_elem = content.xpath( + "/html/body/div[1]/div[5]/div[1]/div[2]/h1/span[1]/a/text()" + ) + authors = [author_elem[0].strip()] if author_elem else None # type:ignore + + return ResourceContent( + metadata={ + "localized_title": [{"lang": "zh-cn", "text": title}], + "author": authors, + "localized_description": [{"lang": "zh-cn", "text": brief}], + "cover_image_url": img_url, + } + ) diff --git a/catalog/sites/ypshuo.py b/catalog/sites/ypshuo.py new file mode 100644 index 00000000..f5c80f46 --- /dev/null +++ b/catalog/sites/ypshuo.py @@ -0,0 +1,33 @@ +import logging + +from catalog.common import * +from catalog.models import * + + +@SiteManager.register +class Ypshuo(AbstractSite): + SITE_NAME = SiteName.Ypshuo + ID_TYPE = IdType.Ypshuo + URL_PATTERNS = [ + r"https://www\.ypshuo\.com/novel/(\d+)\.html", + ] + WIKI_PROPERTY_ID = "" + DEFAULT_MODEL = Edition + + @classmethod + def id_to_url(cls, id_value): + return f"https://www.ypshuo.com/novel/{id_value}.html" + + def scrape(self): + api_url = f"https://www.ypshuo.com/api/novel/getInfo?novelId={self.id_value}" + o = BasicDownloader(api_url).download().json() + return ResourceContent( + metadata={ + "localized_title": [{"lang": "zh-cn", "text": o["data"]["novel_name"]}], + "author": [o["data"]["author_name"]], + "localized_description": [ + {"lang": "zh-cn", "text": o["data"]["synopsis"]} + ], + "cover_image_url": o["data"]["novel_img"], + }, + ) diff --git a/common/static/scss/_sitelabel.scss b/common/static/scss/_sitelabel.scss index 989293af..4f310d17 100644 --- a/common/static/scss/_sitelabel.scss +++ b/common/static/scss/_sitelabel.scss @@ -18,6 +18,12 @@ white-space: nowrap; } + .qidian { + border: none; + color: white; + background-color: #9e252b; + } + .douban { border: none; color: white;