add qidian,ypshuo parser

This commit is contained in:
Jigsaw 2024-09-18 10:34:31 +00:00 committed by Henri Dickson
parent af7f733cf9
commit 7cf8c085d1
5 changed files with 99 additions and 0 deletions

View file

@ -57,6 +57,8 @@ class SiteName(models.TextChoices):
Discogs = "discogs", _("Discogs") # type:ignore[reportCallIssue]
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
Fediverse = "fedi", _("Fediverse") # type:ignore[reportCallIssue]
Qidian = "qidian", _("Qidian") # type:ignore[reportCallIssue]
Ypshuo = "ypshuo", _("Ypshuo") # type:ignore[reportCallIssue]
class IdType(models.TextChoices):
@ -118,6 +120,8 @@ class IdType(models.TextChoices):
ApplePodcast = "apple_podcast", _("Apple Podcast") # type:ignore[reportCallIssue]
AppleMusic = "apple_music", _("Apple Music") # type:ignore[reportCallIssue]
Fediverse = "fedi", _("Fediverse") # type:ignore[reportCallIssue]
Qidian = "qidian", _("Qidian") # type:ignore[reportCallIssue]
Ypshuo = "ypshuo", _("Ypshuo") # type:ignore[reportCallIssue]
IdealIdTypes = [

View file

@ -15,9 +15,11 @@ from .goodreads import Goodreads
from .google_books import GoogleBooks
from .igdb import IGDB
from .imdb import IMDB
from .qidian import Qidian
from .rss import RSS
from .spotify import Spotify
from .steam import Steam
from .tmdb import TMDB_Movie
from .ypshuo import Ypshuo
# from .apple_podcast import ApplePodcast

54
catalog/sites/qidian.py Normal file
View file

@ -0,0 +1,54 @@
import logging
from catalog.common import *
from catalog.models import *
@SiteManager.register
class Qidian(AbstractSite):
SITE_NAME = SiteName.Qidian
ID_TYPE = IdType.Qidian
URL_PATTERNS = [
r"https://www\.qidian\.com/book/(\d+)",
r"https://book\.qidian\.com/info/(\d+)",
]
WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Edition
@classmethod
def id_to_url(cls, id_value):
return f"https://book.qidian.com/info/{id_value}"
def scrape(self):
content = ProxiedDownloader(self.url).download().html()
title_elem = content.xpath('//*[@id="bookName"]/text()')
title = (
title_elem[0].strip() # type:ignore
if title_elem
else f"Unknown Title {self.id_value}"
)
brief_elem = content.xpath(
"/html/body/div[1]/div[5]/div[3]/div[1]/div/div[1]/div[1]/p/text()"
)
brief = (
"\n".join(p.strip() for p in brief_elem) # type:ignore
if brief_elem
else None
)
img_url = f"https://bookcover.yuewen.com/qdbimg/349573/{self.id_value}"
author_elem = content.xpath(
"/html/body/div[1]/div[5]/div[1]/div[2]/h1/span[1]/a/text()"
)
authors = [author_elem[0].strip()] if author_elem else None # type:ignore
return ResourceContent(
metadata={
"localized_title": [{"lang": "zh-cn", "text": title}],
"author": authors,
"localized_description": [{"lang": "zh-cn", "text": brief}],
"cover_image_url": img_url,
}
)

33
catalog/sites/ypshuo.py Normal file
View file

@ -0,0 +1,33 @@
import logging
from catalog.common import *
from catalog.models import *
@SiteManager.register
class Ypshuo(AbstractSite):
SITE_NAME = SiteName.Ypshuo
ID_TYPE = IdType.Ypshuo
URL_PATTERNS = [
r"https://www\.ypshuo\.com/novel/(\d+)\.html",
]
WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Edition
@classmethod
def id_to_url(cls, id_value):
return f"https://www.ypshuo.com/novel/{id_value}.html"
def scrape(self):
api_url = f"https://www.ypshuo.com/api/novel/getInfo?novelId={self.id_value}"
o = BasicDownloader(api_url).download().json()
return ResourceContent(
metadata={
"localized_title": [{"lang": "zh-cn", "text": o["data"]["novel_name"]}],
"author": [o["data"]["author_name"]],
"localized_description": [
{"lang": "zh-cn", "text": o["data"]["synopsis"]}
],
"cover_image_url": o["data"]["novel_img"],
},
)

View file

@ -18,6 +18,12 @@
white-space: nowrap;
}
.qidian {
border: none;
color: white;
background-color: #9e252b;
}
.douban {
border: none;
color: white;