lib.itmens/catalog/sites/qidian.py
2024-10-19 00:50:46 -04:00

54 lines
1.6 KiB
Python

import logging
from catalog.common import *
from catalog.models import *
@SiteManager.register
class Qidian(AbstractSite):
SITE_NAME = SiteName.Qidian
ID_TYPE = IdType.Qidian
URL_PATTERNS = [
r"https://www\.qidian\.com/book/(\d+)",
r"https://book\.qidian\.com/info/(\d+)",
]
WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Edition
@classmethod
def id_to_url(cls, id_value):
return f"https://book.qidian.com/info/{id_value}"
def scrape(self):
content = ProxiedDownloader(self.url).download().html()
title_elem = content.xpath('//*[@id="bookName"]/text()')
title = (
title_elem[0].strip() # type:ignore
if title_elem
else f"Unknown Title {self.id_value}"
)
brief_elem = content.xpath(
"/html/body/div[1]/div[5]/div[3]/div[1]/div/div[1]/div[1]/p/text()"
)
brief = (
"\n".join(p.strip() for p in brief_elem) # type:ignore
if brief_elem
else None
)
img_url = f"https://bookcover.yuewen.com/qdbimg/349573/{self.id_value}"
author_elem = content.xpath(
"/html/body/div[1]/div[5]/div[1]/div[2]/h1/span[1]/a/text()"
)
authors = [author_elem[0].strip()] if author_elem else None # type:ignore
return ResourceContent(
metadata={
"localized_title": [{"lang": "zh-cn", "text": title}],
"author": authors,
"localized_description": [{"lang": "zh-cn", "text": brief}],
"cover_image_url": img_url,
}
)