fix some lint issues
This commit is contained in:
parent
ea4f52dfa6
commit
86b1ee19e0
9 changed files with 288 additions and 193 deletions
|
@ -104,7 +104,7 @@ class AbstractSite:
|
|||
return content.xpath(query)[0].strip()
|
||||
|
||||
@staticmethod
|
||||
def query_list(content, query: str) -> list[str]:
|
||||
def query_list(content, query: str) -> list:
|
||||
return list(content.xpath(query))
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -79,6 +79,9 @@ class ExternalSearchResultItem:
|
|||
self.display_description = brief
|
||||
self.cover_image_url = cover_url
|
||||
|
||||
def __repr__(self):
|
||||
return f"[{self.category}] {self.display_title} {self.url}"
|
||||
|
||||
@property
|
||||
def verbose_category_name(self):
|
||||
return self.category.label if self.category else ""
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import json
|
||||
import re
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.search.models import ExternalSearchResultItem
|
||||
|
||||
RE_NUMBERS = re.compile(r"\d+\d*")
|
||||
RE_WHITESPACES = re.compile(r"\s+")
|
||||
|
@ -30,3 +32,35 @@ class DoubanDownloader(ProxiedDownloader):
|
|||
return RESPONSE_OK
|
||||
else:
|
||||
return RESPONSE_INVALID_CONTENT
|
||||
|
||||
|
||||
class DoubanSearcher:
|
||||
@classmethod
|
||||
def search(cls, cat: ItemCategory, c: str, q: str, p: int = 1):
|
||||
url = f"https://search.douban.com/{c}/subject_search?search_text={q}&start={15*(p-1)}"
|
||||
content = DoubanDownloader(url).download().html()
|
||||
j = json.loads(
|
||||
content.xpath(
|
||||
"//script[text()[contains(.,'window.__DATA__')]]/text()"
|
||||
)[ # type:ignore
|
||||
0
|
||||
]
|
||||
.split("window.__DATA__ = ")[1] # type:ignore
|
||||
.split("};")[0] # type:ignore
|
||||
+ "}"
|
||||
)
|
||||
results = [
|
||||
ExternalSearchResultItem(
|
||||
cat,
|
||||
SiteName.Douban,
|
||||
item["url"],
|
||||
item["title"],
|
||||
item["abstract"],
|
||||
item["abstract_2"],
|
||||
item["cover_url"],
|
||||
)
|
||||
for item in j["items"]
|
||||
for item in j["items"]
|
||||
if item.get("tpl_name") == "search_subject"
|
||||
]
|
||||
return results
|
||||
|
|
|
@ -3,7 +3,7 @@ from catalog.book.utils import *
|
|||
from catalog.common import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import *
|
||||
from .douban import RE_NUMBERS, RE_WHITESPACES, DoubanDownloader, DoubanSearcher
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
|
@ -23,46 +23,51 @@ class DoubanBook(AbstractSite):
|
|||
def id_to_url(cls, id_value):
|
||||
return "https://book.douban.com/subject/" + id_value + "/"
|
||||
|
||||
@classmethod
|
||||
def search(cls, q: str, p: int = 1):
|
||||
return DoubanSearcher.search(ItemCategory.Book, "book", q, p)
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
isbn_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='ISBN:']/following::text()"
|
||||
isbn_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='ISBN:']/following::text()"
|
||||
)
|
||||
isbn = isbn_elem[0].strip() if isbn_elem else None
|
||||
|
||||
title_elem = content.xpath("/html/body//h1/span/text()")
|
||||
title_elem = self.query_list(content, "/html/body//h1/span/text()")
|
||||
title = (
|
||||
title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
|
||||
)
|
||||
|
||||
subtitle_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='副标题:']/following::text()"
|
||||
subtitle_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='副标题:']/following::text()"
|
||||
)
|
||||
subtitle = subtitle_elem[0].strip()[:500] if subtitle_elem else None
|
||||
|
||||
orig_title_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='原作名:']/following::text()"
|
||||
orig_title_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='原作名:']/following::text()"
|
||||
)
|
||||
orig_title = orig_title_elem[0].strip()[:500] if orig_title_elem else None
|
||||
|
||||
language_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='语言:']/following::text()"
|
||||
language_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='语言:']/following::text()"
|
||||
)
|
||||
language = [language_elem[0].strip()] if language_elem else []
|
||||
|
||||
pub_house_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版社:']/following::text()"
|
||||
pub_house_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='出版社:']/following::text()"
|
||||
)
|
||||
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
|
||||
if not pub_house:
|
||||
pub_house_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()"
|
||||
pub_house_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='出版社:']/following-sibling::a/text()",
|
||||
)
|
||||
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
|
||||
|
||||
pub_date_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版年:']/following::text()"
|
||||
pub_date_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='出版年:']/following::text()"
|
||||
)
|
||||
pub_date = pub_date_elem[0].strip() if pub_date_elem else ""
|
||||
year_month_day = RE_NUMBERS.findall(pub_date)
|
||||
|
@ -88,18 +93,18 @@ class DoubanBook(AbstractSite):
|
|||
else pub_month
|
||||
)
|
||||
|
||||
binding_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='装帧:']/following::text()"
|
||||
binding_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='装帧:']/following::text()"
|
||||
)
|
||||
binding = binding_elem[0].strip() if binding_elem else None
|
||||
|
||||
price_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='定价:']/following::text()"
|
||||
price_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='定价:']/following::text()"
|
||||
)
|
||||
price = price_elem[0].strip() if price_elem else None
|
||||
|
||||
pages_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='页数:']/following::text()"
|
||||
pages_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='页数:']/following::text()"
|
||||
)
|
||||
pages = pages_elem[0].strip() if pages_elem else None
|
||||
if pages is not None:
|
||||
|
@ -109,15 +114,16 @@ class DoubanBook(AbstractSite):
|
|||
if pages and (pages > 999999 or pages < 1):
|
||||
pages = None
|
||||
|
||||
brief_elem = content.xpath(
|
||||
"//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()"
|
||||
brief_elem = self.query_list(
|
||||
content,
|
||||
"//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()",
|
||||
)
|
||||
brief = "\n".join(p.strip() for p in brief_elem) if brief_elem else None
|
||||
|
||||
contents = None
|
||||
try:
|
||||
contents_elem = content.xpath(
|
||||
"//h2/span[text()='目录']/../following-sibling::div[1]"
|
||||
contents_elem = self.query_list(
|
||||
content, "//h2/span[text()='目录']/../following-sibling::div[1]"
|
||||
)[0]
|
||||
# if next the id of next sibling contains `dir`, that would be the full contents
|
||||
if "dir" in contents_elem.getnext().xpath("@id")[0]:
|
||||
|
@ -129,24 +135,28 @@ class DoubanBook(AbstractSite):
|
|||
)
|
||||
else:
|
||||
contents = (
|
||||
"\n".join(p.strip() for p in contents_elem.xpath("text()"))
|
||||
"\n".join(
|
||||
p.strip() for p in self.query_list(contents_elem, "text()")
|
||||
)
|
||||
if contents_elem is not None
|
||||
else None
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
img_url_elem = content.xpath("//*[@id='mainpic']/a/img/@src")
|
||||
img_url_elem = self.query_list(content, "//*[@id='mainpic']/a/img/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
# there are two html formats for authors and translators
|
||||
authors_elem = content.xpath(
|
||||
authors_elem = self.query_list(
|
||||
content,
|
||||
"""//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()"""
|
||||
preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()""",
|
||||
)
|
||||
if not authors_elem:
|
||||
authors_elem = content.xpath(
|
||||
"""//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()"""
|
||||
authors_elem = self.query_list(
|
||||
content,
|
||||
"""//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()""",
|
||||
)
|
||||
if authors_elem:
|
||||
authors = []
|
||||
|
@ -155,13 +165,15 @@ class DoubanBook(AbstractSite):
|
|||
else:
|
||||
authors = None
|
||||
|
||||
translators_elem = content.xpath(
|
||||
translators_elem = self.query_list(
|
||||
content,
|
||||
"""//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()"""
|
||||
preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()""",
|
||||
)
|
||||
if not translators_elem:
|
||||
translators_elem = content.xpath(
|
||||
"""//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()"""
|
||||
translators_elem = self.query_list(
|
||||
content,
|
||||
"""//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()""",
|
||||
)
|
||||
if translators_elem:
|
||||
translators = []
|
||||
|
@ -170,18 +182,20 @@ class DoubanBook(AbstractSite):
|
|||
else:
|
||||
translators = None
|
||||
|
||||
cncode_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='统一书号:']/following::text()"
|
||||
cncode_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='统一书号:']/following::text()"
|
||||
)
|
||||
cubn = cncode_elem[0].strip() if cncode_elem else None
|
||||
|
||||
series_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()"
|
||||
series_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()",
|
||||
)
|
||||
series = series_elem[0].strip() if series_elem else None
|
||||
|
||||
imprint_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()"
|
||||
imprint_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()",
|
||||
)
|
||||
imprint = imprint_elem[0].strip() if imprint_elem else None
|
||||
|
||||
|
@ -212,8 +226,9 @@ class DoubanBook(AbstractSite):
|
|||
"cover_image_url": img_url,
|
||||
}
|
||||
|
||||
works_element = content.xpath(
|
||||
'//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href'
|
||||
works_element = self.query_list(
|
||||
content,
|
||||
'//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href',
|
||||
)
|
||||
if works_element:
|
||||
r = re.match(r"\w+://book.douban.com/works/(\d+)", works_element[0])
|
||||
|
@ -234,7 +249,7 @@ class DoubanBook(AbstractSite):
|
|||
]
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
t, n = detect_isbn_asin(isbn)
|
||||
t, n = detect_isbn_asin(isbn or "")
|
||||
if t:
|
||||
pd.lookup_ids[t] = n
|
||||
pd.lookup_ids[IdType.CUBN] = cubn
|
||||
|
@ -255,11 +270,11 @@ class DoubanBook_Work(AbstractSite):
|
|||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
title_elem = content.xpath("//h1/text()")
|
||||
title_elem = self.query_list(content, "//h1/text()")
|
||||
title = title_elem[0].split("全部版本(")[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
book_urls = content.xpath('//a[@class="pl2"]/@href')
|
||||
book_urls = self.query_list(content, '//a[@class="pl2"]/@href')
|
||||
related_resources = []
|
||||
for url in book_urls:
|
||||
site = SiteManager.get_site_by_url(url)
|
||||
|
|
|
@ -7,7 +7,7 @@ from catalog.common import *
|
|||
from catalog.models import *
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import DoubanDownloader
|
||||
from .douban import DoubanDownloader, DoubanSearcher
|
||||
|
||||
|
||||
def _cache_key(url):
|
||||
|
@ -45,6 +45,8 @@ class DoubanDramaVersion(AbstractSite):
|
|||
return f"https://www.douban.com/location/drama/{ids[0]}/#{ids[1]}"
|
||||
|
||||
def scrape(self):
|
||||
if not self.id_value or not self.url:
|
||||
raise ParseError(self, "id_value or url")
|
||||
show_url = self.url.split("#")[0]
|
||||
show_id = self.id_value.split("-")[0]
|
||||
version_id = self.id_value.split("-")[1]
|
||||
|
@ -59,20 +61,20 @@ class DoubanDramaVersion(AbstractSite):
|
|||
p = "//div[@id='" + version_id + "']"
|
||||
q = p + "//dt[text()='{}:']/following-sibling::dd[1]/a/span/text()"
|
||||
q2 = p + "//dt[text()='{}:']/following-sibling::dd[1]/text()"
|
||||
title = " ".join(h.xpath(p + "//h3/text()")).strip()
|
||||
title = " ".join(self.query_list(h, p + "//h3/text()")).strip()
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
data = {
|
||||
"title": title,
|
||||
"localized_title": [{"lang": "zh-cn", "text": title}],
|
||||
"director": [x.strip() for x in h.xpath(q.format("导演"))],
|
||||
"playwright": [x.strip() for x in h.xpath(q.format("编剧"))],
|
||||
# "actor": [x.strip() for x in h.xpath(q.format("主演"))],
|
||||
"composer": [x.strip() for x in h.xpath(q.format("作曲"))],
|
||||
"language": [x.strip() for x in h.xpath(q2.format("语言"))],
|
||||
"opening_date": " ".join(h.xpath(q2.format("演出日期"))).strip(),
|
||||
"troupe": [x.strip() for x in h.xpath(q.format("演出团体"))],
|
||||
"location": [x.strip() for x in h.xpath(q.format("演出剧院"))],
|
||||
"director": [x.strip() for x in self.query_list(h, q.format("导演"))],
|
||||
"playwright": [x.strip() for x in self.query_list(h, q.format("编剧"))],
|
||||
# "actor": [x.strip() for x in self.query_list(h, q.format("主演"))],
|
||||
"composer": [x.strip() for x in self.query_list(h, q.format("作曲"))],
|
||||
"language": [x.strip() for x in self.query_list(h, q2.format("语言"))],
|
||||
"opening_date": " ".join(self.query_list(h, q2.format("演出日期"))).strip(),
|
||||
"troupe": [x.strip() for x in self.query_list(h, q.format("演出团体"))],
|
||||
"location": [x.strip() for x in self.query_list(h, q.format("演出剧院"))],
|
||||
}
|
||||
if data["opening_date"]:
|
||||
d = data["opening_date"].split("-")
|
||||
|
@ -80,7 +82,9 @@ class DoubanDramaVersion(AbstractSite):
|
|||
if dl > 3:
|
||||
data["opening_date"] = "-".join(d[:3])
|
||||
data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
|
||||
actor_elem = h.xpath(p + "//dt[text()='主演:']/following-sibling::dd[1]/a")
|
||||
actor_elem = self.query_list(
|
||||
h, p + "//dt[text()='主演:']/following-sibling::dd[1]/a"
|
||||
)
|
||||
data["actor"] = []
|
||||
for e in actor_elem:
|
||||
n = "".join(e.xpath("span/text()")).strip()
|
||||
|
@ -88,7 +92,7 @@ class DoubanDramaVersion(AbstractSite):
|
|||
t = re.sub(r"^[\s\(饰]*(.+)\)[\s\/]*$", r"\1", t).strip()
|
||||
t = t if t != "/" else ""
|
||||
data["actor"].append({"name": n, "role": t})
|
||||
img_url_elem = h.xpath("//img[@itemprop='image']/@src")
|
||||
img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
|
||||
data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
|
||||
pd = ResourceContent(metadata=data)
|
||||
pd.metadata["required_resources"] = [
|
||||
|
@ -128,78 +132,87 @@ class DoubanDrama(AbstractSite):
|
|||
h = html.fromstring(r)
|
||||
data = {}
|
||||
|
||||
title_elem = h.xpath("/html/body//h1/span/text()")
|
||||
title_elem = self.query_list(h, "/html/body//h1/span/text()")
|
||||
if title_elem:
|
||||
data["title"] = title_elem[0].strip()
|
||||
data["orig_title"] = title_elem[1] if len(title_elem) > 1 else None
|
||||
else:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
other_title_elem = h.xpath(
|
||||
"//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()"
|
||||
other_title_elem = self.query_list(
|
||||
h, "//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()"
|
||||
)
|
||||
data["other_title"] = other_title_elem
|
||||
|
||||
plot_elem = h.xpath("//div[@class='pure-text']/div[@class='full']/text()")
|
||||
if len(plot_elem) == 0:
|
||||
plot_elem = h.xpath(
|
||||
"//div[@class='pure-text']/div[@class='abstract']/text()"
|
||||
plot_elem = self.query_list(
|
||||
h, "//div[@class='pure-text']/div[@class='full']/text()"
|
||||
)
|
||||
if len(plot_elem) == 0:
|
||||
plot_elem = h.xpath("//div[@class='pure-text']/text()")
|
||||
plot_elem = self.query_list(
|
||||
h, "//div[@class='pure-text']/div[@class='abstract']/text()"
|
||||
)
|
||||
if len(plot_elem) == 0:
|
||||
plot_elem = self.query_list(h, "//div[@class='pure-text']/text()")
|
||||
data["brief"] = "\n".join(plot_elem)
|
||||
|
||||
data["genre"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()",
|
||||
)
|
||||
]
|
||||
# data["version"] = [
|
||||
# s.strip()
|
||||
# for s in h.xpath(
|
||||
# for s in self.query_list(h,
|
||||
# "//dl//dt[text()='版本:']/following-sibling::dd[@class='titles']/a//text()"
|
||||
# )
|
||||
# ]
|
||||
data["director"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()",
|
||||
)
|
||||
]
|
||||
data["composer"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='作曲:']/following-sibling::dd/a[@itemprop='musicBy']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='作曲:']/following-sibling::dd/a[@itemprop='musicBy']//text()",
|
||||
)
|
||||
]
|
||||
data["choreographer"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='编舞:']/following-sibling::dd/a[@itemprop='choreographer']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='编舞:']/following-sibling::dd/a[@itemprop='choreographer']//text()",
|
||||
)
|
||||
]
|
||||
data["troupe"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='演出团体:']/following-sibling::dd/a[@itemprop='performer']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='演出团体:']/following-sibling::dd/a[@itemprop='performer']//text()",
|
||||
)
|
||||
]
|
||||
data["playwright"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()",
|
||||
)
|
||||
]
|
||||
data["actor"] = [
|
||||
{"name": s.strip(), "role": ""}
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()",
|
||||
)
|
||||
]
|
||||
|
||||
date_elem = h.xpath(
|
||||
"//div[@class='meta']//dl//dt[text()='演出日期:']/following::dd/text()"
|
||||
date_elem = self.query_list(
|
||||
h, "//div[@class='meta']//dl//dt[text()='演出日期:']/following::dd/text()"
|
||||
)
|
||||
data["opening_date"] = date_elem[0] if date_elem else None
|
||||
if data["opening_date"]:
|
||||
|
@ -211,12 +224,15 @@ class DoubanDrama(AbstractSite):
|
|||
|
||||
data["location"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='演出剧院:']/following-sibling::dd/a[@itemprop='location']//text()"
|
||||
for s in self.query_list(
|
||||
h,
|
||||
"//div[@class='meta']/dl//dt[text()='演出剧院:']/following-sibling::dd/a[@itemprop='location']//text()",
|
||||
)
|
||||
]
|
||||
|
||||
versions = h.xpath("//div[@id='versions']/div[@class='fluid-mods']/div/@id")
|
||||
versions = self.query_list(
|
||||
h, "//div[@id='versions']/div[@class='fluid-mods']/div/@id"
|
||||
)
|
||||
data["related_resources"] = list(
|
||||
map(
|
||||
lambda v: {
|
||||
|
@ -229,7 +245,7 @@ class DoubanDrama(AbstractSite):
|
|||
versions,
|
||||
)
|
||||
)
|
||||
img_url_elem = h.xpath("//img[@itemprop='image']/@src")
|
||||
img_url_elem = self.query_list(h, "//img[@itemprop='image']/@src")
|
||||
data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
|
||||
data["localized_title"] = (
|
||||
[{"lang": "zh-cn", "text": data["title"]}]
|
||||
|
|
|
@ -7,9 +7,7 @@ from catalog.models import *
|
|||
from common.models.lang import detect_language
|
||||
from common.models.misc import uniq
|
||||
|
||||
from .douban import DoubanDownloader
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
from .douban import DoubanDownloader, DoubanSearcher
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
|
@ -26,18 +24,18 @@ class DoubanGame(AbstractSite):
|
|||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
def id_to_url(cls, id_value):
|
||||
return "https://www.douban.com/game/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
elem = content.xpath("//div[@id='content']/h1/text()")
|
||||
elem = self.query_list(content, "//div[@id='content']/h1/text()")
|
||||
title = elem[0].strip() if len(elem) else None
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
elem = content.xpath("//div[@id='comments']//h2/text()")
|
||||
elem = self.query_list(content, "//div[@id='comments']//h2/text()")
|
||||
title2 = elem[0].strip() if len(elem) else ""
|
||||
if title2:
|
||||
sp = title2.strip().rsplit("的短评", 1)
|
||||
|
@ -48,46 +46,52 @@ class DoubanGame(AbstractSite):
|
|||
else:
|
||||
orig_title = ""
|
||||
|
||||
other_title_elem = content.xpath(
|
||||
"//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
|
||||
other_title_elem = self.query_list(
|
||||
content,
|
||||
"//dl[@class='thing-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()",
|
||||
)
|
||||
other_title = (
|
||||
other_title_elem[0].strip().split(" / ") if other_title_elem else []
|
||||
)
|
||||
|
||||
developer_elem = content.xpath(
|
||||
"//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
|
||||
developer_elem = self.query_list(
|
||||
content,
|
||||
"//dl[@class='thing-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()",
|
||||
)
|
||||
developer = developer_elem[0].strip().split(" / ") if developer_elem else None
|
||||
|
||||
publisher_elem = content.xpath(
|
||||
"//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
|
||||
publisher_elem = self.query_list(
|
||||
content,
|
||||
"//dl[@class='thing-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()",
|
||||
)
|
||||
publisher = publisher_elem[0].strip().split(" / ") if publisher_elem else None
|
||||
|
||||
platform_elem = content.xpath(
|
||||
"//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
|
||||
platform_elem = self.query_list(
|
||||
content,
|
||||
"//dl[@class='thing-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()",
|
||||
)
|
||||
platform = platform_elem if platform_elem else None
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
|
||||
genre_elem = self.query_list(
|
||||
content,
|
||||
"//dl[@class='thing-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()",
|
||||
)
|
||||
genre = None
|
||||
if genre_elem:
|
||||
genre = [g for g in genre_elem if g != "游戏"]
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
|
||||
date_elem = self.query_list(
|
||||
content,
|
||||
"//dl[@class='thing-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()",
|
||||
)
|
||||
release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
|
||||
release_date = release_date.strftime("%Y-%m-%d") if release_date else None
|
||||
|
||||
brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
|
||||
brief_elem = self.query_list(content, "//div[@class='mod item-desc']/p/text()")
|
||||
brief = "\n".join(brief_elem) if brief_elem else ""
|
||||
|
||||
img_url_elem = content.xpath(
|
||||
"//div[@class='item-subject-info']/div[@class='pic']//img/@src"
|
||||
img_url_elem = self.query_list(
|
||||
content, "//div[@class='item-subject-info']/div[@class='pic']//img/@src"
|
||||
)
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
|
|
|
@ -1,16 +1,17 @@
|
|||
import json
|
||||
import logging
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
from common.models.lang import detect_language
|
||||
from common.models.misc import int_
|
||||
|
||||
from .douban import *
|
||||
from .douban import DoubanDownloader, DoubanSearcher
|
||||
from .tmdb import TMDB_TV, TMDB_TVSeason, query_tmdb_tv_episode, search_tmdb_by_imdb_id
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
class DoubanMovie(AbstractSite):
|
||||
|
@ -29,11 +30,15 @@ class DoubanMovie(AbstractSite):
|
|||
def id_to_url(cls, id_value):
|
||||
return "https://movie.douban.com/subject/" + id_value + "/"
|
||||
|
||||
@classmethod
|
||||
def search(cls, q: str, p: int = 1):
|
||||
return DoubanSearcher.search(ItemCategory.Movie, "movie", q, p)
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
try:
|
||||
schema_data = "".join(
|
||||
content.xpath('//script[@type="application/ld+json"]/text()')
|
||||
self.query_list(content, '//script[@type="application/ld+json"]/text()')
|
||||
).replace(
|
||||
"\n", ""
|
||||
) # strip \n bc multi-line string is not properly coded in json by douban
|
||||
|
@ -42,13 +47,13 @@ class DoubanMovie(AbstractSite):
|
|||
d = {}
|
||||
|
||||
try:
|
||||
raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
|
||||
0
|
||||
].strip()
|
||||
raw_title = self.query_list(
|
||||
content, "//span[@property='v:itemreviewed']/text()"
|
||||
)[0].strip()
|
||||
except IndexError:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()
|
||||
orig_title = self.query_list(content, "//img[@rel='v:image']/@alt")[0].strip()
|
||||
title = raw_title.split(orig_title)[0].strip()
|
||||
# if has no chinese title
|
||||
if title == "":
|
||||
|
@ -58,40 +63,46 @@ class DoubanMovie(AbstractSite):
|
|||
orig_title = None
|
||||
|
||||
# there are two html formats for authors and translators
|
||||
other_title_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
|
||||
other_title_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]",
|
||||
)
|
||||
other_title = (
|
||||
other_title_elem[0].strip().split(" / ") if other_title_elem else None
|
||||
)
|
||||
|
||||
imdb_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"
|
||||
imdb_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()",
|
||||
)
|
||||
if not imdb_elem:
|
||||
imdb_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"
|
||||
imdb_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]",
|
||||
)
|
||||
imdb_code = imdb_elem[0].strip() if imdb_elem else None
|
||||
|
||||
director_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"
|
||||
director_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()",
|
||||
)
|
||||
director = director_elem if director_elem else None
|
||||
|
||||
playwright_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"
|
||||
playwright_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()",
|
||||
)
|
||||
playwright = (
|
||||
list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
|
||||
)
|
||||
|
||||
actor_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"
|
||||
actor_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()",
|
||||
)
|
||||
actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None
|
||||
|
||||
genre_elem = content.xpath("//span[@property='v:genre']/text()")
|
||||
genre_elem = self.query_list(content, "//span[@property='v:genre']/text()")
|
||||
genre = []
|
||||
if genre_elem:
|
||||
for g in genre_elem:
|
||||
|
@ -102,7 +113,9 @@ class DoubanMovie(AbstractSite):
|
|||
g = "惊悚"
|
||||
genre.append(g)
|
||||
|
||||
showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")
|
||||
showtime_elem = self.query_list(
|
||||
content, "//span[@property='v:initialReleaseDate']/text()"
|
||||
)
|
||||
if showtime_elem:
|
||||
showtime = []
|
||||
for st in showtime_elem:
|
||||
|
@ -122,39 +135,39 @@ class DoubanMovie(AbstractSite):
|
|||
else:
|
||||
showtime = None
|
||||
|
||||
site_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"
|
||||
site_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href",
|
||||
)
|
||||
site = site_elem[0].strip()[:200] if site_elem else None
|
||||
if site and not re.match(r"http.+", site):
|
||||
site = None
|
||||
|
||||
area_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"
|
||||
area_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]",
|
||||
)
|
||||
if area_elem:
|
||||
area = [a.strip()[:100] for a in area_elem[0].split("/")]
|
||||
else:
|
||||
area = None
|
||||
|
||||
language_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"
|
||||
language_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]",
|
||||
)
|
||||
if language_elem:
|
||||
language = [a.strip() for a in language_elem[0].split(" / ")]
|
||||
else:
|
||||
language = None
|
||||
|
||||
year_elem = content.xpath("//span[@class='year']/text()")
|
||||
year = (
|
||||
int(re.search(r"\d+", year_elem[0])[0])
|
||||
if year_elem and re.search(r"\d+", year_elem[0])
|
||||
else None
|
||||
)
|
||||
year_s = self.query_str(content, "//span[@class='year']/text()")
|
||||
year_r = re.search(r"\d+", year_s) if year_s else None
|
||||
year = int_(year_r[0]) if year_r else None
|
||||
|
||||
duration_elem = content.xpath("//span[@property='v:runtime']/text()")
|
||||
other_duration_elem = content.xpath(
|
||||
"//span[@property='v:runtime']/following-sibling::text()[1]"
|
||||
duration_elem = self.query_list(content, "//span[@property='v:runtime']/text()")
|
||||
other_duration_elem = self.query_list(
|
||||
content, "//span[@property='v:runtime']/following-sibling::text()[1]"
|
||||
)
|
||||
if duration_elem:
|
||||
duration = duration_elem[0].strip()
|
||||
|
@ -164,19 +177,21 @@ class DoubanMovie(AbstractSite):
|
|||
else:
|
||||
duration = None
|
||||
|
||||
season_elem = content.xpath(
|
||||
"//*[@id='season']/option[@selected='selected']/text()"
|
||||
season_elem = self.query_list(
|
||||
content, "//*[@id='season']/option[@selected='selected']/text()"
|
||||
)
|
||||
if not season_elem:
|
||||
season_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"
|
||||
season_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]",
|
||||
)
|
||||
season = int(season_elem[0].strip()) if season_elem else None
|
||||
else:
|
||||
season = int(season_elem[0].strip())
|
||||
|
||||
episodes_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"
|
||||
episodes_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]",
|
||||
)
|
||||
episodes = (
|
||||
int(episodes_elem[0].strip())
|
||||
|
@ -184,8 +199,9 @@ class DoubanMovie(AbstractSite):
|
|||
else None
|
||||
)
|
||||
|
||||
single_episode_length_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"
|
||||
single_episode_length_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]",
|
||||
)
|
||||
single_episode_length = (
|
||||
single_episode_length_elem[0].strip()[:100]
|
||||
|
@ -195,16 +211,16 @@ class DoubanMovie(AbstractSite):
|
|||
|
||||
is_series = d.get("@type") == "TVSeries" or episodes is not None
|
||||
|
||||
brief_elem = content.xpath("//span[@class='all hidden']")
|
||||
brief_elem = self.query_list(content, "//span[@class='all hidden']")
|
||||
if not brief_elem:
|
||||
brief_elem = content.xpath("//span[@property='v:summary']")
|
||||
brief_elem = self.query_list(content, "//span[@property='v:summary']")
|
||||
brief = (
|
||||
"\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
|
||||
if brief_elem
|
||||
else None
|
||||
)
|
||||
|
||||
img_url_elem = content.xpath("//img[@rel='v:image']/@src")
|
||||
img_url_elem = self.query_list(content, "//img[@rel='v:image']/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
titles = set(
|
||||
|
@ -261,26 +277,26 @@ class DoubanMovie(AbstractSite):
|
|||
pd.metadata.get("season_number")
|
||||
and pd.metadata.get("season_number") != 1
|
||||
):
|
||||
_logger.warn(f"{imdb_code} matched imdb tv show, force season 1")
|
||||
logger.warning(f"{imdb_code} matched imdb tv show, force season 1")
|
||||
pd.metadata["season_number"] = 1
|
||||
elif pd.metadata["preferred_model"] == "TVSeason" and has_episode:
|
||||
if res_data["tv_episode_results"][0]["episode_number"] != 1:
|
||||
_logger.warning(
|
||||
logger.warning(
|
||||
f"Douban Movie {self.url} IMDB {imdb_code} mapping to non-first episode in a season"
|
||||
)
|
||||
elif res_data["tv_episode_results"][0]["season_number"] == 1:
|
||||
_logger.warning(
|
||||
logger.warning(
|
||||
f"Douban Movie {self.url} IMDB {imdb_code} mapping to first season episode in a season"
|
||||
)
|
||||
elif has_movie:
|
||||
if pd.metadata["preferred_model"] != "Movie":
|
||||
_logger.warn(f"{imdb_code} matched imdb movie, force Movie")
|
||||
logger.warning(f"{imdb_code} matched imdb movie, force Movie")
|
||||
pd.metadata["preferred_model"] = "Movie"
|
||||
elif has_tv or has_episode:
|
||||
_logger.warn(f"{imdb_code} matched imdb tv/episode, force TVSeason")
|
||||
logger.warning(f"{imdb_code} matched imdb tv/episode, force TVSeason")
|
||||
pd.metadata["preferred_model"] = "TVSeason"
|
||||
else:
|
||||
_logger.warn(f"{imdb_code} unknown to TMDB")
|
||||
logger.warning(f"{imdb_code} unknown to TMDB")
|
||||
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
|
||||
|
|
|
@ -7,9 +7,7 @@ from catalog.models import *
|
|||
from catalog.music.utils import upc_to_gtin_13
|
||||
from common.models.lang import detect_language
|
||||
|
||||
from .douban import DoubanDownloader
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
from .douban import DoubanDownloader, DoubanSearcher
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
|
@ -29,58 +27,63 @@ class DoubanMusic(AbstractSite):
|
|||
def id_to_url(cls, id_value):
|
||||
return "https://music.douban.com/subject/" + id_value + "/"
|
||||
|
||||
@classmethod
|
||||
def search(cls, q: str, p: int = 1):
|
||||
return DoubanSearcher.search(ItemCategory.Music, "music", q, p)
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
elem = content.xpath("//h1/span/text()")
|
||||
elem = self.query_list(content, "//h1/span/text()")
|
||||
title = elem[0].strip() if len(elem) else None
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
artists_elem = content.xpath(
|
||||
"//div[@id='info']/span/span[@class='pl']/a/text()"
|
||||
artists_elem = self.query_list(
|
||||
content, "//div[@id='info']/span/span[@class='pl']/a/text()"
|
||||
)
|
||||
artist = (
|
||||
None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
|
||||
)
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='流派:']/following::text()[1]"
|
||||
genre_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='流派:']/following::text()[1]"
|
||||
)
|
||||
genre = genre_elem[0].strip().split(" / ") if genre_elem else []
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
|
||||
date_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
|
||||
)
|
||||
release_date = dateparser.parse(date_elem[0].strip()) if date_elem else None
|
||||
release_date = release_date.strftime("%Y-%m-%d") if release_date else None
|
||||
|
||||
company_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版者:']/following::text()[1]"
|
||||
company_elem = self.query_list(
|
||||
content, "//div[@id='info']//span[text()='出版者:']/following::text()[1]"
|
||||
)
|
||||
company = company_elem[0].strip() if company_elem else None
|
||||
|
||||
track_list_elem = content.xpath(
|
||||
"//div[@class='track-list']/div[@class='indent']/div/text()"
|
||||
track_list_elem = self.query_list(
|
||||
content, "//div[@class='track-list']/div[@class='indent']/div/text()"
|
||||
)
|
||||
if track_list_elem:
|
||||
track_list = "\n".join([track.strip() for track in track_list_elem])
|
||||
else:
|
||||
track_list = None
|
||||
|
||||
brief_elem = content.xpath("//span[@class='all hidden']")
|
||||
brief_elem = self.query_list(content, "//span[@class='all hidden']")
|
||||
if not brief_elem:
|
||||
brief_elem = content.xpath("//span[@property='v:summary']")
|
||||
brief_elem = self.query_list(content, "//span[@property='v:summary']")
|
||||
brief = (
|
||||
"\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
|
||||
if brief_elem
|
||||
else None
|
||||
)
|
||||
|
||||
img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
|
||||
img_url_elem = self.query_list(content, "//div[@id='mainpic']//img/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
|
||||
other_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]",
|
||||
)
|
||||
other_title = other_elem[0].strip().split(" / ") if other_elem else []
|
||||
lang = detect_language(f"{title} {brief}")
|
||||
|
@ -103,28 +106,33 @@ class DoubanMusic(AbstractSite):
|
|||
}
|
||||
gtin = None
|
||||
isrc = None
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]"
|
||||
other_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]",
|
||||
)
|
||||
if other_elem:
|
||||
data["album_type"] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]"
|
||||
other_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]",
|
||||
)
|
||||
if other_elem:
|
||||
data["media"] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]"
|
||||
other_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]",
|
||||
)
|
||||
if other_elem:
|
||||
isrc = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]"
|
||||
other_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]",
|
||||
)
|
||||
if other_elem:
|
||||
gtin = upc_to_gtin_13(other_elem[0].strip())
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]"
|
||||
other_elem = self.query_list(
|
||||
content,
|
||||
"//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]",
|
||||
)
|
||||
if other_elem:
|
||||
data["disc_count"] = other_elem[0].strip()
|
||||
|
|
|
@ -80,7 +80,6 @@ exclude = [
|
|||
"journal/tests.py",
|
||||
"neodb",
|
||||
"**/migrations",
|
||||
"**/sites/douban_*",
|
||||
"neodb-takahe",
|
||||
]
|
||||
reportIncompatibleVariableOverride = false
|
||||
|
|
Loading…
Add table
Reference in a new issue