more data checks in scrapers
This commit is contained in:
parent
d671b7bf0e
commit
4707343e30
8 changed files with 21 additions and 9 deletions
|
@ -106,7 +106,9 @@ class Bangumi(AbstractSite):
|
|||
[title] + (other_title or []) + ([orig_title] if orig_title else [])
|
||||
)
|
||||
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
|
||||
localized_desc = [{"lang": detect_language(brief), "text": brief}]
|
||||
localized_desc = (
|
||||
[{"lang": detect_language(brief), "text": brief}] if brief else []
|
||||
)
|
||||
data = {
|
||||
"localized_title": localized_title,
|
||||
"localized_description": localized_desc,
|
||||
|
|
|
@ -56,7 +56,9 @@ class BoardGameGeek(AbstractSite):
|
|||
pd = ResourceContent(
|
||||
metadata={
|
||||
"localized_title": localized_title,
|
||||
"localized_description": [{"lang": "en", "text": brief}],
|
||||
"localized_description": (
|
||||
[{"lang": "en", "text": brief}] if brief else []
|
||||
),
|
||||
"title": title,
|
||||
"other_title": other_title,
|
||||
"genre": category,
|
||||
|
|
|
@ -190,7 +190,7 @@ class DoubanBook(AbstractSite):
|
|||
"subtitle": subtitle,
|
||||
"localized_title": [{"lang": lang, "text": title}],
|
||||
"localized_subtitle": [{"lang": lang, "text": subtitle}],
|
||||
"localized_description": [{"lang": lang, "text": brief}],
|
||||
"localized_description": [{"lang": lang, "text": brief}] if brief else [],
|
||||
"orig_title": orig_title,
|
||||
"author": authors,
|
||||
"translator": translators,
|
||||
|
|
|
@ -92,7 +92,9 @@ class DoubanGame(AbstractSite):
|
|||
|
||||
titles = uniq([title] + other_title + ([orig_title] if orig_title else []))
|
||||
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
|
||||
localized_desc = [{"lang": detect_language(brief), "text": brief}]
|
||||
localized_desc = (
|
||||
[{"lang": detect_language(brief), "text": brief}] if brief else []
|
||||
)
|
||||
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
|
|
|
@ -212,7 +212,9 @@ class DoubanMovie(AbstractSite):
|
|||
+ (other_title if other_title else [])
|
||||
)
|
||||
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
|
||||
localized_desc = [{"lang": detect_language(brief), "text": brief}]
|
||||
localized_desc = (
|
||||
[{"lang": detect_language(brief), "text": brief}] if brief else []
|
||||
)
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
|
|
|
@ -90,7 +90,7 @@ class DoubanMusic(AbstractSite):
|
|||
data = {
|
||||
"title": title,
|
||||
"localized_title": localized_title,
|
||||
"localized_description": [{"lang": lang, "text": brief}],
|
||||
"localized_description": [{"lang": lang, "text": brief}] if brief else [],
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"release_date": release_date,
|
||||
|
|
|
@ -73,7 +73,9 @@ class Goodreads(AbstractSite):
|
|||
lang = detect_language(b["title"] + " " + (b["description"] or ""))
|
||||
data["localized_title"] = [{"lang": lang, "text": b["title"]}]
|
||||
data["localized_subtitle"] = [] # Goodreads does not support subtitle
|
||||
data["localized_description"] = [{"lang": lang, "text": b["description"]}]
|
||||
data["localized_description"] = (
|
||||
[{"lang": lang, "text": b["description"]}] if b["description"] else []
|
||||
)
|
||||
|
||||
if data["brief"]:
|
||||
data["brief"] = re.sub(
|
||||
|
|
|
@ -88,7 +88,9 @@ class RSS(AbstractSite):
|
|||
feed = self.parse_feed_from_url(self.url)
|
||||
if not feed:
|
||||
raise ValueError(f"no feed avaialble in {self.url}")
|
||||
title = feed["title"]
|
||||
title = feed["title"].strip()
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
desc = html_to_text(feed["description"])
|
||||
lang = detect_language(title + " " + desc)
|
||||
pd = ResourceContent(
|
||||
|
@ -96,7 +98,7 @@ class RSS(AbstractSite):
|
|||
"title": title,
|
||||
"brief": desc,
|
||||
"localized_title": [{"lang": lang, "text": title}],
|
||||
"localized_description": [{"lang": lang, "text": desc}],
|
||||
"localized_description": [{"lang": lang, "text": desc}] if desc else [],
|
||||
"host": (
|
||||
[feed.get("itunes_author")] if feed.get("itunes_author") else []
|
||||
),
|
||||
|
|
Loading…
Add table
Reference in a new issue