more data checks in scrapers

This commit is contained in:
Your Name 2024-07-16 00:51:05 -04:00 committed by Henri Dickson
parent d671b7bf0e
commit 4707343e30
8 changed files with 21 additions and 9 deletions

View file

@ -106,7 +106,9 @@ class Bangumi(AbstractSite):
[title] + (other_title or []) + ([orig_title] if orig_title else [])
)
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
localized_desc = [{"lang": detect_language(brief), "text": brief}]
localized_desc = (
[{"lang": detect_language(brief), "text": brief}] if brief else []
)
data = {
"localized_title": localized_title,
"localized_description": localized_desc,

View file

@ -56,7 +56,9 @@ class BoardGameGeek(AbstractSite):
pd = ResourceContent(
metadata={
"localized_title": localized_title,
"localized_description": [{"lang": "en", "text": brief}],
"localized_description": (
[{"lang": "en", "text": brief}] if brief else []
),
"title": title,
"other_title": other_title,
"genre": category,

View file

@ -190,7 +190,7 @@ class DoubanBook(AbstractSite):
"subtitle": subtitle,
"localized_title": [{"lang": lang, "text": title}],
"localized_subtitle": [{"lang": lang, "text": subtitle}],
"localized_description": [{"lang": lang, "text": brief}],
"localized_description": [{"lang": lang, "text": brief}] if brief else [],
"orig_title": orig_title,
"author": authors,
"translator": translators,

View file

@ -92,7 +92,9 @@ class DoubanGame(AbstractSite):
titles = uniq([title] + other_title + ([orig_title] if orig_title else []))
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
localized_desc = [{"lang": detect_language(brief), "text": brief}]
localized_desc = (
[{"lang": detect_language(brief), "text": brief}] if brief else []
)
pd = ResourceContent(
metadata={

View file

@ -212,7 +212,9 @@ class DoubanMovie(AbstractSite):
+ (other_title if other_title else [])
)
localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
localized_desc = [{"lang": detect_language(brief), "text": brief}]
localized_desc = (
[{"lang": detect_language(brief), "text": brief}] if brief else []
)
pd = ResourceContent(
metadata={
"title": title,

View file

@ -90,7 +90,7 @@ class DoubanMusic(AbstractSite):
data = {
"title": title,
"localized_title": localized_title,
"localized_description": [{"lang": lang, "text": brief}],
"localized_description": [{"lang": lang, "text": brief}] if brief else [],
"artist": artist,
"genre": genre,
"release_date": release_date,

View file

@ -73,7 +73,9 @@ class Goodreads(AbstractSite):
lang = detect_language(b["title"] + " " + (b["description"] or ""))
data["localized_title"] = [{"lang": lang, "text": b["title"]}]
data["localized_subtitle"] = [] # Goodreads does not support subtitle
data["localized_description"] = [{"lang": lang, "text": b["description"]}]
data["localized_description"] = (
[{"lang": lang, "text": b["description"]}] if b["description"] else []
)
if data["brief"]:
data["brief"] = re.sub(

View file

@ -88,7 +88,9 @@ class RSS(AbstractSite):
feed = self.parse_feed_from_url(self.url)
if not feed:
raise ValueError(f"no feed avaialble in {self.url}")
title = feed["title"]
title = feed["title"].strip()
if not title:
raise ParseError(self, "title")
desc = html_to_text(feed["description"])
lang = detect_language(title + " " + desc)
pd = ResourceContent(
@ -96,7 +98,7 @@ class RSS(AbstractSite):
"title": title,
"brief": desc,
"localized_title": [{"lang": lang, "text": title}],
"localized_description": [{"lang": lang, "text": desc}],
"localized_description": [{"lang": lang, "text": desc}] if desc else [],
"host": (
[feed.get("itunes_author")] if feed.get("itunes_author") else []
),