diff --git a/catalog/game/tests.py b/catalog/game/tests.py index e625c927..98ce278f 100644 --- a/catalog/game/tests.py +++ b/catalog/game/tests.py @@ -116,7 +116,7 @@ class DoubanGameTestCase(TestCase): class BangumiGameTestCase(TestCase): databases = "__all__" - # @use_local_response + @use_local_response def test_parse(self): t_id_type = IdType.Bangumi t_id_value = "15912" diff --git a/catalog/sites/goodreads.py b/catalog/sites/goodreads.py index 17ee841d..6370ad1a 100644 --- a/catalog/sites/goodreads.py +++ b/catalog/sites/goodreads.py @@ -10,6 +10,7 @@ from catalog.book.models import Edition, Work from catalog.book.utils import detect_isbn_asin from catalog.common import * from common.models.lang import detect_language +from journal.models.renderers import html_to_text _logger = logging.getLogger(__name__) @@ -69,12 +70,12 @@ class Goodreads(AbstractSite): # Goodreads may return empty page template when internal service timeouts raise ParseError(self, "Book in __NEXT_DATA__ json") data["title"] = b["title"] - data["brief"] = b["description"] - lang = detect_language(b["title"] + " " + (b["description"] or "")) + data["brief"] = html_to_text(b["description"] or "").strip() + lang = detect_language(b["title"] + " " + data["brief"]) data["localized_title"] = [{"lang": lang, "text": b["title"]}] data["localized_subtitle"] = [] # Goodreads does not support subtitle data["localized_description"] = ( - [{"lang": lang, "text": b["description"]}] if b["description"] else [] + [{"lang": lang, "text": data["brief"]}] if data["brief"] else [] ) if data["brief"]: @@ -103,7 +104,7 @@ class Goodreads(AbstractSite): ) data["pub_year"] = dt.year data["pub_month"] = dt.month - if b["details"].get("language"): + if b["details"].get("language", {}).get("name"): data["language"] = [b["details"].get("language").get("name")] data["cover_image_url"] = b["imageUrl"] w = next(filter(lambda x: x.get("details"), o["Work"]), None) diff --git a/journal/models/renderers.py b/journal/models/renderers.py index ada284c4..c685696e 100644 --- a/journal/models/renderers.py +++ b/journal/models/renderers.py @@ -43,7 +43,11 @@ _RE_HTML_TAG = re.compile(r"<[^>]*>") def html_to_text(h: str) -> str: - return unescape(_RE_HTML_TAG.sub(" ", h.replace("\r", ""))) + return unescape( + _RE_HTML_TAG.sub( + " ", h.replace("\r", "").replace(" str: