diff --git a/catalog/book/models.py b/catalog/book/models.py index 8598402b..a8164423 100644 --- a/catalog/book/models.py +++ b/catalog/book/models.py @@ -302,10 +302,10 @@ class Edition(Item): ) work.save() else: - work = Work.objects.create(title=self.title) + work = Work.objects.create(localized_title=self.localized_title) work.editions.add(self, target) - work.localized_title = self.localized_title - work.save() + # work.localized_title = self.localized_title + # work.save() return True def unlink_from_all_works(self): @@ -362,7 +362,7 @@ class Work(Item): for edition in self.editions.all(): to_item.editions.add(edition) self.editions.clear() - to_item.other_title = uniq(to_item.other_title + [self.title]) # type: ignore + to_item.language = uniq(to_item.language + self.language) # type: ignore to_item.localized_title = uniq(to_item.localized_title + self.localized_title) to_item.save() diff --git a/catalog/book/tests.py b/catalog/book/tests.py index 19b222b6..72c76f4b 100644 --- a/catalog/book/tests.py +++ b/catalog/book/tests.py @@ -62,20 +62,26 @@ class WorkTestCase(TestCase): databases = "__all__" def setUp(self): - self.hyperion_hardcover = Edition.objects.create(title="Hyperion") + self.hyperion_hardcover = Edition.objects.create( + localized_title=[{"lang": "en", "text": "Hyperion"}] + ) self.hyperion_hardcover.pages = 481 self.hyperion_hardcover.isbn = "9780385249492" self.hyperion_hardcover.save() - self.hyperion_print = Edition.objects.create(title="Hyperion") + self.hyperion_print = Edition.objects.create( + localized_title=[{"lang": "en", "text": "Hyperion"}] + ) self.hyperion_print.pages = 500 self.hyperion_print.isbn = "9780553283686" self.hyperion_print.save() self.hyperion_ebook = Edition(title="Hyperion") self.hyperion_ebook.asin = "B0043M6780" self.hyperion_ebook.save() - self.andymion_print = Edition.objects.create(title="Andymion", pages=42) + self.andymion_print = Edition.objects.create( + localized_title=[{"lang": "en", "text": "Andymion"}], pages=42 + ) # serie = Serie(title="Hyperion Cantos") - self.hyperion = Work(title="Hyperion") + self.hyperion = Work(localized_title=[{"lang": "en", "text": "Hyperion"}]) self.hyperion.save() def test_work(self): @@ -86,11 +92,9 @@ class WorkTestCase(TestCase): def test_merge(self): title1 = [{"lang": "zh", "text": "z"}] title2 = [{"lang": "en", "text": "e"}] - w1 = Work.objects.create(title="title1", localized_title=title1) - w2 = Work.objects.create(title="title2", localized_title=title2) + w1 = Work.objects.create(localized_title=title1) + w2 = Work.objects.create(localized_title=title2) w2.merge_to(w1) - self.assertEqual(w1.title, "title1") - self.assertEqual(w1.other_title, ["title2"]) self.assertEqual(len(w1.localized_title), 2) def test_link(self): @@ -99,7 +103,8 @@ class WorkTestCase(TestCase): self.assertTrue(self.hyperion_ebook.has_related_books()) self.assertTrue(self.hyperion_print.has_works()) self.assertEqual( - self.hyperion_print.works.first().title, self.hyperion_print.title + self.hyperion_print.works.first().display_title, + self.hyperion_print.display_title, ) self.hyperion_print.unlink_from_all_works() self.assertFalse(self.hyperion_print.has_related_books()) @@ -162,7 +167,7 @@ class GoodreadsTestCase(TestCase): self.assertEqual(resource.id_value, "77566") self.assertNotEqual(resource.cover, "/media/item/default.svg") self.assertEqual(edition.isbn, "9780553283686") - self.assertEqual(edition.title, "Hyperion") + self.assertEqual(edition.display_title, "Hyperion") edition.delete() site = SiteManager.get_site_by_url(t_url) @@ -186,14 +191,14 @@ class GoodreadsTestCase(TestCase): t_url = "https://www.goodreads.com/book/show/45064996-hyperion" site = SiteManager.get_site_by_url(t_url) site.get_resource_ready() - self.assertEqual(site.resource.item.title, "Hyperion") + self.assertEqual(site.resource.item.display_title, "Hyperion") self.assertEqual(site.resource.item.asin, "B004G60EHS") @use_local_response def test_work(self): url = "https://www.goodreads.com/work/editions/153313" p = SiteManager.get_site_by_url(url).get_resource_ready() - self.assertEqual(p.item.title, "1984") + self.assertEqual(p.item.display_title, "1984") url1 = "https://www.goodreads.com/book/show/3597767-rok-1984" url2 = "https://www.goodreads.com/book/show/40961427-1984" p1 = SiteManager.get_site_by_url(url1).get_resource_ready() @@ -233,7 +238,7 @@ class GoogleBooksTestCase(TestCase): self.assertEqual(site.resource.id_type, IdType.GoogleBooks) self.assertEqual(site.resource.id_value, "hV--zQEACAAJ") self.assertEqual(site.resource.item.isbn, "9781847498571") - self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four") + self.assertEqual(site.resource.item.display_title, "1984 Nineteen Eighty-Four") class BooksTWTestCase(TestCase): @@ -271,7 +276,7 @@ class BooksTWTestCase(TestCase): self.assertEqual(site.resource.metadata.get("isbn"), "9786263152236") self.assertEqual(site.resource.metadata.get("author"), ["Tim Mackintosh-Smith"]) self.assertEqual(site.resource.metadata.get("translator"), ["吳莉君"]) - self.assertEqual(site.resource.metadata.get("language"), "繁體中文") + self.assertEqual(site.resource.metadata.get("language"), ["繁體中文"]) self.assertEqual(site.resource.metadata.get("pub_house"), "臉譜") self.assertEqual(site.resource.metadata.get("pub_year"), 2023) self.assertEqual(site.resource.metadata.get("pub_month"), 2) @@ -282,7 +287,7 @@ class BooksTWTestCase(TestCase): self.assertEqual(site.resource.id_value, "0010947886") self.assertEqual(site.resource.item.isbn, "9786263152236") self.assertEqual( - site.resource.item.title, + site.resource.item.display_title, "阿拉伯人三千年:從民族、部落、語言、文化、宗教到帝國,綜覽阿拉伯世界的崛起、衰落與再興", ) @@ -320,7 +325,7 @@ class DoubanBookTestCase(TestCase): self.assertEqual(site.resource.id_type, IdType.DoubanBook) self.assertEqual(site.resource.id_value, "35902899") self.assertEqual(site.resource.item.isbn, "9781847498571") - self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four") + self.assertEqual(site.resource.item.display_title, "1984 Nineteen Eighty-Four") @use_local_response def test_publisher(self): @@ -342,13 +347,13 @@ class DoubanBookTestCase(TestCase): p2 = SiteManager.get_site_by_url(url2).get_resource_ready() w1 = p1.item.works.all().first() w2 = p2.item.works.all().first() - self.assertEqual(w1.title, "黄金时代") - self.assertEqual(w2.title, "黄金时代") + self.assertEqual(w1.display_title, "黄金时代") + self.assertEqual(w2.display_title, "黄金时代") self.assertEqual(w1, w2) - editions = w1.editions.all().order_by("title") - self.assertEqual(editions.count(), 2) - self.assertEqual(editions[0].title, "Wang in Love and Bondage") - self.assertEqual(editions[1].title, "黄金时代") + editions = sorted(list(w1.editions.all()), key=lambda e: e.display_title) + self.assertEqual(len(editions), 2) + self.assertEqual(editions[0].display_title, "Wang in Love and Bondage") + self.assertEqual(editions[1].display_title, "黄金时代") class MultiBookSitesTestCase(TestCase): @@ -388,16 +393,16 @@ class MultiBookSitesTestCase(TestCase): self.assertEqual(p4.item.id, p1.item.id) self.assertEqual(p4.item.works.all().count(), 2) self.assertEqual(p1.item.works.all().count(), 2) - w2e = w2.editions.all().order_by("title") - self.assertEqual(w2e.count(), 2) - self.assertEqual(w2e[0].title, "Wang in Love and Bondage") - self.assertEqual(w2e[1].title, "黄金时代") - w3e = w3.editions.all().order_by("title") - self.assertEqual(w3e.count(), 2) - self.assertEqual(w3e[0].title, "Golden Age: A Novel") - self.assertEqual(w3e[1].title, "黄金时代") + w2e = sorted(list(w2.editions.all()), key=lambda e: e.display_title) + self.assertEqual(len(w2e), 2) + self.assertEqual(w2e[0].display_title, "Wang in Love and Bondage") + self.assertEqual(w2e[1].display_title, "黄金时代") + w3e = sorted(list(w3.editions.all()), key=lambda e: e.display_title) + self.assertEqual(len(w3e), 2) + self.assertEqual(w3e[0].display_title, "Golden Age: A Novel") + self.assertEqual(w3e[1].display_title, "黄金时代") e = Edition.objects.get(primary_lookup_id_value=9781662601217) - self.assertEqual(e.title, "Golden Age: A Novel") + self.assertEqual(e.display_title, "Golden Age: A Novel") @use_local_response def test_works_merge(self): @@ -428,16 +433,16 @@ class MultiBookSitesTestCase(TestCase): self.assertEqual(p4.item.id, p1.item.id) self.assertEqual(p4.item.works.all().count(), 1) self.assertEqual(p1.item.works.all().count(), 1) - w2e = w2.editions.all().order_by("title") - self.assertEqual(w2e.count(), 3) - self.assertEqual(w2e[0].title, "Golden Age: A Novel") - self.assertEqual(w2e[1].title, "Wang in Love and Bondage") - self.assertEqual(w2e[2].title, "黄金时代") + w2e = sorted(list(w2.editions.all()), key=lambda e: e.display_title) + self.assertEqual(len(w2e), 3) + self.assertEqual(w2e[0].display_title, "Golden Age: A Novel") + self.assertEqual(w2e[1].display_title, "Wang in Love and Bondage") + self.assertEqual(w2e[2].display_title, "黄金时代") w3e = w3.editions.all().order_by("title") self.assertEqual(w3e.count(), 0) e = Edition.objects.get(primary_lookup_id_value=9781662601217) - self.assertEqual(e.title, "Golden Age: A Novel") + self.assertEqual(e.display_title, "Golden Age: A Novel") w2e[1].delete() self.assertEqual(w2.editions.all().count(), 2) - w2e.delete() + w2.editions.all().delete() self.assertEqual(p1.item.works.all().count(), 0) diff --git a/catalog/common/models.py b/catalog/common/models.py index 55c53480..f4051f85 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -607,15 +607,10 @@ class Item(PolymorphicModel): @property def display_title(self) -> str: - return ( - self.get_localized_title() - or self.title - or ( - self.orig_title # type:ignore - if hasattr(self, "orig_title") - else "" - ) - ) or (self.localized_title[0]["text"] if self.localized_title else "") + # return title in current locale if possible, otherwise any title + return (self.get_localized_title() or self.title) or ( + self.localized_title[0]["text"] if self.localized_title else "" + ) @property def display_description(self) -> str: diff --git a/catalog/game/tests.py b/catalog/game/tests.py index 23dd7f66..56b9d26a 100644 --- a/catalog/game/tests.py +++ b/catalog/game/tests.py @@ -106,7 +106,8 @@ class DoubanGameTestCase(TestCase): site.get_resource_ready() self.assertEqual(site.ready, True) self.assertIsInstance(site.resource.item, Game) - self.assertEqual(site.resource.item.display_title, "Portal 2") + titles = sorted([t["text"] for t in site.resource.item.localized_title]) + self.assertEqual(titles, ["Portal 2", "传送门2"]) self.assertEqual(site.resource.item.douban_game, "10734307") self.assertEqual(site.resource.item.genre, ["第一人称射击", "益智"]) self.assertEqual(site.resource.item.other_title, []) diff --git a/catalog/performance/tests.py b/catalog/performance/tests.py index 2fa86229..9b93c245 100644 --- a/catalog/performance/tests.py +++ b/catalog/performance/tests.py @@ -31,9 +31,7 @@ class DoubanDramaTestCase(TestCase): site = SiteManager.get_site_by_url(t_url) resource = site.get_resource_ready() item = site.get_item() - self.assertEqual( - item.display_title, "眠らない男・ナポレオン ―愛と栄光の涯(はて)に―" - ) + self.assertEqual(item.display_title, "不眠之人·拿破仑") self.assertEqual(len(item.localized_title), 2) self.assertEqual(item.genre, ["音乐剧"]) self.assertEqual(item.troupe, ["宝塚歌剧团"]) @@ -43,7 +41,7 @@ class DoubanDramaTestCase(TestCase): site = SiteManager.get_site_by_url(t_url) resource = site.get_resource_ready() item = site.get_item() - self.assertEqual(item.display_title, "相聲說垮鬼子們") + self.assertEqual(item.display_title, "相声说垮鬼子们") self.assertEqual(item.opening_date, "1997-05") self.assertEqual(item.location, ["臺北新舞臺"]) diff --git a/catalog/sites/bookstw.py b/catalog/sites/bookstw.py index 3594c872..c85284fc 100644 --- a/catalog/sites/bookstw.py +++ b/catalog/sites/bookstw.py @@ -60,7 +60,7 @@ class BooksTW(AbstractSite): "//div/ul/li[starts-with(text(),'語言:')]/text()" ) language = ( - language_elem[0].strip().split(":")[1].strip() if language_elem else None # type: ignore + [language_elem[0].strip().split(":")[1].strip()] if language_elem else [] # type: ignore ) pub_house = content.xpath("string(//div/ul/li[contains(text(),'出版社:')])") @@ -117,15 +117,12 @@ class BooksTW(AbstractSite): "string(//div[contains(@class,'cover_img')]//img[contains(@class,'cover')]/@src)" ) img_url = re.sub(r"&[wh]=\d+", "", img_url) if img_url else None # type: ignore - localized_title = [{"lang": "zh-tw", "text": title}] - if orig_title: - localized_title.append( - {"lang": detect_language(orig_title), "text": orig_title} - ) data = { "title": title, - "localized_title": localized_title, "subtitle": subtitle, + "localized_title": [{"lang": "zh-tw", "text": title}], + "localized_subtitle": [{"lang": "zh-tw", "text": subtitle}], + "localized_description": [{"lang": "zh-tw", "text": brief}], "orig_title": orig_title, "author": authors, "translator": translators, diff --git a/catalog/sites/douban_book.py b/catalog/sites/douban_book.py index ee2b4272..64fa8e90 100644 --- a/catalog/sites/douban_book.py +++ b/catalog/sites/douban_book.py @@ -3,6 +3,7 @@ import logging from catalog.book.models import * from catalog.book.utils import * from catalog.common import * +from common.models.lang import detect_language from .douban import * @@ -51,7 +52,7 @@ class DoubanBook(AbstractSite): language_elem = content.xpath( "//div[@id='info']//span[text()='语言:']/following::text()" ) - language = language_elem[0].strip() if language_elem else None + language = [language_elem[0].strip()] if language_elem else [] pub_house_elem = content.xpath( "//div[@id='info']//span[text()='出版社:']/following::text()" @@ -187,9 +188,13 @@ class DoubanBook(AbstractSite): ) imprint = imprint_elem[0].strip() if imprint_elem else None + lang = detect_language(title + " " + (brief or "")) data = { "title": title, "subtitle": subtitle, + "localized_title": [{"lang": lang, "text": title}], + "localized_subtitle": [{"lang": lang, "text": subtitle}], + "localized_description": [{"lang": lang, "text": brief}], "orig_title": orig_title, "author": authors, "translator": translators, @@ -221,7 +226,12 @@ class DoubanBook(AbstractSite): "id_value": r[1] if r else None, "title": data["title"], "url": works_element[0], - "content": {"metadata": {"title": data["title"]}}, + "content": { + "metadata": { + "title": data["title"], + "localized_title": data["localized_title"], + } + }, } ] @@ -255,5 +265,10 @@ class DoubanBook_Work(AbstractSite): if not title: raise ParseError(self, "title") book_urls = content.xpath('//a[@class="pl2"]/@href') - pd = ResourceContent(metadata={"title": title, "edition_urls": book_urls}) + d = { + "title": title, + "localized_title": [{"lang": "zh-cn", "text": title}], + "edition_urls": book_urls, + } + pd = ResourceContent(metadata=d) return pd diff --git a/catalog/sites/goodreads.py b/catalog/sites/goodreads.py index 2013a741..bba40784 100644 --- a/catalog/sites/goodreads.py +++ b/catalog/sites/goodreads.py @@ -9,6 +9,7 @@ from lxml import html from catalog.book.models import Edition, Work from catalog.book.utils import detect_isbn_asin from catalog.common import * +from common.models.lang import detect_language _logger = logging.getLogger(__name__) @@ -69,6 +70,11 @@ class Goodreads(AbstractSite): raise ParseError(self, "Book in __NEXT_DATA__ json") data["title"] = b["title"] data["brief"] = b["description"] + lang = detect_language(b["title"] + " " + (b["description"] or "")) + data["localized_title"] = [{"lang": lang, "text": b["title"]}] + data["localized_subtitle"] = [] # Goodreads does not support subtitle + data["localized_description"] = [{"lang": lang, "text": b["description"]}] + if data["brief"]: data["brief"] = re.sub( r"<[^>]*>", "", data["brief"].replace("
", "\n") @@ -96,7 +102,7 @@ class Goodreads(AbstractSite): data["pub_year"] = dt.year data["pub_month"] = dt.month if b["details"].get("language"): - data["language"] = b["details"].get("language").get("name") + data["language"] = [b["details"].get("language").get("name")] data["cover_image_url"] = b["imageUrl"] w = next(filter(lambda x: x.get("details"), o["Work"]), None) if w: @@ -149,6 +155,7 @@ class Goodreads_Work(AbstractSite): pd = ResourceContent( metadata={ "title": title, + "localized_title": [{"lang": "en", "text": title}], "author": author, "first_published": first_published, } diff --git a/catalog/sites/google_books.py b/catalog/sites/google_books.py index bbea4cd7..8baff688 100644 --- a/catalog/sites/google_books.py +++ b/catalog/sites/google_books.py @@ -41,8 +41,9 @@ class GoogleBooks(AbstractSite): b["volumeInfo"]["publisher"] if "publisher" in b["volumeInfo"] else None ) language = ( - b["volumeInfo"]["language"] if "language" in b["volumeInfo"] else None + [b["volumeInfo"]["language"]] if "language" in b["volumeInfo"] else [] ) + pages = b["volumeInfo"]["pageCount"] if "pageCount" in b["volumeInfo"] else None if "mainCategory" in b["volumeInfo"]: other["分类"] = b["volumeInfo"]["mainCategory"] @@ -81,7 +82,11 @@ class GoogleBooks(AbstractSite): raw_img, ext = BasicImageDownloader.download_image(img_url, None, headers={}) data = { "title": title, + "localized_title": [{"lang": language, "text": title}], "subtitle": subtitle, + "localized_subtitle": ( + [{"lang": language, "text": subtitle}] if subtitle else [] + ), "orig_title": None, "author": authors, "translator": None, @@ -92,7 +97,10 @@ class GoogleBooks(AbstractSite): "binding": None, "pages": pages, "isbn": isbn, - "brief": brief, + # "brief": brief, + "localized_description": ( + [{"lang": language, "text": brief}] if brief else [] + ), "contents": None, "other_info": other, "cover_image_url": img_url,