From 86834ede207dc670658ba024bf9f470d2727e117 Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 8 Apr 2024 21:27:36 -0400 Subject: [PATCH] update model about merging works --- catalog/book/models.py | 42 ++++++++++++++++++++++++++++++++++++----- catalog/book/tests.py | 43 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/catalog/book/models.py b/catalog/book/models.py index 0e7dfc29..5f3e32b9 100644 --- a/catalog/book/models.py +++ b/catalog/book/models.py @@ -22,6 +22,7 @@ from os.path import exists from django.core.validators import MaxValueValidator, MinValueValidator from django.db import models from django.utils.translation import gettext_lazy as _ +from loguru import logger as _logger from catalog.common import ( BaseSchema, @@ -163,19 +164,39 @@ class Edition(Item): return detect_isbn_asin(lookup_id_value) return super().lookup_id_cleanup(lookup_id_type, lookup_id_value) + def merge_to(self, to_item): + super().merge_to(to_item) + for work in self.works.all(): + to_item.works.add(work) + self.works.clear() + + def delete(self, using=None, soft=True, *args, **kwargs): + if soft: + self.works.clear() + return super().delete(using, soft, *args, **kwargs) + def update_linked_items_from_external_resource(self, resource): """add Work from resource.metadata['work'] if not yet""" links = resource.required_resources + resource.related_resources for w in links: if w.get("model") == "Work": - work = Work.objects.filter( - primary_lookup_id_type=w["id_type"], - primary_lookup_id_value=w["id_value"], + work_res = ExternalResource.objects.filter( + id_type=w["id_type"], id_value=w["id_value"] ).first() + if work_res: + work = work_res.item + if not work: + _logger.warning(f"Unable to find work for {work_res}") + else: + _logger.warning( + f'Unable to find resource for {w["id_type"]}:{w["id_value"]}' + ) + work = Work.objects.filter( + primary_lookup_id_type=w["id_type"], + primary_lookup_id_value=w["id_value"], + ).first() if work and work not in self.works.all(): self.works.add(work) - # if not work: - # _logger.info(f'Unable to find link for {w["url"]}') def get_related_books(self): works = list(self.works.all()) @@ -224,6 +245,17 @@ class Work(Item): # a lazy fix is to remove smaller DoubanBook_Work ids # but ideally deal with 302 in scrape(). + def merge_to(self, to_item): + super().merge_to(to_item) + for edition in self.editions.all(): + to_item.editions.add(edition) + self.editions.clear() + + def delete(self, using=None, soft=True, *args, **kwargs): + if soft: + self.editions.clear() + return super().delete(using, soft, *args, **kwargs) + class Series(Item): category = ItemCategory.Book diff --git a/catalog/book/tests.py b/catalog/book/tests.py index b87f59c3..70d63a4b 100644 --- a/catalog/book/tests.py +++ b/catalog/book/tests.py @@ -387,3 +387,46 @@ class MultiBookSitesTestCase(TestCase): self.assertEqual(w3e[1].title, "黄金时代") e = Edition.objects.get(primary_lookup_id_value=9781662601217) self.assertEqual(e.title, "Golden Age: A Novel") + + @use_local_response + def test_works_merge(self): + # url1 and url4 has same ISBN, hence they share same Edition instance, which belongs to 2 Work instances + url1 = "https://book.douban.com/subject/1089243/" + url2 = "https://book.douban.com/subject/2037260/" + url3 = "https://www.goodreads.com/book/show/59952545-golden-age" + url4 = "https://www.goodreads.com/book/show/11798823" + p1 = SiteManager.get_site_by_url( + url1 + ).get_resource_ready() # lxml bug may break this + w1 = p1.item.works.all().first() + p2 = SiteManager.get_site_by_url(url2).get_resource_ready() + w2 = p2.item.works.all().first() + self.assertEqual(w1, w2) + self.assertEqual(p1.item.works.all().count(), 1) + p3 = SiteManager.get_site_by_url(url3).get_resource_ready() + w3 = p3.item.works.all().first() + self.assertNotEqual(w3, w2) + self.assertEqual(w2.external_resources.all().count(), 1) + self.assertEqual(w3.external_resources.all().count(), 1) + w3.merge_to(w2) + self.assertEqual(w2.external_resources.all().count(), 2) + self.assertEqual(w3.external_resources.all().count(), 0) + self.assertEqual(w2.editions.all().count(), 3) + self.assertEqual(w3.editions.all().count(), 0) + p4 = SiteManager.get_site_by_url(url4).get_resource_ready() + self.assertEqual(p4.item.id, p1.item.id) + self.assertEqual(p4.item.works.all().count(), 1) + self.assertEqual(p1.item.works.all().count(), 1) + w2e = w2.editions.all().order_by("title") + self.assertEqual(w2e.count(), 3) + self.assertEqual(w2e[0].title, "Golden Age: A Novel") + self.assertEqual(w2e[1].title, "Wang in Love and Bondage") + self.assertEqual(w2e[2].title, "黄金时代") + w3e = w3.editions.all().order_by("title") + self.assertEqual(w3e.count(), 0) + e = Edition.objects.get(primary_lookup_id_value=9781662601217) + self.assertEqual(e.title, "Golden Age: A Novel") + w2e[1].delete() + self.assertEqual(w2.editions.all().count(), 2) + w2e.delete() + self.assertEqual(p1.item.works.all().count(), 0)