lib.itmens/catalog/book/models.py

"""
Models for Book

Series -> Work -> Edition

Series is not fully implemented at the moment

Goodreads
Famous works have many editions

Google Books:
only has Edition level ("volume") data

Douban:
old editions has only CUBN(Chinese Unified Book Number)
work data seems asymmetric (a book links to a work, but may not listed in that work as one of its editions)

"""

from os.path import exists

from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.utils.translation import gettext_lazy as _
from loguru import logger as _logger

from catalog.common import (
    BaseSchema,
    ExternalResource,
    IdType,
    Item,
    ItemCategory,
    ItemInSchema,
    ItemSchema,
    ItemType,
    PrimaryLookupIdDescriptor,
    jsondata,
)

from .utils import *


class EditionInSchema(ItemInSchema):
    subtitle: str | None = None
    orig_title: str | None = None
    author: list[str]
    translator: list[str]
    language: str | None = None
    pub_house: str | None = None
    pub_year: int | None = None
    pub_month: int | None = None
    binding: str | None = None
    price: str | None = None
    pages: int | str | None = None
    series: str | None = None
    imprint: str | None = None


class EditionSchema(EditionInSchema, BaseSchema):
    isbn: str | None = None
    pass


class Edition(Item):
    category = ItemCategory.Book
    url_path = "book"

    isbn = PrimaryLookupIdDescriptor(IdType.ISBN)
    asin = PrimaryLookupIdDescriptor(IdType.ASIN)
    cubn = PrimaryLookupIdDescriptor(IdType.CUBN)
    # douban_book = LookupIdDescriptor(IdType.DoubanBook)
    # goodreads = LookupIdDescriptor(IdType.Goodreads)

    METADATA_COPY_LIST = [
        "title",
        "subtitle",
        "author",
        "pub_house",
        "pub_year",
        "pub_month",
        "language",
        "orig_title",
        "translator",
        "series",
        "imprint",
        "binding",
        "pages",
        "price",
        "brief",
        "contents",
    ]
    subtitle = jsondata.CharField(
        _("subtitle"), null=True, blank=True, default=None, max_length=500
    )
    orig_title = jsondata.CharField(
        _("original title"), null=True, blank=True, default=None, max_length=500
    )
    author = jsondata.ArrayField(
        verbose_name=_("author"),
        base_field=models.CharField(max_length=500),
        null=False,
        blank=False,
        default=list,
    )
    translator = jsondata.ArrayField(
        verbose_name=_("translator"),
        base_field=models.CharField(max_length=500),
        null=True,
        blank=True,
        default=list,
    )
    language = jsondata.CharField(
        _("language"), null=True, blank=True, default=None, max_length=500
    )
    pub_house = jsondata.CharField(
        _("publisher"), null=True, blank=False, default=None, max_length=500
    )
    pub_year = jsondata.IntegerField(
        _("publication year"),
        null=True,
        blank=False,
        validators=[MinValueValidator(1), MaxValueValidator(2999)],
    )
    pub_month = jsondata.IntegerField(
        _("publication month"),
        null=True,
        blank=True,
        validators=[MinValueValidator(1), MaxValueValidator(12)],
    )
    binding = jsondata.CharField(
        _("binding"), null=True, blank=True, default=None, max_length=500
    )
    pages = jsondata.IntegerField(_("pages"), blank=True, default=None)
    series = jsondata.CharField(
        _("series"), null=True, blank=True, default=None, max_length=500
    )
    contents = jsondata.TextField(_("contents"), null=True, blank=True, default=None)
    price = jsondata.CharField(_("price"), null=True, blank=True, max_length=500)
    imprint = jsondata.CharField(_("imprint"), null=True, blank=True, max_length=500)

    @property
    def isbn10(self):
        return isbn_13_to_10(self.isbn)

    @isbn10.setter
    def isbn10(self, value):
        self.isbn = isbn_10_to_13(value)

    @classmethod
    def lookup_id_type_choices(cls):
        id_types = [
            IdType.ISBN,
            IdType.ASIN,
            IdType.CUBN,
            IdType.DoubanBook,
            IdType.Goodreads,
            IdType.GoogleBooks,
        ]
        return [(i.value, i.label) for i in id_types]

    @classmethod
    def lookup_id_cleanup(cls, lookup_id_type, lookup_id_value):
        if lookup_id_type in [IdType.ASIN.value, IdType.ISBN.value]:
            return detect_isbn_asin(lookup_id_value)
        return super().lookup_id_cleanup(lookup_id_type, lookup_id_value)

    def merge_to(self, to_item):
        super().merge_to(to_item)
        for work in self.works.all():
            to_item.works.add(work)
        self.works.clear()

    def delete(self, using=None, soft=True, *args, **kwargs):
        if soft:
            self.works.clear()
        return super().delete(using, soft, *args, **kwargs)

    def update_linked_items_from_external_resource(self, resource):
        """add Work from resource.metadata['work'] if not yet"""
        links = resource.required_resources + resource.related_resources
        for w in links:
            if w.get("model") == "Work":
                work_res = ExternalResource.objects.filter(
                    id_type=w["id_type"], id_value=w["id_value"]
                ).first()
                if work_res:
                    work = work_res.item
                    if not work:
                        _logger.warning(f"Unable to find work for {work_res}")
                else:
                    _logger.warning(
                        f'Unable to find resource for {w["id_type"]}:{w["id_value"]}'
                    )
                    work = Work.objects.filter(
                        primary_lookup_id_type=w["id_type"],
                        primary_lookup_id_value=w["id_value"],
                    ).first()
                if work and work not in self.works.all():
                    self.works.add(work)

    def get_related_books(self):
        works = list(self.works.all())
        return (
            Edition.objects.filter(works__in=works)
            .distinct()
            .exclude(pk=self.pk)
            .exclude(is_deleted=True)
            .exclude(merged_to_item__isnull=False)
            .order_by("title")
        )

    def has_related_books(self):
        works = list(self.works.all())
        if not works:
            return False
        return Edition.objects.filter(works__in=works).exclude(pk=self.pk).exists()

    def link_to_related_book(self, target: "Edition") -> bool:
        if target == self or target.is_deleted or target.merged_to_item:
            return False
        if target.works.all().exists():
            for work in target.works.all():
                self.works.add(work)
        elif self.works.all().exists():
            for work in self.works.all():
                target.works.add(work)
        else:
            Work.objects.create(title=self.title).editions.add(self, target)
        return True

    def unlink_from_all_works(self):
        self.works.clear()

    def has_works(self):
        return self.works.all().exists()


class Work(Item):
    category = ItemCategory.Book
    url_path = "book/work"
    douban_work = PrimaryLookupIdDescriptor(IdType.DoubanBook_Work)
    goodreads_work = PrimaryLookupIdDescriptor(IdType.Goodreads_Work)
    editions = models.ManyToManyField(Edition, related_name="works")
    author = jsondata.ArrayField(
        verbose_name=_("author"),
        base_field=models.CharField(max_length=500),
        null=True,
        blank=True,
        default=list,
    )
    other_title = jsondata.ArrayField(
        verbose_name=_("other title"),
        base_field=models.CharField(blank=True, default="", max_length=200),
        null=True,
        blank=True,
        default=list,
    )
    METADATA_COPY_LIST = [
        "title",
        "other_title",
        "author",
        "brief",
    ]
    # TODO: we have many duplicates due to 302
    # a lazy fix is to remove smaller DoubanBook_Work ids
    # but ideally deal with 302 in scrape().

    @classmethod
    def lookup_id_type_choices(cls):
        id_types = [
            IdType.WikiData,
            IdType.DoubanBook_Work,
            IdType.Goodreads_Work,
        ]
        return [(i.value, i.label) for i in id_types]

    def merge_to(self, to_item):
        if (
            to_item
            and self.title != to_item.title
            and self.title not in to_item.other_title
        ):
            to_item.other_title += [self.title]
        super().merge_to(to_item)
        for edition in self.editions.all():
            to_item.editions.add(edition)
        self.editions.clear()

    def delete(self, using=None, soft=True, *args, **kwargs):
        if soft:
            self.editions.clear()
        return super().delete(using, soft, *args, **kwargs)


class Series(Item):
    category = ItemCategory.Book
    url_path = "book/series"
    # douban_serie = LookupIdDescriptor(IdType.DoubanBook_Serie)
    # goodreads_serie = LookupIdDescriptor(IdType.Goodreads_Serie)

    class Meta:
        proxy = True
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`"""`
			`Models for Book`

			`Series -> Work -> Edition`

			`Series is not fully implemented at the moment`

			`Goodreads`
			`Famous works have many editions`

			`Google Books:`
			`only has Edition level ("volume") data`

			`Douban:`
			`old editions has only CUBN(Chinese Unified Book Number)`
new data model: rename some classes 2022-12-08 16:08:59 +00:00			`work data seems asymmetric (a book links to a work, but may not listed in that work as one of its editions)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`"""`

update model to link editions 2024-04-08 20:25:14 -04:00			`from os.path import exists`

add/edit item page 2023-01-05 03:06:13 -05:00			`from django.core.validators import MaxValueValidator, MinValueValidator`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`from django.db import models`
			`from django.utils.translation import gettext_lazy as _`
update model about merging works 2024-04-08 21:27:36 -04:00			`from loguru import logger as _logger`
fix unicode site name; add isort; split journal and users models 2023-08-10 11:27:31 -04:00
fix lint with pyright 2023-08-11 01:43:19 -04:00			`from catalog.common import (`
			`BaseSchema,`
			`ExternalResource,`
			`IdType,`
			`Item,`
			`ItemCategory,`
			`ItemInSchema,`
			`ItemSchema,`
			`ItemType,`
			`PrimaryLookupIdDescriptor,`
			`jsondata,`
			`)`
fix unicode site name; add isort; split journal and users models 2023-08-10 11:27:31 -04:00
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`from .utils import *`


add api for catalog 2023-02-15 15:45:57 -05:00			`class EditionInSchema(ItemInSchema):`
			`subtitle: str \| None = None`
			`orig_title: str \| None = None`
			`author: list[str]`
			`translator: list[str]`
			`language: str \| None = None`
			`pub_house: str \| None = None`
			`pub_year: int \| None = None`
			`pub_month: int \| None = None`
			`binding: str \| None = None`
			`price: str \| None = None`
fix federated status update 2023-11-18 20:30:48 -05:00			`pages: int \| str \| None = None`
add api for catalog 2023-02-15 15:45:57 -05:00			`series: str \| None = None`
			`imprint: str \| None = None`


			`class EditionSchema(EditionInSchema, BaseSchema):`
queue api fetch 2023-02-15 16:22:32 -05:00			`isbn: str \| None = None`
add api for catalog 2023-02-15 15:45:57 -05:00			`pass`


new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`class Edition(Item):`
new data model: journal models 2022-12-11 23:20:28 +00:00			`category = ItemCategory.Book`
reformat new code with black 2022-12-29 23:57:02 -05:00			`url_path = "book"`
new data model: /book/<uid> 2022-12-15 17:29:35 -05:00
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`isbn = PrimaryLookupIdDescriptor(IdType.ISBN)`
			`asin = PrimaryLookupIdDescriptor(IdType.ASIN)`
			`cubn = PrimaryLookupIdDescriptor(IdType.CUBN)`
			`# douban_book = LookupIdDescriptor(IdType.DoubanBook)`
			`# goodreads = LookupIdDescriptor(IdType.Goodreads)`
new data model: add legacy fields for Book to Edition as virtual fields 2022-12-14 21:12:37 -05:00
			`METADATA_COPY_LIST = [`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"title",`
			`"subtitle",`
			`"author",`
			`"pub_house",`
			`"pub_year",`
			`"pub_month",`
publishing month of edition is now optional 2023-06-08 14:20:01 -04:00			`"language",`
			`"orig_title",`
			`"translator",`
			`"series",`
add/edit item page 2023-01-05 03:06:13 -05:00			`"imprint",`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"binding",`
			`"pages",`
add/edit item page 2023-01-05 03:06:13 -05:00			`"price",`
			`"brief",`
			`"contents",`
new data model: add legacy fields for Book to Edition as virtual fields 2022-12-14 21:12:37 -05:00			`]`
add/edit item page 2023-01-05 03:06:13 -05:00			`subtitle = jsondata.CharField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("subtitle"), null=True, blank=True, default=None, max_length=500`
add/edit item page 2023-01-05 03:06:13 -05:00			`)`
			`orig_title = jsondata.CharField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("original title"), null=True, blank=True, default=None, max_length=500`
add/edit item page 2023-01-05 03:06:13 -05:00			`)`
			`author = jsondata.ArrayField(`
i18n wip 2024-03-10 20:55:50 -04:00			`verbose_name=_("author"),`
add/edit item page 2023-01-05 03:06:13 -05:00			`base_field=models.CharField(max_length=500),`
			`null=False,`
			`blank=False,`
			`default=list,`
			`)`
			`translator = jsondata.ArrayField(`
i18n wip 2024-03-10 20:55:50 -04:00			`verbose_name=_("translator"),`
add/edit item page 2023-01-05 03:06:13 -05:00			`base_field=models.CharField(max_length=500),`
			`null=True,`
			`blank=True,`
			`default=list,`
			`)`
fix missing maxlen check 2023-01-09 09:21:53 -05:00			`language = jsondata.CharField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("language"), null=True, blank=True, default=None, max_length=500`
fix missing maxlen check 2023-01-09 09:21:53 -05:00			`)`
add/edit item page 2023-01-05 03:06:13 -05:00			`pub_house = jsondata.CharField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("publisher"), null=True, blank=False, default=None, max_length=500`
add/edit item page 2023-01-05 03:06:13 -05:00			`)`
			`pub_year = jsondata.IntegerField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("publication year"),`
add/edit item page 2023-01-05 03:06:13 -05:00			`null=True,`
			`blank=False,`
			`validators=[MinValueValidator(1), MaxValueValidator(2999)],`
			`)`
			`pub_month = jsondata.IntegerField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("publication month"),`
add/edit item page 2023-01-05 03:06:13 -05:00			`null=True,`
publishing month of edition is now optional 2023-06-08 14:20:01 -04:00			`blank=True,`
add/edit item page 2023-01-05 03:06:13 -05:00			`validators=[MinValueValidator(1), MaxValueValidator(12)],`
			`)`
			`binding = jsondata.CharField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("binding"), null=True, blank=True, default=None, max_length=500`
add/edit item page 2023-01-05 03:06:13 -05:00			`)`
i18n wip 2024-03-10 20:55:50 -04:00			`pages = jsondata.IntegerField(_("pages"), blank=True, default=None)`
add/edit item page 2023-01-05 03:06:13 -05:00			`series = jsondata.CharField(`
i18n wip 2024-03-10 20:55:50 -04:00			`_("series"), null=True, blank=True, default=None, max_length=500`
add/edit item page 2023-01-05 03:06:13 -05:00			`)`
i18n wip 2024-03-10 20:55:50 -04:00			`contents = jsondata.TextField(_("contents"), null=True, blank=True, default=None)`
			`price = jsondata.CharField(_("price"), null=True, blank=True, max_length=500)`
			`imprint = jsondata.CharField(_("imprint"), null=True, blank=True, max_length=500)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`@property`
			`def isbn10(self):`
			`return isbn_13_to_10(self.isbn)`

			`@isbn10.setter`
			`def isbn10(self, value):`
			`self.isbn = isbn_10_to_13(value)`

add/edit item page 2023-01-05 03:06:13 -05:00			`@classmethod`
			`def lookup_id_type_choices(cls):`
			`id_types = [`
			`IdType.ISBN,`
			`IdType.ASIN,`
			`IdType.CUBN,`
			`IdType.DoubanBook,`
			`IdType.Goodreads,`
			`IdType.GoogleBooks,`
			`]`
			`return [(i.value, i.label) for i in id_types]`

			`@classmethod`
			`def lookup_id_cleanup(cls, lookup_id_type, lookup_id_value):`
			`if lookup_id_type in [IdType.ASIN.value, IdType.ISBN.value]:`
			`return detect_isbn_asin(lookup_id_value)`
			`return super().lookup_id_cleanup(lookup_id_type, lookup_id_value)`

update model about merging works 2024-04-08 21:27:36 -04:00			`def merge_to(self, to_item):`
			`super().merge_to(to_item)`
			`for work in self.works.all():`
			`to_item.works.add(work)`
			`self.works.clear()`

			`def delete(self, using=None, soft=True, args, *kwargs):`
			`if soft:`
			`self.works.clear()`
			`return super().delete(using, soft, args, *kwargs)`

new data model: rename some classes 2022-12-08 16:08:59 +00:00			`def update_linked_items_from_external_resource(self, resource):`
			`"""add Work from resource.metadata['work'] if not yet"""`
			`links = resource.required_resources + resource.related_resources`
add douban book works; add goodread works; auto link season to show 2022-12-08 05:53:00 +00:00			`for w in links:`
takahe integration 2023-07-20 21:59:49 -04:00			`if w.get("model") == "Work":`
update model about merging works 2024-04-08 21:27:36 -04:00			`work_res = ExternalResource.objects.filter(`
			`id_type=w["id_type"], id_value=w["id_value"]`
reformat new code with black 2022-12-29 23:57:02 -05:00			`).first()`
update model about merging works 2024-04-08 21:27:36 -04:00			`if work_res:`
			`work = work_res.item`
			`if not work:`
			`_logger.warning(f"Unable to find work for {work_res}")`
			`else:`
			`_logger.warning(`
			`f'Unable to find resource for {w["id_type"]}:{w["id_value"]}'`
			`)`
			`work = Work.objects.filter(`
			`primary_lookup_id_type=w["id_type"],`
			`primary_lookup_id_value=w["id_value"],`
			`).first()`
add douban book works; add goodread works; auto link season to show 2022-12-08 05:53:00 +00:00			`if work and work not in self.works.all():`
			`self.works.add(work)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
review url mounts for deprecated modules. 2023-01-01 23:50:57 -05:00			`def get_related_books(self):`
show other editions of same works 2023-02-13 00:52:24 -05:00			`works = list(self.works.all())`
			`return (`
			`Edition.objects.filter(works__in=works)`
fix duplicates in related books 2023-06-08 18:05:19 -04:00			`.distinct()`
show other editions of same works 2023-02-13 00:52:24 -05:00			`.exclude(pk=self.pk)`
			`.exclude(is_deleted=True)`
			`.exclude(merged_to_item__isnull=False)`
animate the unhide and sort related books 2023-06-08 19:57:10 -04:00			`.order_by("title")`
show other editions of same works 2023-02-13 00:52:24 -05:00			`)`
review url mounts for deprecated modules. 2023-01-01 23:50:57 -05:00
update model to link editions 2024-04-08 20:25:14 -04:00			`def has_related_books(self):`
			`works = list(self.works.all())`
			`if not works:`
			`return False`
			`return Edition.objects.filter(works__in=works).exclude(pk=self.pk).exists()`

			`def link_to_related_book(self, target: "Edition") -> bool:`
			`if target == self or target.is_deleted or target.merged_to_item:`
			`return False`
			`if target.works.all().exists():`
			`for work in target.works.all():`
			`self.works.add(work)`
			`elif self.works.all().exists():`
			`for work in self.works.all():`
			`target.works.add(work)`
			`else:`
			`Work.objects.create(title=self.title).editions.add(self, target)`
			`return True`

			`def unlink_from_all_works(self):`
			`self.works.clear()`

			`def has_works(self):`
			`return self.works.all().exists()`

new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`class Work(Item):`
new data model: journal models 2022-12-11 23:20:28 +00:00			`category = ItemCategory.Book`
reformat new code with black 2022-12-29 23:57:02 -05:00			`url_path = "book/work"`
new data model: journal models 2022-12-11 23:20:28 +00:00			`douban_work = PrimaryLookupIdDescriptor(IdType.DoubanBook_Work)`
			`goodreads_work = PrimaryLookupIdDescriptor(IdType.Goodreads_Work)`
reformat new code with black 2022-12-29 23:57:02 -05:00			`editions = models.ManyToManyField(Edition, related_name="works")`
add other_title to works 2024-04-09 15:47:27 -04:00			`author = jsondata.ArrayField(`
			`verbose_name=_("author"),`
			`base_field=models.CharField(max_length=500),`
			`null=True,`
			`blank=True,`
			`default=list,`
			`)`
			`other_title = jsondata.ArrayField(`
			`verbose_name=_("other title"),`
			`base_field=models.CharField(blank=True, default="", max_length=200),`
			`null=True,`
			`blank=True,`
			`default=list,`
			`)`
			`METADATA_COPY_LIST = [`
			`"title",`
			`"other_title",`
			`"author",`
			`"brief",`
			`]`
fix duplicates in related books 2023-06-08 18:05:19 -04:00			`# TODO: we have many duplicates due to 302`
			`# a lazy fix is to remove smaller DoubanBook_Work ids`
			`# but ideally deal with 302 in scrape().`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
update works: ui 2024-04-09 00:45:40 -04:00			`@classmethod`
			`def lookup_id_type_choices(cls):`
			`id_types = [`
add other_title to works 2024-04-09 15:47:27 -04:00			`IdType.WikiData,`
update works: ui 2024-04-09 00:45:40 -04:00			`IdType.DoubanBook_Work,`
			`IdType.Goodreads_Work,`
			`]`
			`return [(i.value, i.label) for i in id_types]`

update model about merging works 2024-04-08 21:27:36 -04:00			`def merge_to(self, to_item):`
add other_title to works 2024-04-09 15:47:27 -04:00			`if (`
			`to_item`
			`and self.title != to_item.title`
			`and self.title not in to_item.other_title`
			`):`
			`to_item.other_title += [self.title]`
update model about merging works 2024-04-08 21:27:36 -04:00			`super().merge_to(to_item)`
			`for edition in self.editions.all():`
			`to_item.editions.add(edition)`
			`self.editions.clear()`

			`def delete(self, using=None, soft=True, args, *kwargs):`
			`if soft:`
			`self.editions.clear()`
			`return super().delete(using, soft, args, *kwargs)`

new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`class Series(Item):`
new data model: journal models 2022-12-11 23:20:28 +00:00			`category = ItemCategory.Book`
reformat new code with black 2022-12-29 23:57:02 -05:00			`url_path = "book/series"`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`# douban_serie = LookupIdDescriptor(IdType.DoubanBook_Serie)`
			`# goodreads_serie = LookupIdDescriptor(IdType.Goodreads_Serie)`

			`class Meta:`
			`proxy = True`