lib.itmens/catalog/book/models.py

429 lines
13 KiB
Python
Raw Normal View History

"""
Models for Book
Series -> Work -> Edition
Series is not fully implemented at the moment
Goodreads
Famous works have many editions
Google Books:
only has Edition level ("volume") data
Douban:
old editions has only CUBN(Chinese Unified Book Number)
2022-12-08 16:08:59 +00:00
work data seems asymmetric (a book links to a work, but may not listed in that work as one of its editions)
"""
2024-05-27 15:44:12 -04:00
from typing import TYPE_CHECKING
2024-04-08 20:25:14 -04:00
2023-01-05 03:06:13 -05:00
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.utils.translation import gettext_lazy as _
2024-05-25 23:38:11 -04:00
from loguru import logger
from ninja import Field
2023-08-11 01:43:19 -04:00
from catalog.common import (
BaseSchema,
ExternalResource,
IdType,
Item,
ItemCategory,
ItemInSchema,
ItemSchema,
ItemType,
PrimaryLookupIdDescriptor,
jsondata,
)
from catalog.common.models import (
2024-07-16 02:28:53 -04:00
LIST_OF_ONE_PLUS_STR_SCHEMA,
LOCALE_CHOICES_JSONFORM,
SCRIPT_CHOICES,
LanguageListField,
)
from common.models.lang import get_current_locales
2024-07-13 01:36:18 -04:00
from common.models.misc import uniq
from .utils import *
2023-02-15 15:45:57 -05:00
class EditionInSchema(ItemInSchema):
subtitle: str | None = Field(default=None, alias="display_subtitle")
2023-02-15 15:45:57 -05:00
orig_title: str | None = None
author: list[str]
translator: list[str]
language: list[str]
2023-02-15 15:45:57 -05:00
pub_house: str | None = None
pub_year: int | None = None
pub_month: int | None = None
binding: str | None = None
price: str | None = None
2023-11-18 20:30:48 -05:00
pages: int | str | None = None
2023-02-15 15:45:57 -05:00
series: str | None = None
imprint: str | None = None
class EditionSchema(EditionInSchema, BaseSchema):
2023-02-15 16:22:32 -05:00
isbn: str | None = None
2023-02-15 15:45:57 -05:00
pass
EDITION_LOCALIZED_TITLE_SCHEMA = {
"type": "list",
"items": {
"type": "dict",
"keys": {
"lang": {
"type": "string",
"title": _("locale"),
"choices": LOCALE_CHOICES_JSONFORM,
},
"text": {"type": "string", "title": _("text content")},
},
2024-07-16 02:28:53 -04:00
"required": ["lang", "text"],
},
"minItems": 1,
"maxItems": 1,
# "uniqueItems": True,
}
EDITION_LOCALIZED_SUBTITLE_SCHEMA = {
"type": "list",
"items": {
"type": "dict",
"keys": {
"lang": {
"type": "string",
"title": _("locale"),
"choices": LOCALE_CHOICES_JSONFORM,
},
"text": {"type": "string", "title": _("text content")},
},
2024-07-16 02:28:53 -04:00
"required": ["lang", "text"],
},
"minItems": 0,
"maxItems": 1,
# "uniqueItems": True,
}
class Edition(Item):
2024-05-27 15:44:12 -04:00
if TYPE_CHECKING:
works: "models.ManyToManyField[Work, Edition]"
2024-07-28 16:08:36 -04:00
class BookFormat(models.TextChoices):
PAPERBACK = "paperback", _("Paperback")
HARDCOVER = "hardcover", _("Hardcover")
EBOOK = "ebook", _("eBook")
AUDIOBOOK = "audiobook", _("Audiobook")
2024-10-18 21:00:38 -04:00
# GRAPHICNOVEL = "graphicnovel", _("GraphicNovel")
2024-07-28 16:08:36 -04:00
WEB = "web", _("Web Fiction")
OTHER = "other", _("Other")
2022-12-11 23:20:28 +00:00
category = ItemCategory.Book
2022-12-29 23:57:02 -05:00
url_path = "book"
2022-12-15 17:29:35 -05:00
isbn = PrimaryLookupIdDescriptor(IdType.ISBN)
asin = PrimaryLookupIdDescriptor(IdType.ASIN)
cubn = PrimaryLookupIdDescriptor(IdType.CUBN)
# douban_book = LookupIdDescriptor(IdType.DoubanBook)
# goodreads = LookupIdDescriptor(IdType.Goodreads)
METADATA_COPY_LIST = [
"localized_title",
"localized_subtitle",
# "title",
# "subtitle",
2022-12-29 23:57:02 -05:00
"author",
2024-07-28 16:08:36 -04:00
"format",
2022-12-29 23:57:02 -05:00
"pub_house",
"pub_year",
"pub_month",
"language",
"orig_title",
"translator",
"series",
2023-01-05 03:06:13 -05:00
"imprint",
2022-12-29 23:57:02 -05:00
"binding",
"pages",
2023-01-05 03:06:13 -05:00
"price",
2024-07-15 15:47:22 -04:00
# "brief",
2024-07-13 00:16:47 -04:00
"localized_description",
2023-01-05 03:06:13 -05:00
"contents",
]
# force Edition to have only one title
localized_title_schema = EDITION_LOCALIZED_TITLE_SCHEMA
localized_subtitle = jsondata.JSONField(
verbose_name=_("subtitle"),
null=False,
blank=True,
default=list,
schema=EDITION_LOCALIZED_SUBTITLE_SCHEMA,
2023-01-05 03:06:13 -05:00
)
# subtitle = jsondata.CharField(
# _("subtitle"), null=True, blank=True, default=None, max_length=500
# )
2023-01-05 03:06:13 -05:00
orig_title = jsondata.CharField(
2024-07-16 02:28:53 -04:00
_("original title"), null=True, blank=True, max_length=500
2023-01-05 03:06:13 -05:00
)
2024-07-16 02:28:53 -04:00
author = jsondata.JSONField(
2024-03-10 20:55:50 -04:00
verbose_name=_("author"),
2023-01-05 03:06:13 -05:00
null=False,
blank=False,
default=list,
2024-07-16 02:28:53 -04:00
schema=LIST_OF_ONE_PLUS_STR_SCHEMA,
2023-01-05 03:06:13 -05:00
)
translator = jsondata.ArrayField(
2024-03-10 20:55:50 -04:00
verbose_name=_("translator"),
2023-01-05 03:06:13 -05:00
base_field=models.CharField(max_length=500),
null=True,
blank=True,
default=list,
)
2024-07-28 16:08:36 -04:00
format = jsondata.CharField(
_("book format"),
blank=True,
max_length=100,
choices=BookFormat.choices,
)
language = LanguageListField()
2023-01-05 03:06:13 -05:00
pub_house = jsondata.CharField(
2024-07-16 17:49:10 -04:00
_("publishing house"), null=True, blank=True, max_length=500
2023-01-05 03:06:13 -05:00
)
pub_year = jsondata.IntegerField(
2024-03-10 20:55:50 -04:00
_("publication year"),
2023-01-05 03:06:13 -05:00
null=True,
blank=False,
validators=[MinValueValidator(1), MaxValueValidator(2999)],
)
pub_month = jsondata.IntegerField(
2024-03-10 20:55:50 -04:00
_("publication month"),
2023-01-05 03:06:13 -05:00
null=True,
blank=True,
2023-01-05 03:06:13 -05:00
validators=[MinValueValidator(1), MaxValueValidator(12)],
)
2024-07-16 02:28:53 -04:00
binding = jsondata.CharField(_("binding"), null=True, blank=True, max_length=500)
2024-03-10 20:55:50 -04:00
pages = jsondata.IntegerField(_("pages"), blank=True, default=None)
2024-07-16 02:28:53 -04:00
series = jsondata.CharField(_("series"), null=True, blank=True, max_length=500)
contents = jsondata.TextField(_("contents"), null=True, blank=True)
2024-03-10 20:55:50 -04:00
price = jsondata.CharField(_("price"), null=True, blank=True, max_length=500)
imprint = jsondata.CharField(_("imprint"), null=True, blank=True, max_length=500)
def get_localized_subtitle(self) -> str | None:
2024-07-15 23:26:24 -04:00
return self.localized_subtitle[0]["text"] if self.localized_subtitle else None
@property
def display_subtitle(self) -> str | None:
return self.get_localized_subtitle()
@property
def isbn10(self):
return isbn_13_to_10(self.isbn)
@isbn10.setter
def isbn10(self, value):
self.isbn = isbn_10_to_13(value)
2023-01-05 03:06:13 -05:00
@classmethod
def lookup_id_type_choices(cls):
id_types = [
IdType.ISBN,
IdType.ASIN,
IdType.CUBN,
IdType.DoubanBook,
IdType.Goodreads,
IdType.GoogleBooks,
2024-10-14 13:21:04 +00:00
IdType.Qidian,
2023-01-05 03:06:13 -05:00
]
return [(i.value, i.label) for i in id_types]
@classmethod
2024-05-26 22:57:49 -04:00
def lookup_id_cleanup(cls, lookup_id_type: str | IdType, lookup_id_value: str):
2023-01-05 03:06:13 -05:00
if lookup_id_type in [IdType.ASIN.value, IdType.ISBN.value]:
return detect_isbn_asin(lookup_id_value)
return super().lookup_id_cleanup(lookup_id_type, lookup_id_value)
2024-05-27 15:44:12 -04:00
def merge_to(self, to_item: "Edition | None"): # type: ignore[reportIncompatibleMethodOverride]
2024-04-08 21:27:36 -04:00
super().merge_to(to_item)
2024-05-26 22:57:49 -04:00
if to_item:
for work in self.works.all():
to_item.works.add(work)
2024-04-08 21:27:36 -04:00
self.works.clear()
2024-05-27 15:44:12 -04:00
def delete(self, using=None, keep_parents=False, soft=True, *args, **kwargs):
2024-04-08 21:27:36 -04:00
if soft:
self.works.clear()
2024-05-27 15:44:12 -04:00
return super().delete(using, soft, keep_parents, *args, **kwargs)
2024-04-08 21:27:36 -04:00
2022-12-08 16:08:59 +00:00
def update_linked_items_from_external_resource(self, resource):
"""add Work from resource.metadata['work'] if not yet"""
links = resource.required_resources + resource.related_resources
for w in links:
2023-07-20 21:59:49 -04:00
if w.get("model") == "Work":
2024-04-08 21:27:36 -04:00
work_res = ExternalResource.objects.filter(
id_type=w["id_type"], id_value=w["id_value"]
2022-12-29 23:57:02 -05:00
).first()
2024-04-08 21:27:36 -04:00
if work_res:
work = work_res.item
if not work:
2024-05-25 23:38:11 -04:00
logger.warning(f"Unable to find work for {work_res}")
2024-04-08 21:27:36 -04:00
else:
2024-05-25 23:38:11 -04:00
logger.warning(
2024-04-08 21:27:36 -04:00
f'Unable to find resource for {w["id_type"]}:{w["id_value"]}'
)
work = Work.objects.filter(
primary_lookup_id_type=w["id_type"],
primary_lookup_id_value=w["id_value"],
).first()
if work and work not in self.works.all():
self.works.add(work)
2024-04-09 17:22:21 -04:00
@property
def sibling_items(self):
2023-02-13 00:52:24 -05:00
works = list(self.works.all())
return (
Edition.objects.filter(works__in=works)
.exclude(pk=self.pk)
.exclude(is_deleted=True)
.exclude(merged_to_item__isnull=False)
)
@property
def title_deco(self):
a = [str(i) for i in [self.pub_house, self.pub_year] if i]
return f"({' '.join(a)})" if a else ""
2024-04-08 20:25:14 -04:00
def has_related_books(self):
works = list(self.works.all())
if not works:
return False
return Edition.objects.filter(works__in=works).exclude(pk=self.pk).exists()
def link_to_related_book(self, target: "Edition") -> bool:
if target == self or target.is_deleted or target.merged_to_item:
return False
if target.works.all().exists():
for work in target.works.all():
self.works.add(work)
2024-07-13 01:36:18 -04:00
work.localized_title = uniq(work.localized_title + self.localized_title)
work.save()
2024-04-08 20:25:14 -04:00
elif self.works.all().exists():
for work in self.works.all():
target.works.add(work)
2024-07-13 01:36:18 -04:00
work.localized_title = uniq(
work.localized_title + target.localized_title
)
work.save()
2024-04-08 20:25:14 -04:00
else:
2024-07-13 18:14:40 -04:00
work = Work.objects.create(localized_title=self.localized_title)
2024-07-13 01:36:18 -04:00
work.editions.add(self, target)
2024-07-13 18:14:40 -04:00
# work.localized_title = self.localized_title
# work.save()
2024-04-08 20:25:14 -04:00
return True
def unlink_from_all_works(self):
self.works.clear()
def has_works(self):
return self.works.all().exists()
class Work(Item):
2022-12-11 23:20:28 +00:00
category = ItemCategory.Book
2022-12-29 23:57:02 -05:00
url_path = "book/work"
2022-12-11 23:20:28 +00:00
douban_work = PrimaryLookupIdDescriptor(IdType.DoubanBook_Work)
goodreads_work = PrimaryLookupIdDescriptor(IdType.Goodreads_Work)
2022-12-29 23:57:02 -05:00
editions = models.ManyToManyField(Edition, related_name="works")
language = LanguageListField()
2024-04-09 15:47:27 -04:00
author = jsondata.ArrayField(
verbose_name=_("author"),
base_field=models.CharField(max_length=500),
null=True,
blank=True,
default=list,
)
# other_title = jsondata.ArrayField(
# verbose_name=_("other title"),
# base_field=models.CharField(blank=True, default="", max_length=200),
# null=True,
# blank=True,
# default=list,
# )
2024-04-09 15:47:27 -04:00
METADATA_COPY_LIST = [
"localized_title",
2024-04-09 15:47:27 -04:00
"author",
"language",
"localized_description",
2024-04-09 15:47:27 -04:00
]
2023-06-08 18:05:19 -04:00
# TODO: we have many duplicates due to 302
# a lazy fix is to remove smaller DoubanBook_Work ids
# but ideally deal with 302 in scrape().
2024-04-09 00:45:40 -04:00
@classmethod
def lookup_id_type_choices(cls):
id_types = [
2024-04-09 15:47:27 -04:00
IdType.WikiData,
2024-04-09 00:45:40 -04:00
IdType.DoubanBook_Work,
IdType.Goodreads_Work,
]
return [(i.value, i.label) for i in id_types]
2024-05-27 15:44:12 -04:00
def merge_to(self, to_item: "Work | None"): # type: ignore[reportIncompatibleMethodOverride]
2024-04-09 17:22:21 -04:00
super().merge_to(to_item)
2024-07-13 01:36:18 -04:00
if not to_item:
return
for edition in self.editions.all():
to_item.editions.add(edition)
2024-04-09 17:22:21 -04:00
self.editions.clear()
2024-07-13 18:14:40 -04:00
to_item.language = uniq(to_item.language + self.language) # type: ignore
2024-07-13 01:36:18 -04:00
to_item.localized_title = uniq(to_item.localized_title + self.localized_title)
to_item.save()
2024-04-08 21:27:36 -04:00
2024-05-27 15:44:12 -04:00
def delete(self, using=None, keep_parents=False, soft=True, *args, **kwargs):
2024-04-08 21:27:36 -04:00
if soft:
self.editions.clear()
2024-05-27 15:44:12 -04:00
return super().delete(using, keep_parents, soft, *args, **kwargs)
2024-04-08 21:27:36 -04:00
2024-07-27 02:18:09 -04:00
@property
def cover_image_url(self):
url = super().cover_image_url
if url:
return url
e = next(filter(lambda e: e.cover_image_url, self.editions.all()), None)
return e.cover_image_url if e else None
def update_linked_items_from_external_resource(self, resource):
"""add Edition from resource.metadata['required_resources'] if not yet"""
links = resource.required_resources + resource.related_resources
for e in links:
if e.get("model") == "Edition":
edition_res = ExternalResource.objects.filter(
id_type=e["id_type"], id_value=e["id_value"]
).first()
if edition_res:
edition = edition_res.item
if not edition:
logger.warning(f"Unable to find edition for {edition_res}")
else:
logger.warning(
f'Unable to find resource for {e["id_type"]}:{e["id_value"]}'
)
edition = Edition.objects.filter(
primary_lookup_id_type=e["id_type"],
primary_lookup_id_value=e["id_value"],
).first()
if edition and edition not in self.editions.all():
self.editions.add(edition)
class Series(Item):
2022-12-11 23:20:28 +00:00
category = ItemCategory.Book
2022-12-29 23:57:02 -05:00
url_path = "book/series"
# douban_serie = LookupIdDescriptor(IdType.DoubanBook_Serie)
# goodreads_serie = LookupIdDescriptor(IdType.Goodreads_Serie)
class Meta:
proxy = True