fix goodreads asin mix up
This commit is contained in:
parent
47cd239e21
commit
c05aa65e3f
4 changed files with 44 additions and 11 deletions
|
@ -1,5 +1,6 @@
|
|||
from django.test import TestCase
|
||||
from catalog.book.models import *
|
||||
from catalog.book.utils import *
|
||||
from catalog.common import *
|
||||
|
||||
|
||||
|
@ -34,6 +35,14 @@ class BookTestCase(TestCase):
|
|||
self.assertEqual(hyperion.isbn10, None)
|
||||
|
||||
def test_isbn(self):
|
||||
t, n = detect_isbn_asin('0553283685')
|
||||
self.assertEqual(t, IdType.ISBN)
|
||||
self.assertEqual(n, '9780553283686')
|
||||
t, n = detect_isbn_asin('9780553283686')
|
||||
self.assertEqual(t, IdType.ISBN)
|
||||
t, n = detect_isbn_asin(' b0043M6780')
|
||||
self.assertEqual(t, IdType.ASIN)
|
||||
|
||||
hyperion = Edition.objects.get(title="Hyperion")
|
||||
self.assertEqual(hyperion.isbn, '9780553283686')
|
||||
self.assertEqual(hyperion.isbn10, '0553283685')
|
||||
|
@ -82,7 +91,7 @@ class GoodreadsTestCase(TestCase):
|
|||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), 'Hyperion')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), isbn)
|
||||
self.assertEqual(site.resource.get_all_lookup_ids().get(IdType.ISBN), isbn)
|
||||
self.assertEqual(site.resource.required_resources[0]['id_value'], '1383900')
|
||||
edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn)
|
||||
resource = edition.external_resources.all().first()
|
||||
|
@ -229,6 +238,7 @@ class MultiBookSitesTestCase(TestCase):
|
|||
w3 = p3.item.works.all().first()
|
||||
self.assertNotEqual(w3, w2)
|
||||
p4 = SiteManager.get_site_by_url(url4).get_resource_ready()
|
||||
self.assertEqual(p4.item.id, p1.item.id)
|
||||
self.assertEqual(p4.item.works.all().count(), 2)
|
||||
self.assertEqual(p1.item.works.all().count(), 2)
|
||||
w2e = w2.editions.all().order_by('title')
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
import re
|
||||
from .models import IdType
|
||||
|
||||
|
||||
def check_digit_10(isbn):
|
||||
assert len(isbn) == 9
|
||||
sum = 0
|
||||
|
@ -34,12 +38,23 @@ def isbn_13_to_10(isbn):
|
|||
|
||||
|
||||
def is_isbn_13(isbn):
|
||||
return len(isbn) == 13
|
||||
return re.match(r'\d{13}', isbn) is not None
|
||||
|
||||
|
||||
def is_isbn_10(isbn):
|
||||
return len(isbn) == 10 and isbn[0] >= '0' and isbn[0] <= '9'
|
||||
return re.match(r'\d{9}[X0-9]', isbn) is not None
|
||||
|
||||
|
||||
def is_asin(asin):
|
||||
return len(asin) == 10 and asin[0].lower == 'b'
|
||||
return re.match(r'B[A-Z0-9]{9}', asin) is not None
|
||||
|
||||
|
||||
def detect_isbn_asin(s):
|
||||
n = s.strip().upper() if s else ''
|
||||
if is_isbn_13(n):
|
||||
return IdType.ISBN, n
|
||||
if is_isbn_10(n):
|
||||
return IdType.ISBN, isbn_10_to_13(n)
|
||||
if is_asin(n):
|
||||
return IdType.ASIN, n
|
||||
return None, None
|
||||
|
|
|
@ -176,7 +176,9 @@ class DoubanBook(AbstractSite):
|
|||
}]
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
pd.lookup_ids[IdType.ISBN] = isbn
|
||||
t, n = detect_isbn_asin(isbn)
|
||||
if t:
|
||||
pd.lookup_ids[t] = n
|
||||
pd.lookup_ids[IdType.CUBN] = cubn
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(img_url, self.url)
|
||||
return pd
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from catalog.book.models import Edition, Work
|
||||
from catalog.common import *
|
||||
from catalog.book.utils import detect_isbn_asin
|
||||
from lxml import html
|
||||
import json
|
||||
import logging
|
||||
|
@ -60,10 +61,15 @@ class Goodreads(AbstractSite):
|
|||
raise ParseError(self, 'Book in __NEXT_DATA__ json')
|
||||
data['title'] = b['title']
|
||||
data['brief'] = b['description']
|
||||
data['isbn'] = b['details'].get('isbn13')
|
||||
asin = b['details'].get('asin')
|
||||
if asin and asin != data['isbn']:
|
||||
data['asin'] = asin
|
||||
ids = {}
|
||||
t, n = detect_isbn_asin(b['details'].get('asin'))
|
||||
if t:
|
||||
ids[t] = n
|
||||
t, n = detect_isbn_asin(b['details'].get('isbn13'))
|
||||
if t:
|
||||
ids[t] = n
|
||||
# amazon has a known problem to use another book's isbn as asin
|
||||
# so we alway overwrite asin-converted isbn with real isbn
|
||||
data['pages'] = b['details'].get('numPages')
|
||||
data['cover_image_url'] = b['imageUrl']
|
||||
w = next(filter(lambda x: x.get('details'), o['Work']), None)
|
||||
|
@ -76,8 +82,8 @@ class Goodreads(AbstractSite):
|
|||
'url': w['editions']['webUrl'],
|
||||
}]
|
||||
pd = ResourceContent(metadata=data)
|
||||
pd.lookup_ids[IdType.ISBN] = data.get('isbn')
|
||||
pd.lookup_ids[IdType.ASIN] = data.get('asin')
|
||||
pd.lookup_ids[IdType.ISBN] = ids.get(IdType.ISBN)
|
||||
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
|
||||
if data["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(data["cover_image_url"], self.url)
|
||||
try:
|
||||
|
|
Loading…
Add table
Reference in a new issue