new data model: googlebooks

This commit is contained in:
Your Name 2022-12-09 02:35:21 +00:00
parent 6206918ccc
commit bfd68e6a3f
6 changed files with 189 additions and 2 deletions

View file

@ -116,6 +116,35 @@ class GoodreadsTestCase(TestCase):
self.assertEqual(w1, w2)
class GoogleBooksTestCase(TestCase):
def test_parse(self):
t_type = IdType.GoogleBooks
t_id = 'hV--zQEACAAJ'
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms'
t_url2 = 'https://books.google.com/books?id=hV--zQEACAAJ'
p1 = SiteList.get_site_by_url(t_url)
p2 = SiteList.get_site_by_url(t_url2)
self.assertIsNotNone(p1)
self.assertEqual(p1.url, t_url2)
self.assertEqual(p1.ID_TYPE, t_type)
self.assertEqual(p1.id_value, t_id)
self.assertEqual(p2.url, t_url2)
@use_local_response
def test_scrape(self):
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ'
site = SiteList.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
self.assertEqual(site.resource.id_type, IdType.GoogleBooks)
self.assertEqual(site.resource.id_value, 'hV--zQEACAAJ')
self.assertEqual(site.resource.item.isbn, '9781847498571')
self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
class DoubanBookTestCase(TestCase):
def setUp(self):
pass
@ -170,9 +199,12 @@ class MultiBookSitesTestCase(TestCase):
# isbn = '9781847498571'
url1 = 'https://www.goodreads.com/book/show/56821625-1984'
url2 = 'https://book.douban.com/subject/35902899/'
url3 = 'https://books.google.com/books?id=hV--zQEACAAJ'
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
self.assertEqual(p1.item.id, p2.item.id)
self.assertEqual(p2.item.id, p3.item.id)
@use_local_response
def test_works(self):

View file

@ -27,7 +27,7 @@ class IdType(models.TextChoices):
TMDB_Movie = 'tmdb_movie', _('TMDB电影')
Goodreads = 'goodreads', _('Goodreads')
Goodreads_Work = 'goodreads_work', _('Goodreads著作')
GoogleBook = 'googlebook', _('谷歌图书')
GoogleBooks = 'googlebooks', _('谷歌图书')
DoubanBook = 'doubanbook', _('豆瓣读书')
DoubanBook_Work = 'doubanbook_work', _('豆瓣读书著作')
DoubanMovie = 'doubanmovie', _('豆瓣电影')

View file

@ -20,7 +20,7 @@ _logger = logging.getLogger(__name__)
class ResourceContent:
lookup_ids: dict = field(default_factory=dict)
metadata: dict = field(default_factory=dict)
cover_image = None
cover_image: bytes = None
cover_image_extention: str = None

View file

@ -6,6 +6,7 @@ from .douban_music import DoubanMusic
from .douban_game import DoubanGame
from .douban_drama import DoubanDrama
from .goodreads import Goodreads
from .google_books import GoogleBooks
from .tmdb import TMDB_Movie
from .imdb import IMDB
from .spotify import Spotify

View file

@ -0,0 +1,79 @@
from catalog.common import *
from catalog.models import *
import re
import logging
_logger = logging.getLogger(__name__)
@SiteList.register
class GoogleBooks(AbstractSite):
ID_TYPE = IdType.GoogleBooks
URL_PATTERNS = [
r"https://books\.google\.co[^/]+/books\?id=([^&#]+)",
r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^&#?]+)",
r"https://books\.google\.co[^/]+/books/about/[^?]+?id=([^&#?]+)",
]
WIKI_PROPERTY_ID = ''
DEFAULT_MODEL = Edition
@classmethod
def id_to_url(self, id_value):
return "https://books.google.com/books?id=" + id_value
def scrape(self):
api_url = f'https://www.googleapis.com/books/v1/volumes/{self.id_value}'
b = BasicDownloader(api_url).download().json()
other = {}
title = b['volumeInfo']['title']
subtitle = b['volumeInfo']['subtitle'] if 'subtitle' in b['volumeInfo'] else None
pub_year = None
pub_month = None
if 'publishedDate' in b['volumeInfo']:
pub_date = b['volumeInfo']['publishedDate'].split('-')
pub_year = pub_date[0]
pub_month = pub_date[1] if len(pub_date) > 1 else None
pub_house = b['volumeInfo']['publisher'] if 'publisher' in b['volumeInfo'] else None
language = b['volumeInfo']['language'] if 'language' in b['volumeInfo'] else None
pages = b['volumeInfo']['pageCount'] if 'pageCount' in b['volumeInfo'] else None
if 'mainCategory' in b['volumeInfo']:
other['分类'] = b['volumeInfo']['mainCategory']
authors = b['volumeInfo']['authors'] if 'authors' in b['volumeInfo'] else None
if 'description' in b['volumeInfo']:
brief = b['volumeInfo']['description']
elif 'textSnippet' in b['volumeInfo']:
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
else:
brief = ''
brief = re.sub(r'<.*?>', '', brief.replace('<br', '\n<br'))
img_url = b['volumeInfo']['imageLinks']['thumbnail'] if 'imageLinks' in b['volumeInfo'] else None
isbn10 = None
isbn13 = None
for iid in b['volumeInfo']['industryIdentifiers'] if 'industryIdentifiers' in b['volumeInfo'] else []:
if iid['type'] == 'ISBN_10':
isbn10 = iid['identifier']
if iid['type'] == 'ISBN_13':
isbn13 = iid['identifier']
isbn = isbn13 # if isbn13 is not None else isbn10
raw_img, ext = BasicImageDownloader.download_image(img_url, self.url)
data = {
'title': title,
'subtitle': subtitle,
'orig_title': None,
'author': authors,
'translator': None,
'language': language,
'pub_house': pub_house,
'pub_year': pub_year,
'pub_month': pub_month,
'binding': None,
'pages': pages,
'isbn': isbn,
'brief': brief,
'contents': None,
'other_info': other,
'cover_image_url': img_url,
}
return ResourceContent(metadata=data, cover_image=raw_img, cover_image_extention=ext, lookup_ids={IdType.ISBN: isbn13})

View file

@ -0,0 +1,75 @@
{
"kind": "books#volume",
"id": "hV--zQEACAAJ",
"etag": "lwbqGlV/h5s",
"selfLink": "https://www.googleapis.com/books/v1/volumes/hV--zQEACAAJ",
"volumeInfo": {
"title": "1984 Nineteen Eighty-Four",
"authors": [
"George Orwell"
],
"publisher": "Alma Classics",
"publishedDate": "2021-01-07",
"description": "In 1984, London is a grim city in the totalitarian state of Oceania where Big Brother is always watching you and the Thought Police can practically read your mind. Winston Smith is a man in grave danger for the simple reason that his memory still functions. Drawn into a forbidden love affair, Winston finds the courage to join a secret revolutionary organization called The Brotherhood, dedicated to the destruction of the Party. Together with his beloved Julia, he hazards his life in a deadly match against the powers that be.Lionel Trilling said of Orwell's masterpiece \" 1984 is a profound, terrifying, and wholly fascinating book. It is a fantasy of the political future, and like any such fantasy, serves its author as a magnifying device for an examination of the present.\" Though the year 1984 now exists in the past, Orwell's novel remains an urgent call for the individual willing to speak truth to power.\"",
"industryIdentifiers": [
{
"type": "ISBN_10",
"identifier": "1847498574"
},
{
"type": "ISBN_13",
"identifier": "9781847498571"
}
],
"readingModes": {
"text": false,
"image": false
},
"pageCount": 400,
"printedPageCount": 400,
"dimensions": {
"height": "19.90 cm",
"width": "13.10 cm",
"thickness": "2.20 cm"
},
"printType": "BOOK",
"averageRating": 4,
"ratingsCount": 564,
"maturityRating": "NOT_MATURE",
"allowAnonLogging": false,
"contentVersion": "preview-1.0.0",
"panelizationSummary": {
"containsEpubBubbles": false,
"containsImageBubbles": false
},
"imageLinks": {
"smallThumbnail": "http://books.google.com/books/content?id=hV--zQEACAAJ&printsec=frontcover&img=1&zoom=5&imgtk=AFLRE72QQ6bzD4LfhArQGJHoUdX5wex-wfg5FVAKOo2MbmCbFSF_HbDUwhZ-gAvmSKiEBTyoRkC3Kvbo9k1jB0uiOyOXcvgAc2643MV091Ny8TySRaV2HSVXtch-MYK2qfzNvUKwGEhx&source=gbs_api",
"thumbnail": "http://books.google.com/books/content?id=hV--zQEACAAJ&printsec=frontcover&img=1&zoom=1&imgtk=AFLRE70UTuB9rf2_mqyGrJGsI2XbzpjV2vGQP9Oyjc441rCvvRiGMhMGYXsgTMbAUZ3rHtxarPvPIqaT-RGH9JzzFEbrXs3cp7f2jaHVh3M-fyPcEkg0eao_AuYUePhckBN-PtHZNyy-&source=gbs_api"
},
"language": "en",
"previewLink": "http://books.google.com/books?id=hV--zQEACAAJ&hl=&source=gbs_api",
"infoLink": "https://play.google.com/store/books/details?id=hV--zQEACAAJ&source=gbs_api",
"canonicalVolumeLink": "https://play.google.com/store/books/details?id=hV--zQEACAAJ"
},
"saleInfo": {
"country": "US",
"saleability": "NOT_FOR_SALE",
"isEbook": false
},
"accessInfo": {
"country": "US",
"viewability": "NO_PAGES",
"embeddable": false,
"publicDomain": false,
"textToSpeechPermission": "ALLOWED",
"epub": {
"isAvailable": false
},
"pdf": {
"isAvailable": false
},
"webReaderLink": "http://play.google.com/books/reader?id=hV--zQEACAAJ&hl=&source=gbs_api",
"accessViewStatus": "NONE",
"quoteSharingAllowed": false
}
}