new data model: googlebooks
This commit is contained in:
parent
6206918ccc
commit
bfd68e6a3f
6 changed files with 189 additions and 2 deletions
|
@ -116,6 +116,35 @@ class GoodreadsTestCase(TestCase):
|
|||
self.assertEqual(w1, w2)
|
||||
|
||||
|
||||
class GoogleBooksTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_type = IdType.GoogleBooks
|
||||
t_id = 'hV--zQEACAAJ'
|
||||
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms'
|
||||
t_url2 = 'https://books.google.com/books?id=hV--zQEACAAJ'
|
||||
p1 = SiteList.get_site_by_url(t_url)
|
||||
p2 = SiteList.get_site_by_url(t_url2)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.url, t_url2)
|
||||
self.assertEqual(p1.ID_TYPE, t_type)
|
||||
self.assertEqual(p1.id_value, t_id)
|
||||
self.assertEqual(p2.url, t_url2)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
|
||||
self.assertEqual(site.resource.id_type, IdType.GoogleBooks)
|
||||
self.assertEqual(site.resource.id_value, 'hV--zQEACAAJ')
|
||||
self.assertEqual(site.resource.item.isbn, '9781847498571')
|
||||
self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
|
||||
|
||||
|
||||
class DoubanBookTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
@ -170,9 +199,12 @@ class MultiBookSitesTestCase(TestCase):
|
|||
# isbn = '9781847498571'
|
||||
url1 = 'https://www.goodreads.com/book/show/56821625-1984'
|
||||
url2 = 'https://book.douban.com/subject/35902899/'
|
||||
url3 = 'https://books.google.com/books?id=hV--zQEACAAJ'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
self.assertEqual(p2.item.id, p3.item.id)
|
||||
|
||||
@use_local_response
|
||||
def test_works(self):
|
||||
|
|
|
@ -27,7 +27,7 @@ class IdType(models.TextChoices):
|
|||
TMDB_Movie = 'tmdb_movie', _('TMDB电影')
|
||||
Goodreads = 'goodreads', _('Goodreads')
|
||||
Goodreads_Work = 'goodreads_work', _('Goodreads著作')
|
||||
GoogleBook = 'googlebook', _('谷歌图书')
|
||||
GoogleBooks = 'googlebooks', _('谷歌图书')
|
||||
DoubanBook = 'doubanbook', _('豆瓣读书')
|
||||
DoubanBook_Work = 'doubanbook_work', _('豆瓣读书著作')
|
||||
DoubanMovie = 'doubanmovie', _('豆瓣电影')
|
||||
|
|
|
@ -20,7 +20,7 @@ _logger = logging.getLogger(__name__)
|
|||
class ResourceContent:
|
||||
lookup_ids: dict = field(default_factory=dict)
|
||||
metadata: dict = field(default_factory=dict)
|
||||
cover_image = None
|
||||
cover_image: bytes = None
|
||||
cover_image_extention: str = None
|
||||
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ from .douban_music import DoubanMusic
|
|||
from .douban_game import DoubanGame
|
||||
from .douban_drama import DoubanDrama
|
||||
from .goodreads import Goodreads
|
||||
from .google_books import GoogleBooks
|
||||
from .tmdb import TMDB_Movie
|
||||
from .imdb import IMDB
|
||||
from .spotify import Spotify
|
||||
|
|
79
catalog/sites/google_books.py
Normal file
79
catalog/sites/google_books.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
import re
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class GoogleBooks(AbstractSite):
|
||||
ID_TYPE = IdType.GoogleBooks
|
||||
URL_PATTERNS = [
|
||||
r"https://books\.google\.co[^/]+/books\?id=([^&#]+)",
|
||||
r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^&#?]+)",
|
||||
r"https://books\.google\.co[^/]+/books/about/[^?]+?id=([^&#?]+)",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = Edition
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://books.google.com/books?id=" + id_value
|
||||
|
||||
def scrape(self):
|
||||
api_url = f'https://www.googleapis.com/books/v1/volumes/{self.id_value}'
|
||||
b = BasicDownloader(api_url).download().json()
|
||||
other = {}
|
||||
title = b['volumeInfo']['title']
|
||||
subtitle = b['volumeInfo']['subtitle'] if 'subtitle' in b['volumeInfo'] else None
|
||||
pub_year = None
|
||||
pub_month = None
|
||||
if 'publishedDate' in b['volumeInfo']:
|
||||
pub_date = b['volumeInfo']['publishedDate'].split('-')
|
||||
pub_year = pub_date[0]
|
||||
pub_month = pub_date[1] if len(pub_date) > 1 else None
|
||||
pub_house = b['volumeInfo']['publisher'] if 'publisher' in b['volumeInfo'] else None
|
||||
language = b['volumeInfo']['language'] if 'language' in b['volumeInfo'] else None
|
||||
pages = b['volumeInfo']['pageCount'] if 'pageCount' in b['volumeInfo'] else None
|
||||
if 'mainCategory' in b['volumeInfo']:
|
||||
other['分类'] = b['volumeInfo']['mainCategory']
|
||||
authors = b['volumeInfo']['authors'] if 'authors' in b['volumeInfo'] else None
|
||||
if 'description' in b['volumeInfo']:
|
||||
brief = b['volumeInfo']['description']
|
||||
elif 'textSnippet' in b['volumeInfo']:
|
||||
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||||
else:
|
||||
brief = ''
|
||||
brief = re.sub(r'<.*?>', '', brief.replace('<br', '\n<br'))
|
||||
img_url = b['volumeInfo']['imageLinks']['thumbnail'] if 'imageLinks' in b['volumeInfo'] else None
|
||||
isbn10 = None
|
||||
isbn13 = None
|
||||
for iid in b['volumeInfo']['industryIdentifiers'] if 'industryIdentifiers' in b['volumeInfo'] else []:
|
||||
if iid['type'] == 'ISBN_10':
|
||||
isbn10 = iid['identifier']
|
||||
if iid['type'] == 'ISBN_13':
|
||||
isbn13 = iid['identifier']
|
||||
isbn = isbn13 # if isbn13 is not None else isbn10
|
||||
|
||||
raw_img, ext = BasicImageDownloader.download_image(img_url, self.url)
|
||||
data = {
|
||||
'title': title,
|
||||
'subtitle': subtitle,
|
||||
'orig_title': None,
|
||||
'author': authors,
|
||||
'translator': None,
|
||||
'language': language,
|
||||
'pub_house': pub_house,
|
||||
'pub_year': pub_year,
|
||||
'pub_month': pub_month,
|
||||
'binding': None,
|
||||
'pages': pages,
|
||||
'isbn': isbn,
|
||||
'brief': brief,
|
||||
'contents': None,
|
||||
'other_info': other,
|
||||
'cover_image_url': img_url,
|
||||
}
|
||||
return ResourceContent(metadata=data, cover_image=raw_img, cover_image_extention=ext, lookup_ids={IdType.ISBN: isbn13})
|
|
@ -0,0 +1,75 @@
|
|||
{
|
||||
"kind": "books#volume",
|
||||
"id": "hV--zQEACAAJ",
|
||||
"etag": "lwbqGlV/h5s",
|
||||
"selfLink": "https://www.googleapis.com/books/v1/volumes/hV--zQEACAAJ",
|
||||
"volumeInfo": {
|
||||
"title": "1984 Nineteen Eighty-Four",
|
||||
"authors": [
|
||||
"George Orwell"
|
||||
],
|
||||
"publisher": "Alma Classics",
|
||||
"publishedDate": "2021-01-07",
|
||||
"description": "In 1984, London is a grim city in the totalitarian state of Oceania where Big Brother is always watching you and the Thought Police can practically read your mind. Winston Smith is a man in grave danger for the simple reason that his memory still functions. Drawn into a forbidden love affair, Winston finds the courage to join a secret revolutionary organization called The Brotherhood, dedicated to the destruction of the Party. Together with his beloved Julia, he hazards his life in a deadly match against the powers that be.Lionel Trilling said of Orwell's masterpiece \" 1984 is a profound, terrifying, and wholly fascinating book. It is a fantasy of the political future, and like any such fantasy, serves its author as a magnifying device for an examination of the present.\" Though the year 1984 now exists in the past, Orwell's novel remains an urgent call for the individual willing to speak truth to power.\"",
|
||||
"industryIdentifiers": [
|
||||
{
|
||||
"type": "ISBN_10",
|
||||
"identifier": "1847498574"
|
||||
},
|
||||
{
|
||||
"type": "ISBN_13",
|
||||
"identifier": "9781847498571"
|
||||
}
|
||||
],
|
||||
"readingModes": {
|
||||
"text": false,
|
||||
"image": false
|
||||
},
|
||||
"pageCount": 400,
|
||||
"printedPageCount": 400,
|
||||
"dimensions": {
|
||||
"height": "19.90 cm",
|
||||
"width": "13.10 cm",
|
||||
"thickness": "2.20 cm"
|
||||
},
|
||||
"printType": "BOOK",
|
||||
"averageRating": 4,
|
||||
"ratingsCount": 564,
|
||||
"maturityRating": "NOT_MATURE",
|
||||
"allowAnonLogging": false,
|
||||
"contentVersion": "preview-1.0.0",
|
||||
"panelizationSummary": {
|
||||
"containsEpubBubbles": false,
|
||||
"containsImageBubbles": false
|
||||
},
|
||||
"imageLinks": {
|
||||
"smallThumbnail": "http://books.google.com/books/content?id=hV--zQEACAAJ&printsec=frontcover&img=1&zoom=5&imgtk=AFLRE72QQ6bzD4LfhArQGJHoUdX5wex-wfg5FVAKOo2MbmCbFSF_HbDUwhZ-gAvmSKiEBTyoRkC3Kvbo9k1jB0uiOyOXcvgAc2643MV091Ny8TySRaV2HSVXtch-MYK2qfzNvUKwGEhx&source=gbs_api",
|
||||
"thumbnail": "http://books.google.com/books/content?id=hV--zQEACAAJ&printsec=frontcover&img=1&zoom=1&imgtk=AFLRE70UTuB9rf2_mqyGrJGsI2XbzpjV2vGQP9Oyjc441rCvvRiGMhMGYXsgTMbAUZ3rHtxarPvPIqaT-RGH9JzzFEbrXs3cp7f2jaHVh3M-fyPcEkg0eao_AuYUePhckBN-PtHZNyy-&source=gbs_api"
|
||||
},
|
||||
"language": "en",
|
||||
"previewLink": "http://books.google.com/books?id=hV--zQEACAAJ&hl=&source=gbs_api",
|
||||
"infoLink": "https://play.google.com/store/books/details?id=hV--zQEACAAJ&source=gbs_api",
|
||||
"canonicalVolumeLink": "https://play.google.com/store/books/details?id=hV--zQEACAAJ"
|
||||
},
|
||||
"saleInfo": {
|
||||
"country": "US",
|
||||
"saleability": "NOT_FOR_SALE",
|
||||
"isEbook": false
|
||||
},
|
||||
"accessInfo": {
|
||||
"country": "US",
|
||||
"viewability": "NO_PAGES",
|
||||
"embeddable": false,
|
||||
"publicDomain": false,
|
||||
"textToSpeechPermission": "ALLOWED",
|
||||
"epub": {
|
||||
"isAvailable": false
|
||||
},
|
||||
"pdf": {
|
||||
"isAvailable": false
|
||||
},
|
||||
"webReaderLink": "http://play.google.com/books/reader?id=hV--zQEACAAJ&hl=&source=gbs_api",
|
||||
"accessViewStatus": "NONE",
|
||||
"quoteSharingAllowed": false
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue