80 lines
3.1 KiB
Python
80 lines
3.1 KiB
Python
![]() |
from catalog.common import *
|
||
|
from catalog.models import *
|
||
|
import re
|
||
|
import logging
|
||
|
|
||
|
|
||
|
_logger = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
@SiteList.register
|
||
|
class GoogleBooks(AbstractSite):
|
||
|
ID_TYPE = IdType.GoogleBooks
|
||
|
URL_PATTERNS = [
|
||
|
r"https://books\.google\.co[^/]+/books\?id=([^&#]+)",
|
||
|
r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^&#?]+)",
|
||
|
r"https://books\.google\.co[^/]+/books/about/[^?]+?id=([^&#?]+)",
|
||
|
]
|
||
|
WIKI_PROPERTY_ID = ''
|
||
|
DEFAULT_MODEL = Edition
|
||
|
|
||
|
@classmethod
|
||
|
def id_to_url(self, id_value):
|
||
|
return "https://books.google.com/books?id=" + id_value
|
||
|
|
||
|
def scrape(self):
|
||
|
api_url = f'https://www.googleapis.com/books/v1/volumes/{self.id_value}'
|
||
|
b = BasicDownloader(api_url).download().json()
|
||
|
other = {}
|
||
|
title = b['volumeInfo']['title']
|
||
|
subtitle = b['volumeInfo']['subtitle'] if 'subtitle' in b['volumeInfo'] else None
|
||
|
pub_year = None
|
||
|
pub_month = None
|
||
|
if 'publishedDate' in b['volumeInfo']:
|
||
|
pub_date = b['volumeInfo']['publishedDate'].split('-')
|
||
|
pub_year = pub_date[0]
|
||
|
pub_month = pub_date[1] if len(pub_date) > 1 else None
|
||
|
pub_house = b['volumeInfo']['publisher'] if 'publisher' in b['volumeInfo'] else None
|
||
|
language = b['volumeInfo']['language'] if 'language' in b['volumeInfo'] else None
|
||
|
pages = b['volumeInfo']['pageCount'] if 'pageCount' in b['volumeInfo'] else None
|
||
|
if 'mainCategory' in b['volumeInfo']:
|
||
|
other['分类'] = b['volumeInfo']['mainCategory']
|
||
|
authors = b['volumeInfo']['authors'] if 'authors' in b['volumeInfo'] else None
|
||
|
if 'description' in b['volumeInfo']:
|
||
|
brief = b['volumeInfo']['description']
|
||
|
elif 'textSnippet' in b['volumeInfo']:
|
||
|
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||
|
else:
|
||
|
brief = ''
|
||
|
brief = re.sub(r'<.*?>', '', brief.replace('<br', '\n<br'))
|
||
|
img_url = b['volumeInfo']['imageLinks']['thumbnail'] if 'imageLinks' in b['volumeInfo'] else None
|
||
|
isbn10 = None
|
||
|
isbn13 = None
|
||
|
for iid in b['volumeInfo']['industryIdentifiers'] if 'industryIdentifiers' in b['volumeInfo'] else []:
|
||
|
if iid['type'] == 'ISBN_10':
|
||
|
isbn10 = iid['identifier']
|
||
|
if iid['type'] == 'ISBN_13':
|
||
|
isbn13 = iid['identifier']
|
||
|
isbn = isbn13 # if isbn13 is not None else isbn10
|
||
|
|
||
|
raw_img, ext = BasicImageDownloader.download_image(img_url, self.url)
|
||
|
data = {
|
||
|
'title': title,
|
||
|
'subtitle': subtitle,
|
||
|
'orig_title': None,
|
||
|
'author': authors,
|
||
|
'translator': None,
|
||
|
'language': language,
|
||
|
'pub_house': pub_house,
|
||
|
'pub_year': pub_year,
|
||
|
'pub_month': pub_month,
|
||
|
'binding': None,
|
||
|
'pages': pages,
|
||
|
'isbn': isbn,
|
||
|
'brief': brief,
|
||
|
'contents': None,
|
||
|
'other_info': other,
|
||
|
'cover_image_url': img_url,
|
||
|
}
|
||
|
return ResourceContent(metadata=data, cover_image=raw_img, cover_image_extention=ext, lookup_ids={IdType.ISBN: isbn13})
|