import logging import re from urllib.parse import quote_plus import httpx from django.conf import settings from loguru import logger from catalog.book.utils import isbn_10_to_13 from catalog.common import * from catalog.models import * _logger = logging.getLogger(__name__) @SiteManager.register class GoogleBooks(AbstractSite): SITE_NAME = SiteName.GoogleBooks ID_TYPE = IdType.GoogleBooks URL_PATTERNS = [ r"https://books\.google\.co[^/]+/books\?id=([^&#]+)", r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^&#?]+)", r"https://books\.google\.co[^/]+/books/about/[^?]+\?id=([^&#?]+)", ] WIKI_PROPERTY_ID = "" DEFAULT_MODEL = Edition @classmethod def id_to_url(cls, id_value): return "https://books.google.com/books?id=" + id_value def scrape(self): api_url = f"https://www.googleapis.com/books/v1/volumes/{self.id_value}" if settings.GOOGLE_API_KEY: api_url += f"?key={settings.GOOGLE_API_KEY}" b = BasicDownloader(api_url).download().json() other = {} title = b["volumeInfo"]["title"] subtitle = ( b["volumeInfo"]["subtitle"] if "subtitle" in b["volumeInfo"] else None ) pub_year = None pub_month = None if "publishedDate" in b["volumeInfo"]: pub_date = b["volumeInfo"]["publishedDate"].split("-") pub_year = pub_date[0] pub_month = pub_date[1] if len(pub_date) > 1 else None pub_house = ( b["volumeInfo"]["publisher"] if "publisher" in b["volumeInfo"] else None ) language = ( b["volumeInfo"]["language"].lower() if "language" in b["volumeInfo"] else [] ) pages = b["volumeInfo"]["pageCount"] if "pageCount" in b["volumeInfo"] else None if "mainCategory" in b["volumeInfo"]: other["分类"] = b["volumeInfo"]["mainCategory"] authors = b["volumeInfo"]["authors"] if "authors" in b["volumeInfo"] else None if "description" in b["volumeInfo"]: brief = b["volumeInfo"]["description"] elif "textSnippet" in b["volumeInfo"]: brief = b["volumeInfo"]["textSnippet"]["searchInfo"] else: brief = "" brief = re.sub(r"<.*?>", "", brief.replace(" list[ExternalSearchResultItem]: if category not in ["all", "book"]: return [] SEARCH_PAGE_SIZE = 5 results = [] api_url = f"https://www.googleapis.com/books/v1/volumes?country=us&q={quote_plus(q)}&startIndex={SEARCH_PAGE_SIZE * (page - 1)}&maxResults={SEARCH_PAGE_SIZE}&maxAllowedMaturityRating=MATURE" async with httpx.AsyncClient() as client: try: response = await client.get(api_url, timeout=2) j = response.json() if "items" in j: for b in j["items"]: if "title" not in b["volumeInfo"]: continue title = b["volumeInfo"]["title"] subtitle = "" if "publishedDate" in b["volumeInfo"]: subtitle += b["volumeInfo"]["publishedDate"] + " " if "authors" in b["volumeInfo"]: subtitle += ", ".join(b["volumeInfo"]["authors"]) if "description" in b["volumeInfo"]: brief = b["volumeInfo"]["description"] elif "textSnippet" in b["volumeInfo"]: brief = b["volumeInfo"]["textSnippet"]["searchInfo"] else: brief = "" category = ItemCategory.Book # b['volumeInfo']['infoLink'].replace('http:', 'https:') url = "https://books.google.com/books?id=" + b["id"] cover = ( b["volumeInfo"]["imageLinks"]["thumbnail"] if "imageLinks" in b["volumeInfo"] else "" ) results.append( ExternalSearchResultItem( category, SiteName.GoogleBooks, url, title, subtitle, brief, cover, ) ) except Exception as e: logger.error( "GoogleBooks search error", extra={"query": q, "exception": e} ) return results