From 5224f57ad01197844626d74a4dd1f1e7de8f84ec Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 12 Feb 2023 21:28:22 -0500 Subject: [PATCH] improve google book cover image quality --- catalog/common/downloaders.py | 23 +++++++++++++++-------- catalog/management/commands/cat.py | 1 + catalog/sites/google_books.py | 9 ++++----- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/catalog/common/downloaders.py b/catalog/common/downloaders.py index 389ddbac..2baf4e3f 100644 --- a/catalog/common/downloaders.py +++ b/catalog/common/downloaders.py @@ -47,6 +47,8 @@ def get_mock_file(url): fn = url.replace("***REMOVED***", "1234") # Thank you, Github Action -_-! fn = re.sub(r"[^\w]", "_", fn) fn = re.sub(r"_key_[*A-Za-z0-9]+", "_key_8964", fn) + if len(fn) > 255: + fn = fn[:255] return fn @@ -107,12 +109,15 @@ class BasicDownloader: url, headers=self.headers, timeout=self.get_timeout() ) if settings.DOWNLOADER_SAVEDIR: - with open( - settings.DOWNLOADER_SAVEDIR + "/" + get_mock_file(url), - "w", - encoding="utf-8", - ) as fp: - fp.write(resp.text) + try: + with open( + settings.DOWNLOADER_SAVEDIR + "/" + get_mock_file(url), + "w", + encoding="utf-8", + ) as fp: + fp.write(resp.text) + except: + _logger.warn("Save downloaded data failed.") else: resp = MockResponse(self.url) response_type = self.validate_response(resp) @@ -225,8 +230,6 @@ class ImageDownloaderMixin: else: return RESPONSE_NETWORK_ERROR - -class BasicImageDownloader(ImageDownloaderMixin, BasicDownloader): @classmethod def download_image(cls, image_url, page_url): imgdl = cls(image_url, page_url) @@ -238,6 +241,10 @@ class BasicImageDownloader(ImageDownloaderMixin, BasicDownloader): return None, None +class BasicImageDownloader(ImageDownloaderMixin, BasicDownloader): + pass + + class ProxiedImageDownloader(ImageDownloaderMixin, ProxiedDownloader): pass diff --git a/catalog/management/commands/cat.py b/catalog/management/commands/cat.py index ae23129a..e9dbc533 100644 --- a/catalog/management/commands/cat.py +++ b/catalog/management/commands/cat.py @@ -31,6 +31,7 @@ class Command(BaseCommand): resource = site.get_resource_ready(ignore_existing_content=options["force"]) pprint.pp(resource.metadata) pprint.pp(site.get_item()) + pprint.pp(site.get_item().cover) pprint.pp(site.get_item().metadata) else: resource = site.scrape() diff --git a/catalog/sites/google_books.py b/catalog/sites/google_books.py index a036df37..b4e34f6a 100644 --- a/catalog/sites/google_books.py +++ b/catalog/sites/google_books.py @@ -54,11 +54,10 @@ class GoogleBooks(AbstractSite): else: brief = "" brief = re.sub(r"<.*?>", "", brief.replace("