diff --git a/books/management/commands/fix-book-cover.py b/books/management/commands/fix-book-cover.py index 9d7bb12f..5ab60a1c 100644 --- a/books/management/commands/fix-book-cover.py +++ b/books/management/commands/fix-book-cover.py @@ -28,6 +28,8 @@ class DoubanPatcherMixin: if r.status_code == 200: content = r.content.decode('utf-8') if content.find('关于豆瓣') == -1: + # with open('/tmp/temp.html', 'w', encoding='utf-8') as fp: + # fp.write(content) content = None error = error + 'Content not authentic' # response is garbage elif re.search('不存在[^<]+', content, re.MULTILINE): @@ -86,12 +88,12 @@ class DoubanPatcherMixin: def latest(): nonlocal r, error, content - if settings.SCRAPERAPI_KEY is None: + if settings.SCRAPESTACK_KEY is None: error = error + '\nDirect: ' get(url, 60) else: - error = error + '\nScraperAPI: ' - get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}', 60) + error = error + '\nScrapeStack: ' + get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}', 60) check_content() wayback_cdx() @@ -105,12 +107,15 @@ class DoubanPatcherMixin: @classmethod def download_image(cls, url, item_url=None): + if url is None: + logger.error(f"Douban: no image url for {item_url}") + return None, None raw_img = None ext = None dl_url = url - if settings.SCRAPERAPI_KEY is not None: - dl_url = f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}' + if settings.SCRAPESTACK_KEY is not None: + dl_url = f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}' try: img_response = requests.get(dl_url, timeout=90) @@ -127,7 +132,7 @@ class DoubanPatcherMixin: raw_img = None ext = None logger.error(f"Douban: download image failed {e} {dl_url} {item_url}") - if raw_img is None and settings.SCRAPERAPI_KEY is not None: + if raw_img is None and settings.SCRAPESTACK_KEY is not None: try: img_response = requests.get(dl_url, timeout=90) if img_response.status_code == 200: diff --git a/movies/management/commands/fix-movie-poster.py b/movies/management/commands/fix-movie-poster.py index 99699767..80790302 100644 --- a/movies/management/commands/fix-movie-poster.py +++ b/movies/management/commands/fix-movie-poster.py @@ -28,8 +28,11 @@ class DoubanPatcherMixin: if r.status_code == 200: content = r.content.decode('utf-8') if content.find('关于豆瓣') == -1: + if content.find('你的 IP 发出') == -1: + error = error + 'Content not authentic' # response is garbage + else: + error = error + 'IP banned' content = None - error = error + 'Content not authentic' # response is garbage elif re.search('不存在[^<]+', content, re.MULTILINE): content = None error = error + 'Not found or hidden by Douban' @@ -86,31 +89,35 @@ class DoubanPatcherMixin: def latest(): nonlocal r, error, content - if settings.SCRAPERAPI_KEY is None: + if settings.SCRAPESTACK_KEY is None: error = error + '\nDirect: ' get(url, 60) else: error = error + '\nScraperAPI: ' - get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}', 60) + get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}', 60) check_content() - wayback_cdx() - if content is None: - latest() + # wayback_cdx() + # if content is None: + latest() if content is None: logger.error(error) content = '' + # with open('/tmp/temp.html', 'w', encoding='utf-8') as fp: + # fp.write(content) return html.fromstring(content) @classmethod def download_image(cls, url, item_url=None): + if url is None: + return None, None raw_img = None ext = None dl_url = url - if settings.SCRAPERAPI_KEY is not None: - dl_url = f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}' + if settings.SCRAPESTACK_KEY is not None: + dl_url = f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}' try: img_response = requests.get(dl_url, timeout=90) @@ -127,7 +134,7 @@ class DoubanPatcherMixin: raw_img = None ext = None logger.error(f"Douban: download image failed {e} {dl_url} {item_url}") - if raw_img is None and settings.SCRAPERAPI_KEY is not None: + if raw_img is None and settings.SCRAPESTACK_KEY is not None: try: img_response = requests.get(dl_url, timeout=90) if img_response.status_code == 200: @@ -184,3 +191,4 @@ class Command(BaseCommand): print(f'Skipped {m.source_url}') except Exception as e: print(e) + # return diff --git a/music/management/commands/fix-album-cover.py b/music/management/commands/fix-album-cover.py index 9e413e51..995ac95e 100644 --- a/music/management/commands/fix-album-cover.py +++ b/music/management/commands/fix-album-cover.py @@ -86,12 +86,13 @@ class DoubanPatcherMixin: def latest(): nonlocal r, error, content - if settings.SCRAPERAPI_KEY is None: + if settings.SCRAPESTACK_KEY is None: error = error + '\nDirect: ' get(url, 60) else: error = error + '\nScraperAPI: ' - get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}', 60) + # get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}', 60) + get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}', 60) check_content() wayback_cdx() @@ -105,12 +106,15 @@ class DoubanPatcherMixin: @classmethod def download_image(cls, url, item_url=None): + if url is None: + return None, None raw_img = None ext = None dl_url = url - if settings.SCRAPERAPI_KEY is not None: - dl_url = f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}' + if settings.SCRAPESTACK_KEY is not None: + dl_url = f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}' + # f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}' try: img_response = requests.get(dl_url, timeout=90) @@ -127,7 +131,7 @@ class DoubanPatcherMixin: raw_img = None ext = None logger.error(f"Douban: download image failed {e} {dl_url} {item_url}") - if raw_img is None and settings.SCRAPERAPI_KEY is not None: + if raw_img is None and settings.SCRAPESTACK_KEY is not None: try: img_response = requests.get(dl_url, timeout=90) if img_response.status_code == 200: