From f7248b2e0c7f27f48ef9617f90edc9eeb6df0b8b Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 10 Dec 2021 07:19:16 -0500
Subject: [PATCH] adjust timeout

---
 common/scraper.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/common/scraper.py b/common/scraper.py
index 7e8ac673..ab2ded46 100644
--- a/common/scraper.py
+++ b/common/scraper.py
@@ -75,6 +75,7 @@ def log_url(func):
 
     return wrapper
 
+
 def parse_date(raw_str):
     return dateparser.parse(
         raw_str,
@@ -163,7 +164,7 @@ class AbstractScraper:
         if settings.LUMINATI_USERNAME is None:
             proxies = None
         r = requests.get(url, proxies=proxies,
-                         headers=headers, timeout=TIMEOUT)
+                         headers=headers, timeout=settings.SCRAPING_TIMEOUT)
 
         if r.status_code != 200:
             raise RuntimeError(f"download page failed, status code {r.status_code}")
@@ -197,7 +198,7 @@ class AbstractScraper:
                     'dnt': '1',
                 },
                 proxies=proxies,
-                timeout=TIMEOUT,
+                timeout=settings.SCRAPING_TIMEOUT,
             )
             if img_response.status_code == 200:
                 raw_img = img_response.content
@@ -232,11 +233,11 @@ class DoubanScrapperMixin:
         content = None
         last_error = None
 
-        def get(url, timeout):
+        def get(url):
             nonlocal r
             # print('Douban GET ' + url)
             try:
-                r = requests.get(url, timeout=timeout)
+                r = requests.get(url, timeout=settings.SCRAPING_TIMEOUT)
             except Exception as e:
                 r = requests.Response()
                 r.status_code = f"Exception when GET {url} {e}" + url
@@ -280,11 +281,11 @@ class DoubanScrapperMixin:
         def wayback():
             nonlocal r, error, content
             error = error + '\nWayback: '
-            get('http://archive.org/wayback/available?url=' + url, 10)
+            get('http://archive.org/wayback/available?url=' + url)
             if r.status_code == 200:
                 w = r.json()
                 if w['archived_snapshots'] and w['archived_snapshots']['closest']:
-                    get(w['archived_snapshots']['closest']['url'], 10)
+                    get(w['archived_snapshots']['closest']['url'])
                     check_content()
                     if content is not None:
                         fix_wayback_links()
@@ -297,13 +298,13 @@ class DoubanScrapperMixin:
         def wayback_cdx():
             nonlocal r, error, content
             error = error + '\nWayback: '
-            get('http://web.archive.org/cdx/search/cdx?url=' + url, 10)
+            get('http://web.archive.org/cdx/search/cdx?url=' + url)
             if r.status_code == 200:
                 dates = re.findall(r'[^\s]+\s+(\d+)\s+[^\s]+\s+[^\s]+\s+\d+\s+[^\s]+\s+\d{5,}',
                                    r.content.decode('utf-8'))
                 # assume snapshots whose size >9999 contain real content, use the latest one of them
                 if len(dates) > 0:
-                    get('http://web.archive.org/web/' + dates[-1] + '/' + url, 10)
+                    get('http://web.archive.org/web/' + dates[-1] + '/' + url)
                     check_content()
                     if content is not None:
                         fix_wayback_links()
@@ -316,17 +317,17 @@ class DoubanScrapperMixin:
             nonlocal r, error, content
             if settings.SCRAPESTACK_KEY is not None:
                 error = error + '\nScrapeStack: '
-                get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}', 30)
+                get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}')
             elif settings.SCRAPERAPI_KEY is not None:
                 error = error + '\nScraperAPI: '
-                get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}', 30)
+                get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}')
             else:
                 error = error + '\nDirect: '
-                get(url, 30)
+                get(url)
             check_content()
             if last_error == 'network' and settings.PROXYCRAWL_KEY is not None:
                 error = error + '\nProxyCrawl: '
-                get(f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={url}', 30)
+                get(f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={url}')
                 check_content()
 
         latest()
@@ -352,7 +353,7 @@ class DoubanScrapperMixin:
             dl_url = url
 
         try:
-            img_response = requests.get(dl_url, timeout=30)
+            img_response = requests.get(dl_url, timeout=settings.SCRAPING_TIMEOUT)
             if img_response.status_code == 200:
                 raw_img = img_response.content
                 img = Image.open(BytesIO(raw_img))
@@ -370,7 +371,7 @@ class DoubanScrapperMixin:
         if raw_img is None and settings.PROXYCRAWL_KEY is not None:
             try:
                 dl_url = f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={url}'
-                img_response = requests.get(dl_url, timeout=30)
+                img_response = requests.get(dl_url, timeout=settings.SCRAPING_TIMEOUT)
                 if img_response.status_code == 200:
                     raw_img = img_response.content
                     img = Image.open(BytesIO(raw_img))
@@ -823,6 +824,7 @@ class DoubanAlbumScraper(DoubanScrapperMixin, AbstractScraper):
 spotify_token = None
 spotify_token_expire_time = time.time()
 
+
 class SpotifyTrackScraper(AbstractScraper):
     site_name = SourceSiteEnum.SPOTIFY.value
     host = 'https://open.spotify.com/track/'
@@ -1378,7 +1380,6 @@ class BangumiScraper(AbstractScraper):
         self.raw_data, self.raw_img, self.img_ext = data, raw_img, ext
         return data, raw_img
 
-
     def scrape_game(self, content):
         self.data_class = Game
         self.form_class = GameForm