From dbdae6d58c7d913dee39269e118fc4aa1febe861 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Thu, 9 Dec 2021 22:00:09 -0500
Subject: [PATCH] add missing MovieGenre

---
 common/scraper.py | 34 +++++++++++++++++++++-------------
 movies/models.py  |  1 +
 2 files changed, 22 insertions(+), 13 deletions(-)
diff --git a/common/scraper.py b/common/scraper.py
index f812edec..7e8ac673 100644
--- a/common/scraper.py
+++ b/common/scraper.py
@@ -79,10 +79,11 @@ def parse_date(raw_str):
     return dateparser.parse(
         raw_str,
         settings={
-        "RELATIVE_BASE": datetime.datetime(1900, 1, 1)
+            "RELATIVE_BASE": datetime.datetime(1900, 1, 1)
         }
     )
 
+
 class AbstractScraper:
     """
     Scrape entities. The entities means those defined in the models.py file,
@@ -249,9 +250,12 @@ class DoubanScrapperMixin:
             if r.status_code == 200:
                 content = r.content.decode('utf-8')
                 if content.find('关于豆瓣') == -1:
+                    if content.find('你的 IP 发出') == -1:
+                        error = error + 'Content not authentic'  # response is garbage
+                    else:
+                        error = error + 'IP banned'
                     content = None
                     last_error = 'network'
-                    error = error + 'Content not authentic'  # response is garbage
                 elif re.search('不存在[^<]+</title>', content, re.MULTILINE):
                     content = None
                     last_error = 'censorship'
@@ -313,12 +317,12 @@ class DoubanScrapperMixin:
             if settings.SCRAPESTACK_KEY is not None:
                 error = error + '\nScrapeStack: '
                 get(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}', 30)
-            elif settings.SCRAPERAPI_KEY is None:
-                error = error + '\nDirect: '
-                get(url, 30)
-            else:
+            elif settings.SCRAPERAPI_KEY is not None:
                 error = error + '\nScraperAPI: '
                 get(f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}', 30)
+            else:
+                error = error + '\nDirect: '
+                get(url, 30)
             check_content()
             if last_error == 'network' and settings.PROXYCRAWL_KEY is not None:
                 error = error + '\nProxyCrawl: '
@@ -340,11 +344,12 @@ class DoubanScrapperMixin:
         raw_img = None
         ext = None
 
-        dl_url = url
         if settings.SCRAPESTACK_KEY is not None:
             dl_url = f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={url}'
         elif settings.SCRAPERAPI_KEY is not None:
             dl_url = f'http://api.scraperapi.com?api_key={settings.SCRAPERAPI_KEY}&url={url}'
+        else:
+            dl_url = url
 
         try:
             img_response = requests.get(dl_url, timeout=30)
@@ -361,6 +366,7 @@ class DoubanScrapperMixin:
             raw_img = None
             ext = None
             logger.error(f"Douban: download image failed {e} {dl_url} {item_url}")
+
         if raw_img is None and settings.PROXYCRAWL_KEY is not None:
             try:
                 dl_url = f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={url}'
@@ -430,9 +436,9 @@ class DoubanBookScraper(DoubanScrapperMixin, AbstractScraper):
             pub_month = None
         if pub_year and pub_month and pub_year < pub_month:
             pub_year, pub_month = pub_month, pub_year
-        pub_year = None if pub_year is not None and not pub_year in range(
+        pub_year = None if pub_year is not None and pub_year not in range(
             0, 3000) else pub_year
-        pub_month = None if pub_month is not None and not pub_month in range(
+        pub_month = None if pub_month is not None and pub_month not in range(
             1, 12) else pub_month
 
         binding_elem = content.xpath(
@@ -598,7 +604,7 @@ class DoubanMovieScraper(DoubanScrapperMixin, AbstractScraper):
 
         # construct genre translator
         genre_translator = {}
-        attrs = [attr for attr in dir(MovieGenreEnum) if not '__' in attr]
+        attrs = [attr for attr in dir(MovieGenreEnum) if '__' not in attr]
         for attr in attrs:
             genre_translator[getattr(MovieGenreEnum, attr).label] = getattr(
                 MovieGenreEnum, attr).value
@@ -738,8 +744,7 @@ class DoubanAlbumScraper(DoubanScrapperMixin, AbstractScraper):
         if not title:
             raise ValueError("given url contains no album info")
 
-
-        artists_elem = content.xpath("""//div[@id='info']/span/span[@class='pl']/a/text()""")
+        artists_elem = content.xpath("//div[@id='info']/span/span[@class='pl']/a/text()")
         artist = None if not artists_elem else artists_elem
 
         genre_elem = content.xpath(
@@ -1648,6 +1653,7 @@ class GoodreadsScraper(AbstractScraper):
         self.raw_data, self.raw_img, self.img_ext = data, raw_img, ext
         return data, raw_img
 
+
 class TmdbMovieScraper(AbstractScraper):
     site_name = SourceSiteEnum.TMDB.value
     host = 'https://www.themoviedb.org/'
@@ -1777,6 +1783,7 @@ class TmdbMovieScraper(AbstractScraper):
         else:
             return None
 
+
 # https://developers.google.com/youtube/v3/docs/?apix=true
 # https://developers.google.com/books/docs/v1/using
 class GoogleBooksScraper(AbstractScraper):
@@ -1855,4 +1862,5 @@ class GoogleBooksScraper(AbstractScraper):
         self.raw_data, self.raw_img, self.img_ext = data, raw_img, ext
         return data, raw_img
 
-from common.scrapers.bandcamp import BandcampAlbumScraper
\ No newline at end of file
+
+from common.scrapers.bandcamp import BandcampAlbumScraper
diff --git a/movies/models.py b/movies/models.py
index bf592b9d..84fc9270 100644
--- a/movies/models.py
+++ b/movies/models.py
@@ -52,6 +52,7 @@ class MovieGenreEnum(models.TextChoices):
     NEWS = 'News', _('新闻')
     SOAP = 'Soap', _('肥皂剧')
     TV_MOVIE = 'TV Movie', _('电视电影')
+    THEATRE = 'Theatre', _('舞台艺术')
     OTHER = 'Other', _('其他')