From b4bdc58cadca695c82eef9bd76499dbb346d083b Mon Sep 17 00:00:00 2001 From: Your Name Date: Sat, 27 Jul 2024 03:22:27 -0400 Subject: [PATCH] move cover download out of scrape() --- catalog/common/models.py | 14 ++++++++++++- catalog/sites/apple_music.py | 19 +++++------------- catalog/sites/apple_podcast.py | 9 --------- catalog/sites/bandcamp.py | 9 --------- catalog/sites/bgg.py | 9 --------- catalog/sites/discogs.py | 18 ----------------- catalog/sites/douban_book.py | 3 --- catalog/sites/douban_drama.py | 33 ++++++------------------------- catalog/sites/douban_movie.py | 12 +----------- catalog/sites/douban_music.py | 9 --------- catalog/sites/goodreads.py | 9 --------- catalog/sites/igdb.py | 9 --------- catalog/sites/imdb.py | 9 --------- catalog/sites/rss.py | 13 +----------- catalog/sites/spotify.py | 9 --------- catalog/sites/tmdb.py | 36 ---------------------------------- 16 files changed, 26 insertions(+), 194 deletions(-) diff --git a/catalog/common/models.py b/catalog/common/models.py index 1215b542..b623b6b0 100644 --- a/catalog/common/models.py +++ b/catalog/common/models.py @@ -891,12 +891,24 @@ class ExternalResource(models.Model): def update_content(self, resource_content: "ResourceContent"): self.other_lookup_ids = resource_content.lookup_ids self.metadata = resource_content.metadata + if ( + resource_content.metadata.get("cover_image_url") + and not resource_content.cover_image + ): + from .downloaders import BasicImageDownloader + + ( + resource_content.cover_image, + resource_content.cover_image_extention, + ) = BasicImageDownloader.download_image( + resource_content.metadata.get("cover_image_url"), self.url + ) if resource_content.cover_image and resource_content.cover_image_extention: self.cover = SimpleUploadedFile( "temp." + resource_content.cover_image_extention, resource_content.cover_image, ) - else: + elif resource_content.metadata.get("cover_image_path"): self.cover = resource_content.metadata.get("cover_image_path") self.scraped_time = timezone.now() self.save() diff --git a/catalog/sites/apple_music.py b/catalog/sites/apple_music.py index 0fb8a916..4d11a3ca 100644 --- a/catalog/sites/apple_music.py +++ b/catalog/sites/apple_music.py @@ -56,8 +56,8 @@ class AppleMusic(AbstractSite): def get_locales(self): locales = {} - for l in PREFERRED_LANGUAGES: - match l: + for lang in PREFERRED_LANGUAGES: + match lang: case "zh": locales.update({"zh": ["cn", "tw", "hk", "sg"]}) case "en": @@ -94,10 +94,10 @@ class AppleMusic(AbstractSite): brief = album_data.get("modalPresentationDescriptor", {}).get( "paragraphText", "" ) - l = detect_language(title + " " + brief) - localized_title.append({"lang": l, "text": title}) + tl = detect_language(title + " " + brief) + localized_title.append({"lang": tl, "text": title}) if brief: - localized_desc.append({"lang": l, "text": brief}) + localized_desc.append({"lang": tl, "text": brief}) if lang == DEFAULT_CATALOG_LANGUAGE or not matched_content: matched_content = content break @@ -155,13 +155,4 @@ class AppleMusic(AbstractSite): "cover_image_url": image_url, } ) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/apple_podcast.py b/catalog/sites/apple_podcast.py index 4c3ec9fd..76c7a0c8 100644 --- a/catalog/sites/apple_podcast.py +++ b/catalog/sites/apple_podcast.py @@ -37,13 +37,4 @@ class ApplePodcast(AbstractSite): } ) pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/bandcamp.py b/catalog/sites/bandcamp.py index 98c6b15b..80164189 100644 --- a/catalog/sites/bandcamp.py +++ b/catalog/sites/bandcamp.py @@ -102,13 +102,4 @@ class Bandcamp(AbstractSite): "cover_image_url": cover_url, } pd = ResourceContent(metadata=data) - if data["cover_image_url"]: - imgdl = BasicImageDownloader(data["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {data["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/bgg.py b/catalog/sites/bgg.py index 9fe5768e..080d7a01 100644 --- a/catalog/sites/bgg.py +++ b/catalog/sites/bgg.py @@ -69,13 +69,4 @@ class BoardGameGeek(AbstractSite): "cover_image_url": cover_image_url, } ) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/discogs.py b/catalog/sites/discogs.py index b7674799..6e324714 100644 --- a/catalog/sites/discogs.py +++ b/catalog/sites/discogs.py @@ -76,15 +76,6 @@ class DiscogsRelease(AbstractSite): ) if barcode: pd.lookup_ids[IdType.GTIN] = barcode - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd @@ -122,15 +113,6 @@ class DiscogsMaster(AbstractSite): "cover_image_url": image_url, } ) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/douban_book.py b/catalog/sites/douban_book.py index 71fc6ffa..fe7aa7a5 100644 --- a/catalog/sites/douban_book.py +++ b/catalog/sites/douban_book.py @@ -236,9 +236,6 @@ class DoubanBook(AbstractSite): if t: pd.lookup_ids[t] = n pd.lookup_ids[IdType.CUBN] = cubn - pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image( - img_url, self.url - ) return pd diff --git a/catalog/sites/douban_drama.py b/catalog/sites/douban_drama.py index 5566d81f..f7882db2 100644 --- a/catalog/sites/douban_drama.py +++ b/catalog/sites/douban_drama.py @@ -1,4 +1,3 @@ -import logging import re from django.core.cache import cache @@ -10,8 +9,6 @@ from common.models.lang import detect_language from .douban import DoubanDownloader -_logger = logging.getLogger(__name__) - def _cache_key(url): return f"$:{url}" @@ -77,10 +74,10 @@ class DoubanDramaVersion(AbstractSite): } if data["opening_date"]: d = data["opening_date"].split("-") - l = len(d) if len(d) < 6 else 6 - if l > 3: + dl = len(d) if len(d) < 6 else 6 + if dl > 3: data["opening_date"] = "-".join(d[:3]) - data["closing_date"] = "-".join(d[0 : 6 - l] + d[3:l]) + data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl]) actor_elem = h.xpath(p + "//dt[text()='主演:']/following-sibling::dd[1]/a") data["actor"] = [] for e in actor_elem: @@ -101,15 +98,6 @@ class DoubanDramaVersion(AbstractSite): "url": show_url, } ] - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd @@ -213,10 +201,10 @@ class DoubanDrama(AbstractSite): data["opening_date"] = date_elem[0] if date_elem else None if data["opening_date"]: d = data["opening_date"].split("-") - l = len(d) if len(d) < 6 else 6 - if l > 3: + dl = len(d) if len(d) < 6 else 6 + if dl > 3: data["opening_date"] = "-".join(d[:3]) - data["closing_date"] = "-".join(d[0 : 6 - l] + d[3:l]) + data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl]) data["location"] = [ s.strip() @@ -257,13 +245,4 @@ class DoubanDrama(AbstractSite): data["localized_description"] = [{"lang": "zh-cn", "text": data["brief"]}] pd = ResourceContent(metadata=data) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/douban_movie.py b/catalog/sites/douban_movie.py index 153e76fd..ceab67a0 100644 --- a/catalog/sites/douban_movie.py +++ b/catalog/sites/douban_movie.py @@ -37,7 +37,7 @@ class DoubanMovie(AbstractSite): "\n", "" ) # strip \n bc multi-line string is not properly coded in json by douban d = json.loads(schema_data) if schema_data else {} - except Exception as e: + except Exception: d = {} try: @@ -245,7 +245,6 @@ class DoubanMovie(AbstractSite): "TVSeason" if is_series or episodes or season else "Movie" ) - tmdb_season_id = None if imdb_code: res_data = search_tmdb_by_imdb_id(imdb_code) has_movie = ( @@ -302,13 +301,4 @@ class DoubanMovie(AbstractSite): ] # TODO parse sister seasons # pd.metadata['related_resources'] = [] - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/douban_music.py b/catalog/sites/douban_music.py index d458ab90..d45b7582 100644 --- a/catalog/sites/douban_music.py +++ b/catalog/sites/douban_music.py @@ -133,13 +133,4 @@ class DoubanMusic(AbstractSite): pd.lookup_ids[IdType.GTIN] = gtin if isrc: pd.lookup_ids[IdType.ISRC] = isrc - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/goodreads.py b/catalog/sites/goodreads.py index 85403f75..8baeb17a 100644 --- a/catalog/sites/goodreads.py +++ b/catalog/sites/goodreads.py @@ -119,15 +119,6 @@ class Goodreads(AbstractSite): pd = ResourceContent(metadata=data) pd.lookup_ids[IdType.ISBN] = ids.get(IdType.ISBN) pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN) - if data["cover_image_url"]: - imgdl = BasicImageDownloader(data["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {data["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/igdb.py b/catalog/sites/igdb.py index 52dc7dfc..595a7913 100644 --- a/catalog/sites/igdb.py +++ b/catalog/sites/igdb.py @@ -152,13 +152,4 @@ class IGDB(AbstractSite): pd.lookup_ids[IdType.Steam] = SiteManager.get_site_cls_by_id_type( IdType.Steam ).url_to_id(steam_url) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/imdb.py b/catalog/sites/imdb.py index 4ebc9f8a..d563fdd0 100644 --- a/catalog/sites/imdb.py +++ b/catalog/sites/imdb.py @@ -116,15 +116,6 @@ class IMDB(AbstractSite): data["title"] = re.sub(r"#(\d+).(\d+)", r"S\1E\2", data["title"][8:]) pd = ResourceContent(metadata=data) pd.lookup_ids[IdType.IMDB] = self.id_value - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd @staticmethod diff --git a/catalog/sites/rss.py b/catalog/sites/rss.py index 95d5fcbd..9456cc50 100644 --- a/catalog/sites/rss.py +++ b/catalog/sites/rss.py @@ -84,7 +84,7 @@ class RSS(AbstractSite): def scrape(self): if not self.url: - raise ValueError(f"no url avaialble in RSS site") + raise ValueError("no url avaialble in RSS site") feed = self.parse_feed_from_url(self.url) if not feed: raise ValueError(f"no feed avaialble in {self.url}") @@ -108,17 +108,6 @@ class RSS(AbstractSite): } ) pd.lookup_ids[IdType.RSS] = RSS.url_to_id(self.url) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader( - pd.metadata["cover_image_url"], feed.get("link") or self.url - ) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.warn( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd def scrape_additional_data(self): diff --git a/catalog/sites/spotify.py b/catalog/sites/spotify.py index b02ee108..da17ff35 100644 --- a/catalog/sites/spotify.py +++ b/catalog/sites/spotify.py @@ -103,15 +103,6 @@ class Spotify(AbstractSite): pd.lookup_ids[IdType.GTIN] = gtin if isrc: pd.lookup_ids[IdType.ISRC] = isrc - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py index 592069bf..3aac1577 100644 --- a/catalog/sites/tmdb.py +++ b/catalog/sites/tmdb.py @@ -172,15 +172,6 @@ class TMDB_Movie(AbstractSite): ) if imdb_code: pd.lookup_ids[IdType.IMDB] = imdb_code - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd @@ -285,15 +276,6 @@ class TMDB_TV(AbstractSite): ) if imdb_code: pd.lookup_ids[IdType.IMDB] = imdb_code - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) return pd @@ -380,15 +362,6 @@ class TMDB_TVSeason(AbstractSite): map(lambda ep: ep["episode_number"], d["episodes"]) ) pd.metadata["episode_count"] = len(pd.metadata["episode_number_list"]) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) # use show's IMDB (for Season 1) or 1st episode's IMDB (if not Season 1) as this season's IMDB so that it can be compatible with TVSeason data from Douban if pd.lookup_ids.get(IdType.IMDB): @@ -486,15 +459,6 @@ class TMDB_TVEpisode(AbstractSite): if pd.metadata["title"] else f'S{d["season_number"]} E{d["episode_number"]}' ) - if pd.metadata["cover_image_url"]: - imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) - try: - pd.cover_image = imgdl.download().content - pd.cover_image_extention = imgdl.extention - except Exception: - _logger.debug( - f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}' - ) if pd.lookup_ids.get(IdType.IMDB): pd.lookup_ids[IdType.IMDB] = pd.lookup_ids[IdType.IMDB]