move cover download out of scrape()
This commit is contained in:
parent
976319247c
commit
b4bdc58cad
16 changed files with 26 additions and 194 deletions
|
@ -891,12 +891,24 @@ class ExternalResource(models.Model):
|
|||
def update_content(self, resource_content: "ResourceContent"):
|
||||
self.other_lookup_ids = resource_content.lookup_ids
|
||||
self.metadata = resource_content.metadata
|
||||
if (
|
||||
resource_content.metadata.get("cover_image_url")
|
||||
and not resource_content.cover_image
|
||||
):
|
||||
from .downloaders import BasicImageDownloader
|
||||
|
||||
(
|
||||
resource_content.cover_image,
|
||||
resource_content.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
resource_content.metadata.get("cover_image_url"), self.url
|
||||
)
|
||||
if resource_content.cover_image and resource_content.cover_image_extention:
|
||||
self.cover = SimpleUploadedFile(
|
||||
"temp." + resource_content.cover_image_extention,
|
||||
resource_content.cover_image,
|
||||
)
|
||||
else:
|
||||
elif resource_content.metadata.get("cover_image_path"):
|
||||
self.cover = resource_content.metadata.get("cover_image_path")
|
||||
self.scraped_time = timezone.now()
|
||||
self.save()
|
||||
|
|
|
@ -56,8 +56,8 @@ class AppleMusic(AbstractSite):
|
|||
|
||||
def get_locales(self):
|
||||
locales = {}
|
||||
for l in PREFERRED_LANGUAGES:
|
||||
match l:
|
||||
for lang in PREFERRED_LANGUAGES:
|
||||
match lang:
|
||||
case "zh":
|
||||
locales.update({"zh": ["cn", "tw", "hk", "sg"]})
|
||||
case "en":
|
||||
|
@ -94,10 +94,10 @@ class AppleMusic(AbstractSite):
|
|||
brief = album_data.get("modalPresentationDescriptor", {}).get(
|
||||
"paragraphText", ""
|
||||
)
|
||||
l = detect_language(title + " " + brief)
|
||||
localized_title.append({"lang": l, "text": title})
|
||||
tl = detect_language(title + " " + brief)
|
||||
localized_title.append({"lang": tl, "text": title})
|
||||
if brief:
|
||||
localized_desc.append({"lang": l, "text": brief})
|
||||
localized_desc.append({"lang": tl, "text": brief})
|
||||
if lang == DEFAULT_CATALOG_LANGUAGE or not matched_content:
|
||||
matched_content = content
|
||||
break
|
||||
|
@ -155,13 +155,4 @@ class AppleMusic(AbstractSite):
|
|||
"cover_image_url": image_url,
|
||||
}
|
||||
)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -37,13 +37,4 @@ class ApplePodcast(AbstractSite):
|
|||
}
|
||||
)
|
||||
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(feed_url)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -102,13 +102,4 @@ class Bandcamp(AbstractSite):
|
|||
"cover_image_url": cover_url,
|
||||
}
|
||||
pd = ResourceContent(metadata=data)
|
||||
if data["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(data["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {data["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -69,13 +69,4 @@ class BoardGameGeek(AbstractSite):
|
|||
"cover_image_url": cover_image_url,
|
||||
}
|
||||
)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -76,15 +76,6 @@ class DiscogsRelease(AbstractSite):
|
|||
)
|
||||
if barcode:
|
||||
pd.lookup_ids[IdType.GTIN] = barcode
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -122,15 +113,6 @@ class DiscogsMaster(AbstractSite):
|
|||
"cover_image_url": image_url,
|
||||
}
|
||||
)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
|
|
@ -236,9 +236,6 @@ class DoubanBook(AbstractSite):
|
|||
if t:
|
||||
pd.lookup_ids[t] = n
|
||||
pd.lookup_ids[IdType.CUBN] = cubn
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(
|
||||
img_url, self.url
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import logging
|
||||
import re
|
||||
|
||||
from django.core.cache import cache
|
||||
|
@ -10,8 +9,6 @@ from common.models.lang import detect_language
|
|||
|
||||
from .douban import DoubanDownloader
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _cache_key(url):
|
||||
return f"$:{url}"
|
||||
|
@ -77,10 +74,10 @@ class DoubanDramaVersion(AbstractSite):
|
|||
}
|
||||
if data["opening_date"]:
|
||||
d = data["opening_date"].split("-")
|
||||
l = len(d) if len(d) < 6 else 6
|
||||
if l > 3:
|
||||
dl = len(d) if len(d) < 6 else 6
|
||||
if dl > 3:
|
||||
data["opening_date"] = "-".join(d[:3])
|
||||
data["closing_date"] = "-".join(d[0 : 6 - l] + d[3:l])
|
||||
data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
|
||||
actor_elem = h.xpath(p + "//dt[text()='主演:']/following-sibling::dd[1]/a")
|
||||
data["actor"] = []
|
||||
for e in actor_elem:
|
||||
|
@ -101,15 +98,6 @@ class DoubanDramaVersion(AbstractSite):
|
|||
"url": show_url,
|
||||
}
|
||||
]
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -213,10 +201,10 @@ class DoubanDrama(AbstractSite):
|
|||
data["opening_date"] = date_elem[0] if date_elem else None
|
||||
if data["opening_date"]:
|
||||
d = data["opening_date"].split("-")
|
||||
l = len(d) if len(d) < 6 else 6
|
||||
if l > 3:
|
||||
dl = len(d) if len(d) < 6 else 6
|
||||
if dl > 3:
|
||||
data["opening_date"] = "-".join(d[:3])
|
||||
data["closing_date"] = "-".join(d[0 : 6 - l] + d[3:l])
|
||||
data["closing_date"] = "-".join(d[0 : 6 - dl] + d[3:dl])
|
||||
|
||||
data["location"] = [
|
||||
s.strip()
|
||||
|
@ -257,13 +245,4 @@ class DoubanDrama(AbstractSite):
|
|||
data["localized_description"] = [{"lang": "zh-cn", "text": data["brief"]}]
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -37,7 +37,7 @@ class DoubanMovie(AbstractSite):
|
|||
"\n", ""
|
||||
) # strip \n bc multi-line string is not properly coded in json by douban
|
||||
d = json.loads(schema_data) if schema_data else {}
|
||||
except Exception as e:
|
||||
except Exception:
|
||||
d = {}
|
||||
|
||||
try:
|
||||
|
@ -245,7 +245,6 @@ class DoubanMovie(AbstractSite):
|
|||
"TVSeason" if is_series or episodes or season else "Movie"
|
||||
)
|
||||
|
||||
tmdb_season_id = None
|
||||
if imdb_code:
|
||||
res_data = search_tmdb_by_imdb_id(imdb_code)
|
||||
has_movie = (
|
||||
|
@ -302,13 +301,4 @@ class DoubanMovie(AbstractSite):
|
|||
]
|
||||
# TODO parse sister seasons
|
||||
# pd.metadata['related_resources'] = []
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -133,13 +133,4 @@ class DoubanMusic(AbstractSite):
|
|||
pd.lookup_ids[IdType.GTIN] = gtin
|
||||
if isrc:
|
||||
pd.lookup_ids[IdType.ISRC] = isrc
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -119,15 +119,6 @@ class Goodreads(AbstractSite):
|
|||
pd = ResourceContent(metadata=data)
|
||||
pd.lookup_ids[IdType.ISBN] = ids.get(IdType.ISBN)
|
||||
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
|
||||
if data["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(data["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {data["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
|
|
@ -152,13 +152,4 @@ class IGDB(AbstractSite):
|
|||
pd.lookup_ids[IdType.Steam] = SiteManager.get_site_cls_by_id_type(
|
||||
IdType.Steam
|
||||
).url_to_id(steam_url)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -116,15 +116,6 @@ class IMDB(AbstractSite):
|
|||
data["title"] = re.sub(r"#(\d+).(\d+)", r"S\1E\2", data["title"][8:])
|
||||
pd = ResourceContent(metadata=data)
|
||||
pd.lookup_ids[IdType.IMDB] = self.id_value
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -84,7 +84,7 @@ class RSS(AbstractSite):
|
|||
|
||||
def scrape(self):
|
||||
if not self.url:
|
||||
raise ValueError(f"no url avaialble in RSS site")
|
||||
raise ValueError("no url avaialble in RSS site")
|
||||
feed = self.parse_feed_from_url(self.url)
|
||||
if not feed:
|
||||
raise ValueError(f"no feed avaialble in {self.url}")
|
||||
|
@ -108,17 +108,6 @@ class RSS(AbstractSite):
|
|||
}
|
||||
)
|
||||
pd.lookup_ids[IdType.RSS] = RSS.url_to_id(self.url)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(
|
||||
pd.metadata["cover_image_url"], feed.get("link") or self.url
|
||||
)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.warn(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
def scrape_additional_data(self):
|
||||
|
|
|
@ -103,15 +103,6 @@ class Spotify(AbstractSite):
|
|||
pd.lookup_ids[IdType.GTIN] = gtin
|
||||
if isrc:
|
||||
pd.lookup_ids[IdType.ISRC] = isrc
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
|
|
@ -172,15 +172,6 @@ class TMDB_Movie(AbstractSite):
|
|||
)
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -285,15 +276,6 @@ class TMDB_TV(AbstractSite):
|
|||
)
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -380,15 +362,6 @@ class TMDB_TVSeason(AbstractSite):
|
|||
map(lambda ep: ep["episode_number"], d["episodes"])
|
||||
)
|
||||
pd.metadata["episode_count"] = len(pd.metadata["episode_number_list"])
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
|
||||
# use show's IMDB (for Season 1) or 1st episode's IMDB (if not Season 1) as this season's IMDB so that it can be compatible with TVSeason data from Douban
|
||||
if pd.lookup_ids.get(IdType.IMDB):
|
||||
|
@ -486,15 +459,6 @@ class TMDB_TVEpisode(AbstractSite):
|
|||
if pd.metadata["title"]
|
||||
else f'S{d["season_number"]} E{d["episode_number"]}'
|
||||
)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
|
||||
if pd.lookup_ids.get(IdType.IMDB):
|
||||
pd.lookup_ids[IdType.IMDB] = pd.lookup_ids[IdType.IMDB]
|
||||
|
|
Loading…
Add table
Reference in a new issue