diff --git a/common/scrapers/imdb.py b/common/scrapers/imdb.py index 5029c46e..97c040bb 100644 --- a/common/scrapers/imdb.py +++ b/common/scrapers/imdb.py @@ -16,10 +16,18 @@ class ImdbMovieScraper(AbstractScraper): regex = re.compile(r"(?<=https://www\.imdb\.com/title/)[a-zA-Z0-9]+") def scrape(self, url): - effective_url = self.get_effective_url(url) if effective_url is None: raise ValueError("not valid url") + code = self.regex.findall(effective_url)[0] + s = TmdbMovieScraper() + s.scrape_imdb(code) + self.raw_data = s.raw_data + self.raw_img = s.raw_img + self.img_ext = s.img_ext + self.raw_data['source_site'] = self.site_name + self.raw_data['source_url'] = effective_url + return self.raw_data, self.raw_img api_url = self.get_api_url(effective_url) r = requests.get(api_url) diff --git a/common/scrapers/tmdb.py b/common/scrapers/tmdb.py index 228e6559..15072add 100644 --- a/common/scrapers/tmdb.py +++ b/common/scrapers/tmdb.py @@ -45,6 +45,18 @@ class TmdbMovieScraper(AbstractScraper): '音乐': 'Music', } + def scrape_imdb(self, imdb_code): + api_url = f"https://api.themoviedb.org/3/find/{imdb_code}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id" + r = requests.get(api_url) + res_data = r.json() + if 'movie_results' in res_data and len(res_data['movie_results']) > 0: + url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}" + elif 'tv_results' in res_data and len(res_data['tv_results']) > 0: + url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}" + else: + raise ValueError("Cannot find IMDb ID in TMDB") + return self.scrape(url) + def scrape(self, url): m = self.regex.match(url) if m: