use TMDB for IMDb

This commit is contained in:
Your Name 2022-09-07 10:05:56 -04:00
parent 2be843a187
commit 574f88f552
2 changed files with 21 additions and 1 deletions

View file

@ -16,10 +16,18 @@ class ImdbMovieScraper(AbstractScraper):
regex = re.compile(r"(?<=https://www\.imdb\.com/title/)[a-zA-Z0-9]+")
def scrape(self, url):
effective_url = self.get_effective_url(url)
if effective_url is None:
raise ValueError("not valid url")
code = self.regex.findall(effective_url)[0]
s = TmdbMovieScraper()
s.scrape_imdb(code)
self.raw_data = s.raw_data
self.raw_img = s.raw_img
self.img_ext = s.img_ext
self.raw_data['source_site'] = self.site_name
self.raw_data['source_url'] = effective_url
return self.raw_data, self.raw_img
api_url = self.get_api_url(effective_url)
r = requests.get(api_url)

View file

@ -45,6 +45,18 @@ class TmdbMovieScraper(AbstractScraper):
'音乐': 'Music',
}
def scrape_imdb(self, imdb_code):
api_url = f"https://api.themoviedb.org/3/find/{imdb_code}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
r = requests.get(api_url)
res_data = r.json()
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}"
else:
raise ValueError("Cannot find IMDb ID in TMDB")
return self.scrape(url)
def scrape(self, url):
m = self.regex.match(url)
if m: