use TMDB for IMDb
This commit is contained in:
parent
2be843a187
commit
574f88f552
2 changed files with 21 additions and 1 deletions
|
@ -16,10 +16,18 @@ class ImdbMovieScraper(AbstractScraper):
|
|||
regex = re.compile(r"(?<=https://www\.imdb\.com/title/)[a-zA-Z0-9]+")
|
||||
|
||||
def scrape(self, url):
|
||||
|
||||
effective_url = self.get_effective_url(url)
|
||||
if effective_url is None:
|
||||
raise ValueError("not valid url")
|
||||
code = self.regex.findall(effective_url)[0]
|
||||
s = TmdbMovieScraper()
|
||||
s.scrape_imdb(code)
|
||||
self.raw_data = s.raw_data
|
||||
self.raw_img = s.raw_img
|
||||
self.img_ext = s.img_ext
|
||||
self.raw_data['source_site'] = self.site_name
|
||||
self.raw_data['source_url'] = effective_url
|
||||
return self.raw_data, self.raw_img
|
||||
|
||||
api_url = self.get_api_url(effective_url)
|
||||
r = requests.get(api_url)
|
||||
|
|
|
@ -45,6 +45,18 @@ class TmdbMovieScraper(AbstractScraper):
|
|||
'音乐': 'Music',
|
||||
}
|
||||
|
||||
def scrape_imdb(self, imdb_code):
|
||||
api_url = f"https://api.themoviedb.org/3/find/{imdb_code}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
|
||||
r = requests.get(api_url)
|
||||
res_data = r.json()
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}"
|
||||
else:
|
||||
raise ValueError("Cannot find IMDb ID in TMDB")
|
||||
return self.scrape(url)
|
||||
|
||||
def scrape(self, url):
|
||||
m = self.regex.match(url)
|
||||
if m:
|
||||
|
|
Loading…
Add table
Reference in a new issue