lib.itmens/catalog/sites/imdb.py

50 lines
2.3 KiB
Python
Raw Normal View History

from catalog.common import *
from .tmdb import search_tmdb_by_imdb_id
from catalog.movie.models import *
from catalog.tv.models import *
import logging
2022-12-08 16:59:03 +00:00
_logger = logging.getLogger(__name__)
2022-12-15 17:29:35 -05:00
@SiteManager.register
class IMDB(AbstractSite):
2022-12-16 01:08:10 -05:00
SITE_NAME = SiteName.IMDB
ID_TYPE = IdType.IMDB
URL_PATTERNS = [r'\w+://www.imdb.com/title/(tt\d+)']
WIKI_PROPERTY_ID = '?'
@classmethod
def id_to_url(self, id_value):
return "https://www.imdb.com/title/" + id_value + "/"
def scrape(self):
self.scraped = False
res_data = search_tmdb_by_imdb_id(self.id_value)
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}"
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
# this should not happen given IMDB only has ids for either show or episode
tv_id = res_data['tv_season_results'][0]['show_id']
season_number = res_data['tv_season_results'][0]['season_number']
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
tv_id = res_data['tv_episode_results'][0]['show_id']
season_number = res_data['tv_episode_results'][0]['season_number']
episode_number = res_data['tv_episode_results'][0]['episode_number']
if season_number == 0:
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
elif episode_number == 1:
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}"
else:
raise ParseError(self, "IMDB id matching TMDB but not first episode, this is not supported")
else:
raise ParseError(self, "IMDB id not found in TMDB")
2022-12-15 17:29:35 -05:00
tmdb = SiteManager.get_site_by_url(url)
pd = tmdb.scrape()
pd.metadata['preferred_model'] = tmdb.DEFAULT_MODEL.__name__
return pd