lib.itmens/catalog/sites/douban_movie.py

import json
import logging

from catalog.common import *
from catalog.movie.models import *
from catalog.tv.models import *
from common.models.lang import detect_language

from .douban import *
from .tmdb import TMDB_TV, TMDB_TVSeason, query_tmdb_tv_episode, search_tmdb_by_imdb_id

_logger = logging.getLogger(__name__)


@SiteManager.register
class DoubanMovie(AbstractSite):
    SITE_NAME = SiteName.Douban
    ID_TYPE = IdType.DoubanMovie
    URL_PATTERNS = [
        r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}",
        r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}",
        r"\w+://www.douban.com/doubanapp/dispatch\?uri=/movie/(\d+)/",
    ]
    WIKI_PROPERTY_ID = "?"
    # no DEFAULT_MODEL as it may be either TV Season and Movie

    @classmethod
    def id_to_url(cls, id_value):
        return "https://movie.douban.com/subject/" + id_value + "/"

    def scrape(self):
        content = DoubanDownloader(self.url).download().html()
        try:
            schema_data = "".join(
                content.xpath('//script[@type="application/ld+json"]/text()')
            ).replace(
                "\n", ""
            )  # strip \n bc multi-line string is not properly coded in json by douban
            d = json.loads(schema_data) if schema_data else {}
        except Exception as e:
            d = {}

        try:
            raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
                0
            ].strip()
        except IndexError:
            raise ParseError(self, "title")

        orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()
        title = raw_title.split(orig_title)[0].strip()
        # if has no chinese title
        if title == "":
            title = orig_title

        if title == orig_title:
            orig_title = None

        # there are two html formats for authors and translators
        other_title_elem = content.xpath(
            "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
        )
        other_title = (
            other_title_elem[0].strip().split(" / ") if other_title_elem else None
        )

        imdb_elem = content.xpath(
            "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"
        )
        if not imdb_elem:
            imdb_elem = content.xpath(
                "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"
            )
        imdb_code = imdb_elem[0].strip() if imdb_elem else None

        director_elem = content.xpath(
            "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"
        )
        director = director_elem if director_elem else None

        playwright_elem = content.xpath(
            "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"
        )
        playwright = (
            list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
        )

        actor_elem = content.xpath(
            "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"
        )
        actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None

        genre_elem = content.xpath("//span[@property='v:genre']/text()")
        genre = []
        if genre_elem:
            for g in genre_elem:
                g = g.split(" ")[0]
                if g == "紀錄片":  # likely some original data on douban was corrupted
                    g = "纪录片"
                elif g == "鬼怪":
                    g = "惊悚"
                genre.append(g)

        showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")
        if showtime_elem:
            showtime = []
            for st in showtime_elem:
                parts = st.split("(")
                if len(parts) == 1:
                    time = st.split("(")[0]
                    region = ""
                else:
                    time = st.split("(")[0]
                    region = st.split("(")[1][0:-1]
                showtime.append(
                    {
                        "region": region,
                        "time": time,
                    }
                )
        else:
            showtime = None

        site_elem = content.xpath(
            "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"
        )
        site = site_elem[0].strip()[:200] if site_elem else None
        if site and not re.match(r"http.+", site):
            site = None

        area_elem = content.xpath(
            "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"
        )
        if area_elem:
            area = [a.strip()[:100] for a in area_elem[0].split("/")]
        else:
            area = None

        language_elem = content.xpath(
            "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"
        )
        if language_elem:
            language = [a.strip() for a in language_elem[0].split(" / ")]
        else:
            language = None

        year_elem = content.xpath("//span[@class='year']/text()")
        year = (
            int(re.search(r"\d+", year_elem[0])[0])
            if year_elem and re.search(r"\d+", year_elem[0])
            else None
        )

        duration_elem = content.xpath("//span[@property='v:runtime']/text()")
        other_duration_elem = content.xpath(
            "//span[@property='v:runtime']/following-sibling::text()[1]"
        )
        if duration_elem:
            duration = duration_elem[0].strip()
            if other_duration_elem:
                duration += other_duration_elem[0].rstrip()
            duration = duration.split("/")[0].strip()
        else:
            duration = None

        season_elem = content.xpath(
            "//*[@id='season']/option[@selected='selected']/text()"
        )
        if not season_elem:
            season_elem = content.xpath(
                "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"
            )
            season = int(season_elem[0].strip()) if season_elem else None
        else:
            season = int(season_elem[0].strip())

        episodes_elem = content.xpath(
            "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"
        )
        episodes = (
            int(episodes_elem[0].strip())
            if episodes_elem and episodes_elem[0].strip().isdigit()
            else None
        )

        single_episode_length_elem = content.xpath(
            "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"
        )
        single_episode_length = (
            single_episode_length_elem[0].strip()[:100]
            if single_episode_length_elem
            else None
        )

        is_series = d.get("@type") == "TVSeries" or episodes is not None

        brief_elem = content.xpath("//span[@class='all hidden']")
        if not brief_elem:
            brief_elem = content.xpath("//span[@property='v:summary']")
        brief = (
            "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
            if brief_elem
            else None
        )

        img_url_elem = content.xpath("//img[@rel='v:image']/@src")
        img_url = img_url_elem[0].strip() if img_url_elem else None

        titles = set(
            [title]
            + ([orig_title] if orig_title else [])
            + (other_title if other_title else [])
        )
        localized_title = [{"lang": detect_language(t), "text": t} for t in titles]
        localized_desc = [{"lang": detect_language(brief), "text": brief}]
        pd = ResourceContent(
            metadata={
                "title": title,
                "localized_title": localized_title,
                "localized_description": localized_desc,
                "orig_title": orig_title,
                "other_title": other_title,
                "imdb_code": imdb_code,
                "director": director,
                "playwright": playwright,
                "actor": actor,
                "genre": genre,
                "showtime": showtime,
                "site": site,
                "area": area,
                "language": language,
                "year": year,
                "duration": duration,
                "season_number": season,
                "episode_count": episodes,
                "single_episode_length": single_episode_length,
                "brief": brief,
                "is_series": is_series,
                "cover_image_url": img_url,
            }
        )
        pd.metadata["preferred_model"] = (
            "TVSeason" if is_series or episodes or season else "Movie"
        )

        tmdb_season_id = None
        if imdb_code:
            res_data = search_tmdb_by_imdb_id(imdb_code)
            has_movie = (
                "movie_results" in res_data and len(res_data["movie_results"]) > 0
            )
            has_tv = "tv_results" in res_data and len(res_data["tv_results"]) > 0
            has_episode = (
                "tv_episode_results" in res_data
                and len(res_data["tv_episode_results"]) > 0
            )
            if pd.metadata["preferred_model"] == "TVSeason" and has_tv:
                if (
                    pd.metadata.get("season_number")
                    and pd.metadata.get("season_number") != 1
                ):
                    _logger.warn(f"{imdb_code} matched imdb tv show, force season 1")
                    pd.metadata["season_number"] = 1
            elif pd.metadata["preferred_model"] == "TVSeason" and has_episode:
                if res_data["tv_episode_results"][0]["episode_number"] != 1:
                    _logger.warning(
                        f"Douban Movie {self.url} IMDB {imdb_code} mapping to non-first episode in a season"
                    )
                elif res_data["tv_episode_results"][0]["season_number"] == 1:
                    _logger.warning(
                        f"Douban Movie {self.url} IMDB {imdb_code} mapping to first season episode in a season"
                    )
            elif has_movie:
                if pd.metadata["preferred_model"] != "Movie":
                    _logger.warn(f"{imdb_code} matched imdb movie, force Movie")
                    pd.metadata["preferred_model"] = "Movie"
            elif has_tv or has_episode:
                _logger.warn(f"{imdb_code} matched imdb tv/episode, force TVSeason")
                pd.metadata["preferred_model"] = "TVSeason"
            else:
                _logger.warn(f"{imdb_code} unknown to TMDB")

            pd.lookup_ids[IdType.IMDB] = imdb_code

            if pd.metadata["preferred_model"] == "TVSeason":
                tmdb_show_id = None
                if has_tv:
                    tmdb_show_id = res_data["tv_results"][0]["id"]
                elif has_episode:
                    tmdb_show_id = res_data["tv_episode_results"][0]["show_id"]
                if tmdb_show_id:
                    pd.metadata["required_resources"] = [
                        {
                            "model": "TVShow",
                            "id_type": IdType.TMDB_TV,
                            "id_value": tmdb_show_id,
                            "title": title,
                            "url": TMDB_TV.id_to_url(tmdb_show_id),
                        }
                    ]
        # TODO parse sister seasons
        # pd.metadata['related_resources'] = []
        if pd.metadata["cover_image_url"]:
            imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
            try:
                pd.cover_image = imgdl.download().content
                pd.cover_image_extention = imgdl.extention
            except Exception:
                _logger.debug(
                    f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
                )
        return pd
support additional douban url format 2023-07-19 11:12:58 -04:00			`import json`
			`import logging`

new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`from catalog.common import *`
			`from catalog.movie.models import *`
			`from catalog.tv.models import *`
supports localized title 2024-07-13 00:16:47 -04:00			`from common.models.lang import detect_language`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
support additional douban url format 2023-07-19 11:12:58 -04:00			`from .douban import *`
			`from .tmdb import TMDB_TV, TMDB_TVSeason, query_tmdb_tv_episode, search_tmdb_by_imdb_id`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
rename a few methods 2022-12-08 16:59:03 +00:00			`_logger = logging.getLogger(__name__)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00

new data model: /book/<uid> 2022-12-15 17:29:35 -05:00			`@SiteManager.register`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`class DoubanMovie(AbstractSite):`
new data model: view detail page 2022-12-16 01:08:10 -05:00			`SITE_NAME = SiteName.Douban`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`ID_TYPE = IdType.DoubanMovie`
reformat new code with black 2022-12-29 23:57:02 -05:00			`URL_PATTERNS = [`
			`r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}",`
			`r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}",`
support additional douban url format 2023-07-19 11:12:58 -04:00			`r"\w+://www.douban.com/doubanapp/dispatch\?uri=/movie/(\d+)/",`
reformat new code with black 2022-12-29 23:57:02 -05:00			`]`
			`WIKI_PROPERTY_ID = "?"`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`# no DEFAULT_MODEL as it may be either TV Season and Movie`

			`@classmethod`
use tmdb_tvseason as main key for tvseason clas 2023-01-07 00:35:30 -05:00			`def id_to_url(cls, id_value):`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`return "https://movie.douban.com/subject/" + id_value + "/"`

			`def scrape(self):`
rename a few methods 2022-12-08 16:59:03 +00:00			`content = DoubanDownloader(self.url).download().html()`
improve douban movie json compatibility 2023-07-02 20:11:02 -04:00			`try:`
			`schema_data = "".join(`
			`content.xpath('//script[@type="application/ld+json"]/text()')`
			`).replace(`
			`"\n", ""`
			`) # strip \n bc multi-line string is not properly coded in json by douban`
			`d = json.loads(schema_data) if schema_data else {}`
			`except Exception as e:`
			`d = {}`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`try:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[`
			`0`
			`].strip()`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`except IndexError:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`raise ParseError(self, "title")`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
reformat new code with black 2022-12-29 23:57:02 -05:00			`orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`title = raw_title.split(orig_title)[0].strip()`
			`# if has no chinese title`
reformat new code with black 2022-12-29 23:57:02 -05:00			`if title == "":`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`title = orig_title`

			`if title == orig_title:`
			`orig_title = None`

			`# there are two html formats for authors and translators`
			`other_title_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"`
			`)`
			`other_title = (`
			`other_title_elem[0].strip().split(" / ") if other_title_elem else None`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`imdb_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if not imdb_elem:`
			`imdb_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`imdb_code = imdb_elem[0].strip() if imdb_elem else None`

			`director_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`director = director_elem if director_elem else None`

			`playwright_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"`
			`)`
			`playwright = (`
			`list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`actor_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None`

			`genre_elem = content.xpath("//span[@property='v:genre']/text()")`
new data model: view detail page 2022-12-16 01:08:10 -05:00			`genre = []`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if genre_elem:`
			`for g in genre_elem:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`g = g.split(" ")[0]`
			`if g == "紀錄片": # likely some original data on douban was corrupted`
			`g = "纪录片"`
			`elif g == "鬼怪":`
			`g = "惊悚"`
new data model: view detail page 2022-12-16 01:08:10 -05:00			`genre.append(g)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
reformat new code with black 2022-12-29 23:57:02 -05:00			`showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if showtime_elem:`
			`showtime = []`
			`for st in showtime_elem:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`parts = st.split("(")`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if len(parts) == 1:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`time = st.split("(")[0]`
			`region = ""`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`else:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`time = st.split("(")[0]`
			`region = st.split("(")[1][0:-1]`
switch showtime format 2023-06-07 04:14:38 -04:00			`showtime.append(`
			`{`
			`"region": region,`
			`"time": time,`
			`}`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`else:`
			`showtime = None`

			`site_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`site = site_elem[0].strip()[:200] if site_elem else None`
reformat new code with black 2022-12-29 23:57:02 -05:00			`if site and not re.match(r"http.+", site):`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`site = None`

			`area_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if area_elem:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`area = [a.strip()[:100] for a in area_elem[0].split("/")]`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`else:`
			`area = None`

			`language_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if language_elem:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`language = [a.strip() for a in language_elem[0].split(" / ")]`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`else:`
			`language = None`

			`year_elem = content.xpath("//span[@class='year']/text()")`
reformat new code with black 2022-12-29 23:57:02 -05:00			`year = (`
			`int(re.search(r"\d+", year_elem[0])[0])`
			`if year_elem and re.search(r"\d+", year_elem[0])`
			`else None`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`duration_elem = content.xpath("//span[@property='v:runtime']/text()")`
			`other_duration_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//span[@property='v:runtime']/following-sibling::text()[1]"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if duration_elem:`
			`duration = duration_elem[0].strip()`
			`if other_duration_elem:`
			`duration += other_duration_elem[0].rstrip()`
reformat new code with black 2022-12-29 23:57:02 -05:00			`duration = duration.split("/")[0].strip()`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`else:`
			`duration = None`

			`season_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//*[@id='season']/option[@selected='selected']/text()"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if not season_elem:`
			`season_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`season = int(season_elem[0].strip()) if season_elem else None`
			`else:`
			`season = int(season_elem[0].strip())`

			`episodes_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"`
			`)`
			`episodes = (`
			`int(episodes_elem[0].strip())`
			`if episodes_elem and episodes_elem[0].strip().isdigit()`
			`else None`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`single_episode_length_elem = content.xpath(`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"`
			`)`
			`single_episode_length = (`
			`single_episode_length_elem[0].strip()[:100]`
			`if single_episode_length_elem`
			`else None`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
fix douban tv type parsing 2023-06-29 08:58:08 -04:00			`is_series = d.get("@type") == "TVSeries" or episodes is not None`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`brief_elem = content.xpath("//span[@class='all hidden']")`
			`if not brief_elem:`
			`brief_elem = content.xpath("//span[@property='v:summary']")`
reformat new code with black 2022-12-29 23:57:02 -05:00			`brief = (`
			`"\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])`
			`if brief_elem`
			`else None`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00
			`img_url_elem = content.xpath("//img[@rel='v:image']/@src")`
			`img_url = img_url_elem[0].strip() if img_url_elem else None`

supports localized title 2024-07-13 00:16:47 -04:00			`titles = set(`
			`[title]`
			`+ ([orig_title] if orig_title else [])`
			`+ (other_title if other_title else [])`
			`)`
			`localized_title = [{"lang": detect_language(t), "text": t} for t in titles]`
			`localized_desc = [{"lang": detect_language(brief), "text": brief}]`
reformat new code with black 2022-12-29 23:57:02 -05:00			`pd = ResourceContent(`
			`metadata={`
			`"title": title,`
supports localized title 2024-07-13 00:16:47 -04:00			`"localized_title": localized_title,`
			`"localized_description": localized_desc,`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"orig_title": orig_title,`
			`"other_title": other_title,`
			`"imdb_code": imdb_code,`
			`"director": director,`
			`"playwright": playwright,`
			`"actor": actor,`
			`"genre": genre,`
			`"showtime": showtime,`
			`"site": site,`
			`"area": area,`
			`"language": language,`
			`"year": year,`
			`"duration": duration,`
			`"season_number": season,`
			`"episode_count": episodes,`
			`"single_episode_length": single_episode_length,`
			`"brief": brief,`
			`"is_series": is_series,`
			`"cover_image_url": img_url,`
			`}`
			`)`
			`pd.metadata["preferred_model"] = (`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`"TVSeason" if is_series or episodes or season else "Movie"`
reformat new code with black 2022-12-29 23:57:02 -05:00			`)`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00
use tmdb_tvseason as main key for tvseason clas 2023-01-07 00:35:30 -05:00			`tmdb_season_id = None`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if imdb_code:`
add douban book works; add goodread works; auto link season to show 2022-12-08 05:53:00 +00:00			`res_data = search_tmdb_by_imdb_id(imdb_code)`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`has_movie = (`
			`"movie_results" in res_data and len(res_data["movie_results"]) > 0`
			`)`
			`has_tv = "tv_results" in res_data and len(res_data["tv_results"]) > 0`
			`has_episode = (`
reformat new code with black 2022-12-29 23:57:02 -05:00			`"tv_episode_results" in res_data`
			`and len(res_data["tv_episode_results"]) > 0`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`)`
			`if pd.metadata["preferred_model"] == "TVSeason" and has_tv:`
soft merge without removing dupes 2023-06-06 22:20:50 -04:00			`if (`
			`pd.metadata.get("season_number")`
			`and pd.metadata.get("season_number") != 1`
			`):`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`_logger.warn(f"{imdb_code} matched imdb tv show, force season 1")`
soft merge without removing dupes 2023-06-06 22:20:50 -04:00			`pd.metadata["season_number"] = 1`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`elif pd.metadata["preferred_model"] == "TVSeason" and has_episode:`
			`if res_data["tv_episode_results"][0]["episode_number"] != 1:`
			`_logger.warning(`
			`f"Douban Movie {self.url} IMDB {imdb_code} mapping to non-first episode in a season"`
			`)`
			`elif res_data["tv_episode_results"][0]["season_number"] == 1:`
			`_logger.warning(`
			`f"Douban Movie {self.url} IMDB {imdb_code} mapping to first season episode in a season"`
			`)`
			`elif has_movie:`
			`if pd.metadata["preferred_model"] != "Movie":`
			`_logger.warn(f"{imdb_code} matched imdb movie, force Movie")`
			`pd.metadata["preferred_model"] = "Movie"`
			`elif has_tv or has_episode:`
			`_logger.warn(f"{imdb_code} matched imdb tv/episode, force TVSeason")`
reformat new code with black 2022-12-29 23:57:02 -05:00			`pd.metadata["preferred_model"] = "TVSeason"`
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`else:`
			`_logger.warn(f"{imdb_code} unknown to TMDB")`
new data model: view detail page 2022-12-16 01:08:10 -05:00
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`pd.lookup_ids[IdType.IMDB] = imdb_code`
use tmdb_tvseason as main key for tvseason clas 2023-01-07 00:35:30 -05:00
all douban tv map to season and add script to find links to tv show 2023-01-08 16:26:05 -05:00			`if pd.metadata["preferred_model"] == "TVSeason":`
			`tmdb_show_id = None`
			`if has_tv:`
			`tmdb_show_id = res_data["tv_results"][0]["id"]`
			`elif has_episode:`
			`tmdb_show_id = res_data["tv_episode_results"][0]["show_id"]`
			`if tmdb_show_id:`
			`pd.metadata["required_resources"] = [`
			`{`
			`"model": "TVShow",`
			`"id_type": IdType.TMDB_TV,`
			`"id_value": tmdb_show_id,`
			`"title": title,`
			`"url": TMDB_TV.id_to_url(tmdb_show_id),`
			`}`
			`]`
add douban book works; add goodread works; auto link season to show 2022-12-08 05:53:00 +00:00			`# TODO parse sister seasons`
new data model: rename some classes 2022-12-08 16:08:59 +00:00			`# pd.metadata['related_resources'] = []`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`if pd.metadata["cover_image_url"]:`
			`imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)`
			`try:`
			`pd.cover_image = imgdl.download().content`
			`pd.cover_image_extention = imgdl.extention`
			`except Exception:`
reformat new code with black 2022-12-29 23:57:02 -05:00			`_logger.debug(`
			`f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'`
			`)`
new catalog data model, wip, not enabled 2022-12-07 19:09:05 -05:00			`return pd`