add douban book works; add goodread works; auto link season to show

This commit is contained in:
Your Name 2022-12-08 05:53:00 +00:00
parent 9e219bfac9
commit 3a95e5fc5c
16 changed files with 15041 additions and 67 deletions

View file

@ -47,15 +47,14 @@ class Edition(Item):
def update_linked_items_from_extenal_page(self, page): def update_linked_items_from_extenal_page(self, page):
"""add Work from page.metadata['work'] if not yet""" """add Work from page.metadata['work'] if not yet"""
w = page.metadata.get('work', None) links = page.required_pages + page.related_pages
if w: for w in links:
work = Work.objects.filter(primary_lookup_id_type=w['lookup_id_type'], primary_lookup_id_value=w['lookup_id_value']).first() if w['model'] == 'Work':
if work: work = Work.objects.filter(primary_lookup_id_type=w['id_type'], primary_lookup_id_value=w['id_value']).first()
if any(edition == self for edition in work.editions.all()): if work and work not in self.works.all():
return self.works.add(work)
else: # if not work:
work = Work.objects.create(primary_lookup_id_type=w['lookup_id_type'], primary_lookup_id_value=w['lookup_id_value'], title=w['title']) # logger.info(f'Unable to find link for {w["url"]}')
work.editions.add(self)
class Work(Item): class Work(Item):

View file

@ -78,8 +78,7 @@ class GoodreadsTestCase(TestCase):
self.assertEqual(site.ready, True) self.assertEqual(site.ready, True)
self.assertEqual(site.page.metadata.get('title'), 'Hyperion') self.assertEqual(site.page.metadata.get('title'), 'Hyperion')
self.assertEqual(site.page.metadata.get('isbn'), isbn) self.assertEqual(site.page.metadata.get('isbn'), isbn)
self.assertEqual(site.page.metadata['work']['lookup_id_value'], '1383900') self.assertEqual(site.page.required_pages[0]['id_value'], '1383900')
self.assertEqual(site.page.metadata['work']['title'], 'Hyperion')
edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn) edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn)
page = edition.external_pages.all().first() page = edition.external_pages.all().first()
self.assertEqual(page.id_type, IdType.Goodreads) self.assertEqual(page.id_type, IdType.Goodreads)
@ -105,19 +104,19 @@ class GoodreadsTestCase(TestCase):
@use_local_response @use_local_response
def test_work(self): def test_work(self):
# url = 'https://www.goodreads.com/work/editions/153313' url = 'https://www.goodreads.com/work/editions/153313'
p = SiteList.get_site_by_url(url).get_page_ready()
self.assertEqual(p.item.title, '1984')
url1 = 'https://www.goodreads.com/book/show/3597767-rok-1984' url1 = 'https://www.goodreads.com/book/show/3597767-rok-1984'
url2 = 'https://www.goodreads.com/book/show/40961427-1984' url2 = 'https://www.goodreads.com/book/show/40961427-1984'
p1 = SiteList.get_site_by_url(url1).get_page_ready() p1 = SiteList.get_site_by_url(url1).get_page_ready()
p2 = SiteList.get_site_by_url(url2).get_page_ready() p2 = SiteList.get_site_by_url(url2).get_page_ready()
w1 = p1.item.works.all().first() w1 = p1.item.works.all().first()
w2 = p2.item.works.all().first() w2 = p2.item.works.all().first()
self.assertEqual(w1.title, 'Nineteen Eighty-Four')
self.assertEqual(w2.title, 'Nineteen Eighty-Four')
self.assertEqual(w1, w2) self.assertEqual(w1, w2)
class DoubanTestCase(TestCase): class DoubanBookTestCase(TestCase):
def setUp(self): def setUp(self):
pass pass

View file

@ -1,5 +1,6 @@
from polymorphic.models import PolymorphicModel from polymorphic.models import PolymorphicModel
from django.db import models from django.db import models
from catalog.common import jsondata
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from django.utils import timezone from django.utils import timezone
from django.core.files.uploadedfile import SimpleUploadedFile from django.core.files.uploadedfile import SimpleUploadedFile
@ -220,6 +221,8 @@ class ExternalPage(models.Model):
scraped_time = models.DateTimeField(null=True) scraped_time = models.DateTimeField(null=True)
created_time = models.DateTimeField(auto_now_add=True) created_time = models.DateTimeField(auto_now_add=True)
edited_time = models.DateTimeField(auto_now=True) edited_time = models.DateTimeField(auto_now=True)
required_pages = jsondata.ArrayField(null=False, blank=False, default=list)
related_pages = jsondata.ArrayField(null=False, blank=False, default=list)
class Meta: class Meta:
unique_together = [['id_type', 'id_value']] unique_together = [['id_type', 'id_value']]
@ -237,7 +240,7 @@ class ExternalPage(models.Model):
@property @property
def ready(self): def ready(self):
return bool(self.metadata) return bool(self.metadata and self.scraped_time)
def get_all_lookup_ids(self): def get_all_lookup_ids(self):
d = self.other_lookup_ids.copy() d = self.other_lookup_ids.copy()
@ -254,11 +257,3 @@ class ExternalPage(models.Model):
else: else:
raise ValueError(f'preferred model {model} does not exist') raise ValueError(f'preferred model {model} does not exist')
return None return None
def get_dependent_urls(self):
ll = self.metadata.get('dependent_urls')
return ll if ll else []
def get_related_urls(self):
ll = self.metadata.get('related_urls')
return ll if ll else []

View file

@ -2,6 +2,10 @@ from typing import *
import re import re
from .models import ExternalPage from .models import ExternalPage
from dataclasses import dataclass, field from dataclasses import dataclass, field
import logging
logger = logging.getLogger(__name__)
@dataclass @dataclass
@ -50,6 +54,10 @@ class AbstractSite:
self.page = ExternalPage(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url) self.page = ExternalPage(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url)
return self.page return self.page
def bypass_scrape(self, data_from_link) -> PageData | None:
"""subclass may implement this to use data from linked page and bypass actual scrape"""
return None
def scrape(self) -> PageData: def scrape(self) -> PageData:
"""subclass should implement this, return PageData object""" """subclass should implement this, return PageData object"""
data = PageData() data = PageData()
@ -76,7 +84,7 @@ class AbstractSite:
def ready(self): def ready(self):
return bool(self.page and self.page.ready) return bool(self.page and self.page.ready)
def get_page_ready(self, auto_save=True, auto_create=True, auto_link=True): def get_page_ready(self, auto_save=True, auto_create=True, auto_link=True, data_from_link=None):
"""return a page scraped, or scrape if not yet""" """return a page scraped, or scrape if not yet"""
if auto_link: if auto_link:
auto_create = True auto_create = True
@ -87,7 +95,9 @@ class AbstractSite:
if not self.page: if not self.page:
return None return None
if not p.ready: if not p.ready:
pagedata = self.scrape() pagedata = self.bypass_scrape(data_from_link)
if not pagedata:
pagedata = self.scrape()
p.update_content(pagedata) p.update_content(pagedata)
if not p.ready: if not p.ready:
logger.error(f'unable to get page {self.url} ready') logger.error(f'unable to get page {self.url} ready')
@ -100,14 +110,16 @@ class AbstractSite:
p.item.merge_data_from_extenal_pages() p.item.merge_data_from_extenal_pages()
p.item.save() p.item.save()
if auto_link: if auto_link:
# todo rewrite this for linked_pages in p.required_pages:
linked_site = SiteList.get_site_by_url(linked_pages['url'])
if linked_site:
linked_site.get_page_ready(auto_link=False)
else:
logger.error(f'unable to get site for {linked_pages["url"]}')
p.item.update_linked_items_from_extenal_page(p) p.item.update_linked_items_from_extenal_page(p)
p.item.save()
return p return p
def get_dependent_pages_ready(self, urls):
# set depth = 2 so in a case of douban season can find an IMDB episode then a TMDB Serie
pass
class SiteList: class SiteList:
registry = {} registry = {}

View file

@ -108,16 +108,16 @@ class DoubanBook(AbstractSite, ScraperMixin):
translators = None translators = None
self.data['translators'] = translators self.data['translators'] = translators
self.data['work'] = {}
work_link = self.parse_str('//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href') work_link = self.parse_str('//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href')
if work_link: if work_link:
# TODO move logic to a differnet class
r = re.match(r'\w+://book.douban.com/works/(\d+)', work_link) r = re.match(r'\w+://book.douban.com/works/(\d+)', work_link)
self.data['work']['lookup_id_type'] = IdType.DoubanBook_Work self.data['required_pages'] = [{
self.data['work']['lookup_id_value'] = r[1] if r else None 'model': 'Work',
self.data['work']['title'] = self.data['title'] 'id_type': IdType.DoubanBook_Work,
self.data['work']['url'] = work_link 'id_value': r[1] if r else None,
'title': self.data['title'],
'url': work_link,
}]
pd = PageData(metadata=self.data) pd = PageData(metadata=self.data)
pd.lookup_ids[IdType.ISBN] = self.data.get('isbn') pd.lookup_ids[IdType.ISBN] = self.data.get('isbn')
pd.lookup_ids[IdType.CUBN] = self.data.get('cubn') pd.lookup_ids[IdType.CUBN] = self.data.get('cubn')
@ -129,3 +129,34 @@ class DoubanBook(AbstractSite, ScraperMixin):
except Exception: except Exception:
logger.debug(f'failed to download cover for {self.url} from {self.data["cover_image_url"]}') logger.debug(f'failed to download cover for {self.url} from {self.data["cover_image_url"]}')
return pd return pd
@SiteList.register
class DoubanBook_Work(AbstractSite):
ID_TYPE = IdType.DoubanBook_Work
URL_PATTERNS = [r"\w+://book\.douban\.com/works/(\d+)"]
WIKI_PROPERTY_ID = '?'
DEFAULT_MODEL = Work
@classmethod
def id_to_url(self, id_value):
return "https://book.douban.com/works/" + id_value + "/"
def bypass_scrape(self, data_from_link):
if not data_from_link:
return None
pd = PageData(metadata={
'title': data_from_link['title'],
})
return pd
def scrape(self):
content = html.fromstring(DoubanDownloader(self.url).download().text.strip())
title_elem = content.xpath("//h1/text()")
title = title_elem[0].split('全部版本(')[0].strip() if title_elem else None
if not title:
raise ParseError(self, 'title')
pd = PageData(metadata={
'title': title,
})
return pd

View file

@ -6,6 +6,7 @@ from catalog.tv.models import *
import logging import logging
from django.db import models from django.db import models
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
from .tmdb import TMDB_TV, search_tmdb_by_imdb_id
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -238,19 +239,33 @@ class DoubanMovie(AbstractSite):
}) })
pd.metadata['preferred_model'] = ('TVSeason' if season else 'TVShow') if is_series else 'Movie' pd.metadata['preferred_model'] = ('TVSeason' if season else 'TVShow') if is_series else 'Movie'
# tmdb_api_url = f"https://api.themoviedb.org/3/find/{self.imdb_code}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
# res_data = BasicDownloader(tmdb_api_url).download().json()
# if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
# pd.metadata['preferred_model'] = 'Movie'
# elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
# pd.metadata['preferred_model'] = 'TVShow'
# elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
# pd.metadata['preferred_model'] = 'TVSeason'
# elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
# pd.metadata['preferred_model'] = 'TVSeason'
if imdb_code: if imdb_code:
res_data = search_tmdb_by_imdb_id(imdb_code)
tmdb_show_id = None
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
pd.metadata['preferred_model'] = 'Movie'
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
pd.metadata['preferred_model'] = 'TVShow'
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
pd.metadata['preferred_model'] = 'TVSeason'
tmdb_show_id = res_data['tv_season_results'][0]['show_id']
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
pd.metadata['preferred_model'] = 'TVSeason'
tmdb_show_id = res_data['tv_episode_results'][0]['show_id']
if res_data['tv_episode_results'][0]['episode_number'] != 1:
logger.error(f'Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}')
# TODO correct the IMDB id
pd.lookup_ids[IdType.IMDB] = imdb_code pd.lookup_ids[IdType.IMDB] = imdb_code
if tmdb_show_id:
pd.metadata['required_pages'] = [{
'model': 'TVShow',
'id_type': IdType.TMDB_TV,
'id_value': tmdb_show_id,
'title': title,
'url': TMDB_TV.id_to_url(tmdb_show_id),
}]
# TODO parse sister seasons
# pd.metadata['related_pages'] = []
if pd.metadata["cover_image_url"]: if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try: try:

View file

@ -1,12 +1,12 @@
import re import re
from catalog.book.models import Edition from catalog.book.models import Edition, Work
from catalog.common import * from catalog.common import *
from lxml import html from lxml import html
import json import json
import logging import logging
logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class GoodreadsDownloader(RetryDownloader): class GoodreadsDownloader(RetryDownloader):
@ -64,14 +64,15 @@ class Goodreads(AbstractSite):
data['asin'] = asin data['asin'] = asin
data['pages'] = b['details'].get('numPages') data['pages'] = b['details'].get('numPages')
data['cover_image_url'] = b['imageUrl'] data['cover_image_url'] = b['imageUrl']
data['work'] = {}
w = next(filter(lambda x: x.get('details'), o['Work']), None) w = next(filter(lambda x: x.get('details'), o['Work']), None)
if w: if w:
data['work']['lookup_id_type'] = IdType.Goodreads_Work data['required_pages'] = [{
data['work']['lookup_id_value'] = str(w['legacyId']) 'model': 'Work',
data['work']['title'] = w['details']['originalTitle'] 'id_type': IdType.Goodreads_Work,
data['work']['url'] = w['details']['webUrl'] 'id_value': str(w['legacyId']),
'title': w['details']['originalTitle'],
'url': w['editions']['webUrl'],
}]
pd = PageData(metadata=data) pd = PageData(metadata=data)
pd.lookup_ids[IdType.ISBN] = data.get('isbn') pd.lookup_ids[IdType.ISBN] = data.get('isbn')
pd.lookup_ids[IdType.ASIN] = data.get('asin') pd.lookup_ids[IdType.ASIN] = data.get('asin')
@ -81,5 +82,34 @@ class Goodreads(AbstractSite):
pd.cover_image = imgdl.download().content pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention pd.cover_image_extention = imgdl.extention
except Exception: except Exception:
logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}') _logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
return pd
@SiteList.register
class Goodreads_Work(AbstractSite):
ID_TYPE = IdType.Goodreads_Work
WIKI_PROPERTY_ID = ''
DEFAULT_MODEL = Work
URL_PATTERNS = [r".+goodreads.com/work/editions/(\d+)"]
@classmethod
def id_to_url(self, id_value):
return "https://www.goodreads.com/work/editions/" + id_value
def scrape(self, response=None):
content = html.fromstring(BasicDownloader(self.url).download().text.strip())
title_elem = content.xpath("//h1/a/text()")
title = title_elem[0].strip() if title_elem else None
if not title:
raise ParseError(self, 'title')
author_elem = content.xpath("//h2/a/text()")
author = author_elem[0].strip() if author_elem else None
first_published_elem = content.xpath("//h2/span/text()")
first_published = first_published_elem[0].strip() if first_published_elem else None
pd = PageData(metadata={
'title': title,
'author': author,
'first_published': first_published
})
return pd return pd

View file

@ -1,6 +1,5 @@
from django.conf import settings
from catalog.common import * from catalog.common import *
from .douban import * from .tmdb import search_tmdb_by_imdb_id
from catalog.movie.models import * from catalog.movie.models import *
from catalog.tv.models import * from catalog.tv.models import *
import logging import logging
@ -21,8 +20,7 @@ class IMDB(AbstractSite):
def scrape(self): def scrape(self):
self.scraped = False self.scraped = False
api_url = f"https://api.themoviedb.org/3/find/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id" res_data = search_tmdb_by_imdb_id(self.id_value)
res_data = BasicDownloader(api_url).download().json()
if 'movie_results' in res_data and len(res_data['movie_results']) > 0: if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}" url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0: elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:

View file

@ -14,6 +14,12 @@ import logging
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def search_tmdb_by_imdb_id(imdb_id):
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
res_data = BasicDownloader(tmdb_api_url).download().json()
return res_data
def _copy_dict(s, key_map): def _copy_dict(s, key_map):
d = {} d = {}
for src, dst in key_map.items(): for src, dst in key_map.items():
@ -61,7 +67,7 @@ class TMDB_Movie(AbstractSite):
@classmethod @classmethod
def id_to_url(self, id_value): def id_to_url(self, id_value):
return "https://www.themoviedb.org/movie/" + id_value return f"https://www.themoviedb.org/movie/{id_value}"
def scrape(self): def scrape(self):
is_series = False is_series = False
@ -162,7 +168,7 @@ class TMDB_TV(AbstractSite):
@classmethod @classmethod
def id_to_url(self, id_value): def id_to_url(self, id_value):
return "https://www.themoviedb.org/tv/" + id_value return f"https://www.themoviedb.org/tv/{id_value}"
def scrape(self): def scrape(self):
is_series = True is_series = True
@ -221,6 +227,12 @@ class TMDB_TV(AbstractSite):
# TODO: use GET /configuration to get base url # TODO: use GET /configuration to get base url
img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
season_links = list(map(lambda s: {
'model': 'TVSeason',
'id_type': IdType.TMDB_TVSeason,
'id_value': f'{self.id_value}-{s["season_number"]}',
'title': s['name'],
'url': f'{self.url}/season/{s["season_number"]}'}, res_data['seasons']))
pd = PageData(metadata={ pd = PageData(metadata={
'title': title, 'title': title,
'orig_title': orig_title, 'orig_title': orig_title,
@ -241,9 +253,11 @@ class TMDB_TV(AbstractSite):
'single_episode_length': None, 'single_episode_length': None,
'brief': brief, 'brief': brief,
'cover_image_url': img_url, 'cover_image_url': img_url,
'related_pages': season_links,
}) })
if imdb_code: if imdb_code:
pd.lookup_ids[IdType.IMDB] = imdb_code pd.lookup_ids[IdType.IMDB] = imdb_code
if pd.metadata["cover_image_url"]: if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url) imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try: try:
@ -279,6 +293,13 @@ class TMDB_TVSeason(AbstractSite):
if not d.get('id'): if not d.get('id'):
raise ParseError('id') raise ParseError('id')
pd = PageData(metadata=_copy_dict(d, {'name': 'title', 'overview': 'brief', 'air_date': 'air_date', 'season_number': 0, 'external_ids': 0})) pd = PageData(metadata=_copy_dict(d, {'name': 'title', 'overview': 'brief', 'air_date': 'air_date', 'season_number': 0, 'external_ids': 0}))
pd.metadata['required_pages'] = [{
'model': 'TVShow',
'id_type': IdType.TMDB_TV,
'id_value': v[0],
'title': f'TMDB TV Show {v[0]}',
'url': f"https://www.themoviedb.org/tv/{v[0]}",
}]
pd.lookup_ids[IdType.IMDB] = d['external_ids'].get('imdb_id') pd.lookup_ids[IdType.IMDB] = d['external_ids'].get('imdb_id')
pd.metadata['cover_image_url'] = ('https://image.tmdb.org/t/p/original/' + d['poster_path']) if d['poster_path'] else None pd.metadata['cover_image_url'] = ('https://image.tmdb.org/t/p/original/' + d['poster_path']) if d['poster_path'] else None
pd.metadata['title'] = pd.metadata['title'] if pd.metadata['title'] else f'Season {d["season_number"]}' pd.metadata['title'] = pd.metadata['title'] if pd.metadata['title'] else f'Season {d["season_number"]}'
@ -295,7 +316,7 @@ class TMDB_TVSeason(AbstractSite):
# get external id from 1st episode # get external id from 1st episode
if pd.lookup_ids[IdType.IMDB]: if pd.lookup_ids[IdType.IMDB]:
logger.warning("Unexpected IMDB id for TMDB tv season") logger.warning("Unexpected IMDB id for TMDB tv season")
elif len(pd.metadata['episode_number_list']) == 0: elif len(pd.metadata['episode_number_list']) == 0:
logger.warning("Unable to lookup IMDB id for TMDB tv season with zero episodes") logger.warning("Unable to lookup IMDB id for TMDB tv season with zero episodes")
else: else:
ep = pd.metadata['episode_number_list'][0] ep = pd.metadata['episode_number_list'][0]

View file

@ -39,14 +39,23 @@ class TVSeason(Item):
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie) douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
imdb = PrimaryLookupIdDescriptor(IdType.IMDB) imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
tmdb_tvseason = PrimaryLookupIdDescriptor(IdType.TMDB_TVSeason) tmdb_tvseason = PrimaryLookupIdDescriptor(IdType.TMDB_TVSeason)
series = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons') show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons')
season_number = models.PositiveIntegerField() season_number = models.PositiveIntegerField()
episode_count = jsondata.IntegerField(blank=True, default=None) episode_count = jsondata.IntegerField(blank=True, default=None)
METADATA_COPY_LIST = ['title', 'brief', 'season_number', 'episode_count'] METADATA_COPY_LIST = ['title', 'brief', 'season_number', 'episode_count']
def update_linked_items_from_extenal_page(self, page):
"""add Work from page.metadata['work'] if not yet"""
links = page.required_pages + page.related_pages
for w in links:
if w['model'] == 'TVShow':
p = ExternalPage.objects.filter(id_type=w['id_type'], id_value=w['id_value']).first()
if p and p.item and self.show != p.item:
self.show = p.item
class TVEpisode(Item): class TVEpisode(Item):
series = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes') show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes')
season = models.ForeignKey(TVSeason, null=True, on_delete=models.SET_NULL, related_name='episodes') season = models.ForeignKey(TVSeason, null=True, on_delete=models.SET_NULL, related_name='episodes')
episode_number = models.PositiveIntegerField() episode_number = models.PositiveIntegerField()
imdb = PrimaryLookupIdDescriptor(IdType.IMDB) imdb = PrimaryLookupIdDescriptor(IdType.IMDB)

View file

@ -60,6 +60,8 @@ class TMDBTVSeasonTestCase(TestCase):
self.assertEqual(site.page.item.primary_lookup_id_type, IdType.IMDB) self.assertEqual(site.page.item.primary_lookup_id_type, IdType.IMDB)
self.assertEqual(site.page.item.__class__.__name__, 'TVSeason') self.assertEqual(site.page.item.__class__.__name__, 'TVSeason')
self.assertEqual(site.page.item.imdb, 'tt1159991') self.assertEqual(site.page.item.imdb, 'tt1159991')
self.assertIsNotNone(site.page.item.show)
self.assertEqual(site.page.item.show.imdb, 'tt0436992')
class DoubanMovieTVTestCase(TestCase): class DoubanMovieTVTestCase(TestCase):
@ -68,6 +70,8 @@ class DoubanMovieTVTestCase(TestCase):
url3 = 'https://movie.douban.com/subject/3627919/' url3 = 'https://movie.douban.com/subject/3627919/'
p3 = SiteList.get_site_by_url(url3).get_page_ready() p3 = SiteList.get_site_by_url(url3).get_page_ready()
self.assertEqual(p3.item.__class__.__name__, 'TVSeason') self.assertEqual(p3.item.__class__.__name__, 'TVSeason')
self.assertIsNotNone(p3.item.show)
self.assertEqual(p3.item.show.imdb, 'tt0436992')
@use_local_response @use_local_response
def test_scrape_singleseason(self): def test_scrape_singleseason(self):

View file

@ -0,0 +1 @@
{"movie_results":[],"person_results":[],"tv_results":[{"adult":false,"backdrop_path":"/8IC1q0lHFwi5m8VtChLzIfmpaZH.jpg","id":86941,"name":"北海鲸梦","original_language":"en","original_name":"The North Water","overview":"改编自伊恩·麦奎尔的同名获奖小说聚焦19世纪一次灾难性的捕鲸活动。故事围绕帕特里克·萨姆纳展开他是一名声名狼藉的前战地医生后成为捕鲸船上的医生在船上遇到了鱼叉手亨利·德拉克斯一个残忍、不道德的杀手。萨姆纳没有逃离过去的恐惧而是被迫在北极荒原上为生存而进行残酷的斗争...","poster_path":"/9CM0ca8pX1os3SJ24hsIc0nN8ph.jpg","media_type":"tv","genre_ids":[18,9648],"popularity":11.318,"first_air_date":"2021-07-14","vote_average":7.5,"vote_count":75,"origin_country":["US"]}],"tv_episode_results":[],"tv_season_results":[]}

File diff suppressed because it is too large Load diff

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long