add douban book works; add goodread works; auto link season to show
This commit is contained in:
parent
9e219bfac9
commit
3a95e5fc5c
16 changed files with 15041 additions and 67 deletions
|
@ -47,15 +47,14 @@ class Edition(Item):
|
|||
|
||||
def update_linked_items_from_extenal_page(self, page):
|
||||
"""add Work from page.metadata['work'] if not yet"""
|
||||
w = page.metadata.get('work', None)
|
||||
if w:
|
||||
work = Work.objects.filter(primary_lookup_id_type=w['lookup_id_type'], primary_lookup_id_value=w['lookup_id_value']).first()
|
||||
if work:
|
||||
if any(edition == self for edition in work.editions.all()):
|
||||
return
|
||||
else:
|
||||
work = Work.objects.create(primary_lookup_id_type=w['lookup_id_type'], primary_lookup_id_value=w['lookup_id_value'], title=w['title'])
|
||||
work.editions.add(self)
|
||||
links = page.required_pages + page.related_pages
|
||||
for w in links:
|
||||
if w['model'] == 'Work':
|
||||
work = Work.objects.filter(primary_lookup_id_type=w['id_type'], primary_lookup_id_value=w['id_value']).first()
|
||||
if work and work not in self.works.all():
|
||||
self.works.add(work)
|
||||
# if not work:
|
||||
# logger.info(f'Unable to find link for {w["url"]}')
|
||||
|
||||
|
||||
class Work(Item):
|
||||
|
|
|
@ -78,8 +78,7 @@ class GoodreadsTestCase(TestCase):
|
|||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.page.metadata.get('title'), 'Hyperion')
|
||||
self.assertEqual(site.page.metadata.get('isbn'), isbn)
|
||||
self.assertEqual(site.page.metadata['work']['lookup_id_value'], '1383900')
|
||||
self.assertEqual(site.page.metadata['work']['title'], 'Hyperion')
|
||||
self.assertEqual(site.page.required_pages[0]['id_value'], '1383900')
|
||||
edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn)
|
||||
page = edition.external_pages.all().first()
|
||||
self.assertEqual(page.id_type, IdType.Goodreads)
|
||||
|
@ -105,19 +104,19 @@ class GoodreadsTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_work(self):
|
||||
# url = 'https://www.goodreads.com/work/editions/153313'
|
||||
url = 'https://www.goodreads.com/work/editions/153313'
|
||||
p = SiteList.get_site_by_url(url).get_page_ready()
|
||||
self.assertEqual(p.item.title, '1984')
|
||||
url1 = 'https://www.goodreads.com/book/show/3597767-rok-1984'
|
||||
url2 = 'https://www.goodreads.com/book/show/40961427-1984'
|
||||
p1 = SiteList.get_site_by_url(url1).get_page_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_page_ready()
|
||||
w1 = p1.item.works.all().first()
|
||||
w2 = p2.item.works.all().first()
|
||||
self.assertEqual(w1.title, 'Nineteen Eighty-Four')
|
||||
self.assertEqual(w2.title, 'Nineteen Eighty-Four')
|
||||
self.assertEqual(w1, w2)
|
||||
|
||||
|
||||
class DoubanTestCase(TestCase):
|
||||
class DoubanBookTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
from polymorphic.models import PolymorphicModel
|
||||
from django.db import models
|
||||
from catalog.common import jsondata
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django.utils import timezone
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
|
@ -220,6 +221,8 @@ class ExternalPage(models.Model):
|
|||
scraped_time = models.DateTimeField(null=True)
|
||||
created_time = models.DateTimeField(auto_now_add=True)
|
||||
edited_time = models.DateTimeField(auto_now=True)
|
||||
required_pages = jsondata.ArrayField(null=False, blank=False, default=list)
|
||||
related_pages = jsondata.ArrayField(null=False, blank=False, default=list)
|
||||
|
||||
class Meta:
|
||||
unique_together = [['id_type', 'id_value']]
|
||||
|
@ -237,7 +240,7 @@ class ExternalPage(models.Model):
|
|||
|
||||
@property
|
||||
def ready(self):
|
||||
return bool(self.metadata)
|
||||
return bool(self.metadata and self.scraped_time)
|
||||
|
||||
def get_all_lookup_ids(self):
|
||||
d = self.other_lookup_ids.copy()
|
||||
|
@ -254,11 +257,3 @@ class ExternalPage(models.Model):
|
|||
else:
|
||||
raise ValueError(f'preferred model {model} does not exist')
|
||||
return None
|
||||
|
||||
def get_dependent_urls(self):
|
||||
ll = self.metadata.get('dependent_urls')
|
||||
return ll if ll else []
|
||||
|
||||
def get_related_urls(self):
|
||||
ll = self.metadata.get('related_urls')
|
||||
return ll if ll else []
|
||||
|
|
|
@ -2,6 +2,10 @@ from typing import *
|
|||
import re
|
||||
from .models import ExternalPage
|
||||
from dataclasses import dataclass, field
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
|
@ -50,6 +54,10 @@ class AbstractSite:
|
|||
self.page = ExternalPage(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url)
|
||||
return self.page
|
||||
|
||||
def bypass_scrape(self, data_from_link) -> PageData | None:
|
||||
"""subclass may implement this to use data from linked page and bypass actual scrape"""
|
||||
return None
|
||||
|
||||
def scrape(self) -> PageData:
|
||||
"""subclass should implement this, return PageData object"""
|
||||
data = PageData()
|
||||
|
@ -76,7 +84,7 @@ class AbstractSite:
|
|||
def ready(self):
|
||||
return bool(self.page and self.page.ready)
|
||||
|
||||
def get_page_ready(self, auto_save=True, auto_create=True, auto_link=True):
|
||||
def get_page_ready(self, auto_save=True, auto_create=True, auto_link=True, data_from_link=None):
|
||||
"""return a page scraped, or scrape if not yet"""
|
||||
if auto_link:
|
||||
auto_create = True
|
||||
|
@ -87,7 +95,9 @@ class AbstractSite:
|
|||
if not self.page:
|
||||
return None
|
||||
if not p.ready:
|
||||
pagedata = self.scrape()
|
||||
pagedata = self.bypass_scrape(data_from_link)
|
||||
if not pagedata:
|
||||
pagedata = self.scrape()
|
||||
p.update_content(pagedata)
|
||||
if not p.ready:
|
||||
logger.error(f'unable to get page {self.url} ready')
|
||||
|
@ -100,14 +110,16 @@ class AbstractSite:
|
|||
p.item.merge_data_from_extenal_pages()
|
||||
p.item.save()
|
||||
if auto_link:
|
||||
# todo rewrite this
|
||||
for linked_pages in p.required_pages:
|
||||
linked_site = SiteList.get_site_by_url(linked_pages['url'])
|
||||
if linked_site:
|
||||
linked_site.get_page_ready(auto_link=False)
|
||||
else:
|
||||
logger.error(f'unable to get site for {linked_pages["url"]}')
|
||||
p.item.update_linked_items_from_extenal_page(p)
|
||||
p.item.save()
|
||||
return p
|
||||
|
||||
def get_dependent_pages_ready(self, urls):
|
||||
# set depth = 2 so in a case of douban season can find an IMDB episode then a TMDB Serie
|
||||
pass
|
||||
|
||||
|
||||
class SiteList:
|
||||
registry = {}
|
||||
|
|
|
@ -108,16 +108,16 @@ class DoubanBook(AbstractSite, ScraperMixin):
|
|||
translators = None
|
||||
self.data['translators'] = translators
|
||||
|
||||
self.data['work'] = {}
|
||||
work_link = self.parse_str('//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href')
|
||||
if work_link:
|
||||
# TODO move logic to a differnet class
|
||||
r = re.match(r'\w+://book.douban.com/works/(\d+)', work_link)
|
||||
self.data['work']['lookup_id_type'] = IdType.DoubanBook_Work
|
||||
self.data['work']['lookup_id_value'] = r[1] if r else None
|
||||
self.data['work']['title'] = self.data['title']
|
||||
self.data['work']['url'] = work_link
|
||||
|
||||
self.data['required_pages'] = [{
|
||||
'model': 'Work',
|
||||
'id_type': IdType.DoubanBook_Work,
|
||||
'id_value': r[1] if r else None,
|
||||
'title': self.data['title'],
|
||||
'url': work_link,
|
||||
}]
|
||||
pd = PageData(metadata=self.data)
|
||||
pd.lookup_ids[IdType.ISBN] = self.data.get('isbn')
|
||||
pd.lookup_ids[IdType.CUBN] = self.data.get('cubn')
|
||||
|
@ -129,3 +129,34 @@ class DoubanBook(AbstractSite, ScraperMixin):
|
|||
except Exception:
|
||||
logger.debug(f'failed to download cover for {self.url} from {self.data["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanBook_Work(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanBook_Work
|
||||
URL_PATTERNS = [r"\w+://book\.douban\.com/works/(\d+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Work
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://book.douban.com/works/" + id_value + "/"
|
||||
|
||||
def bypass_scrape(self, data_from_link):
|
||||
if not data_from_link:
|
||||
return None
|
||||
pd = PageData(metadata={
|
||||
'title': data_from_link['title'],
|
||||
})
|
||||
return pd
|
||||
|
||||
def scrape(self):
|
||||
content = html.fromstring(DoubanDownloader(self.url).download().text.strip())
|
||||
title_elem = content.xpath("//h1/text()")
|
||||
title = title_elem[0].split('全部版本(')[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, 'title')
|
||||
pd = PageData(metadata={
|
||||
'title': title,
|
||||
})
|
||||
return pd
|
||||
|
|
|
@ -6,6 +6,7 @@ from catalog.tv.models import *
|
|||
import logging
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from .tmdb import TMDB_TV, search_tmdb_by_imdb_id
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -238,19 +239,33 @@ class DoubanMovie(AbstractSite):
|
|||
})
|
||||
pd.metadata['preferred_model'] = ('TVSeason' if season else 'TVShow') if is_series else 'Movie'
|
||||
|
||||
# tmdb_api_url = f"https://api.themoviedb.org/3/find/{self.imdb_code}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
|
||||
# res_data = BasicDownloader(tmdb_api_url).download().json()
|
||||
# if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
# pd.metadata['preferred_model'] = 'Movie'
|
||||
# elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
# pd.metadata['preferred_model'] = 'TVShow'
|
||||
# elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
|
||||
# pd.metadata['preferred_model'] = 'TVSeason'
|
||||
# elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
|
||||
# pd.metadata['preferred_model'] = 'TVSeason'
|
||||
|
||||
if imdb_code:
|
||||
res_data = search_tmdb_by_imdb_id(imdb_code)
|
||||
tmdb_show_id = None
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'Movie'
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVShow'
|
||||
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVSeason'
|
||||
tmdb_show_id = res_data['tv_season_results'][0]['show_id']
|
||||
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVSeason'
|
||||
tmdb_show_id = res_data['tv_episode_results'][0]['show_id']
|
||||
if res_data['tv_episode_results'][0]['episode_number'] != 1:
|
||||
logger.error(f'Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}')
|
||||
# TODO correct the IMDB id
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if tmdb_show_id:
|
||||
pd.metadata['required_pages'] = [{
|
||||
'model': 'TVShow',
|
||||
'id_type': IdType.TMDB_TV,
|
||||
'id_value': tmdb_show_id,
|
||||
'title': title,
|
||||
'url': TMDB_TV.id_to_url(tmdb_show_id),
|
||||
}]
|
||||
# TODO parse sister seasons
|
||||
# pd.metadata['related_pages'] = []
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
import re
|
||||
from catalog.book.models import Edition
|
||||
from catalog.book.models import Edition, Work
|
||||
from catalog.common import *
|
||||
from lxml import html
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GoodreadsDownloader(RetryDownloader):
|
||||
|
@ -64,14 +64,15 @@ class Goodreads(AbstractSite):
|
|||
data['asin'] = asin
|
||||
data['pages'] = b['details'].get('numPages')
|
||||
data['cover_image_url'] = b['imageUrl']
|
||||
data['work'] = {}
|
||||
w = next(filter(lambda x: x.get('details'), o['Work']), None)
|
||||
if w:
|
||||
data['work']['lookup_id_type'] = IdType.Goodreads_Work
|
||||
data['work']['lookup_id_value'] = str(w['legacyId'])
|
||||
data['work']['title'] = w['details']['originalTitle']
|
||||
data['work']['url'] = w['details']['webUrl']
|
||||
|
||||
data['required_pages'] = [{
|
||||
'model': 'Work',
|
||||
'id_type': IdType.Goodreads_Work,
|
||||
'id_value': str(w['legacyId']),
|
||||
'title': w['details']['originalTitle'],
|
||||
'url': w['editions']['webUrl'],
|
||||
}]
|
||||
pd = PageData(metadata=data)
|
||||
pd.lookup_ids[IdType.ISBN] = data.get('isbn')
|
||||
pd.lookup_ids[IdType.ASIN] = data.get('asin')
|
||||
|
@ -81,5 +82,34 @@ class Goodreads(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
|
||||
_logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class Goodreads_Work(AbstractSite):
|
||||
ID_TYPE = IdType.Goodreads_Work
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = Work
|
||||
URL_PATTERNS = [r".+goodreads.com/work/editions/(\d+)"]
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.goodreads.com/work/editions/" + id_value
|
||||
|
||||
def scrape(self, response=None):
|
||||
content = html.fromstring(BasicDownloader(self.url).download().text.strip())
|
||||
title_elem = content.xpath("//h1/a/text()")
|
||||
title = title_elem[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, 'title')
|
||||
author_elem = content.xpath("//h2/a/text()")
|
||||
author = author_elem[0].strip() if author_elem else None
|
||||
first_published_elem = content.xpath("//h2/span/text()")
|
||||
first_published = first_published_elem[0].strip() if first_published_elem else None
|
||||
pd = PageData(metadata={
|
||||
'title': title,
|
||||
'author': author,
|
||||
'first_published': first_published
|
||||
})
|
||||
return pd
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
from django.conf import settings
|
||||
from catalog.common import *
|
||||
from .douban import *
|
||||
from .tmdb import search_tmdb_by_imdb_id
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
import logging
|
||||
|
@ -21,8 +20,7 @@ class IMDB(AbstractSite):
|
|||
|
||||
def scrape(self):
|
||||
self.scraped = False
|
||||
api_url = f"https://api.themoviedb.org/3/find/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
res_data = search_tmdb_by_imdb_id(self.id_value)
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
|
|
|
@ -14,6 +14,12 @@ import logging
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def search_tmdb_by_imdb_id(imdb_id):
|
||||
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
|
||||
res_data = BasicDownloader(tmdb_api_url).download().json()
|
||||
return res_data
|
||||
|
||||
|
||||
def _copy_dict(s, key_map):
|
||||
d = {}
|
||||
for src, dst in key_map.items():
|
||||
|
@ -61,7 +67,7 @@ class TMDB_Movie(AbstractSite):
|
|||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.themoviedb.org/movie/" + id_value
|
||||
return f"https://www.themoviedb.org/movie/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
is_series = False
|
||||
|
@ -162,7 +168,7 @@ class TMDB_TV(AbstractSite):
|
|||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.themoviedb.org/tv/" + id_value
|
||||
return f"https://www.themoviedb.org/tv/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
is_series = True
|
||||
|
@ -221,6 +227,12 @@ class TMDB_TV(AbstractSite):
|
|||
# TODO: use GET /configuration to get base url
|
||||
img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
|
||||
|
||||
season_links = list(map(lambda s: {
|
||||
'model': 'TVSeason',
|
||||
'id_type': IdType.TMDB_TVSeason,
|
||||
'id_value': f'{self.id_value}-{s["season_number"]}',
|
||||
'title': s['name'],
|
||||
'url': f'{self.url}/season/{s["season_number"]}'}, res_data['seasons']))
|
||||
pd = PageData(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
|
@ -241,9 +253,11 @@ class TMDB_TV(AbstractSite):
|
|||
'single_episode_length': None,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url,
|
||||
'related_pages': season_links,
|
||||
})
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
|
@ -279,6 +293,13 @@ class TMDB_TVSeason(AbstractSite):
|
|||
if not d.get('id'):
|
||||
raise ParseError('id')
|
||||
pd = PageData(metadata=_copy_dict(d, {'name': 'title', 'overview': 'brief', 'air_date': 'air_date', 'season_number': 0, 'external_ids': 0}))
|
||||
pd.metadata['required_pages'] = [{
|
||||
'model': 'TVShow',
|
||||
'id_type': IdType.TMDB_TV,
|
||||
'id_value': v[0],
|
||||
'title': f'TMDB TV Show {v[0]}',
|
||||
'url': f"https://www.themoviedb.org/tv/{v[0]}",
|
||||
}]
|
||||
pd.lookup_ids[IdType.IMDB] = d['external_ids'].get('imdb_id')
|
||||
pd.metadata['cover_image_url'] = ('https://image.tmdb.org/t/p/original/' + d['poster_path']) if d['poster_path'] else None
|
||||
pd.metadata['title'] = pd.metadata['title'] if pd.metadata['title'] else f'Season {d["season_number"]}'
|
||||
|
@ -295,7 +316,7 @@ class TMDB_TVSeason(AbstractSite):
|
|||
# get external id from 1st episode
|
||||
if pd.lookup_ids[IdType.IMDB]:
|
||||
logger.warning("Unexpected IMDB id for TMDB tv season")
|
||||
elif len(pd.metadata['episode_number_list']) == 0:
|
||||
elif len(pd.metadata['episode_number_list']) == 0:
|
||||
logger.warning("Unable to lookup IMDB id for TMDB tv season with zero episodes")
|
||||
else:
|
||||
ep = pd.metadata['episode_number_list'][0]
|
||||
|
|
|
@ -39,14 +39,23 @@ class TVSeason(Item):
|
|||
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_tvseason = PrimaryLookupIdDescriptor(IdType.TMDB_TVSeason)
|
||||
series = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons')
|
||||
show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons')
|
||||
season_number = models.PositiveIntegerField()
|
||||
episode_count = jsondata.IntegerField(blank=True, default=None)
|
||||
METADATA_COPY_LIST = ['title', 'brief', 'season_number', 'episode_count']
|
||||
|
||||
def update_linked_items_from_extenal_page(self, page):
|
||||
"""add Work from page.metadata['work'] if not yet"""
|
||||
links = page.required_pages + page.related_pages
|
||||
for w in links:
|
||||
if w['model'] == 'TVShow':
|
||||
p = ExternalPage.objects.filter(id_type=w['id_type'], id_value=w['id_value']).first()
|
||||
if p and p.item and self.show != p.item:
|
||||
self.show = p.item
|
||||
|
||||
|
||||
class TVEpisode(Item):
|
||||
series = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
season = models.ForeignKey(TVSeason, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
episode_number = models.PositiveIntegerField()
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
|
|
|
@ -60,6 +60,8 @@ class TMDBTVSeasonTestCase(TestCase):
|
|||
self.assertEqual(site.page.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.page.item.__class__.__name__, 'TVSeason')
|
||||
self.assertEqual(site.page.item.imdb, 'tt1159991')
|
||||
self.assertIsNotNone(site.page.item.show)
|
||||
self.assertEqual(site.page.item.show.imdb, 'tt0436992')
|
||||
|
||||
|
||||
class DoubanMovieTVTestCase(TestCase):
|
||||
|
@ -68,6 +70,8 @@ class DoubanMovieTVTestCase(TestCase):
|
|||
url3 = 'https://movie.douban.com/subject/3627919/'
|
||||
p3 = SiteList.get_site_by_url(url3).get_page_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVSeason')
|
||||
self.assertIsNotNone(p3.item.show)
|
||||
self.assertEqual(p3.item.show.imdb, 'tt0436992')
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_singleseason(self):
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[{"adult":false,"backdrop_path":"/8IC1q0lHFwi5m8VtChLzIfmpaZH.jpg","id":86941,"name":"北海鲸梦","original_language":"en","original_name":"The North Water","overview":"改编自伊恩·麦奎尔的同名获奖小说,聚焦19世纪一次灾难性的捕鲸活动。故事围绕帕特里克·萨姆纳展开,他是一名声名狼藉的前战地医生,后成为捕鲸船上的医生,在船上遇到了鱼叉手亨利·德拉克斯,一个残忍、不道德的杀手。萨姆纳没有逃离过去的恐惧,而是被迫在北极荒原上为生存而进行残酷的斗争...","poster_path":"/9CM0ca8pX1os3SJ24hsIc0nN8ph.jpg","media_type":"tv","genre_ids":[18,9648],"popularity":11.318,"first_air_date":"2021-07-14","vote_average":7.5,"vote_count":75,"origin_country":["US"]}],"tv_episode_results":[],"tv_season_results":[]}
|
2516
test_data/https___book_douban_com_works_1008677_
Normal file
2516
test_data/https___book_douban_com_works_1008677_
Normal file
File diff suppressed because it is too large
Load diff
4165
test_data/https___www_goodreads_com_work_editions_1383900
Normal file
4165
test_data/https___www_goodreads_com_work_editions_1383900
Normal file
File diff suppressed because one or more lines are too long
4019
test_data/https___www_goodreads_com_work_editions_153313
Normal file
4019
test_data/https___www_goodreads_com_work_editions_153313
Normal file
File diff suppressed because one or more lines are too long
4160
test_data/https___www_goodreads_com_work_editions_24173962
Normal file
4160
test_data/https___www_goodreads_com_work_editions_24173962
Normal file
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue