merge from main
This commit is contained in:
commit
b5c849d6b0
102 changed files with 48009 additions and 14 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,3 +1,5 @@
|
|||
.DS_Store
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
|
0
catalog/__init__.py
Normal file
0
catalog/__init__.py
Normal file
3
catalog/admin.py
Normal file
3
catalog/admin.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from django.contrib import admin
|
||||
|
||||
# Register your models here.
|
11
catalog/api.py
Normal file
11
catalog/api.py
Normal file
|
@ -0,0 +1,11 @@
|
|||
from ninja import NinjaAPI
|
||||
from .models import Podcast
|
||||
from django.conf import settings
|
||||
|
||||
|
||||
api = NinjaAPI(title=settings.SITE_INFO['site_name'], version="1.0.0", description=settings.SITE_INFO['site_name'])
|
||||
|
||||
|
||||
@api.get("/podcasts/{item_id}")
|
||||
def get_item(request, item_id: int):
|
||||
return Podcast.objects.filter(pk=item_id).first()
|
6
catalog/apps.py
Normal file
6
catalog/apps.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from django.apps import AppConfig
|
||||
|
||||
|
||||
class CatalogConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'catalog'
|
77
catalog/book/models.py
Normal file
77
catalog/book/models.py
Normal file
|
@ -0,0 +1,77 @@
|
|||
"""
|
||||
Models for Book
|
||||
|
||||
Series -> Work -> Edition
|
||||
|
||||
Series is not fully implemented at the moment
|
||||
|
||||
Goodreads
|
||||
Famous works have many editions
|
||||
|
||||
Google Books:
|
||||
only has Edition level ("volume") data
|
||||
|
||||
Douban:
|
||||
old editions has only CUBN(Chinese Unified Book Number)
|
||||
work data seems asymmetric (a book links to a work, but may not listed in that work as one of its editions)
|
||||
|
||||
"""
|
||||
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from catalog.common import *
|
||||
from .utils import *
|
||||
|
||||
|
||||
class Edition(Item):
|
||||
isbn = PrimaryLookupIdDescriptor(IdType.ISBN)
|
||||
asin = PrimaryLookupIdDescriptor(IdType.ASIN)
|
||||
cubn = PrimaryLookupIdDescriptor(IdType.CUBN)
|
||||
# douban_book = LookupIdDescriptor(IdType.DoubanBook)
|
||||
# goodreads = LookupIdDescriptor(IdType.Goodreads)
|
||||
languages = jsondata.ArrayField(_("语言"), null=True, blank=True, default=list)
|
||||
publish_year = jsondata.IntegerField(_("发表年份"), null=True, blank=True)
|
||||
publish_month = jsondata.IntegerField(_("发表月份"), null=True, blank=True)
|
||||
pages = jsondata.IntegerField(blank=True, default=None)
|
||||
authors = jsondata.ArrayField(_('作者'), null=False, blank=False, default=list)
|
||||
translaters = jsondata.ArrayField(_('译者'), null=True, blank=True, default=list)
|
||||
publishers = jsondata.ArrayField(_('出版方'), null=True, blank=True, default=list)
|
||||
|
||||
@property
|
||||
def isbn10(self):
|
||||
return isbn_13_to_10(self.isbn)
|
||||
|
||||
@isbn10.setter
|
||||
def isbn10(self, value):
|
||||
self.isbn = isbn_10_to_13(value)
|
||||
|
||||
def update_linked_items_from_external_resource(self, resource):
|
||||
"""add Work from resource.metadata['work'] if not yet"""
|
||||
links = resource.required_resources + resource.related_resources
|
||||
for w in links:
|
||||
if w['model'] == 'Work':
|
||||
work = Work.objects.filter(primary_lookup_id_type=w['id_type'], primary_lookup_id_value=w['id_value']).first()
|
||||
if work and work not in self.works.all():
|
||||
self.works.add(work)
|
||||
# if not work:
|
||||
# _logger.info(f'Unable to find link for {w["url"]}')
|
||||
|
||||
|
||||
class Work(Item):
|
||||
# douban_work = PrimaryLookupIdDescriptor(IdType.DoubanBook_Work)
|
||||
# goodreads_work = PrimaryLookupIdDescriptor(IdType.Goodreads_Work)
|
||||
editions = models.ManyToManyField(Edition, related_name='works') # , through='WorkEdition'
|
||||
|
||||
# def __str__(self):
|
||||
# return self.title
|
||||
|
||||
# class Meta:
|
||||
# proxy = True
|
||||
|
||||
|
||||
class Series(Item):
|
||||
# douban_serie = LookupIdDescriptor(IdType.DoubanBook_Serie)
|
||||
# goodreads_serie = LookupIdDescriptor(IdType.Goodreads_Serie)
|
||||
|
||||
class Meta:
|
||||
proxy = True
|
237
catalog/book/tests.py
Normal file
237
catalog/book/tests.py
Normal file
|
@ -0,0 +1,237 @@
|
|||
from django.test import TestCase
|
||||
from catalog.book.models import *
|
||||
from catalog.common import *
|
||||
|
||||
|
||||
class BookTestCase(TestCase):
|
||||
def setUp(self):
|
||||
hyperion = Edition.objects.create(title="Hyperion")
|
||||
hyperion.pages = 500
|
||||
hyperion.isbn = '9780553283686'
|
||||
hyperion.save()
|
||||
# hyperion.isbn10 = '0553283685'
|
||||
|
||||
def test_properties(self):
|
||||
hyperion = Edition.objects.get(title="Hyperion")
|
||||
self.assertEqual(hyperion.title, "Hyperion")
|
||||
self.assertEqual(hyperion.pages, 500)
|
||||
self.assertEqual(hyperion.primary_lookup_id_type, IdType.ISBN)
|
||||
self.assertEqual(hyperion.primary_lookup_id_value, '9780553283686')
|
||||
andymion = Edition(title="Andymion", pages=42)
|
||||
self.assertEqual(andymion.pages, 42)
|
||||
|
||||
def test_lookupids(self):
|
||||
hyperion = Edition.objects.get(title="Hyperion")
|
||||
hyperion.asin = 'B004G60EHS'
|
||||
self.assertEqual(hyperion.primary_lookup_id_type, IdType.ASIN)
|
||||
self.assertEqual(hyperion.primary_lookup_id_value, 'B004G60EHS')
|
||||
self.assertEqual(hyperion.isbn, None)
|
||||
self.assertEqual(hyperion.isbn10, None)
|
||||
|
||||
def test_isbn(self):
|
||||
hyperion = Edition.objects.get(title="Hyperion")
|
||||
self.assertEqual(hyperion.isbn, '9780553283686')
|
||||
self.assertEqual(hyperion.isbn10, '0553283685')
|
||||
hyperion.isbn10 = '0575099437'
|
||||
self.assertEqual(hyperion.isbn, '9780575099432')
|
||||
self.assertEqual(hyperion.isbn10, '0575099437')
|
||||
|
||||
def test_work(self):
|
||||
hyperion_print = Edition.objects.get(title="Hyperion")
|
||||
hyperion_ebook = Edition(title="Hyperion")
|
||||
hyperion_ebook.save()
|
||||
hyperion_ebook.asin = 'B0043M6780'
|
||||
hyperion = Work(title="Hyperion")
|
||||
hyperion.save()
|
||||
hyperion.editions.add(hyperion_print)
|
||||
hyperion.editions.add(hyperion_ebook)
|
||||
# andymion = Edition(title="Andymion", pages=42)
|
||||
# serie = Serie(title="Hyperion Cantos")
|
||||
|
||||
|
||||
class GoodreadsTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def test_parse(self):
|
||||
t_type = IdType.Goodreads
|
||||
t_id = '77566'
|
||||
t_url = 'https://www.goodreads.com/zh/book/show/77566.Hyperion'
|
||||
t_url2 = 'https://www.goodreads.com/book/show/77566'
|
||||
p1 = SiteList.get_site_by_id_type(t_type)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.goodreads.com/book/show/77566.Hyperion'
|
||||
t_url2 = 'https://www.goodreads.com/book/show/77566'
|
||||
isbn = '9780553283686'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.url, t_url2)
|
||||
site.get_resource()
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertIsNotNone(site.resource)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), 'Hyperion')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), isbn)
|
||||
self.assertEqual(site.resource.required_resources[0]['id_value'], '1383900')
|
||||
edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn)
|
||||
resource = edition.external_resources.all().first()
|
||||
self.assertEqual(resource.id_type, IdType.Goodreads)
|
||||
self.assertEqual(resource.id_value, '77566')
|
||||
self.assertNotEqual(resource.cover, '/media/item/default.svg')
|
||||
self.assertEqual(edition.isbn, '9780553283686')
|
||||
self.assertEqual(edition.title, 'Hyperion')
|
||||
|
||||
edition.delete()
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.url, t_url2)
|
||||
site.get_resource()
|
||||
self.assertEqual(site.ready, True, 'previous resource should still exist with data')
|
||||
|
||||
@use_local_response
|
||||
def test_asin(self):
|
||||
t_url = 'https://www.goodreads.com/book/show/45064996-hyperion'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.item.title, 'Hyperion')
|
||||
self.assertEqual(site.resource.item.asin, 'B004G60EHS')
|
||||
|
||||
@use_local_response
|
||||
def test_work(self):
|
||||
url = 'https://www.goodreads.com/work/editions/153313'
|
||||
p = SiteList.get_site_by_url(url).get_resource_ready()
|
||||
self.assertEqual(p.item.title, '1984')
|
||||
url1 = 'https://www.goodreads.com/book/show/3597767-rok-1984'
|
||||
url2 = 'https://www.goodreads.com/book/show/40961427-1984'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
w1 = p1.item.works.all().first()
|
||||
w2 = p2.item.works.all().first()
|
||||
self.assertEqual(w1, w2)
|
||||
|
||||
|
||||
class GoogleBooksTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_type = IdType.GoogleBooks
|
||||
t_id = 'hV--zQEACAAJ'
|
||||
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms'
|
||||
t_url2 = 'https://books.google.com/books?id=hV--zQEACAAJ'
|
||||
p1 = SiteList.get_site_by_url(t_url)
|
||||
p2 = SiteList.get_site_by_url(t_url2)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.url, t_url2)
|
||||
self.assertEqual(p1.ID_TYPE, t_type)
|
||||
self.assertEqual(p1.id_value, t_id)
|
||||
self.assertEqual(p2.url, t_url2)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
|
||||
self.assertEqual(site.resource.id_type, IdType.GoogleBooks)
|
||||
self.assertEqual(site.resource.id_value, 'hV--zQEACAAJ')
|
||||
self.assertEqual(site.resource.item.isbn, '9781847498571')
|
||||
self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
|
||||
|
||||
|
||||
class DoubanBookTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def test_parse(self):
|
||||
t_type = IdType.DoubanBook
|
||||
t_id = '35902899'
|
||||
t_url = 'https://m.douban.com/book/subject/35902899/'
|
||||
t_url2 = 'https://book.douban.com/subject/35902899/'
|
||||
p1 = SiteList.get_site_by_url(t_url)
|
||||
p2 = SiteList.get_site_by_url(t_url2)
|
||||
self.assertEqual(p1.url, t_url2)
|
||||
self.assertEqual(p1.ID_TYPE, t_type)
|
||||
self.assertEqual(p1.id_value, t_id)
|
||||
self.assertEqual(p2.url, t_url2)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://book.douban.com/subject/35902899/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
|
||||
self.assertEqual(site.resource.id_type, IdType.DoubanBook)
|
||||
self.assertEqual(site.resource.id_value, '35902899')
|
||||
self.assertEqual(site.resource.item.isbn, '9781847498571')
|
||||
self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
|
||||
|
||||
@use_local_response
|
||||
def test_work(self):
|
||||
# url = 'https://www.goodreads.com/work/editions/153313'
|
||||
url1 = 'https://book.douban.com/subject/1089243/'
|
||||
url2 = 'https://book.douban.com/subject/2037260/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
w1 = p1.item.works.all().first()
|
||||
w2 = p2.item.works.all().first()
|
||||
self.assertEqual(w1.title, '黄金时代')
|
||||
self.assertEqual(w2.title, '黄金时代')
|
||||
self.assertEqual(w1, w2)
|
||||
editions = w1.editions.all().order_by('title')
|
||||
self.assertEqual(editions.count(), 2)
|
||||
self.assertEqual(editions[0].title, 'Wang in Love and Bondage')
|
||||
self.assertEqual(editions[1].title, '黄金时代')
|
||||
|
||||
|
||||
class MultiBookSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_editions(self):
|
||||
# isbn = '9781847498571'
|
||||
url1 = 'https://www.goodreads.com/book/show/56821625-1984'
|
||||
url2 = 'https://book.douban.com/subject/35902899/'
|
||||
url3 = 'https://books.google.com/books?id=hV--zQEACAAJ'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
self.assertEqual(p2.item.id, p3.item.id)
|
||||
|
||||
@use_local_response
|
||||
def test_works(self):
|
||||
# url1 and url4 has same ISBN, hence they share same Edition instance, which belongs to 2 Work instances
|
||||
url1 = 'https://book.douban.com/subject/1089243/'
|
||||
url2 = 'https://book.douban.com/subject/2037260/'
|
||||
url3 = 'https://www.goodreads.com/book/show/59952545-golden-age'
|
||||
url4 = 'https://www.goodreads.com/book/show/11798823'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready() # lxml bug may break this
|
||||
w1 = p1.item.works.all().first()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
w2 = p2.item.works.all().first()
|
||||
self.assertEqual(w1, w2)
|
||||
self.assertEqual(p1.item.works.all().count(), 1)
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
w3 = p3.item.works.all().first()
|
||||
self.assertNotEqual(w3, w2)
|
||||
p4 = SiteList.get_site_by_url(url4).get_resource_ready()
|
||||
self.assertEqual(p4.item.works.all().count(), 2)
|
||||
self.assertEqual(p1.item.works.all().count(), 2)
|
||||
w2e = w2.editions.all().order_by('title')
|
||||
self.assertEqual(w2e.count(), 2)
|
||||
self.assertEqual(w2e[0].title, 'Wang in Love and Bondage')
|
||||
self.assertEqual(w2e[1].title, '黄金时代')
|
||||
w3e = w3.editions.all().order_by('title')
|
||||
self.assertEqual(w3e.count(), 2)
|
||||
self.assertEqual(w3e[0].title, 'Golden Age: A Novel')
|
||||
self.assertEqual(w3e[1].title, '黄金时代')
|
||||
e = Edition.objects.get(primary_lookup_id_value=9781662601217)
|
||||
self.assertEqual(e.title, 'Golden Age: A Novel')
|
45
catalog/book/utils.py
Normal file
45
catalog/book/utils.py
Normal file
|
@ -0,0 +1,45 @@
|
|||
def check_digit_10(isbn):
|
||||
assert len(isbn) == 9
|
||||
sum = 0
|
||||
for i in range(len(isbn)):
|
||||
c = int(isbn[i])
|
||||
w = i + 1
|
||||
sum += w * c
|
||||
r = sum % 11
|
||||
return 'X' if r == 10 else str(r)
|
||||
|
||||
|
||||
def check_digit_13(isbn):
|
||||
assert len(isbn) == 12
|
||||
sum = 0
|
||||
for i in range(len(isbn)):
|
||||
c = int(isbn[i])
|
||||
w = 3 if i % 2 else 1
|
||||
sum += w * c
|
||||
r = 10 - (sum % 10)
|
||||
return '0' if r == 10 else str(r)
|
||||
|
||||
|
||||
def isbn_10_to_13(isbn):
|
||||
if not isbn or len(isbn) != 10:
|
||||
return None
|
||||
return '978' + isbn[:-1] + check_digit_13('978' + isbn[:-1])
|
||||
|
||||
|
||||
def isbn_13_to_10(isbn):
|
||||
if not isbn or len(isbn) != 13 or isbn[:3] != '978':
|
||||
return None
|
||||
else:
|
||||
return isbn[3:12] + check_digit_10(isbn[3:12])
|
||||
|
||||
|
||||
def is_isbn_13(isbn):
|
||||
return len(isbn) == 13
|
||||
|
||||
|
||||
def is_isbn_10(isbn):
|
||||
return len(isbn) == 10 and isbn[0] >= '0' and isbn[0] <= '9'
|
||||
|
||||
|
||||
def is_asin(asin):
|
||||
return len(asin) == 10 and asin[0].lower == 'b'
|
8
catalog/common/__init__.py
Normal file
8
catalog/common/__init__.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
from .models import *
|
||||
from .sites import *
|
||||
from .downloaders import *
|
||||
from .scrapers import *
|
||||
from . import jsondata
|
||||
|
||||
|
||||
__all__ = ('IdType', 'Item', 'ExternalResource', 'ResourceContent', 'ParseError', 'AbstractSite', 'SiteList', 'jsondata', 'PrimaryLookupIdDescriptor', 'LookupIdDescriptor', 'get_mock_mode', 'get_mock_file', 'use_local_response', 'RetryDownloader', 'BasicDownloader', 'ProxiedDownloader', 'BasicImageDownloader', 'RESPONSE_OK', 'RESPONSE_NETWORK_ERROR', 'RESPONSE_INVALID_CONTENT', 'RESPONSE_CENSORSHIP')
|
245
catalog/common/downloaders.py
Normal file
245
catalog/common/downloaders.py
Normal file
|
@ -0,0 +1,245 @@
|
|||
import requests
|
||||
import filetype
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
from requests.exceptions import RequestException
|
||||
from django.conf import settings
|
||||
from pathlib import Path
|
||||
import json
|
||||
from io import StringIO
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
from lxml import html
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
RESPONSE_OK = 0 # response is ready for pasring
|
||||
RESPONSE_INVALID_CONTENT = -1 # content not valid but no need to retry
|
||||
RESPONSE_NETWORK_ERROR = -2 # network error, retry next proxied url
|
||||
RESPONSE_CENSORSHIP = -3 # censored, try sth special if possible
|
||||
|
||||
_mock_mode = False
|
||||
|
||||
|
||||
def use_local_response(func):
|
||||
def _func(args):
|
||||
set_mock_mode(True)
|
||||
func(args)
|
||||
set_mock_mode(False)
|
||||
return _func
|
||||
|
||||
|
||||
def set_mock_mode(enabled):
|
||||
global _mock_mode
|
||||
_mock_mode = enabled
|
||||
|
||||
|
||||
def get_mock_mode():
|
||||
global _mock_mode
|
||||
return _mock_mode
|
||||
|
||||
|
||||
def get_mock_file(url):
|
||||
fn = re.sub(r'[^\w]', '_', url)
|
||||
return re.sub(r'_key_[A-Za-z0-9]+', '_key_19890604', fn)
|
||||
|
||||
|
||||
class DownloadError(Exception):
|
||||
def __init__(self, downloader, msg=None):
|
||||
self.url = downloader.url
|
||||
self.logs = downloader.logs
|
||||
if downloader.response_type == RESPONSE_INVALID_CONTENT:
|
||||
error = "Invalid Response"
|
||||
elif downloader.response_type == RESPONSE_NETWORK_ERROR:
|
||||
error = "Network Error"
|
||||
elif downloader.response_type == RESPONSE_NETWORK_ERROR:
|
||||
error = "Censored Content"
|
||||
else:
|
||||
error = "Unknown Error"
|
||||
self.message = f"Download Failed: {error}{', ' + msg if msg else ''}, url: {self.url}"
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class BasicDownloader:
|
||||
headers = {
|
||||
# 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0',
|
||||
'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Connection': 'keep-alive',
|
||||
'DNT': '1',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Cache-Control': 'no-cache',
|
||||
}
|
||||
|
||||
def __init__(self, url, headers=None):
|
||||
self.url = url
|
||||
self.response_type = RESPONSE_OK
|
||||
self.logs = []
|
||||
if headers:
|
||||
self.headers = headers
|
||||
|
||||
def get_timeout(self):
|
||||
return settings.SCRAPING_TIMEOUT
|
||||
|
||||
def validate_response(self, response):
|
||||
if response is None:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
elif response.status_code == 200:
|
||||
return RESPONSE_OK
|
||||
else:
|
||||
return RESPONSE_INVALID_CONTENT
|
||||
|
||||
def _download(self, url):
|
||||
try:
|
||||
if not _mock_mode:
|
||||
# TODO cache = get/set from redis
|
||||
resp = requests.get(url, headers=self.headers, timeout=self.get_timeout())
|
||||
if settings.DOWNLOADER_SAVEDIR:
|
||||
with open(settings.DOWNLOADER_SAVEDIR + '/' + get_mock_file(url), 'w', encoding='utf-8') as fp:
|
||||
fp.write(resp.text)
|
||||
else:
|
||||
resp = MockResponse(self.url)
|
||||
response_type = self.validate_response(resp)
|
||||
self.logs.append({'response_type': response_type, 'url': url, 'exception': None})
|
||||
|
||||
return resp, response_type
|
||||
except RequestException as e:
|
||||
self.logs.append({'response_type': RESPONSE_NETWORK_ERROR, 'url': url, 'exception': e})
|
||||
return None, RESPONSE_NETWORK_ERROR
|
||||
|
||||
def download(self):
|
||||
resp, self.response_type = self._download(self.url)
|
||||
if self.response_type == RESPONSE_OK:
|
||||
return resp
|
||||
else:
|
||||
raise DownloadError(self)
|
||||
|
||||
|
||||
class ProxiedDownloader(BasicDownloader):
|
||||
def get_proxied_urls(self):
|
||||
urls = []
|
||||
if settings.PROXYCRAWL_KEY is not None:
|
||||
urls.append(f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={self.url}')
|
||||
if settings.SCRAPESTACK_KEY is not None:
|
||||
# urls.append(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={self.url}')
|
||||
urls.append(f'http://api.scrapestack.com/scrape?keep_headers=1&access_key={settings.SCRAPESTACK_KEY}&url={self.url}')
|
||||
if settings.SCRAPERAPI_KEY is not None:
|
||||
urls.append(f'http://api.scraperapi.com/?api_key={settings.SCRAPERAPI_KEY}&url={self.url}')
|
||||
return urls
|
||||
|
||||
def get_special_proxied_url(self):
|
||||
return f'{settings.LOCAL_PROXY}?url={self.url}' if settings.LOCAL_PROXY is not None else None
|
||||
|
||||
def download(self):
|
||||
urls = self.get_proxied_urls()
|
||||
last_try = False
|
||||
url = urls.pop(0) if len(urls) else None
|
||||
resp = None
|
||||
while url:
|
||||
resp, resp_type = self._download(url)
|
||||
if resp_type == RESPONSE_OK or resp_type == RESPONSE_INVALID_CONTENT or last_try:
|
||||
url = None
|
||||
elif resp_type == RESPONSE_CENSORSHIP:
|
||||
url = self.get_special_proxied_url()
|
||||
last_try = True
|
||||
else: # resp_type == RESPONSE_NETWORK_ERROR:
|
||||
url = urls.pop(0) if len(urls) else None
|
||||
self.response_type = resp_type
|
||||
if self.response_type == RESPONSE_OK:
|
||||
return resp
|
||||
else:
|
||||
raise DownloadError(self)
|
||||
|
||||
|
||||
class RetryDownloader(BasicDownloader):
|
||||
def download(self):
|
||||
retries = settings.DOWNLOADER_RETRIES
|
||||
while retries:
|
||||
retries -= 1
|
||||
resp, self.response_type = self._download(self.url)
|
||||
if self.response_type == RESPONSE_OK:
|
||||
return resp
|
||||
elif self.response_type != RESPONSE_NETWORK_ERROR and retries == 0:
|
||||
raise DownloadError(self)
|
||||
elif retries > 0:
|
||||
_logger.debug('Retry ' + self.url)
|
||||
time.sleep((settings.DOWNLOADER_RETRIES - retries) * 0.5)
|
||||
raise DownloadError(self, 'max out of retries')
|
||||
|
||||
|
||||
class ImageDownloaderMixin:
|
||||
def __init__(self, url, referer=None):
|
||||
if referer is not None:
|
||||
self.headers['Referer'] = referer
|
||||
super().__init__(url)
|
||||
|
||||
def validate_response(self, response):
|
||||
if response and response.status_code == 200:
|
||||
try:
|
||||
raw_img = response.content
|
||||
img = Image.open(BytesIO(raw_img))
|
||||
img.load() # corrupted image will trigger exception
|
||||
content_type = response.headers.get('Content-Type')
|
||||
self.extention = filetype.get_type(mime=content_type.partition(';')[0].strip()).extension
|
||||
return RESPONSE_OK
|
||||
except Exception:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
if response and response.status_code >= 400 and response.status_code < 500:
|
||||
return RESPONSE_INVALID_CONTENT
|
||||
else:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
|
||||
|
||||
class BasicImageDownloader(ImageDownloaderMixin, BasicDownloader):
|
||||
@classmethod
|
||||
def download_image(cls, image_url, page_url):
|
||||
imgdl = cls(image_url, page_url)
|
||||
try:
|
||||
image = imgdl.download().content
|
||||
image_extention = imgdl.extention
|
||||
return image, image_extention
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
|
||||
class ProxiedImageDownloader(ImageDownloaderMixin, ProxiedDownloader):
|
||||
pass
|
||||
|
||||
|
||||
_local_response_path = str(Path(__file__).parent.parent.parent.absolute()) + '/test_data/'
|
||||
|
||||
|
||||
class MockResponse:
|
||||
def __init__(self, url):
|
||||
self.url = url
|
||||
fn = _local_response_path + get_mock_file(url)
|
||||
try:
|
||||
self.content = Path(fn).read_bytes()
|
||||
self.status_code = 200
|
||||
_logger.debug(f"use local response for {url} from {fn}")
|
||||
except Exception:
|
||||
self.content = b'Error: response file not found'
|
||||
self.status_code = 404
|
||||
_logger.debug(f"local response not found for {url} at {fn}")
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return self.content.decode('utf-8')
|
||||
|
||||
def json(self):
|
||||
return json.load(StringIO(self.text))
|
||||
|
||||
def html(self):
|
||||
return html.fromstring(self.text) # may throw exception unexpectedly due to OS bug
|
||||
|
||||
@property
|
||||
def headers(self):
|
||||
return {'Content-Type': 'image/jpeg' if self.url.endswith('jpg') else 'text/html'}
|
||||
|
||||
|
||||
requests.Response.html = MockResponse.html
|
201
catalog/common/jsondata.py
Normal file
201
catalog/common/jsondata.py
Normal file
|
@ -0,0 +1,201 @@
|
|||
import copy
|
||||
from datetime import date, datetime
|
||||
from importlib import import_module
|
||||
|
||||
import django
|
||||
from django.conf import settings
|
||||
from django.core.exceptions import FieldError
|
||||
from django.db.models import fields
|
||||
from django.utils import dateparse, timezone
|
||||
|
||||
from functools import partialmethod
|
||||
from django.db.models import JSONField
|
||||
|
||||
|
||||
__all__ = ('BooleanField', 'CharField', 'DateField', 'DateTimeField', 'DecimalField', 'EmailField', 'FloatField', 'IntegerField', 'IPAddressField', 'GenericIPAddressField', 'NullBooleanField', 'TextField', 'TimeField', 'URLField', 'ArrayField')
|
||||
|
||||
|
||||
class JSONFieldDescriptor(object):
|
||||
def __init__(self, field):
|
||||
self.field = field
|
||||
|
||||
def __get__(self, instance, cls=None):
|
||||
if instance is None:
|
||||
return self
|
||||
json_value = getattr(instance, self.field.json_field_name)
|
||||
if isinstance(json_value, dict):
|
||||
if self.field.attname in json_value or not self.field.has_default():
|
||||
value = json_value.get(self.field.attname, None)
|
||||
if hasattr(self.field, 'from_json'):
|
||||
value = self.field.from_json(value)
|
||||
return value
|
||||
else:
|
||||
default = self.field.get_default()
|
||||
if hasattr(self.field, 'to_json'):
|
||||
json_value[self.field.attname] = self.field.to_json(default)
|
||||
else:
|
||||
json_value[self.field.attname] = default
|
||||
return default
|
||||
return None
|
||||
|
||||
def __set__(self, instance, value):
|
||||
json_value = getattr(instance, self.field.json_field_name)
|
||||
if json_value:
|
||||
assert isinstance(json_value, dict)
|
||||
else:
|
||||
json_value = {}
|
||||
|
||||
if hasattr(self.field, 'to_json'):
|
||||
value = self.field.to_json(value)
|
||||
|
||||
if not value and self.field.blank and not self.field.null:
|
||||
try:
|
||||
del json_value[self.field.attname]
|
||||
except KeyError:
|
||||
pass
|
||||
else:
|
||||
json_value[self.field.attname] = value
|
||||
|
||||
setattr(instance, self.field.json_field_name, json_value)
|
||||
|
||||
|
||||
class JSONFieldMixin(object):
|
||||
"""
|
||||
Override django.db.model.fields.Field.contribute_to_class
|
||||
to make a field always private, and register custom access descriptor
|
||||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
self.json_field_name = kwargs.pop('json_field_name', 'metadata')
|
||||
super(JSONFieldMixin, self).__init__(*args, **kwargs)
|
||||
|
||||
def contribute_to_class(self, cls, name, private_only=False):
|
||||
self.set_attributes_from_name(name)
|
||||
self.model = cls
|
||||
self.concrete = False
|
||||
self.column = self.json_field_name
|
||||
cls._meta.add_field(self, private=True)
|
||||
|
||||
if not getattr(cls, self.attname, None):
|
||||
descriptor = JSONFieldDescriptor(self)
|
||||
setattr(cls, self.attname, descriptor)
|
||||
|
||||
if self.choices is not None:
|
||||
setattr(cls, 'get_%s_display' % self.name,
|
||||
partialmethod(cls._get_FIELD_display, field=self))
|
||||
|
||||
def get_lookup(self, lookup_name):
|
||||
# Always return None, to make get_transform been called
|
||||
return None
|
||||
|
||||
def get_transform(self, name):
|
||||
class TransformFactoryWrapper:
|
||||
def __init__(self, json_field, transform, original_lookup):
|
||||
self.json_field = json_field
|
||||
self.transform = transform
|
||||
self.original_lookup = original_lookup
|
||||
|
||||
def __call__(self, lhs, **kwargs):
|
||||
lhs = copy.copy(lhs)
|
||||
lhs.target = self.json_field
|
||||
lhs.output_field = self.json_field
|
||||
transform = self.transform(lhs, **kwargs)
|
||||
transform._original_get_lookup = transform.get_lookup
|
||||
transform.get_lookup = lambda name: transform._original_get_lookup(self.original_lookup)
|
||||
return transform
|
||||
|
||||
json_field = self.model._meta.get_field(self.json_field_name)
|
||||
transform = json_field.get_transform(self.name)
|
||||
if transform is None:
|
||||
raise FieldError(
|
||||
"JSONField '%s' has no support for key '%s' %s lookup" %
|
||||
(self.json_field_name, self.name, name)
|
||||
)
|
||||
|
||||
return TransformFactoryWrapper(json_field, transform, name)
|
||||
|
||||
|
||||
class BooleanField(JSONFieldMixin, fields.BooleanField):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(BooleanField, self).__init__(*args, **kwargs)
|
||||
if django.VERSION < (2, ):
|
||||
self.blank = False
|
||||
|
||||
|
||||
class CharField(JSONFieldMixin, fields.CharField):
|
||||
pass
|
||||
|
||||
|
||||
class DateField(JSONFieldMixin, fields.DateField):
|
||||
def to_json(self, value):
|
||||
if value:
|
||||
assert isinstance(value, (datetime, date))
|
||||
return value.strftime('%Y-%m-%d')
|
||||
|
||||
def from_json(self, value):
|
||||
if value is not None:
|
||||
return dateparse.parse_date(value)
|
||||
|
||||
|
||||
class DateTimeField(JSONFieldMixin, fields.DateTimeField):
|
||||
def to_json(self, value):
|
||||
if value:
|
||||
if not timezone.is_aware(value):
|
||||
value = timezone.make_aware(value)
|
||||
return value.isoformat()
|
||||
|
||||
def from_json(self, value):
|
||||
if value:
|
||||
return dateparse.parse_datetime(value)
|
||||
|
||||
|
||||
class DecimalField(JSONFieldMixin, fields.DecimalField):
|
||||
pass
|
||||
|
||||
|
||||
class EmailField(JSONFieldMixin, fields.EmailField):
|
||||
pass
|
||||
|
||||
|
||||
class FloatField(JSONFieldMixin, fields.FloatField):
|
||||
pass
|
||||
|
||||
|
||||
class IntegerField(JSONFieldMixin, fields.IntegerField):
|
||||
pass
|
||||
|
||||
|
||||
class IPAddressField(JSONFieldMixin, fields.IPAddressField):
|
||||
pass
|
||||
|
||||
|
||||
class GenericIPAddressField(JSONFieldMixin, fields.GenericIPAddressField):
|
||||
pass
|
||||
|
||||
|
||||
class NullBooleanField(JSONFieldMixin, fields.NullBooleanField):
|
||||
pass
|
||||
|
||||
|
||||
class TextField(JSONFieldMixin, fields.TextField):
|
||||
pass
|
||||
|
||||
|
||||
class TimeField(JSONFieldMixin, fields.TimeField):
|
||||
def to_json(self, value):
|
||||
if value:
|
||||
if not timezone.is_aware(value):
|
||||
value = timezone.make_aware(value)
|
||||
return value.isoformat()
|
||||
|
||||
def from_json(self, value):
|
||||
if value:
|
||||
return dateparse.parse_time(value)
|
||||
|
||||
|
||||
class URLField(JSONFieldMixin, fields.URLField):
|
||||
pass
|
||||
|
||||
|
||||
class ArrayField(JSONFieldMixin, JSONField):
|
||||
pass
|
268
catalog/common/models.py
Normal file
268
catalog/common/models.py
Normal file
|
@ -0,0 +1,268 @@
|
|||
from polymorphic.models import PolymorphicModel
|
||||
from django.db import models
|
||||
from catalog.common import jsondata
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from django.utils import timezone
|
||||
from django.core.files.uploadedfile import SimpleUploadedFile
|
||||
from django.contrib.contenttypes.models import ContentType
|
||||
import uuid
|
||||
from .utils import DEFAULT_ITEM_COVER, item_cover_path
|
||||
# from django.conf import settings
|
||||
|
||||
|
||||
class IdType(models.TextChoices):
|
||||
WikiData = 'wikidata', _('维基数据')
|
||||
ISBN10 = 'isbn10', _('ISBN10')
|
||||
ISBN = 'isbn', _('ISBN') # ISBN 13
|
||||
ASIN = 'asin', _('ASIN')
|
||||
ISSN = 'issn', _('ISSN')
|
||||
CUBN = 'cubn', _('统一书号')
|
||||
ISRC = 'isrc', _('ISRC') # only for songs
|
||||
GTIN = 'gtin', _('GTIN UPC EAN码') # ISBN is separate
|
||||
Feed = 'feed', _('Feed URL')
|
||||
IMDB = 'imdb', _('IMDb')
|
||||
TMDB_TV = 'tmdb_tv', _('TMDB剧集')
|
||||
TMDB_TVSeason = 'tmdb_tvseason', _('TMDB剧集')
|
||||
TMDB_TVEpisode = 'tmdb_tvepisode', _('TMDB剧集')
|
||||
TMDB_Movie = 'tmdb_movie', _('TMDB电影')
|
||||
Goodreads = 'goodreads', _('Goodreads')
|
||||
Goodreads_Work = 'goodreads_work', _('Goodreads著作')
|
||||
GoogleBooks = 'googlebooks', _('谷歌图书')
|
||||
DoubanBook = 'doubanbook', _('豆瓣读书')
|
||||
DoubanBook_Work = 'doubanbook_work', _('豆瓣读书著作')
|
||||
DoubanMovie = 'doubanmovie', _('豆瓣电影')
|
||||
DoubanMusic = 'doubanmusic', _('豆瓣音乐')
|
||||
DoubanGame = 'doubangame', _('豆瓣游戏')
|
||||
DoubanDrama = 'doubandrama', _('豆瓣舞台剧')
|
||||
Bandcamp = 'bandcamp', _('Bandcamp')
|
||||
Spotify_Album = 'spotify_album', _('Spotify专辑')
|
||||
Spotify_Show = 'spotify_show', _('Spotify播客')
|
||||
Discogs_Release = 'discogs_release', ('Discogs Release')
|
||||
Discogs_Master = 'discogs_master', ('Discogs Master')
|
||||
MusicBrainz = 'musicbrainz', ('MusicBrainz ID')
|
||||
DoubanBook_Author = 'doubanbook_author', _('豆瓣读书作者')
|
||||
DoubanCelebrity = 'doubanmovie_celebrity', _('豆瓣电影影人')
|
||||
Goodreads_Author = 'goodreads_author', _('Goodreads作者')
|
||||
Spotify_Artist = 'spotify_artist', _('Spotify艺术家')
|
||||
TMDB_Person = 'tmdb_person', _('TMDB影人')
|
||||
IGDB = 'igdb', _('IGDB游戏')
|
||||
Steam = 'steam', _('Steam游戏')
|
||||
Bangumi = 'bangumi', _('Bangumi')
|
||||
ApplePodcast = 'apple_podcast', _('苹果播客')
|
||||
|
||||
|
||||
class ItemType(models.TextChoices):
|
||||
Book = 'book', _('书')
|
||||
TV = 'tv', _('剧集')
|
||||
TVSeason = 'tvseason', _('剧集分季')
|
||||
TVEpisode = 'tvepisode', _('剧集分集')
|
||||
Movie = 'movie', _('电影')
|
||||
Music = 'music', _('音乐')
|
||||
Game = 'game', _('游戏')
|
||||
Boardgame = 'boardgame', _('桌游')
|
||||
Podcast = 'podcast', _('播客')
|
||||
FanFic = 'fanfic', _('网文')
|
||||
Performance = 'performance', _('演出')
|
||||
Exhibition = 'exhibition', _('展览')
|
||||
|
||||
|
||||
class SubItemType(models.TextChoices):
|
||||
Season = 'season', _('剧集分季')
|
||||
Episode = 'episode', _('剧集分集')
|
||||
Version = 'version', _('版本')
|
||||
|
||||
# class CreditType(models.TextChoices):
|
||||
# Author = 'author', _('作者')
|
||||
# Translater = 'translater', _('译者')
|
||||
# Producer = 'producer', _('出品人')
|
||||
# Director = 'director', _('电影')
|
||||
# Actor = 'actor', _('演员')
|
||||
# Playwright = 'playwright', _('播客')
|
||||
# VoiceActor = 'voiceactor', _('配音')
|
||||
# Host = 'host', _('主持人')
|
||||
# Developer = 'developer', _('开发者')
|
||||
# Publisher = 'publisher', _('出版方')
|
||||
|
||||
|
||||
class PrimaryLookupIdDescriptor(object): # TODO make it mixin of Field
|
||||
def __init__(self, id_type):
|
||||
self.id_type = id_type
|
||||
|
||||
def __get__(self, instance, cls=None):
|
||||
if instance is None:
|
||||
return self
|
||||
if self.id_type != instance.primary_lookup_id_type:
|
||||
return None
|
||||
return instance.primary_lookup_id_value
|
||||
|
||||
def __set__(self, instance, id_value):
|
||||
if id_value:
|
||||
instance.primary_lookup_id_type = self.id_type
|
||||
instance.primary_lookup_id_value = id_value
|
||||
else:
|
||||
instance.primary_lookup_id_type = None
|
||||
instance.primary_lookup_id_value = None
|
||||
|
||||
|
||||
class LookupIdDescriptor(object): # TODO make it mixin of Field
|
||||
def __init__(self, id_type):
|
||||
self.id_type = id_type
|
||||
|
||||
def __get__(self, instance, cls=None):
|
||||
if instance is None:
|
||||
return self
|
||||
return instance.get_lookup_id(self.id_type)
|
||||
|
||||
def __set__(self, instance, value):
|
||||
instance.set_lookup_id(self.id_type, value)
|
||||
|
||||
|
||||
# class ItemId(models.Model):
|
||||
# item = models.ForeignKey('Item', models.CASCADE)
|
||||
# id_type = models.CharField(_("源网站"), blank=False, choices=IdType.choices, max_length=50)
|
||||
# id_value = models.CharField(_("源网站ID"), blank=False, max_length=1000)
|
||||
|
||||
|
||||
# class ItemCredit(models.Model):
|
||||
# item = models.ForeignKey('Item', models.CASCADE)
|
||||
# credit_type = models.CharField(_("类型"), choices=CreditType.choices, blank=False, max_length=50)
|
||||
# name = models.CharField(_("名字"), blank=False, max_length=1000)
|
||||
|
||||
|
||||
# def check_source_id(sid):
|
||||
# if not sid:
|
||||
# return True
|
||||
# s = sid.split(':')
|
||||
# if len(s) < 2:
|
||||
# return False
|
||||
# return sid[0] in IdType.values()
|
||||
|
||||
|
||||
class Item(PolymorphicModel):
|
||||
uid = models.UUIDField(default=uuid.uuid4, editable=False)
|
||||
# item_type = models.CharField(_("类型"), choices=ItemType.choices, blank=False, max_length=50)
|
||||
title = models.CharField(_("title in primary language"), max_length=1000, default="")
|
||||
# title_ml = models.JSONField(_("title in different languages {['lang':'zh-cn', 'text':'', primary:True], ...}"), null=True, blank=True, default=list)
|
||||
brief = models.TextField(_("简介"), blank=True, default="")
|
||||
# brief_ml = models.JSONField(_("brief in different languages {['lang':'zh-cn', 'text':'', primary:True], ...}"), null=True, blank=True, default=list)
|
||||
genres = models.JSONField(_("分类"), null=True, blank=True, default=list)
|
||||
primary_lookup_id_type = models.CharField(_("isbn/cubn/imdb"), blank=False, null=True, max_length=50)
|
||||
primary_lookup_id_value = models.CharField(_("1234/tt789"), blank=False, null=True, max_length=1000)
|
||||
metadata = models.JSONField(_("其他信息"), blank=True, null=True, default=dict)
|
||||
cover = models.ImageField(upload_to=item_cover_path, default=DEFAULT_ITEM_COVER, blank=True)
|
||||
created_time = models.DateTimeField(auto_now_add=True)
|
||||
edited_time = models.DateTimeField(auto_now=True)
|
||||
# parent_item = models.ForeignKey('Item', null=True, on_delete=models.SET_NULL, related_name='child_items')
|
||||
# identical_item = models.ForeignKey('Item', null=True, on_delete=models.SET_NULL, related_name='identical_items')
|
||||
# def get_lookup_id(self, id_type: str) -> str:
|
||||
# prefix = id_type.strip().lower() + ':'
|
||||
# return next((x[len(prefix):] for x in self.lookup_ids if x.startswith(prefix)), None)
|
||||
|
||||
class Meta:
|
||||
unique_together = [['polymorphic_ctype_id', 'primary_lookup_id_type', 'primary_lookup_id_value']]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.id}{' ' + self.primary_lookup_id_type + ':' + self.primary_lookup_id_value if self.primary_lookup_id_value else ''} ({self.title})"
|
||||
|
||||
@classmethod
|
||||
def get_best_lookup_id(cls, lookup_ids):
|
||||
""" get best available lookup id, ideally commonly used """
|
||||
best_id_types = [
|
||||
IdType.ISBN, IdType.CUBN, IdType.ASIN,
|
||||
IdType.GTIN, IdType.ISRC, IdType.MusicBrainz,
|
||||
IdType.Feed,
|
||||
IdType.IMDB, IdType.TMDB_TVSeason
|
||||
]
|
||||
for t in best_id_types:
|
||||
if lookup_ids.get(t):
|
||||
return t, lookup_ids[t]
|
||||
return list(lookup_ids.items())[0]
|
||||
|
||||
def update_lookup_ids(self, lookup_ids):
|
||||
# TODO
|
||||
# ll = set(lookup_ids)
|
||||
# ll = list(filter(lambda a, b: b, ll))
|
||||
# print(ll)
|
||||
pass
|
||||
|
||||
METADATA_COPY_LIST = ['title', 'brief'] # list of metadata keys to copy from resource to item
|
||||
|
||||
@classmethod
|
||||
def copy_metadata(cls, metadata):
|
||||
return dict((k, v) for k, v in metadata.items() if k in cls.METADATA_COPY_LIST and v is not None)
|
||||
|
||||
def merge_data_from_external_resources(self):
|
||||
"""Subclass may override this"""
|
||||
lookup_ids = []
|
||||
for p in self.external_resources.all():
|
||||
lookup_ids.append((p.id_type, p.id_value))
|
||||
lookup_ids += p.other_lookup_ids.items()
|
||||
for k in self.METADATA_COPY_LIST:
|
||||
if not getattr(self, k) and p.metadata.get(k):
|
||||
setattr(self, k, p.metadata.get(k))
|
||||
if not self.cover and p.cover:
|
||||
self.cover = p.cover
|
||||
self.update_lookup_ids(lookup_ids)
|
||||
|
||||
def update_linked_items_from_external_resource(self, resource):
|
||||
"""Subclass should override this"""
|
||||
pass
|
||||
|
||||
|
||||
class ItemLookupId(models.Model):
|
||||
item = models.ForeignKey(Item, null=True, on_delete=models.SET_NULL, related_name='lookup_ids')
|
||||
id_type = models.CharField(_("源网站"), blank=True, choices=IdType.choices, max_length=50)
|
||||
id_value = models.CharField(_("源网站ID"), blank=True, max_length=1000)
|
||||
raw_url = models.CharField(_("源网站ID"), blank=True, max_length=1000, unique=True)
|
||||
|
||||
class Meta:
|
||||
unique_together = [['id_type', 'id_value']]
|
||||
|
||||
|
||||
class ExternalResource(models.Model):
|
||||
item = models.ForeignKey(Item, null=True, on_delete=models.SET_NULL, related_name='external_resources')
|
||||
id_type = models.CharField(_("IdType of the source site"), blank=False, choices=IdType.choices, max_length=50)
|
||||
id_value = models.CharField(_("Primary Id on the source site"), blank=False, max_length=1000)
|
||||
url = models.CharField(_("url to the resource"), blank=False, max_length=1000, unique=True)
|
||||
cover = models.ImageField(upload_to=item_cover_path, default=DEFAULT_ITEM_COVER, blank=True)
|
||||
other_lookup_ids = models.JSONField(default=dict)
|
||||
metadata = models.JSONField(default=dict)
|
||||
scraped_time = models.DateTimeField(null=True)
|
||||
created_time = models.DateTimeField(auto_now_add=True)
|
||||
edited_time = models.DateTimeField(auto_now=True)
|
||||
required_resources = jsondata.ArrayField(null=False, blank=False, default=list)
|
||||
related_resources = jsondata.ArrayField(null=False, blank=False, default=list)
|
||||
|
||||
class Meta:
|
||||
unique_together = [['id_type', 'id_value']]
|
||||
|
||||
def __str__(self):
|
||||
return f"{self.id}{':' + self.id_type + ':' + self.id_value if self.id_value else ''} ({self.url})"
|
||||
|
||||
def update_content(self, resource_content):
|
||||
self.other_lookup_ids = resource_content.lookup_ids
|
||||
self.metadata = resource_content.metadata
|
||||
if resource_content.cover_image and resource_content.cover_image_extention:
|
||||
self.cover = SimpleUploadedFile('temp.' + resource_content.cover_image_extention, resource_content.cover_image)
|
||||
self.scraped_time = timezone.now()
|
||||
self.save()
|
||||
|
||||
@property
|
||||
def ready(self):
|
||||
return bool(self.metadata and self.scraped_time)
|
||||
|
||||
def get_all_lookup_ids(self):
|
||||
d = self.other_lookup_ids.copy()
|
||||
d[self.id_type] = self.id_value
|
||||
d = {k: v for k, v in d.items() if bool(v)}
|
||||
return d
|
||||
|
||||
def get_preferred_model(self):
|
||||
model = self.metadata.get('preferred_model')
|
||||
if model:
|
||||
m = ContentType.objects.filter(app_label='catalog', model=model.lower()).first()
|
||||
if m:
|
||||
return m.model_class()
|
||||
else:
|
||||
raise ValueError(f'preferred model {model} does not exist')
|
||||
return None
|
4
catalog/common/scrapers.py
Normal file
4
catalog/common/scrapers.py
Normal file
|
@ -0,0 +1,4 @@
|
|||
class ParseError(Exception):
|
||||
def __init__(self, scraper, field):
|
||||
msg = f'{type(scraper).__name__}: Error parsing field "{field}" for url {scraper.url}'
|
||||
super().__init__(msg)
|
155
catalog/common/sites.py
Normal file
155
catalog/common/sites.py
Normal file
|
@ -0,0 +1,155 @@
|
|||
"""
|
||||
Site and SiteList
|
||||
|
||||
Site should inherite from AbstractSite
|
||||
a Site should map to a unique set of url patterns.
|
||||
a Site may scrape a url and store result in ResourceContent
|
||||
ResourceContent persists as an ExternalResource which may link to an Item
|
||||
"""
|
||||
from typing import *
|
||||
import re
|
||||
from .models import ExternalResource
|
||||
from dataclasses import dataclass, field
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResourceContent:
|
||||
lookup_ids: dict = field(default_factory=dict)
|
||||
metadata: dict = field(default_factory=dict)
|
||||
cover_image: bytes = None
|
||||
cover_image_extention: str = None
|
||||
|
||||
|
||||
class AbstractSite:
|
||||
"""
|
||||
Abstract class to represent a site
|
||||
"""
|
||||
ID_TYPE = None
|
||||
WIKI_PROPERTY_ID = 'P0undefined0'
|
||||
DEFAULT_MODEL = None
|
||||
URL_PATTERNS = [r"\w+://undefined/(\d+)"]
|
||||
|
||||
@classmethod
|
||||
def validate_url(self, url: str):
|
||||
u = next(iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]), None)
|
||||
return u is not None
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return 'https://undefined/' + id_value
|
||||
|
||||
@classmethod
|
||||
def url_to_id(self, url: str):
|
||||
u = next(iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]), None)
|
||||
return u[1] if u else None
|
||||
|
||||
def __str__(self):
|
||||
return f'<{self.__class__.__name__}: {self.url}>'
|
||||
|
||||
def __init__(self, url=None):
|
||||
self.id_value = self.url_to_id(url) if url else None
|
||||
self.url = self.id_to_url(self.id_value) if url else None
|
||||
self.resource = None
|
||||
|
||||
def get_resource(self):
|
||||
if not self.resource:
|
||||
self.resource = ExternalResource.objects.filter(url=self.url).first()
|
||||
if self.resource is None:
|
||||
self.resource = ExternalResource(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url)
|
||||
return self.resource
|
||||
|
||||
def bypass_scrape(self, data_from_link) -> ResourceContent | None:
|
||||
"""subclass may implement this to use data from linked resource and bypass actual scrape"""
|
||||
return None
|
||||
|
||||
def scrape(self) -> ResourceContent:
|
||||
"""subclass should implement this, return ResourceContent object"""
|
||||
data = ResourceContent()
|
||||
return data
|
||||
|
||||
def get_item(self):
|
||||
p = self.get_resource()
|
||||
if not p:
|
||||
raise ValueError(f'resource not available for {self.url}')
|
||||
model = p.get_preferred_model()
|
||||
if not model:
|
||||
model = self.DEFAULT_MODEL
|
||||
t, v = model.get_best_lookup_id(p.get_all_lookup_ids())
|
||||
if t is not None:
|
||||
p.item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first()
|
||||
if p.item is None:
|
||||
obj = model.copy_metadata(p.metadata)
|
||||
obj['primary_lookup_id_type'] = t
|
||||
obj['primary_lookup_id_value'] = v
|
||||
p.item = model.objects.create(**obj)
|
||||
return p.item
|
||||
|
||||
@property
|
||||
def ready(self):
|
||||
return bool(self.resource and self.resource.ready)
|
||||
|
||||
def get_resource_ready(self, auto_save=True, auto_create=True, auto_link=True, data_from_link=None):
|
||||
"""return a resource scraped, or scrape if not yet"""
|
||||
if auto_link:
|
||||
auto_create = True
|
||||
if auto_create:
|
||||
auto_save = True
|
||||
p = self.get_resource()
|
||||
resource_content = {}
|
||||
if not self.resource:
|
||||
return None
|
||||
if not p.ready:
|
||||
resource_content = self.bypass_scrape(data_from_link)
|
||||
if not resource_content:
|
||||
resource_content = self.scrape()
|
||||
p.update_content(resource_content)
|
||||
if not p.ready:
|
||||
_logger.error(f'unable to get resource {self.url} ready')
|
||||
return None
|
||||
if auto_create and p.item is None:
|
||||
self.get_item()
|
||||
if auto_save:
|
||||
p.save()
|
||||
if p.item:
|
||||
p.item.merge_data_from_external_resources()
|
||||
p.item.save()
|
||||
if auto_link:
|
||||
for linked_resources in p.required_resources:
|
||||
linked_site = SiteList.get_site_by_url(linked_resources['url'])
|
||||
if linked_site:
|
||||
linked_site.get_resource_ready(auto_link=False)
|
||||
else:
|
||||
_logger.error(f'unable to get site for {linked_resources["url"]}')
|
||||
p.item.update_linked_items_from_external_resource(p)
|
||||
p.item.save()
|
||||
return p
|
||||
|
||||
|
||||
class SiteList:
|
||||
registry = {}
|
||||
|
||||
@classmethod
|
||||
def register(cls, target) -> Callable:
|
||||
id_type = target.ID_TYPE
|
||||
if id_type in cls.registry:
|
||||
raise ValueError(f'Site for {id_type} already exists')
|
||||
cls.registry[id_type] = target
|
||||
return target
|
||||
|
||||
@classmethod
|
||||
def get_site_by_id_type(cls, typ: str):
|
||||
return cls.registry[typ]() if typ in cls.registry else None
|
||||
|
||||
@classmethod
|
||||
def get_site_by_url(cls, url: str):
|
||||
cls = next(filter(lambda p: p.validate_url(url), cls.registry.values()), None)
|
||||
return cls(url) if cls else None
|
||||
|
||||
@classmethod
|
||||
def get_id_by_url(cls, url: str):
|
||||
site = cls.get_site_by_url(url)
|
||||
return site.url_to_id(url) if site else None
|
14
catalog/common/utils.py
Normal file
14
catalog/common/utils.py
Normal file
|
@ -0,0 +1,14 @@
|
|||
import logging
|
||||
from django.utils import timezone
|
||||
import uuid
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_ITEM_COVER = 'item/default.svg'
|
||||
|
||||
|
||||
def item_cover_path(resource, filename):
|
||||
fn = timezone.now().strftime('%Y/%m/%d/') + str(uuid.uuid4()) + '.' + filename.split('.')[-1]
|
||||
return 'items/' + resource.id_type + '/' + fn
|
8
catalog/game/models.py
Normal file
8
catalog/game/models.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
from catalog.common import *
|
||||
|
||||
|
||||
class Game(Item):
|
||||
igdb = PrimaryLookupIdDescriptor(IdType.IGDB)
|
||||
steam = PrimaryLookupIdDescriptor(IdType.Steam)
|
||||
douban_game = PrimaryLookupIdDescriptor(IdType.DoubanGame)
|
||||
platforms = jsondata.ArrayField(default=list)
|
117
catalog/game/tests.py
Normal file
117
catalog/game/tests.py
Normal file
|
@ -0,0 +1,117 @@
|
|||
from django.test import TestCase
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
|
||||
|
||||
class IGDBTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.IGDB
|
||||
t_id_value = 'portal-2'
|
||||
t_url = 'https://www.igdb.com/games/portal-2'
|
||||
site = SiteList.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.igdb.com/games/portal-2'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'Portal 2')
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.steam, '620')
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_non_steam(self):
|
||||
t_url = 'https://www.igdb.com/games/the-legend-of-zelda-breath-of-the-wild'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'The Legend of Zelda: Breath of the Wild')
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IGDB)
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_value, 'the-legend-of-zelda-breath-of-the-wild')
|
||||
|
||||
|
||||
class SteamTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Steam
|
||||
t_id_value = '620'
|
||||
t_url = 'https://store.steampowered.com/app/620/Portal_2/'
|
||||
t_url2 = 'https://store.steampowered.com/app/620'
|
||||
site = SiteList.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url2)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://store.steampowered.com/app/620/Portal_2/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'Portal 2')
|
||||
self.assertEqual(site.resource.metadata['brief'], '“终身测试计划”现已升级,您可以为您自己或您的好友设计合作谜题!')
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.steam, '620')
|
||||
|
||||
|
||||
class DoubanGameTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.DoubanGame
|
||||
t_id_value = '10734307'
|
||||
t_url = 'https://www.douban.com/game/10734307/'
|
||||
site = SiteList.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.douban.com/game/10734307/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], '传送门2 Portal 2')
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.douban_game, '10734307')
|
||||
|
||||
|
||||
class BangumiGameTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Bangumi
|
||||
t_id_value = '15912'
|
||||
t_url = 'https://bgm.tv/subject/15912'
|
||||
site = SiteList.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
# TODO
|
||||
pass
|
||||
|
||||
|
||||
class MultiGameSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_games(self):
|
||||
url1 = 'https://www.igdb.com/games/portal-2'
|
||||
url2 = 'https://store.steampowered.com/app/620/Portal_2/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
22
catalog/management/commands/cat.py
Normal file
22
catalog/management/commands/cat.py
Normal file
|
@ -0,0 +1,22 @@
|
|||
from django.core.management.base import BaseCommand
|
||||
import pprint
|
||||
from catalog.common import SiteList
|
||||
from catalog.sites import *
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Scrape a catalog item from external resource (but not save it)'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('url', type=str, help='URL to scrape')
|
||||
|
||||
def handle(self, *args, **options):
|
||||
url = str(options['url'])
|
||||
site = SiteList.get_site_by_url(url)
|
||||
if site is None:
|
||||
self.stdout.write(self.style.ERROR(f'Unknown site for {url}'))
|
||||
return
|
||||
self.stdout.write(f'Fetching from {site}')
|
||||
resource = site.get_resource_ready(auto_link=False, auto_save=False)
|
||||
self.stdout.write(self.style.SUCCESS(f'Done.'))
|
||||
pprint.pp(resource.metadata)
|
25
catalog/models.py
Normal file
25
catalog/models.py
Normal file
|
@ -0,0 +1,25 @@
|
|||
from .book.models import Edition, Work, Series
|
||||
from .movie.models import Movie
|
||||
from .tv.models import TVShow, TVSeason, TVEpisode
|
||||
from .music.models import Album
|
||||
from .game.models import Game
|
||||
from .podcast.models import Podcast
|
||||
from .performance.models import Performance
|
||||
|
||||
|
||||
# class Exhibition(Item):
|
||||
|
||||
# class Meta:
|
||||
# proxy = True
|
||||
|
||||
|
||||
# class Fanfic(Item):
|
||||
|
||||
# class Meta:
|
||||
# proxy = True
|
||||
|
||||
|
||||
# class Boardgame(Item):
|
||||
|
||||
# class Meta:
|
||||
# proxy = True
|
8
catalog/movie/models.py
Normal file
8
catalog/movie/models.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
from catalog.common import *
|
||||
|
||||
|
||||
class Movie(Item):
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_movie = PrimaryLookupIdDescriptor(IdType.TMDB_Movie)
|
||||
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
|
||||
duration = jsondata.IntegerField(blank=True, default=None)
|
90
catalog/movie/tests.py
Normal file
90
catalog/movie/tests.py
Normal file
|
@ -0,0 +1,90 @@
|
|||
from django.test import TestCase
|
||||
from catalog.common import *
|
||||
|
||||
|
||||
class DoubanMovieTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '3541415'
|
||||
t_url = 'https://movie.douban.com/subject/3541415/'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.DoubanMovie)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://movie.douban.com/subject/3541415/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '3541415')
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], '盗梦空间')
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'Movie')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt1375666')
|
||||
|
||||
|
||||
class TMDBMovieTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '293767'
|
||||
t_url = 'https://www.themoviedb.org/movie/293767-billy-lynn-s-long-halftime-walk'
|
||||
t_url2 = 'https://www.themoviedb.org/movie/293767'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.TMDB_Movie)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
self.assertEqual(p1.validate_url(t_url2), True)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.themoviedb.org/movie/293767'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '293767')
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], '比利·林恩的中场战事')
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'Movie')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt2513074')
|
||||
|
||||
|
||||
class IMDBMovieTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = 'tt1375666'
|
||||
t_url = 'https://www.imdb.com/title/tt1375666/'
|
||||
t_url2 = 'https://www.imdb.com/title/tt1375666/'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.IMDB)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
self.assertEqual(p1.validate_url(t_url2), True)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.imdb.com/title/tt1375666/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, 'tt1375666')
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], '盗梦空间')
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.imdb, 'tt1375666')
|
||||
|
||||
|
||||
class MultiMovieSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_movies(self):
|
||||
url1 = 'https://www.themoviedb.org/movie/27205-inception'
|
||||
url2 = 'https://movie.douban.com/subject/3541415/'
|
||||
url3 = 'https://www.imdb.com/title/tt1375666/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
self.assertEqual(p2.item.id, p3.item.id)
|
10
catalog/music/models.py
Normal file
10
catalog/music/models.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from catalog.common import *
|
||||
|
||||
|
||||
class Album(Item):
|
||||
barcode = PrimaryLookupIdDescriptor(IdType.GTIN)
|
||||
douban_music = PrimaryLookupIdDescriptor(IdType.DoubanMusic)
|
||||
spotify_album = PrimaryLookupIdDescriptor(IdType.Spotify_Album)
|
||||
|
||||
class Meta:
|
||||
proxy = True
|
61
catalog/music/tests.py
Normal file
61
catalog/music/tests.py
Normal file
|
@ -0,0 +1,61 @@
|
|||
from django.test import TestCase
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
|
||||
|
||||
class SpotifyTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Spotify_Album
|
||||
t_id_value = '65KwtzkJXw7oT819NFWmEP'
|
||||
t_url = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
|
||||
site = SiteList.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'The Race For Space')
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
self.assertEqual(site.resource.item.barcode, '3610159662676')
|
||||
|
||||
|
||||
class DoubanMusicTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.DoubanMusic
|
||||
t_id_value = '33551231'
|
||||
t_url = 'https://music.douban.com/subject/33551231/'
|
||||
site = SiteList.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.url, t_url)
|
||||
self.assertEqual(site.id_value, t_id_value)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://music.douban.com/subject/33551231/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'The Race For Space')
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
self.assertEqual(site.resource.item.barcode, '3610159662676')
|
||||
|
||||
|
||||
class MultiMusicSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_albums(self):
|
||||
url1 = 'https://music.douban.com/subject/33551231/'
|
||||
url2 = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
13
catalog/performance/models.py
Normal file
13
catalog/performance/models.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
from catalog.common import *
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
|
||||
|
||||
class Performance(Item):
|
||||
douban_drama = LookupIdDescriptor(IdType.DoubanDrama)
|
||||
versions = jsondata.ArrayField(_('版本'), null=False, blank=False, default=list)
|
||||
directors = jsondata.ArrayField(_('导演'), null=False, blank=False, default=list)
|
||||
playwrights = jsondata.ArrayField(_('编剧'), null=False, blank=False, default=list)
|
||||
actors = jsondata.ArrayField(_('主演'), null=False, blank=False, default=list)
|
||||
|
||||
class Meta:
|
||||
proxy = True
|
37
catalog/performance/tests.py
Normal file
37
catalog/performance/tests.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from django.test import TestCase
|
||||
from catalog.common import *
|
||||
|
||||
|
||||
class DoubanDramaTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def test_parse(self):
|
||||
t_id = '24849279'
|
||||
t_url = 'https://www.douban.com/location/drama/24849279/'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.DoubanDrama)
|
||||
self.assertIsNotNone(p1)
|
||||
p1 = SiteList.get_site_by_url(t_url)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url)
|
||||
self.assertEqual(p1.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.douban.com/location/drama/24849279/'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
resource = site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(resource.metadata['title'], '红花侠')
|
||||
item = site.get_item()
|
||||
self.assertEqual(item.title, '红花侠')
|
||||
|
||||
# self.assertEqual(i.other_titles, ['スカーレットピンパーネル', 'THE SCARLET PIMPERNEL'])
|
||||
# self.assertEqual(len(i.brief), 545)
|
||||
# self.assertEqual(i.genres, ['音乐剧'])
|
||||
# self.assertEqual(i.versions, ['08星组公演版', '10年月組公演版', '17年星組公演版', 'ュージカル(2017年)版'])
|
||||
# self.assertEqual(i.directors, ['小池修一郎', '小池 修一郎', '石丸さち子'])
|
||||
# self.assertEqual(i.playwrights, ['小池修一郎', 'Baroness Orczy(原作)', '小池 修一郎'])
|
||||
# self.assertEqual(i.actors, ['安蘭けい', '柚希礼音', '遠野あすか', '霧矢大夢', '龍真咲'])
|
13
catalog/podcast/models.py
Normal file
13
catalog/podcast/models.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
from catalog.common import *
|
||||
|
||||
|
||||
class Podcast(Item):
|
||||
feed_url = PrimaryLookupIdDescriptor(IdType.Feed)
|
||||
apple_podcast = PrimaryLookupIdDescriptor(IdType.ApplePodcast)
|
||||
# ximalaya = LookupIdDescriptor(IdType.Ximalaya)
|
||||
# xiaoyuzhou = LookupIdDescriptor(IdType.Xiaoyuzhou)
|
||||
hosts = jsondata.ArrayField(default=list)
|
||||
|
||||
|
||||
# class PodcastEpisode(Item):
|
||||
# pass
|
30
catalog/podcast/tests.py
Normal file
30
catalog/podcast/tests.py
Normal file
|
@ -0,0 +1,30 @@
|
|||
from django.test import TestCase
|
||||
from catalog.podcast.models import *
|
||||
from catalog.common import *
|
||||
|
||||
|
||||
class ApplePodcastTestCase(TestCase):
|
||||
def setUp(self):
|
||||
pass
|
||||
|
||||
def test_parse(self):
|
||||
t_id = '657765158'
|
||||
t_url = 'https://podcasts.apple.com/us/podcast/%E5%A4%A7%E5%86%85%E5%AF%86%E8%B0%88/id657765158'
|
||||
t_url2 = 'https://podcasts.apple.com/us/podcast/id657765158'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.ApplePodcast)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://podcasts.apple.com/gb/podcast/the-new-yorker-radio-hour/id1050430296'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '1050430296')
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], 'The New Yorker Radio Hour')
|
||||
# self.assertEqual(site.resource.metadata['feed_url'], 'http://feeds.wnyc.org/newyorkerradiohour')
|
||||
self.assertEqual(site.resource.metadata['feed_url'], 'http://feeds.feedburner.com/newyorkerradiohour')
|
15
catalog/sites/__init__.py
Normal file
15
catalog/sites/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
from ..common.sites import SiteList
|
||||
from .apple_podcast import ApplePodcast
|
||||
from .douban_book import DoubanBook
|
||||
from .douban_movie import DoubanMovie
|
||||
from .douban_music import DoubanMusic
|
||||
from .douban_game import DoubanGame
|
||||
from .douban_drama import DoubanDrama
|
||||
from .goodreads import Goodreads
|
||||
from .google_books import GoogleBooks
|
||||
from .tmdb import TMDB_Movie
|
||||
from .imdb import IMDB
|
||||
from .spotify import Spotify
|
||||
from .igdb import IGDB
|
||||
from .steam import Steam
|
||||
from .bangumi import Bangumi
|
40
catalog/sites/apple_podcast.py
Normal file
40
catalog/sites/apple_podcast.py
Normal file
|
@ -0,0 +1,40 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class ApplePodcast(AbstractSite):
|
||||
ID_TYPE = IdType.ApplePodcast
|
||||
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
|
||||
WIKI_PROPERTY_ID = 'P5842'
|
||||
DEFAULT_MODEL = Podcast
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://podcasts.apple.com/us/podcast/id" + id_value
|
||||
|
||||
def scrape(self):
|
||||
api_url = f'https://itunes.apple.com/lookup?id={self.id_value}'
|
||||
dl = BasicDownloader(api_url)
|
||||
resp = dl.download()
|
||||
r = resp.json()['results'][0]
|
||||
pd = ResourceContent(metadata={
|
||||
'title': r['trackName'],
|
||||
'feed_url': r['feedUrl'],
|
||||
'hosts': [r['artistName']],
|
||||
'genres': r['genres'],
|
||||
'cover_image_url': r['artworkUrl600'],
|
||||
})
|
||||
pd.lookup_ids[IdType.Feed] = pd.metadata.get('feed_url')
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
24
catalog/sites/bangumi.py
Normal file
24
catalog/sites/bangumi.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class Bangumi(AbstractSite):
|
||||
ID_TYPE = IdType.Bangumi
|
||||
URL_PATTERNS = [
|
||||
r"https://bgm\.tv/subject/(\d+)",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = None
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return f"https://bgm.tv/subject/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
# TODO rewrite with bangumi api https://bangumi.github.io/api/
|
||||
pass
|
28
catalog/sites/douban.py
Normal file
28
catalog/sites/douban.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import re
|
||||
from catalog.common import *
|
||||
|
||||
|
||||
RE_NUMBERS = re.compile(r"\d+\d*")
|
||||
RE_WHITESPACES = re.compile(r"\s+")
|
||||
|
||||
|
||||
class DoubanDownloader(ProxiedDownloader):
|
||||
def validate_response(self, response):
|
||||
if response is None:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
elif response.status_code == 204:
|
||||
return RESPONSE_CENSORSHIP
|
||||
elif response.status_code == 200:
|
||||
content = response.content.decode('utf-8')
|
||||
if content.find('关于豆瓣') == -1:
|
||||
# if content.find('你的 IP 发出') == -1:
|
||||
# error = error + 'Content not authentic' # response is garbage
|
||||
# else:
|
||||
# error = error + 'IP banned'
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
elif content.find('<title>页面不存在</title>') != -1 or content.find('呃... 你想访问的条目豆瓣不收录。') != -1: # re.search('不存在[^<]+</title>', content, re.MULTILINE):
|
||||
return RESPONSE_CENSORSHIP
|
||||
else:
|
||||
return RESPONSE_OK
|
||||
else:
|
||||
return RESPONSE_INVALID_CONTENT
|
180
catalog/sites/douban_book.py
Normal file
180
catalog/sites/douban_book.py
Normal file
|
@ -0,0 +1,180 @@
|
|||
from catalog.common import *
|
||||
from .douban import *
|
||||
from catalog.book.models import *
|
||||
from catalog.book.utils import *
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScraperMixin:
|
||||
def set_field(self, field, value=None):
|
||||
self.data[field] = value
|
||||
|
||||
def parse_str(self, query):
|
||||
elem = self.html.xpath(query)
|
||||
return elem[0].strip() if elem else None
|
||||
|
||||
def parse_field(self, field, query, error_when_missing=False):
|
||||
elem = self.html.xpath(query)
|
||||
if elem:
|
||||
self.data[field] = elem[0].strip()
|
||||
elif error_when_missing:
|
||||
raise ParseError(self, field)
|
||||
else:
|
||||
self.data[field] = None
|
||||
return elem
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanBook(AbstractSite, ScraperMixin):
|
||||
ID_TYPE = IdType.DoubanBook
|
||||
URL_PATTERNS = [r"\w+://book\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/book/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Edition
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://book.douban.com/subject/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
self.data = {}
|
||||
self.html = DoubanDownloader(self.url).download().html()
|
||||
self.parse_field('title', "/html/body//h1/span/text()")
|
||||
self.parse_field('isbn', "//div[@id='info']//span[text()='ISBN:']/following::text()")
|
||||
# TODO does douban store ASIN as ISBN, need more cleanup if so
|
||||
if not self.data['title']:
|
||||
if self.data['isbn']:
|
||||
self.data['title'] = 'isbn: ' + isbn
|
||||
else:
|
||||
raise ParseError(self, 'title')
|
||||
|
||||
self.parse_field('cover_image_url', "//*[@id='mainpic']/a/img/@src")
|
||||
self.parse_field('brief', "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()")
|
||||
self.parse_field('series', "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()")
|
||||
self.parse_field('producer', "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()")
|
||||
self.parse_field('cubn', "//div[@id='info']//span[text()='统一书号:']/following::text()")
|
||||
self.parse_field('subtitle', "//div[@id='info']//span[text()='副标题:']/following::text()")
|
||||
self.parse_field('orig_title', "//div[@id='info']//span[text()='原作名:']/following::text()")
|
||||
self.parse_field('language', "//div[@id='info']//span[text()='语言:']/following::text()")
|
||||
self.parse_field('pub_house', "//div[@id='info']//span[text()='出版社:']/following::text()")
|
||||
self.parse_field('pub_date', "//div[@id='info']//span[text()='出版年:']/following::text()")
|
||||
year_month_day = RE_NUMBERS.findall(self.data['pub_date']) if self.data['pub_date'] else []
|
||||
if len(year_month_day) in (2, 3):
|
||||
pub_year = int(year_month_day[0])
|
||||
pub_month = int(year_month_day[1])
|
||||
elif len(year_month_day) == 1:
|
||||
pub_year = int(year_month_day[0])
|
||||
pub_month = None
|
||||
else:
|
||||
pub_year = None
|
||||
pub_month = None
|
||||
if pub_year and pub_month and pub_year < pub_month:
|
||||
pub_year, pub_month = pub_month, pub_year
|
||||
pub_year = None if pub_year is not None and pub_year not in range(
|
||||
0, 3000) else pub_year
|
||||
pub_month = None if pub_month is not None and pub_month not in range(
|
||||
1, 12) else pub_month
|
||||
|
||||
self.parse_field('binding', "//div[@id='info']//span[text()='装帧:']/following::text()")
|
||||
self.parse_field('price', "//div[@id='info']//span[text()='定价:']/following::text()")
|
||||
self.parse_field('pages', "//div[@id='info']//span[text()='页数:']/following::text()")
|
||||
if self.data['pages'] is not None:
|
||||
self.data['pages'] = int(RE_NUMBERS.findall(self.data['pages'])[0]) if RE_NUMBERS.findall(self.data['pages']) else None
|
||||
if self.data['pages'] and (self.data['pages'] > 999999 or self.data['pages'] < 1):
|
||||
self.data['pages'] = None
|
||||
|
||||
contents = None
|
||||
try:
|
||||
contents_elem = self.html.xpath(
|
||||
"//h2/span[text()='目录']/../following-sibling::div[1]")[0]
|
||||
# if next the id of next sibling contains `dir`, that would be the full contents
|
||||
if "dir" in contents_elem.getnext().xpath("@id")[0]:
|
||||
contents_elem = contents_elem.getnext()
|
||||
contents = '\n'.join(p.strip() for p in contents_elem.xpath("text()")[:-2]) if len(contents_elem) else None
|
||||
else:
|
||||
contents = '\n'.join(p.strip() for p in contents_elem.xpath("text()")) if len(contents_elem) else None
|
||||
except Exception:
|
||||
pass
|
||||
self.data['contents'] = contents
|
||||
|
||||
# there are two html formats for authors and translators
|
||||
authors_elem = self.html.xpath("""//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()""")
|
||||
if not authors_elem:
|
||||
authors_elem = self.html.xpath(
|
||||
"""//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()""")
|
||||
if authors_elem:
|
||||
authors = []
|
||||
for author in authors_elem:
|
||||
authors.append(RE_WHITESPACES.sub(' ', author.strip())[:200])
|
||||
else:
|
||||
authors = None
|
||||
self.data['authors'] = authors
|
||||
|
||||
translators_elem = self.html.xpath("""//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()""")
|
||||
if not translators_elem:
|
||||
translators_elem = self.html.xpath(
|
||||
"""//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()""")
|
||||
if translators_elem:
|
||||
translators = []
|
||||
for translator in translators_elem:
|
||||
translators.append(RE_WHITESPACES.sub(' ', translator.strip()))
|
||||
else:
|
||||
translators = None
|
||||
self.data['translators'] = translators
|
||||
|
||||
work_link = self.parse_str('//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href')
|
||||
if work_link:
|
||||
r = re.match(r'\w+://book.douban.com/works/(\d+)', work_link)
|
||||
self.data['required_resources'] = [{
|
||||
'model': 'Work',
|
||||
'id_type': IdType.DoubanBook_Work,
|
||||
'id_value': r[1] if r else None,
|
||||
'title': self.data['title'],
|
||||
'url': work_link,
|
||||
}]
|
||||
pd = ResourceContent(metadata=self.data)
|
||||
pd.lookup_ids[IdType.ISBN] = self.data.get('isbn')
|
||||
pd.lookup_ids[IdType.CUBN] = self.data.get('cubn')
|
||||
if self.data["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(self.data["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {self.data["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanBook_Work(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanBook_Work
|
||||
URL_PATTERNS = [r"\w+://book\.douban\.com/works/(\d+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Work
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://book.douban.com/works/" + id_value + "/"
|
||||
|
||||
def bypass_scrape(self, data_from_link):
|
||||
if not data_from_link:
|
||||
return None
|
||||
pd = ResourceContent(metadata={
|
||||
'title': data_from_link['title'],
|
||||
})
|
||||
return pd
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
title_elem = content.xpath("//h1/text()")
|
||||
title = title_elem[0].split('全部版本(')[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, 'title')
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
})
|
||||
return pd
|
58
catalog/sites/douban_drama.py
Normal file
58
catalog/sites/douban_drama.py
Normal file
|
@ -0,0 +1,58 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from .douban import DoubanDownloader
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanDrama(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanDrama
|
||||
URL_PATTERNS = [r"\w+://www.douban.com/location/drama/(\d+)/"]
|
||||
WIKI_PROPERTY_ID = 'P6443'
|
||||
DEFAULT_MODEL = Performance
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.douban.com/location/drama/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
h = DoubanDownloader(self.url).download().html()
|
||||
data = {}
|
||||
|
||||
title_elem = h.xpath("/html/body//h1/span/text()")
|
||||
if title_elem:
|
||||
data["title"] = title_elem[0].strip()
|
||||
else:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
data['other_titles'] = [s.strip() for s in title_elem[1:]]
|
||||
other_title_elem = h.xpath("//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()")
|
||||
if len(other_title_elem) > 0:
|
||||
data['other_titles'].append(other_title_elem[0].strip())
|
||||
|
||||
plot_elem = h.xpath("//div[@id='link-report']/text()")
|
||||
if len(plot_elem) == 0:
|
||||
plot_elem = h.xpath("//div[@class='abstract']/text()")
|
||||
data['brief'] = '\n'.join(plot_elem) if len(plot_elem) > 0 else ''
|
||||
|
||||
data['genres'] = [s.strip() for s in h.xpath("//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()")]
|
||||
data['versions'] = [s.strip() for s in h.xpath("//dl//dt[text()='版本:']/following-sibling::dd[@class='titles']/a//text()")]
|
||||
data['directors'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()")]
|
||||
data['playwrights'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()")]
|
||||
data['actors'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()")]
|
||||
|
||||
img_url_elem = h.xpath("//img[@itemprop='image']/@src")
|
||||
data['cover_image_url'] = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
76
catalog/sites/douban_game.py
Normal file
76
catalog/sites/douban_game.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from .douban import DoubanDownloader
|
||||
import dateparser
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanGame(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanGame
|
||||
URL_PATTERNS = [r"\w+://www\.douban\.com/game/(\d+)/{0,1}", r"\w+://m.douban.com/game/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.douban.com/game/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
elem = content.xpath("//div[@id='content']/h1/text()")
|
||||
title = elem[0].strip() if len(elem) else None
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
other_title_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()")
|
||||
other_title = other_title_elem[0].strip().split(' / ') if other_title_elem else None
|
||||
|
||||
developer_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()")
|
||||
developer = developer_elem[0].strip().split(' / ') if developer_elem else None
|
||||
|
||||
publisher_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()")
|
||||
publisher = publisher_elem[0].strip().split(' / ') if publisher_elem else None
|
||||
|
||||
platform_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()")
|
||||
platform = platform_elem if platform_elem else None
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()")
|
||||
genre = None
|
||||
if genre_elem:
|
||||
genre = [g for g in genre_elem if g != '游戏']
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()")
|
||||
release_date = dateparser.parse(date_elem[0].strip()).strftime('%Y-%m-%d') if date_elem else None
|
||||
|
||||
brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
|
||||
brief = '\n'.join(brief_elem) if brief_elem else None
|
||||
|
||||
img_url_elem = content.xpath(
|
||||
"//div[@class='item-subject-info']/div[@class='pic']//img/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'other_title': other_title,
|
||||
'developer': developer,
|
||||
'publisher': publisher,
|
||||
'release_date': release_date,
|
||||
'genre': genre,
|
||||
'platform': platform,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url
|
||||
})
|
||||
if pd.metadata["cover_image_url"]:
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
|
||||
return pd
|
275
catalog/sites/douban_movie.py
Normal file
275
catalog/sites/douban_movie.py
Normal file
|
@ -0,0 +1,275 @@
|
|||
from catalog.common import *
|
||||
from .douban import *
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
import logging
|
||||
from django.db import models
|
||||
from django.utils.translation import gettext_lazy as _
|
||||
from .tmdb import TMDB_TV, search_tmdb_by_imdb_id
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MovieGenreEnum(models.TextChoices):
|
||||
DRAMA = 'Drama', _('剧情')
|
||||
KIDS = 'Kids', _('儿童')
|
||||
COMEDY = 'Comedy', _('喜剧')
|
||||
BIOGRAPHY = 'Biography', _('传记')
|
||||
ACTION = 'Action', _('动作')
|
||||
HISTORY = 'History', _('历史')
|
||||
ROMANCE = 'Romance', _('爱情')
|
||||
WAR = 'War', _('战争')
|
||||
SCI_FI = 'Sci-Fi', _('科幻')
|
||||
CRIME = 'Crime', _('犯罪')
|
||||
ANIMATION = 'Animation', _('动画')
|
||||
WESTERN = 'Western', _('西部')
|
||||
MYSTERY = 'Mystery', _('悬疑')
|
||||
FANTASY = 'Fantasy', _('奇幻')
|
||||
THRILLER = 'Thriller', _('惊悚')
|
||||
ADVENTURE = 'Adventure', _('冒险')
|
||||
HORROR = 'Horror', _('恐怖')
|
||||
DISASTER = 'Disaster', _('灾难')
|
||||
DOCUMENTARY = 'Documentary', _('纪录片')
|
||||
MARTIAL_ARTS = 'Martial-Arts', _('武侠')
|
||||
SHORT = 'Short', _('短片')
|
||||
ANCIENT_COSTUM = 'Ancient-Costum', _('古装')
|
||||
EROTICA = 'Erotica', _('情色')
|
||||
SPORT = 'Sport', _('运动')
|
||||
GAY_LESBIAN = 'Gay/Lesbian', _('同性')
|
||||
OPERA = 'Opera', _('戏曲')
|
||||
MUSIC = 'Music', _('音乐')
|
||||
FILM_NOIR = 'Film-Noir', _('黑色电影')
|
||||
MUSICAL = 'Musical', _('歌舞')
|
||||
REALITY_TV = 'Reality-TV', _('真人秀')
|
||||
FAMILY = 'Family', _('家庭')
|
||||
TALK_SHOW = 'Talk-Show', _('脱口秀')
|
||||
NEWS = 'News', _('新闻')
|
||||
SOAP = 'Soap', _('肥皂剧')
|
||||
TV_MOVIE = 'TV Movie', _('电视电影')
|
||||
THEATRE = 'Theatre', _('舞台艺术')
|
||||
OTHER = 'Other', _('其他')
|
||||
|
||||
|
||||
# MovieGenreTranslator = ChoicesDictGenerator(MovieGenreEnum)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanMovie(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanMovie
|
||||
URL_PATTERNS = [r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
# no DEFAULT_MODEL as it may be either TV Season and Movie
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://movie.douban.com/subject/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
try:
|
||||
raw_title = content.xpath(
|
||||
"//span[@property='v:itemreviewed']/text()")[0].strip()
|
||||
except IndexError:
|
||||
raise ParseError(self, 'title')
|
||||
|
||||
orig_title = content.xpath(
|
||||
"//img[@rel='v:image']/@alt")[0].strip()
|
||||
title = raw_title.split(orig_title)[0].strip()
|
||||
# if has no chinese title
|
||||
if title == '':
|
||||
title = orig_title
|
||||
|
||||
if title == orig_title:
|
||||
orig_title = None
|
||||
|
||||
# there are two html formats for authors and translators
|
||||
other_title_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]")
|
||||
other_title = other_title_elem[0].strip().split(
|
||||
' / ') if other_title_elem else None
|
||||
|
||||
imdb_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()")
|
||||
if not imdb_elem:
|
||||
imdb_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]")
|
||||
imdb_code = imdb_elem[0].strip() if imdb_elem else None
|
||||
|
||||
director_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()")
|
||||
director = director_elem if director_elem else None
|
||||
|
||||
playwright_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()")
|
||||
playwright = list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
|
||||
|
||||
actor_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()")
|
||||
actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None
|
||||
|
||||
# construct genre translator
|
||||
genre_translator = {}
|
||||
attrs = [attr for attr in dir(MovieGenreEnum) if '__' not in attr]
|
||||
for attr in attrs:
|
||||
genre_translator[getattr(MovieGenreEnum, attr).label] = getattr(
|
||||
MovieGenreEnum, attr).value
|
||||
|
||||
genre_elem = content.xpath("//span[@property='v:genre']/text()")
|
||||
if genre_elem:
|
||||
genre = []
|
||||
for g in genre_elem:
|
||||
g = g.split(' ')[0]
|
||||
if g == '紀錄片': # likely some original data on douban was corrupted
|
||||
g = '纪录片'
|
||||
elif g == '鬼怪':
|
||||
g = '惊悚'
|
||||
if g in genre_translator:
|
||||
genre.append(genre_translator[g])
|
||||
elif g in genre_translator.values():
|
||||
genre.append(g)
|
||||
else:
|
||||
_logger.error(f'unable to map genre {g}')
|
||||
else:
|
||||
genre = None
|
||||
|
||||
showtime_elem = content.xpath(
|
||||
"//span[@property='v:initialReleaseDate']/text()")
|
||||
if showtime_elem:
|
||||
showtime = []
|
||||
for st in showtime_elem:
|
||||
parts = st.split('(')
|
||||
if len(parts) == 1:
|
||||
time = st.split('(')[0]
|
||||
region = ''
|
||||
else:
|
||||
time = st.split('(')[0]
|
||||
region = st.split('(')[1][0:-1]
|
||||
showtime.append({time: region})
|
||||
else:
|
||||
showtime = None
|
||||
|
||||
site_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href")
|
||||
site = site_elem[0].strip()[:200] if site_elem else None
|
||||
if site and not re.match(r'http.+', site):
|
||||
site = None
|
||||
|
||||
area_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]")
|
||||
if area_elem:
|
||||
area = [a.strip()[:100] for a in area_elem[0].split('/')]
|
||||
else:
|
||||
area = None
|
||||
|
||||
language_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]")
|
||||
if language_elem:
|
||||
language = [a.strip() for a in language_elem[0].split(' / ')]
|
||||
else:
|
||||
language = None
|
||||
|
||||
year_elem = content.xpath("//span[@class='year']/text()")
|
||||
year = int(re.search(r'\d+', year_elem[0])[0]) if year_elem and re.search(r'\d+', year_elem[0]) else None
|
||||
|
||||
duration_elem = content.xpath("//span[@property='v:runtime']/text()")
|
||||
other_duration_elem = content.xpath(
|
||||
"//span[@property='v:runtime']/following-sibling::text()[1]")
|
||||
if duration_elem:
|
||||
duration = duration_elem[0].strip()
|
||||
if other_duration_elem:
|
||||
duration += other_duration_elem[0].rstrip()
|
||||
duration = duration.split('/')[0].strip()
|
||||
else:
|
||||
duration = None
|
||||
|
||||
season_elem = content.xpath(
|
||||
"//*[@id='season']/option[@selected='selected']/text()")
|
||||
if not season_elem:
|
||||
season_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]")
|
||||
season = int(season_elem[0].strip()) if season_elem else None
|
||||
else:
|
||||
season = int(season_elem[0].strip())
|
||||
|
||||
episodes_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]")
|
||||
episodes = int(episodes_elem[0].strip()) if episodes_elem and episodes_elem[0].strip().isdigit() else None
|
||||
|
||||
single_episode_length_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]")
|
||||
single_episode_length = single_episode_length_elem[0].strip(
|
||||
)[:100] if single_episode_length_elem else None
|
||||
|
||||
# if has field `episodes` not none then must be series
|
||||
is_series = True if episodes else False
|
||||
|
||||
brief_elem = content.xpath("//span[@class='all hidden']")
|
||||
if not brief_elem:
|
||||
brief_elem = content.xpath("//span[@property='v:summary']")
|
||||
brief = '\n'.join([e.strip() for e in brief_elem[0].xpath(
|
||||
'./text()')]) if brief_elem else None
|
||||
|
||||
img_url_elem = content.xpath("//img[@rel='v:image']/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': other_title,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': site,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season_number': season,
|
||||
'episodes': episodes,
|
||||
'single_episode_length': single_episode_length,
|
||||
'brief': brief,
|
||||
'is_series': is_series,
|
||||
'cover_image_url': img_url,
|
||||
})
|
||||
pd.metadata['preferred_model'] = ('TVSeason' if season else 'TVShow') if is_series else 'Movie'
|
||||
|
||||
if imdb_code:
|
||||
res_data = search_tmdb_by_imdb_id(imdb_code)
|
||||
tmdb_show_id = None
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'Movie'
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVShow'
|
||||
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVSeason'
|
||||
tmdb_show_id = res_data['tv_season_results'][0]['show_id']
|
||||
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVSeason'
|
||||
tmdb_show_id = res_data['tv_episode_results'][0]['show_id']
|
||||
if res_data['tv_episode_results'][0]['episode_number'] != 1:
|
||||
_logger.error(f'Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}')
|
||||
# TODO correct the IMDB id
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if tmdb_show_id:
|
||||
pd.metadata['required_resources'] = [{
|
||||
'model': 'TVShow',
|
||||
'id_type': IdType.TMDB_TV,
|
||||
'id_value': tmdb_show_id,
|
||||
'title': title,
|
||||
'url': TMDB_TV.id_to_url(tmdb_show_id),
|
||||
}]
|
||||
# TODO parse sister seasons
|
||||
# pd.metadata['related_resources'] = []
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
115
catalog/sites/douban_music.py
Normal file
115
catalog/sites/douban_music.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from .douban import DoubanDownloader
|
||||
import dateparser
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class DoubanMusic(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanMusic
|
||||
URL_PATTERNS = [r"\w+://music\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/music/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://music.douban.com/subject/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
elem = content.xpath("//h1/span/text()")
|
||||
title = elem[0].strip() if len(elem) else None
|
||||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
artists_elem = content.xpath("//div[@id='info']/span/span[@class='pl']/a/text()")
|
||||
artist = None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='流派:']/following::text()[1]")
|
||||
genre = genre_elem[0].strip() if genre_elem else None
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='发行时间:']/following::text()[1]")
|
||||
release_date = dateparser.parse(date_elem[0].strip()).strftime('%Y-%m-%d') if date_elem else None
|
||||
|
||||
company_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版者:']/following::text()[1]")
|
||||
company = company_elem[0].strip() if company_elem else None
|
||||
|
||||
track_list_elem = content.xpath(
|
||||
"//div[@class='track-list']/div[@class='indent']/div/text()"
|
||||
)
|
||||
if track_list_elem:
|
||||
track_list = '\n'.join([track.strip() for track in track_list_elem])
|
||||
else:
|
||||
track_list = None
|
||||
|
||||
brief_elem = content.xpath("//span[@class='all hidden']")
|
||||
if not brief_elem:
|
||||
brief_elem = content.xpath("//span[@property='v:summary']")
|
||||
brief = '\n'.join([e.strip() for e in brief_elem[0].xpath(
|
||||
'./text()')]) if brief_elem else None
|
||||
|
||||
gtin = None
|
||||
isrc = None
|
||||
other_info = {}
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]")
|
||||
if other_elem:
|
||||
other_info['又名'] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]")
|
||||
if other_elem:
|
||||
other_info['专辑类型'] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]")
|
||||
if other_elem:
|
||||
other_info['介质'] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]")
|
||||
if other_elem:
|
||||
other_info['ISRC'] = other_elem[0].strip()
|
||||
isrc = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]")
|
||||
if other_elem:
|
||||
other_info['条形码'] = other_elem[0].strip()
|
||||
gtin = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]")
|
||||
if other_elem:
|
||||
other_info['碟片数'] = other_elem[0].strip()
|
||||
|
||||
img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
'genre': genre,
|
||||
'release_date': release_date,
|
||||
'duration': None,
|
||||
'company': company,
|
||||
'track_list': track_list,
|
||||
'brief': brief,
|
||||
'other_info': other_info,
|
||||
'cover_image_url': img_url
|
||||
})
|
||||
if gtin:
|
||||
pd.lookup_ids[IdType.GTIN] = gtin
|
||||
if isrc:
|
||||
pd.lookup_ids[IdType.ISRC] = isrc
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
116
catalog/sites/goodreads.py
Normal file
116
catalog/sites/goodreads.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
from catalog.book.models import Edition, Work
|
||||
from catalog.common import *
|
||||
from lxml import html
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class GoodreadsDownloader(RetryDownloader):
|
||||
def validate_response(self, response):
|
||||
if response is None:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
elif response.status_code == 200:
|
||||
if response.text.find('__NEXT_DATA__') != -1:
|
||||
return RESPONSE_OK
|
||||
else:
|
||||
# Goodreads may return legacy version for a/b testing
|
||||
# retry if so
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
else:
|
||||
return RESPONSE_INVALID_CONTENT
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class Goodreads(AbstractSite):
|
||||
ID_TYPE = IdType.Goodreads
|
||||
WIKI_PROPERTY_ID = 'P2968'
|
||||
DEFAULT_MODEL = Edition
|
||||
URL_PATTERNS = [r".+goodreads.com/.*book/show/(\d+)", r".+goodreads.com/.*book/(\d+)"]
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.goodreads.com/book/show/" + id_value
|
||||
|
||||
def scrape(self, response=None):
|
||||
data = {}
|
||||
if response is not None:
|
||||
h = html.fromstring(response.text.strip())
|
||||
else:
|
||||
dl = GoodreadsDownloader(self.url)
|
||||
h = dl.download().html()
|
||||
# Next.JS version of GoodReads
|
||||
# JSON.parse(document.getElementById('__NEXT_DATA__').innerHTML)['props']['pageProps']['apolloState']
|
||||
elem = h.xpath('//script[@id="__NEXT_DATA__"]/text()')
|
||||
src = elem[0].strip() if elem else None
|
||||
if not src:
|
||||
raise ParseError(self, '__NEXT_DATA__ element')
|
||||
d = json.loads(src)['props']['pageProps']['apolloState']
|
||||
o = {'Book': [], 'Work': [], 'Series': [], 'Contributor': []}
|
||||
for v in d.values():
|
||||
t = v.get('__typename')
|
||||
if t and t in o:
|
||||
o[t].append(v)
|
||||
b = next(filter(lambda x: x.get('title'), o['Book']), None)
|
||||
if not b:
|
||||
# Goodreads may return empty page template when internal service timeouts
|
||||
raise ParseError(self, 'Book in __NEXT_DATA__ json')
|
||||
data['title'] = b['title']
|
||||
data['brief'] = b['description']
|
||||
data['isbn'] = b['details'].get('isbn13')
|
||||
asin = b['details'].get('asin')
|
||||
if asin and asin != data['isbn']:
|
||||
data['asin'] = asin
|
||||
data['pages'] = b['details'].get('numPages')
|
||||
data['cover_image_url'] = b['imageUrl']
|
||||
w = next(filter(lambda x: x.get('details'), o['Work']), None)
|
||||
if w:
|
||||
data['required_resources'] = [{
|
||||
'model': 'Work',
|
||||
'id_type': IdType.Goodreads_Work,
|
||||
'id_value': str(w['legacyId']),
|
||||
'title': w['details']['originalTitle'],
|
||||
'url': w['editions']['webUrl'],
|
||||
}]
|
||||
pd = ResourceContent(metadata=data)
|
||||
pd.lookup_ids[IdType.ISBN] = data.get('isbn')
|
||||
pd.lookup_ids[IdType.ASIN] = data.get('asin')
|
||||
if data["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(data["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class Goodreads_Work(AbstractSite):
|
||||
ID_TYPE = IdType.Goodreads_Work
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = Work
|
||||
URL_PATTERNS = [r".+goodreads.com/work/editions/(\d+)"]
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.goodreads.com/work/editions/" + id_value
|
||||
|
||||
def scrape(self, response=None):
|
||||
content = BasicDownloader(self.url).download().html()
|
||||
title_elem = content.xpath("//h1/a/text()")
|
||||
title = title_elem[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, 'title')
|
||||
author_elem = content.xpath("//h2/a/text()")
|
||||
author = author_elem[0].strip() if author_elem else None
|
||||
first_published_elem = content.xpath("//h2/span/text()")
|
||||
first_published = first_published_elem[0].strip() if first_published_elem else None
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'author': author,
|
||||
'first_published': first_published
|
||||
})
|
||||
return pd
|
79
catalog/sites/google_books.py
Normal file
79
catalog/sites/google_books.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
import re
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class GoogleBooks(AbstractSite):
|
||||
ID_TYPE = IdType.GoogleBooks
|
||||
URL_PATTERNS = [
|
||||
r"https://books\.google\.co[^/]+/books\?id=([^&#]+)",
|
||||
r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^&#?]+)",
|
||||
r"https://books\.google\.co[^/]+/books/about/[^?]+?id=([^&#?]+)",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
DEFAULT_MODEL = Edition
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://books.google.com/books?id=" + id_value
|
||||
|
||||
def scrape(self):
|
||||
api_url = f'https://www.googleapis.com/books/v1/volumes/{self.id_value}'
|
||||
b = BasicDownloader(api_url).download().json()
|
||||
other = {}
|
||||
title = b['volumeInfo']['title']
|
||||
subtitle = b['volumeInfo']['subtitle'] if 'subtitle' in b['volumeInfo'] else None
|
||||
pub_year = None
|
||||
pub_month = None
|
||||
if 'publishedDate' in b['volumeInfo']:
|
||||
pub_date = b['volumeInfo']['publishedDate'].split('-')
|
||||
pub_year = pub_date[0]
|
||||
pub_month = pub_date[1] if len(pub_date) > 1 else None
|
||||
pub_house = b['volumeInfo']['publisher'] if 'publisher' in b['volumeInfo'] else None
|
||||
language = b['volumeInfo']['language'] if 'language' in b['volumeInfo'] else None
|
||||
pages = b['volumeInfo']['pageCount'] if 'pageCount' in b['volumeInfo'] else None
|
||||
if 'mainCategory' in b['volumeInfo']:
|
||||
other['分类'] = b['volumeInfo']['mainCategory']
|
||||
authors = b['volumeInfo']['authors'] if 'authors' in b['volumeInfo'] else None
|
||||
if 'description' in b['volumeInfo']:
|
||||
brief = b['volumeInfo']['description']
|
||||
elif 'textSnippet' in b['volumeInfo']:
|
||||
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||||
else:
|
||||
brief = ''
|
||||
brief = re.sub(r'<.*?>', '', brief.replace('<br', '\n<br'))
|
||||
img_url = b['volumeInfo']['imageLinks']['thumbnail'] if 'imageLinks' in b['volumeInfo'] else None
|
||||
isbn10 = None
|
||||
isbn13 = None
|
||||
for iid in b['volumeInfo']['industryIdentifiers'] if 'industryIdentifiers' in b['volumeInfo'] else []:
|
||||
if iid['type'] == 'ISBN_10':
|
||||
isbn10 = iid['identifier']
|
||||
if iid['type'] == 'ISBN_13':
|
||||
isbn13 = iid['identifier']
|
||||
isbn = isbn13 # if isbn13 is not None else isbn10
|
||||
|
||||
raw_img, ext = BasicImageDownloader.download_image(img_url, self.url)
|
||||
data = {
|
||||
'title': title,
|
||||
'subtitle': subtitle,
|
||||
'orig_title': None,
|
||||
'author': authors,
|
||||
'translator': None,
|
||||
'language': language,
|
||||
'pub_house': pub_house,
|
||||
'pub_year': pub_year,
|
||||
'pub_month': pub_month,
|
||||
'binding': None,
|
||||
'pages': pages,
|
||||
'isbn': isbn,
|
||||
'brief': brief,
|
||||
'contents': None,
|
||||
'other_info': other,
|
||||
'cover_image_url': img_url,
|
||||
}
|
||||
return ResourceContent(metadata=data, cover_image=raw_img, cover_image_extention=ext, lookup_ids={IdType.ISBN: isbn13})
|
113
catalog/sites/igdb.py
Normal file
113
catalog/sites/igdb.py
Normal file
|
@ -0,0 +1,113 @@
|
|||
"""
|
||||
IGDB
|
||||
|
||||
use (e.g. "portal-2") as id, which is different from real id in IGDB API
|
||||
"""
|
||||
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from django.conf import settings
|
||||
from igdb.wrapper import IGDBWrapper
|
||||
import requests
|
||||
import datetime
|
||||
import json
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _igdb_access_token():
|
||||
try:
|
||||
token = requests.post(f'https://id.twitch.tv/oauth2/token?client_id={settings.IGDB_CLIENT_ID}&client_secret={settings.IGDB_CLIENT_SECRET}&grant_type=client_credentials').json()['access_token']
|
||||
except Exception:
|
||||
_logger.error('unable to obtain IGDB token')
|
||||
token = '<invalid>'
|
||||
return token
|
||||
|
||||
|
||||
_wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, _igdb_access_token())
|
||||
|
||||
|
||||
def search_igdb_by_3p_url(steam_url):
|
||||
r = IGDB.api_query('websites', f'fields *, game.*; where url = "{steam_url}";')
|
||||
if not r:
|
||||
return None
|
||||
r = sorted(r, key=lambda w: w['game']['id'])
|
||||
return IGDB(url=r[0]['game']['url'])
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class IGDB(AbstractSite):
|
||||
ID_TYPE = IdType.IGDB
|
||||
URL_PATTERNS = [r"\w+://www\.igdb\.com/games/([a-zA-Z0-9\-_]+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.igdb.com/games/" + id_value
|
||||
|
||||
@classmethod
|
||||
def api_query(cls, p, q):
|
||||
key = 'igdb:' + p + '/' + q
|
||||
if get_mock_mode():
|
||||
r = BasicDownloader(key).download().json()
|
||||
else:
|
||||
r = json.loads(_wrapper.api_request(p, q))
|
||||
if settings.DOWNLOADER_SAVEDIR:
|
||||
with open(settings.DOWNLOADER_SAVEDIR + '/' + get_mock_file(key), 'w', encoding='utf-8') as fp:
|
||||
fp.write(json.dumps(r))
|
||||
return r
|
||||
|
||||
def scrape(self):
|
||||
fields = '*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name'
|
||||
r = self.api_query('games', f'fields {fields}; where url = "{self.url}";')[0]
|
||||
brief = r['summary'] if 'summary' in r else ''
|
||||
brief += "\n\n" + r['storyline'] if 'storyline' in r else ''
|
||||
developer = None
|
||||
publisher = None
|
||||
release_date = None
|
||||
genre = None
|
||||
platform = None
|
||||
if 'involved_companies' in r:
|
||||
developer = next(iter([c['company']['name'] for c in r['involved_companies'] if c['developer']]), None)
|
||||
publisher = next(iter([c['company']['name'] for c in r['involved_companies'] if c['publisher']]), None)
|
||||
if 'platforms' in r:
|
||||
ps = sorted(r['platforms'], key=lambda p: p['id'])
|
||||
platform = [(p['name'] if p['id'] != 6 else 'Windows') for p in ps]
|
||||
if 'first_release_date' in r:
|
||||
release_date = datetime.datetime.fromtimestamp(r['first_release_date'], datetime.timezone.utc).strftime('%Y-%m-%d')
|
||||
if 'genres' in r:
|
||||
genre = [g['name'] for g in r['genres']]
|
||||
websites = self.api_query('websites', f'fields *; where game.url = "{self.url}";')
|
||||
steam_url = None
|
||||
official_site = None
|
||||
for website in websites:
|
||||
if website['category'] == 1:
|
||||
official_site = website['url']
|
||||
elif website['category'] == 13:
|
||||
steam_url = website['url']
|
||||
pd = ResourceContent(metadata={
|
||||
'title': r['name'],
|
||||
'other_title': None,
|
||||
'developer': developer,
|
||||
'publisher': publisher,
|
||||
'release_date': release_date,
|
||||
'genre': genre,
|
||||
'platform': platform,
|
||||
'brief': brief,
|
||||
'official_site': official_site,
|
||||
'igdb_id': r['id'],
|
||||
'cover_image_url': 'https:' + r['cover']['url'].replace('t_thumb', 't_cover_big'),
|
||||
})
|
||||
if steam_url:
|
||||
pd.lookup_ids[IdType.Steam] = SiteList.get_site_by_id_type(IdType.Steam).url_to_id(steam_url)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
48
catalog/sites/imdb.py
Normal file
48
catalog/sites/imdb.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
from catalog.common import *
|
||||
from .tmdb import search_tmdb_by_imdb_id
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class IMDB(AbstractSite):
|
||||
ID_TYPE = IdType.IMDB
|
||||
URL_PATTERNS = [r'\w+://www.imdb.com/title/(tt\d+)']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://www.imdb.com/title/" + id_value + "/"
|
||||
|
||||
def scrape(self):
|
||||
self.scraped = False
|
||||
res_data = search_tmdb_by_imdb_id(self.id_value)
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}"
|
||||
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
|
||||
# this should not happen given IMDB only has ids for either show or episode
|
||||
tv_id = res_data['tv_season_results'][0]['show_id']
|
||||
season_number = res_data['tv_season_results'][0]['season_number']
|
||||
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
|
||||
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
|
||||
tv_id = res_data['tv_episode_results'][0]['show_id']
|
||||
season_number = res_data['tv_episode_results'][0]['season_number']
|
||||
episode_number = res_data['tv_episode_results'][0]['episode_number']
|
||||
if season_number == 0:
|
||||
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
|
||||
elif episode_number == 1:
|
||||
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}"
|
||||
else:
|
||||
raise ParseError(self, "IMDB id matching TMDB but not first episode, this is not supported")
|
||||
else:
|
||||
raise ParseError(self, "IMDB id not found in TMDB")
|
||||
tmdb = SiteList.get_site_by_url(url)
|
||||
pd = tmdb.scrape()
|
||||
pd.metadata['preferred_model'] = tmdb.DEFAULT_MODEL.__name__
|
||||
return pd
|
145
catalog/sites/spotify.py
Normal file
145
catalog/sites/spotify.py
Normal file
|
@ -0,0 +1,145 @@
|
|||
"""
|
||||
Spotify
|
||||
"""
|
||||
from django.conf import settings
|
||||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from .douban import *
|
||||
import time
|
||||
import datetime
|
||||
import requests
|
||||
import dateparser
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
spotify_token = None
|
||||
spotify_token_expire_time = time.time()
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class Spotify(AbstractSite):
|
||||
ID_TYPE = IdType.Spotify_Album
|
||||
URL_PATTERNS = [r'\w+://open\.spotify\.com/album/([a-zA-Z0-9]+)']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return f"https://open.spotify.com/album/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
api_url = "https://api.spotify.com/v1/albums/" + self.id_value
|
||||
headers = {
|
||||
'Authorization': f"Bearer {get_spotify_token()}"
|
||||
}
|
||||
res_data = BasicDownloader(api_url, headers=headers).download().json()
|
||||
artist = []
|
||||
for artist_dict in res_data['artists']:
|
||||
artist.append(artist_dict['name'])
|
||||
|
||||
title = res_data['name']
|
||||
|
||||
genre = ', '.join(res_data['genres'])
|
||||
|
||||
company = []
|
||||
for com in res_data['copyrights']:
|
||||
company.append(com['text'])
|
||||
|
||||
duration = 0
|
||||
track_list = []
|
||||
track_urls = []
|
||||
for track in res_data['tracks']['items']:
|
||||
track_urls.append(track['external_urls']['spotify'])
|
||||
duration += track['duration_ms']
|
||||
if res_data['tracks']['items'][-1]['disc_number'] > 1:
|
||||
# more than one disc
|
||||
track_list.append(str(
|
||||
track['disc_number']) + '-' + str(track['track_number']) + '. ' + track['name'])
|
||||
else:
|
||||
track_list.append(str(track['track_number']) + '. ' + track['name'])
|
||||
track_list = '\n'.join(track_list)
|
||||
|
||||
release_date = dateparser.parse(res_data['release_date']).strftime('%Y-%m-%d')
|
||||
|
||||
gtin = None
|
||||
if res_data['external_ids'].get('upc'):
|
||||
gtin = res_data['external_ids'].get('upc')
|
||||
if res_data['external_ids'].get('ean'):
|
||||
gtin = res_data['external_ids'].get('ean')
|
||||
isrc = None
|
||||
if res_data['external_ids'].get('isrc'):
|
||||
isrc = res_data['external_ids'].get('isrc')
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
'genre': genre,
|
||||
'track_list': track_list,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'company': company,
|
||||
'brief': None,
|
||||
'cover_image_url': res_data['images'][0]['url']
|
||||
})
|
||||
if gtin:
|
||||
pd.lookup_ids[IdType.GTIN] = gtin
|
||||
if isrc:
|
||||
pd.lookup_ids[IdType.ISRC] = isrc
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
def get_spotify_token():
|
||||
global spotify_token, spotify_token_expire_time
|
||||
if get_mock_mode():
|
||||
return 'mocked'
|
||||
if spotify_token is None or is_spotify_token_expired():
|
||||
invoke_spotify_token()
|
||||
return spotify_token
|
||||
|
||||
|
||||
def is_spotify_token_expired():
|
||||
global spotify_token_expire_time
|
||||
return True if spotify_token_expire_time <= time.time() else False
|
||||
|
||||
|
||||
def invoke_spotify_token():
|
||||
global spotify_token, spotify_token_expire_time
|
||||
r = requests.post(
|
||||
"https://accounts.spotify.com/api/token",
|
||||
data={
|
||||
"grant_type": "client_credentials"
|
||||
},
|
||||
headers={
|
||||
"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"
|
||||
}
|
||||
)
|
||||
data = r.json()
|
||||
if r.status_code == 401:
|
||||
# token expired, try one more time
|
||||
# this maybe caused by external operations,
|
||||
# for example debugging using a http client
|
||||
r = requests.post(
|
||||
"https://accounts.spotify.com/api/token",
|
||||
data={
|
||||
"grant_type": "client_credentials"
|
||||
},
|
||||
headers={
|
||||
"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"
|
||||
}
|
||||
)
|
||||
data = r.json()
|
||||
elif r.status_code != 200:
|
||||
raise Exception(f"Request to spotify API fails. Reason: {r.reason}")
|
||||
# minus 2 for execution time error
|
||||
spotify_token_expire_time = int(data['expires_in']) + time.time() - 2
|
||||
spotify_token = data['access_token']
|
64
catalog/sites/steam.py
Normal file
64
catalog/sites/steam.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
from catalog.common import *
|
||||
from catalog.models import *
|
||||
from .igdb import search_igdb_by_3p_url
|
||||
import dateparser
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class Steam(AbstractSite):
|
||||
ID_TYPE = IdType.Steam
|
||||
URL_PATTERNS = [r"\w+://store\.steampowered\.com/app/(\d+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return "https://store.steampowered.com/app/" + str(id_value)
|
||||
|
||||
def scrape(self):
|
||||
i = search_igdb_by_3p_url(self.url)
|
||||
pd = i.scrape() if i else ResourceContent()
|
||||
|
||||
headers = BasicDownloader.headers.copy()
|
||||
headers['Host'] = 'store.steampowered.com'
|
||||
headers['Cookie'] = "wants_mature_content=1; birthtime=754700401;"
|
||||
content = BasicDownloader(self.url, headers=headers).download().html()
|
||||
|
||||
title = content.xpath("//div[@class='apphub_AppName']/text()")[0]
|
||||
developer = content.xpath("//div[@id='developers_list']/a/text()")
|
||||
publisher = content.xpath("//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()")
|
||||
release_date = dateparser.parse(
|
||||
content.xpath(
|
||||
"//div[@class='release_date']/div[@class='date']/text()")[0]
|
||||
).strftime('%Y-%m-%d')
|
||||
genre = content.xpath(
|
||||
"//div[@class='details_block']/b[2]/following-sibling::a/text()")
|
||||
platform = ['PC']
|
||||
brief = content.xpath(
|
||||
"//div[@class='game_description_snippet']/text()")[0].strip()
|
||||
# try Steam images if no image from IGDB
|
||||
if pd.cover_image is None:
|
||||
pd.metadata['cover_image_url'] = content.xpath("//img[@class='game_header_image_full']/@src")[0].replace("header.jpg", "library_600x900.jpg")
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
|
||||
if pd.cover_image is None:
|
||||
pd.metadata['cover_image_url'] = content.xpath("//img[@class='game_header_image_full']/@src")[0]
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
|
||||
# merge data from IGDB, use localized Steam data if available
|
||||
d = {
|
||||
'developer': developer,
|
||||
'publisher': publisher,
|
||||
'release_date': release_date,
|
||||
'genre': genre,
|
||||
'platform': platform,
|
||||
}
|
||||
d.update(pd.metadata)
|
||||
pd.metadata = d
|
||||
if title:
|
||||
pd.metadata['title'] = title
|
||||
if brief:
|
||||
pd.metadata['brief'] = brief
|
||||
return pd
|
328
catalog/sites/tmdb.py
Normal file
328
catalog/sites/tmdb.py
Normal file
|
@ -0,0 +1,328 @@
|
|||
"""
|
||||
The Movie Database
|
||||
"""
|
||||
|
||||
import re
|
||||
from django.conf import settings
|
||||
from catalog.common import *
|
||||
from .douban import *
|
||||
from catalog.movie.models import *
|
||||
from catalog.tv.models import *
|
||||
import logging
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def search_tmdb_by_imdb_id(imdb_id):
|
||||
tmdb_api_url = f"https://api.themoviedb.org/3/find/{imdb_id}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&external_source=imdb_id"
|
||||
res_data = BasicDownloader(tmdb_api_url).download().json()
|
||||
return res_data
|
||||
|
||||
|
||||
def _copy_dict(s, key_map):
|
||||
d = {}
|
||||
for src, dst in key_map.items():
|
||||
d[dst if dst else src] = s.get(src)
|
||||
return d
|
||||
|
||||
|
||||
genre_map = {
|
||||
'Sci-Fi & Fantasy': 'Sci-Fi',
|
||||
'War & Politics': 'War',
|
||||
'儿童': 'Kids',
|
||||
'冒险': 'Adventure',
|
||||
'剧情': 'Drama',
|
||||
'动作': 'Action',
|
||||
'动作冒险': 'Action',
|
||||
'动画': 'Animation',
|
||||
'历史': 'History',
|
||||
'喜剧': 'Comedy',
|
||||
'奇幻': 'Fantasy',
|
||||
'家庭': 'Family',
|
||||
'恐怖': 'Horror',
|
||||
'悬疑': 'Mystery',
|
||||
'惊悚': 'Thriller',
|
||||
'战争': 'War',
|
||||
'新闻': 'News',
|
||||
'爱情': 'Romance',
|
||||
'犯罪': 'Crime',
|
||||
'电视电影': 'TV Movie',
|
||||
'真人秀': 'Reality-TV',
|
||||
'科幻': 'Sci-Fi',
|
||||
'纪录': 'Documentary',
|
||||
'肥皂剧': 'Soap',
|
||||
'脱口秀': 'Talk-Show',
|
||||
'西部': 'Western',
|
||||
'音乐': 'Music',
|
||||
}
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class TMDB_Movie(AbstractSite):
|
||||
ID_TYPE = IdType.TMDB_Movie
|
||||
URL_PATTERNS = [r'\w+://www.themoviedb.org/movie/(\d+)']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = Movie
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return f"https://www.themoviedb.org/movie/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
is_series = False
|
||||
if is_series:
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
else:
|
||||
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
|
||||
if is_series:
|
||||
title = res_data['name']
|
||||
orig_title = res_data['original_name']
|
||||
year = int(res_data['first_air_date'].split(
|
||||
'-')[0]) if res_data['first_air_date'] else None
|
||||
imdb_code = res_data['external_ids']['imdb_id']
|
||||
showtime = [{res_data['first_air_date']: "首播日期"}
|
||||
] if res_data['first_air_date'] else None
|
||||
duration = None
|
||||
else:
|
||||
title = res_data['title']
|
||||
orig_title = res_data['original_title']
|
||||
year = int(res_data['release_date'].split('-')
|
||||
[0]) if res_data['release_date'] else None
|
||||
showtime = [{res_data['release_date']: "发布日期"}
|
||||
] if res_data['release_date'] else None
|
||||
imdb_code = res_data['imdb_id']
|
||||
# in minutes
|
||||
duration = res_data['runtime'] if res_data['runtime'] else None
|
||||
|
||||
genre = list(map(lambda x: genre_map[x['name']] if x['name']
|
||||
in genre_map else 'Other', res_data['genres']))
|
||||
language = list(map(lambda x: x['name'], res_data['spoken_languages']))
|
||||
brief = res_data['overview']
|
||||
|
||||
if is_series:
|
||||
director = list(map(lambda x: x['name'], res_data['created_by']))
|
||||
else:
|
||||
director = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Director', res_data['credits']['crew'])))
|
||||
playwright = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Screenplay', res_data['credits']['crew'])))
|
||||
actor = list(map(lambda x: x['name'], res_data['credits']['cast']))
|
||||
area = []
|
||||
|
||||
other_info = {}
|
||||
# other_info['TMDB评分'] = res_data['vote_average']
|
||||
# other_info['分级'] = res_data['contentRating']
|
||||
# other_info['Metacritic评分'] = res_data['metacriticRating']
|
||||
# other_info['奖项'] = res_data['awards']
|
||||
# other_info['TMDB_ID'] = id
|
||||
if is_series:
|
||||
other_info['Seasons'] = res_data['number_of_seasons']
|
||||
other_info['Episodes'] = res_data['number_of_episodes']
|
||||
|
||||
# TODO: use GET /configuration to get base url
|
||||
img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': None,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': None,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season': None,
|
||||
'episodes': None,
|
||||
'single_episode_length': None,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url,
|
||||
})
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class TMDB_TV(AbstractSite):
|
||||
ID_TYPE = IdType.TMDB_TV
|
||||
URL_PATTERNS = [r'\w+://www.themoviedb.org/tv/(\d+)[^/]*$', r'\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = TVShow
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return f"https://www.themoviedb.org/tv/{id_value}"
|
||||
|
||||
def scrape(self):
|
||||
is_series = True
|
||||
if is_series:
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
else:
|
||||
api_url = f"https://api.themoviedb.org/3/movie/{self.id_value}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
|
||||
res_data = BasicDownloader(api_url).download().json()
|
||||
|
||||
if is_series:
|
||||
title = res_data['name']
|
||||
orig_title = res_data['original_name']
|
||||
year = int(res_data['first_air_date'].split(
|
||||
'-')[0]) if res_data['first_air_date'] else None
|
||||
imdb_code = res_data['external_ids']['imdb_id']
|
||||
showtime = [{res_data['first_air_date']: "首播日期"}
|
||||
] if res_data['first_air_date'] else None
|
||||
duration = None
|
||||
else:
|
||||
title = res_data['title']
|
||||
orig_title = res_data['original_title']
|
||||
year = int(res_data['release_date'].split('-')
|
||||
[0]) if res_data['release_date'] else None
|
||||
showtime = [{res_data['release_date']: "发布日期"}
|
||||
] if res_data['release_date'] else None
|
||||
imdb_code = res_data['imdb_id']
|
||||
# in minutes
|
||||
duration = res_data['runtime'] if res_data['runtime'] else None
|
||||
|
||||
genre = list(map(lambda x: genre_map[x['name']] if x['name']
|
||||
in genre_map else 'Other', res_data['genres']))
|
||||
language = list(map(lambda x: x['name'], res_data['spoken_languages']))
|
||||
brief = res_data['overview']
|
||||
|
||||
if is_series:
|
||||
director = list(map(lambda x: x['name'], res_data['created_by']))
|
||||
else:
|
||||
director = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Director', res_data['credits']['crew'])))
|
||||
playwright = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Screenplay', res_data['credits']['crew'])))
|
||||
actor = list(map(lambda x: x['name'], res_data['credits']['cast']))
|
||||
area = []
|
||||
|
||||
other_info = {}
|
||||
# other_info['TMDB评分'] = res_data['vote_average']
|
||||
# other_info['分级'] = res_data['contentRating']
|
||||
# other_info['Metacritic评分'] = res_data['metacriticRating']
|
||||
# other_info['奖项'] = res_data['awards']
|
||||
# other_info['TMDB_ID'] = id
|
||||
if is_series:
|
||||
other_info['Seasons'] = res_data['number_of_seasons']
|
||||
other_info['Episodes'] = res_data['number_of_episodes']
|
||||
|
||||
# TODO: use GET /configuration to get base url
|
||||
img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
|
||||
|
||||
season_links = list(map(lambda s: {
|
||||
'model': 'TVSeason',
|
||||
'id_type': IdType.TMDB_TVSeason,
|
||||
'id_value': f'{self.id_value}-{s["season_number"]}',
|
||||
'title': s['name'],
|
||||
'url': f'{self.url}/season/{s["season_number"]}'}, res_data['seasons']))
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': None,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': None,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season': None,
|
||||
'episodes': None,
|
||||
'single_episode_length': None,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url,
|
||||
'related_resources': season_links,
|
||||
})
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
return pd
|
||||
|
||||
|
||||
@SiteList.register
|
||||
class TMDB_TVSeason(AbstractSite):
|
||||
ID_TYPE = IdType.TMDB_TVSeason
|
||||
URL_PATTERNS = [r'\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*$']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
DEFAULT_MODEL = TVSeason
|
||||
ID_PATTERN = r'^(\d+)-(\d+)$'
|
||||
|
||||
@classmethod
|
||||
def url_to_id(cls, url: str):
|
||||
u = next(iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), None)
|
||||
return u[1] + '-' + u[2] if u else None
|
||||
|
||||
@classmethod
|
||||
def id_to_url(cls, id_value):
|
||||
v = id_value.split('-')
|
||||
return f"https://www.themoviedb.org/tv/{v[0]}/season/{v[1]}"
|
||||
|
||||
def scrape(self):
|
||||
v = self.id_value.split('-')
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
d = BasicDownloader(api_url).download().json()
|
||||
if not d.get('id'):
|
||||
raise ParseError('id')
|
||||
pd = ResourceContent(metadata=_copy_dict(d, {'name': 'title', 'overview': 'brief', 'air_date': 'air_date', 'season_number': 0, 'external_ids': 0}))
|
||||
pd.metadata['required_resources'] = [{
|
||||
'model': 'TVShow',
|
||||
'id_type': IdType.TMDB_TV,
|
||||
'id_value': v[0],
|
||||
'title': f'TMDB TV Show {v[0]}',
|
||||
'url': f"https://www.themoviedb.org/tv/{v[0]}",
|
||||
}]
|
||||
pd.lookup_ids[IdType.IMDB] = d['external_ids'].get('imdb_id')
|
||||
pd.metadata['cover_image_url'] = ('https://image.tmdb.org/t/p/original/' + d['poster_path']) if d['poster_path'] else None
|
||||
pd.metadata['title'] = pd.metadata['title'] if pd.metadata['title'] else f'Season {d["season_number"]}'
|
||||
pd.metadata['episode_number_list'] = list(map(lambda ep: ep['episode_number'], d['episodes']))
|
||||
pd.metadata['episode_count'] = len(pd.metadata['episode_number_list'])
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
|
||||
# get external id from 1st episode
|
||||
if pd.lookup_ids[IdType.IMDB]:
|
||||
_logger.warning("Unexpected IMDB id for TMDB tv season")
|
||||
elif len(pd.metadata['episode_number_list']) == 0:
|
||||
_logger.warning("Unable to lookup IMDB id for TMDB tv season with zero episodes")
|
||||
else:
|
||||
ep = pd.metadata['episode_number_list'][0]
|
||||
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
d2 = BasicDownloader(api_url2).download().json()
|
||||
if not d2.get('id'):
|
||||
raise ParseError('episode id for season')
|
||||
pd.lookup_ids[IdType.IMDB] = d2['external_ids'].get('imdb_id')
|
||||
return pd
|
10
catalog/tests.py
Normal file
10
catalog/tests.py
Normal file
|
@ -0,0 +1,10 @@
|
|||
from django.test import TestCase
|
||||
from catalog.book.tests import *
|
||||
from catalog.movie.tests import *
|
||||
from catalog.tv.tests import *
|
||||
from catalog.music.tests import *
|
||||
from catalog.game.tests import *
|
||||
from catalog.podcast.tests import *
|
||||
from catalog.performance.tests import *
|
||||
|
||||
# imported tests with same name might be ignored silently
|
62
catalog/tv/models.py
Normal file
62
catalog/tv/models.py
Normal file
|
@ -0,0 +1,62 @@
|
|||
"""
|
||||
Models for TV
|
||||
|
||||
TVShow -> TVSeason -> TVEpisode
|
||||
|
||||
TVEpisode is not fully implemented at the moment
|
||||
|
||||
Three way linking between Douban / IMDB / TMDB are quite messy
|
||||
|
||||
IMDB:
|
||||
most widely used.
|
||||
no ID for Season, only for Show and Episode
|
||||
|
||||
TMDB:
|
||||
most friendly API.
|
||||
for some TV specials, both shown as an Episode of Season 0 and a Movie, with same IMDB id
|
||||
|
||||
Douban:
|
||||
most wanted by our users.
|
||||
for single season show, IMDB id of the show id used
|
||||
for multi season show, IMDB id for Ep 1 will be used to repensent that season
|
||||
tv specials are are shown as movies
|
||||
|
||||
For now, we follow Douban convention, but keep an eye on it in case it breaks its own rules...
|
||||
|
||||
"""
|
||||
from catalog.common import *
|
||||
from django.db import models
|
||||
|
||||
|
||||
class TVShow(Item):
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_tv = PrimaryLookupIdDescriptor(IdType.TMDB_TV)
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
season_count = jsondata.IntegerField(blank=True, default=None)
|
||||
|
||||
|
||||
class TVSeason(Item):
|
||||
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_tvseason = PrimaryLookupIdDescriptor(IdType.TMDB_TVSeason)
|
||||
show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons')
|
||||
season_number = models.PositiveIntegerField()
|
||||
episode_count = jsondata.IntegerField(blank=True, default=None)
|
||||
METADATA_COPY_LIST = ['title', 'brief', 'season_number', 'episode_count']
|
||||
|
||||
def update_linked_items_from_external_resource(self, resource):
|
||||
"""add Work from resource.metadata['work'] if not yet"""
|
||||
links = resource.required_resources + resource.related_resources
|
||||
for w in links:
|
||||
if w['model'] == 'TVShow':
|
||||
p = ExternalResource.objects.filter(id_type=w['id_type'], id_value=w['id_value']).first()
|
||||
if p and p.item and self.show != p.item:
|
||||
self.show = p.item
|
||||
|
||||
|
||||
class TVEpisode(Item):
|
||||
show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
season = models.ForeignKey(TVSeason, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
episode_number = models.PositiveIntegerField()
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
METADATA_COPY_LIST = ['title', 'brief', 'episode_number']
|
128
catalog/tv/tests.py
Normal file
128
catalog/tv/tests.py
Normal file
|
@ -0,0 +1,128 @@
|
|||
from django.test import TestCase
|
||||
from catalog.common import *
|
||||
from catalog.tv.models import *
|
||||
|
||||
|
||||
class TMDBTVTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '57243'
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who'
|
||||
t_url1 = 'https://www.themoviedb.org/tv/57243-doctor-who/seasons'
|
||||
t_url2 = 'https://www.themoviedb.org/tv/57243'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.TMDB_TV)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
self.assertEqual(p1.validate_url(t_url1), True)
|
||||
self.assertEqual(p1.validate_url(t_url2), True)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
wrong_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/13'
|
||||
s1 = SiteList.get_site_by_url(wrong_url)
|
||||
self.assertNotIsInstance(s1, TVShow)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '57243')
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], '神秘博士')
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'TVShow')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt0436992')
|
||||
|
||||
|
||||
class TMDBTVSeasonTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '57243-11'
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/11'
|
||||
t_url_unique = 'https://www.themoviedb.org/tv/57243/season/11'
|
||||
p1 = SiteList.get_site_by_id_type(IdType.TMDB_TVSeason)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
self.assertEqual(p1.validate_url(t_url_unique), True)
|
||||
p2 = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url_unique)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/4'
|
||||
site = SiteList.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '57243-4')
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], '第 4 季')
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'TVSeason')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt1159991')
|
||||
self.assertIsNotNone(site.resource.item.show)
|
||||
self.assertEqual(site.resource.item.show.imdb, 'tt0436992')
|
||||
|
||||
|
||||
class DoubanMovieTVTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
url3 = 'https://movie.douban.com/subject/3627919/'
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVSeason')
|
||||
self.assertIsNotNone(p3.item.show)
|
||||
self.assertEqual(p3.item.show.imdb, 'tt0436992')
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_singleseason(self):
|
||||
url3 = 'https://movie.douban.com/subject/26895436/'
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVShow')
|
||||
|
||||
|
||||
class MultiTVSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_tvshows(self):
|
||||
url1 = 'https://www.themoviedb.org/tv/57243-doctor-who'
|
||||
url2 = 'https://www.imdb.com/title/tt0436992/'
|
||||
# url3 = 'https://movie.douban.com/subject/3541415/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
# p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
# self.assertEqual(p2.item.id, p3.item.id)
|
||||
|
||||
@use_local_response
|
||||
def test_tvseasons(self):
|
||||
url1 = 'https://www.themoviedb.org/tv/57243-doctor-who/season/4'
|
||||
url2 = 'https://www.imdb.com/title/tt1159991/'
|
||||
url3 = 'https://movie.douban.com/subject/3627919/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p1.item.imdb, p2.item.imdb)
|
||||
self.assertEqual(p2.item.imdb, p3.item.imdb)
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
self.assertEqual(p2.item.id, p3.item.id)
|
||||
|
||||
@use_local_response
|
||||
def test_miniseries(self):
|
||||
url1 = 'https://www.themoviedb.org/tv/86941-the-north-water'
|
||||
url3 = 'https://movie.douban.com/subject/26895436/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVShow')
|
||||
self.assertEqual(p1.item.id, p3.item.id)
|
||||
|
||||
@use_local_response
|
||||
def test_tvspecial(self):
|
||||
url1 = 'https://www.themoviedb.org/movie/282758-doctor-who-the-runaway-bride'
|
||||
url2 = 'hhttps://www.imdb.com/title/tt0827573/'
|
||||
url3 = 'https://movie.douban.com/subject/4296866/'
|
||||
p1 = SiteList.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteList.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteList.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p1.item.imdb, p2.item.imdb)
|
||||
self.assertEqual(p2.item.imdb, p3.item.imdb)
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
self.assertEqual(p2.item.id, p3.item.id)
|
6
catalog/urls.py
Normal file
6
catalog/urls.py
Normal file
|
@ -0,0 +1,6 @@
|
|||
from django.urls import path
|
||||
from .api import api
|
||||
|
||||
urlpatterns = [
|
||||
path("", api.urls),
|
||||
]
|
3
catalog/views.py
Normal file
3
catalog/views.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
from django.shortcuts import render
|
||||
|
||||
# Create your views here.
|
|
@ -497,7 +497,7 @@ class DoubanMovieScraper(DoubanScrapperMixin, AbstractScraper):
|
|||
|
||||
episodes_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]")
|
||||
episodes = int(episodes_elem[0].strip()) if episodes_elem and episodes_elem[0].isdigit() else None
|
||||
episodes = int(episodes_elem[0].strip()) if episodes_elem and episodes_elem[0].strip().isdigit() else None
|
||||
|
||||
single_episode_length_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]")
|
||||
|
|
|
@ -8,9 +8,22 @@ from common.scraper import *
|
|||
from igdb.wrapper import IGDBWrapper
|
||||
import json
|
||||
import datetime
|
||||
import logging
|
||||
|
||||
|
||||
wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, settings.IGDB_ACCESS_TOKEN)
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _igdb_access_token():
|
||||
try:
|
||||
token = requests.post(f'https://id.twitch.tv/oauth2/token?client_id={settings.IGDB_CLIENT_ID}&client_secret={settings.IGDB_CLIENT_SECRET}&grant_type=client_credentials').json()['access_token']
|
||||
except Exception:
|
||||
_logger.error('unable to obtain IGDB token')
|
||||
token = '<invalid>'
|
||||
return token
|
||||
|
||||
|
||||
wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, _igdb_access_token())
|
||||
|
||||
|
||||
class IgdbGameScraper(AbstractScraper):
|
||||
|
|
|
@ -461,6 +461,11 @@ select::placeholder {
|
|||
color: #606c76;
|
||||
}
|
||||
|
||||
.navbar .current {
|
||||
color: #00a1cc;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.navbar .navbar__search-box {
|
||||
margin: 0 12% 0 15px;
|
||||
display: inline-flex;
|
||||
|
|
2
common/static/css/boofilsic.min.css
vendored
2
common/static/css/boofilsic.min.css
vendored
File diff suppressed because one or more lines are too long
|
@ -35,6 +35,10 @@
|
|||
&:visited
|
||||
color: $color-secondary
|
||||
|
||||
.current
|
||||
color: $color-primary
|
||||
font-weight: bold
|
||||
|
||||
& &__search-box
|
||||
margin: 0 12% 0 15px
|
||||
display: inline-flex
|
||||
|
|
|
@ -26,10 +26,10 @@
|
|||
|
||||
{% if request.user.is_authenticated %}
|
||||
|
||||
<a class="navbar__link" href="{% url 'users:home' request.user.mastodon_username %}">{% trans '主页' %}</a>
|
||||
<a class="navbar__link" href="{% url 'timeline:timeline' %}">{% trans '动态' %}</a>
|
||||
<a class="navbar__link" id="logoutLink" href="{% url 'users:data' %}">{% trans '数据' %}</a>
|
||||
<a class="navbar__link" id="logoutLink" href="{% url 'users:preferences' %}">{% trans '设置' %}</a>
|
||||
<a class="navbar__link {% if current == 'home' %}current{% endif %}" href="{% url 'users:home' request.user.mastodon_username %}">{% trans '主页' %}</a>
|
||||
<a class="navbar__link {% if current == 'timeline' %}current{% endif %}" href="{% url 'timeline:timeline' %}">{% trans '动态' %}</a>
|
||||
<a class="navbar__link {% if current == 'data' %}current{% endif %}" href="{% url 'users:data' %}">{% trans '数据' %}</a>
|
||||
<a class="navbar__link {% if current == 'preferences' %}current{% endif %}" href="{% url 'users:preferences' %}">{% trans '设置' %}</a>
|
||||
<a class="navbar__link" id="logoutLink" href="{% url 'users:logout' %}">{% trans '登出' %}</a>
|
||||
{% if request.user.is_staff %}
|
||||
<a class="navbar__link" href="{% admin_url %}">{% trans '后台' %}</a>
|
||||
|
|
|
@ -8,12 +8,17 @@ django-rq
|
|||
django-simple-history
|
||||
django-hijack
|
||||
django-user-messages
|
||||
django-slack
|
||||
#django-ninja
|
||||
#django-polymorphic
|
||||
meilisearch
|
||||
easy-thumbnails
|
||||
lxml
|
||||
openpyxl
|
||||
psycopg2
|
||||
psycopg2-binary
|
||||
requests
|
||||
filetype
|
||||
setproctitle
|
||||
tqdm
|
||||
opencc
|
||||
dnspython
|
||||
|
|
|
@ -0,0 +1,303 @@
|
|||
{
|
||||
"album_type" : "album",
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"copyrights" : [ {
|
||||
"text" : "Test Card Recordings",
|
||||
"type" : "C"
|
||||
}, {
|
||||
"text" : "Test Card Recordings",
|
||||
"type" : "P"
|
||||
} ],
|
||||
"external_ids" : {
|
||||
"upc" : "3610159662676"
|
||||
},
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
|
||||
},
|
||||
"genres" : [ ],
|
||||
"href" : "https://api.spotify.com/v1/albums/65KwtzkJXw7oT819NFWmEP",
|
||||
"id" : "65KwtzkJXw7oT819NFWmEP",
|
||||
"images" : [ {
|
||||
"height" : 640,
|
||||
"url" : "https://i.scdn.co/image/ab67616d0000b273123ebfc7ca99a9bb6342cd36",
|
||||
"width" : 640
|
||||
}, {
|
||||
"height" : 300,
|
||||
"url" : "https://i.scdn.co/image/ab67616d00001e02123ebfc7ca99a9bb6342cd36",
|
||||
"width" : 300
|
||||
}, {
|
||||
"height" : 64,
|
||||
"url" : "https://i.scdn.co/image/ab67616d00004851123ebfc7ca99a9bb6342cd36",
|
||||
"width" : 64
|
||||
} ],
|
||||
"label" : "Test Card Recordings",
|
||||
"name" : "The Race For Space",
|
||||
"popularity" : 44,
|
||||
"release_date" : "2014",
|
||||
"release_date_precision" : "year",
|
||||
"total_tracks" : 9,
|
||||
"tracks" : {
|
||||
"href" : "https://api.spotify.com/v1/albums/65KwtzkJXw7oT819NFWmEP/tracks?offset=0&limit=50",
|
||||
"items" : [ {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 159859,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/3982V8R7oW3xyV8zASbCGG"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/3982V8R7oW3xyV8zASbCGG",
|
||||
"id" : "3982V8R7oW3xyV8zASbCGG",
|
||||
"is_local" : false,
|
||||
"name" : "The Race For Space",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/cc69663d5b6a7982e5f162e625f1b319b26956ec?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 1,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:3982V8R7oW3xyV8zASbCGG"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 429374,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/4EhQrGzqi8k24qWIJuG5CH"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/4EhQrGzqi8k24qWIJuG5CH",
|
||||
"id" : "4EhQrGzqi8k24qWIJuG5CH",
|
||||
"is_local" : false,
|
||||
"name" : "Sputnik",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/32ccf0b8f7ef1251c35e97acb405e4e7cc2660d2?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 2,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:4EhQrGzqi8k24qWIJuG5CH"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 228623,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/4IaRxPHdzLJ78tm7lxg9M8"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/4IaRxPHdzLJ78tm7lxg9M8",
|
||||
"id" : "4IaRxPHdzLJ78tm7lxg9M8",
|
||||
"is_local" : false,
|
||||
"name" : "Gagarin",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/1d91010dc50a73caa3831c4617f3d658ae279339?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 3,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:4IaRxPHdzLJ78tm7lxg9M8"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 181621,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/6SONXH9dJQgDY9vCjdkZfK"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/6SONXH9dJQgDY9vCjdkZfK",
|
||||
"id" : "6SONXH9dJQgDY9vCjdkZfK",
|
||||
"is_local" : false,
|
||||
"name" : "Fire in the Cockpit",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/a2180cec25187fa80ddc80dcbe36edda1cc169cc?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 4,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:6SONXH9dJQgDY9vCjdkZfK"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 255606,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/52KMWPHDL84oo2Ncj3O6RX"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/52KMWPHDL84oo2Ncj3O6RX",
|
||||
"id" : "52KMWPHDL84oo2Ncj3O6RX",
|
||||
"is_local" : false,
|
||||
"name" : "E.V.A.",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/732171a4a5e27540b6709602b4af9662fda98595?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 5,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:52KMWPHDL84oo2Ncj3O6RX"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 379931,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/3jjMyq44OIjNgmpXLhpw7W"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/3jjMyq44OIjNgmpXLhpw7W",
|
||||
"id" : "3jjMyq44OIjNgmpXLhpw7W",
|
||||
"is_local" : false,
|
||||
"name" : "The Other Side",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/5eda4958044595b36842f2362799d91f080a7357?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 6,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:3jjMyq44OIjNgmpXLhpw7W"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
}, {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/7wbZFLV3wwTqyrKNCJ8Y8D"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/7wbZFLV3wwTqyrKNCJ8Y8D",
|
||||
"id" : "7wbZFLV3wwTqyrKNCJ8Y8D",
|
||||
"name" : "Smoke Fairies",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:7wbZFLV3wwTqyrKNCJ8Y8D"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 269376,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/5Um9ghqMlKALp9AcRMIk7B"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/5Um9ghqMlKALp9AcRMIk7B",
|
||||
"id" : "5Um9ghqMlKALp9AcRMIk7B",
|
||||
"is_local" : false,
|
||||
"name" : "Valentina",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/9e812bde9e2944d22f1eae78eab2adb89ce1f1cd?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 7,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:5Um9ghqMlKALp9AcRMIk7B"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 252720,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/5xYZXIgVAND5sWjN8G0hID"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/5xYZXIgVAND5sWjN8G0hID",
|
||||
"id" : "5xYZXIgVAND5sWjN8G0hID",
|
||||
"is_local" : false,
|
||||
"name" : "Go!",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/a7f4e9d98224dea630ee6604938848c3fd0c2842?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 8,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:5xYZXIgVAND5sWjN8G0hID"
|
||||
}, {
|
||||
"artists" : [ {
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/artist/6VsiDFMZJlJ053P1uO4A6h"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/artists/6VsiDFMZJlJ053P1uO4A6h",
|
||||
"id" : "6VsiDFMZJlJ053P1uO4A6h",
|
||||
"name" : "Public Service Broadcasting",
|
||||
"type" : "artist",
|
||||
"uri" : "spotify:artist:6VsiDFMZJlJ053P1uO4A6h"
|
||||
} ],
|
||||
"available_markets" : [ "AD", "AE", "AG", "AL", "AM", "AO", "AR", "AT", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "ET", "FI", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW", "GY", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "KE", "KG", "KH", "KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV", "MW", "MX", "MZ", "NA", "NE", "NG", "NI", "NL", "NO", "NP", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO", "RW", "SA", "SC", "SE", "SI", "SK", "SL", "SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TR", "TT", "TZ", "UA", "UG", "US", "UY", "UZ", "VC", "VE", "VN", "ZA", "ZM", "ZW" ],
|
||||
"disc_number" : 1,
|
||||
"duration_ms" : 442359,
|
||||
"explicit" : false,
|
||||
"external_urls" : {
|
||||
"spotify" : "https://open.spotify.com/track/5ERrJuNLnmHj525ooOKyqJ"
|
||||
},
|
||||
"href" : "https://api.spotify.com/v1/tracks/5ERrJuNLnmHj525ooOKyqJ",
|
||||
"id" : "5ERrJuNLnmHj525ooOKyqJ",
|
||||
"is_local" : false,
|
||||
"name" : "Tomorrow",
|
||||
"preview_url" : "https://p.scdn.co/mp3-preview/779a285aca862b886613815a0c1d1817446b550e?cid=4b150d8d6d374d1e8dbb85f4f11a2ad9",
|
||||
"track_number" : 9,
|
||||
"type" : "track",
|
||||
"uri" : "spotify:track:5ERrJuNLnmHj525ooOKyqJ"
|
||||
} ],
|
||||
"limit" : 50,
|
||||
"next" : null,
|
||||
"offset" : 0,
|
||||
"previous" : null,
|
||||
"total" : 9
|
||||
},
|
||||
"type" : "album",
|
||||
"uri" : "spotify:album:65KwtzkJXw7oT819NFWmEP"
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[{"adult":false,"backdrop_path":"/sRfl6vyzGWutgG0cmXmbChC4iN6.jpg","id":57243,"name":"神秘博士","original_language":"en","original_name":"Doctor Who","overview":"名为“博士”的宇宙最后一个时间领主,有着重生的能力、体力及优越的智力,利用时光机器TARDIS英国传统的蓝色警亭,展开他勇敢的时光冒险之旅,拯救外星生物、地球与时空。","poster_path":"/sz4zF5z9zyFh8Z6g5IQPNq91cI7.jpg","media_type":"tv","genre_ids":[10759,18,10765],"popularity":158.575,"first_air_date":"2005-03-26","vote_average":7.402,"vote_count":2475,"origin_country":["GB"]}],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[{"adult":false,"backdrop_path":"/13qDzilftzRZMUEHcpi57VLqNPw.jpg","id":282758,"title":"神秘博士:逃跑新娘","original_language":"en","original_title":"Doctor Who: The Runaway Bride","overview":"失去了罗斯的博士正在心灰意冷,而正在举行婚礼的多娜却被突然传送到塔迪斯里。博士带坏脾气的多娜返回地球,却被一群外星机器人追杀,塔迪斯上演了一场公路飚车。后来博士发现多娜身上带有异常含量的Huon粒子,而该粒子来源于上一代宇宙霸主。而博士的母星加利弗雷在宇宙中崛起时,已经消灭了所有的Huon粒子。最终博士揭开了一个藏于地球40亿年的秘密。","poster_path":"/gkTCC4VLv8jATM3kouAUK3EaoGd.jpg","media_type":"movie","genre_ids":[878],"popularity":7.214,"release_date":"2006-12-25","video":false,"vote_average":7.739,"vote_count":201}],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":1008547,"name":"2006年圣诞特辑:逃跑新娘","overview":"失去了罗斯的博士正在心灰意冷,而正在举行婚礼的多娜却被突然传送到塔迪斯里。博士带坏脾气的多娜返回地球,却被一群外星机器人追杀,塔迪斯上演了一场公路飚车。后来博士发现多娜身上带有异常含量的Huon粒子,而该粒子来源于上一代宇宙霸主。而博士的母星加利弗雷在宇宙中崛起时,已经消灭了所有的Huon粒子。最终博士揭开了一个藏于地球40亿年的秘密。","media_type":"tv_episode","vote_average":6.8,"vote_count":14,"air_date":"2006-12-25","episode_number":4,"production_code":"NCFT094N","runtime":64,"season_number":0,"show_id":57243,"still_path":"/mkJufoqvEBMVvnVUjYlR9lGarZB.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[],"tv_episode_results":[{"id":941505,"name":"活宝搭档","overview":"博士在伦敦发现艾迪派斯公司新产品药物有问题,人类服用后会悄悄的产生土豆状生物,并在夜里1点10分逃走回到保姆身边,于是博士潜入公司决定探查究竟,在探查时遇到了多娜原来Adiposian人丢失了他们的繁育星球,于是跑到地球利用人类做代孕母繁殖宝宝。最后保姆在高空中被抛弃,脂肪球回到了父母身边,博士邀请多娜一同旅行。【Rose从平行宇宙回归】","media_type":"tv_episode","vote_average":7.2,"vote_count":43,"air_date":"2008-04-05","episode_number":1,"production_code":"","runtime":null,"season_number":4,"show_id":57243,"still_path":"/cq1zrCS267vGXa3rCYQkVKNJE9v.jpg"}],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[{"adult":false,"backdrop_path":"/s3TBrRGB1iav7gFOCNx3H31MoES.jpg","id":27205,"title":"盗梦空间","original_language":"en","original_title":"Inception","overview":"道姆·柯布与同事阿瑟和纳什在一次针对日本能源大亨齐藤的盗梦行动中失败,反被齐藤利用。齐藤威逼利诱因遭通缉而流亡海外的柯布帮他拆分他竞争对手的公司,采取极端措施在其唯一继承人罗伯特·费希尔的深层潜意识中种下放弃家族公司、自立门户的想法。为了重返美国,柯布偷偷求助于岳父迈尔斯,吸收了年轻的梦境设计师艾里阿德妮、梦境演员艾姆斯和药剂师约瑟夫加入行动。在一层层递进的梦境中,柯布不仅要对付费希尔潜意识的本能反抗,还必须直面已逝妻子梅的处处破坏,实际情况远比预想危险得多…","poster_path":"/lQEjWasu07JbQHdfFI5VnEUfId2.jpg","media_type":"movie","genre_ids":[28,878,12],"popularity":74.425,"release_date":"2010-07-15","video":false,"vote_average":8.359,"vote_count":32695}],"person_results":[],"tv_results":[],"tv_episode_results":[],"tv_season_results":[]}
|
|
@ -0,0 +1 @@
|
|||
{"movie_results":[],"person_results":[],"tv_results":[{"adult":false,"backdrop_path":"/8IC1q0lHFwi5m8VtChLzIfmpaZH.jpg","id":86941,"name":"北海鲸梦","original_language":"en","original_name":"The North Water","overview":"改编自伊恩·麦奎尔的同名获奖小说,聚焦19世纪一次灾难性的捕鲸活动。故事围绕帕特里克·萨姆纳展开,他是一名声名狼藉的前战地医生,后成为捕鲸船上的医生,在船上遇到了鱼叉手亨利·德拉克斯,一个残忍、不道德的杀手。萨姆纳没有逃离过去的恐惧,而是被迫在北极荒原上为生存而进行残酷的斗争...","poster_path":"/9CM0ca8pX1os3SJ24hsIc0nN8ph.jpg","media_type":"tv","genre_ids":[18,9648],"popularity":11.318,"first_air_date":"2021-07-14","vote_average":7.5,"vote_count":75,"origin_country":["US"]}],"tv_episode_results":[],"tv_season_results":[]}
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
3319
test_data/https___book_douban_com_subject_1089243_
Normal file
3319
test_data/https___book_douban_com_subject_1089243_
Normal file
File diff suppressed because it is too large
Load diff
3042
test_data/https___book_douban_com_subject_2037260_
Normal file
3042
test_data/https___book_douban_com_subject_2037260_
Normal file
File diff suppressed because it is too large
Load diff
1446
test_data/https___book_douban_com_subject_35902899_
Normal file
1446
test_data/https___book_douban_com_subject_35902899_
Normal file
File diff suppressed because it is too large
Load diff
2516
test_data/https___book_douban_com_works_1008677_
Normal file
2516
test_data/https___book_douban_com_works_1008677_
Normal file
File diff suppressed because it is too large
Load diff
Binary file not shown.
After Width: | Height: | Size: 43 B |
10
test_data/https___itunes_apple_com_lookup_id_1050430296
Normal file
10
test_data/https___itunes_apple_com_lookup_id_1050430296
Normal file
|
@ -0,0 +1,10 @@
|
|||
|
||||
|
||||
|
||||
{
|
||||
"resultCount":1,
|
||||
"results": [
|
||||
{"wrapperType":"track", "kind":"podcast", "artistId":127981066, "collectionId":1050430296, "trackId":1050430296, "artistName":"WNYC Studios and The New Yorker", "collectionName":"The New Yorker Radio Hour", "trackName":"The New Yorker Radio Hour", "collectionCensoredName":"The New Yorker Radio Hour", "trackCensoredName":"The New Yorker Radio Hour", "artistViewUrl":"https://podcasts.apple.com/us/artist/wnyc/127981066?uo=4", "collectionViewUrl":"https://podcasts.apple.com/us/podcast/the-new-yorker-radio-hour/id1050430296?uo=4", "feedUrl":"http://feeds.feedburner.com/newyorkerradiohour", "trackViewUrl":"https://podcasts.apple.com/us/podcast/the-new-yorker-radio-hour/id1050430296?uo=4", "artworkUrl30":"https://is2-ssl.mzstatic.com/image/thumb/Podcasts115/v4/e3/83/42/e38342fa-712d-ec74-2f31-946601e04e27/mza_2714925949638887112.png/30x30bb.jpg", "artworkUrl60":"https://is2-ssl.mzstatic.com/image/thumb/Podcasts115/v4/e3/83/42/e38342fa-712d-ec74-2f31-946601e04e27/mza_2714925949638887112.png/60x60bb.jpg", "artworkUrl100":"https://is2-ssl.mzstatic.com/image/thumb/Podcasts115/v4/e3/83/42/e38342fa-712d-ec74-2f31-946601e04e27/mza_2714925949638887112.png/100x100bb.jpg", "collectionPrice":0.00, "trackPrice":0.00, "collectionHdPrice":0, "releaseDate":"2022-11-29T11:00:00Z", "collectionExplicitness":"notExplicit", "trackExplicitness":"cleaned", "trackCount":150, "trackTimeMillis":1097, "country":"USA", "currency":"USD", "primaryGenreName":"News Commentary", "contentAdvisoryRating":"Clean", "artworkUrl600":"https://is2-ssl.mzstatic.com/image/thumb/Podcasts115/v4/e3/83/42/e38342fa-712d-ec74-2f31-946601e04e27/mza_2714925949638887112.png/600x600bb.jpg", "genreIds":["1530", "26", "1489", "1527"], "genres":["News Commentary", "Podcasts", "News", "Politics"]}]
|
||||
}
|
||||
|
||||
|
3228
test_data/https___movie_douban_com_subject_26895436_
Normal file
3228
test_data/https___movie_douban_com_subject_26895436_
Normal file
File diff suppressed because it is too large
Load diff
3379
test_data/https___movie_douban_com_subject_3541415_
Normal file
3379
test_data/https___movie_douban_com_subject_3541415_
Normal file
File diff suppressed because it is too large
Load diff
3301
test_data/https___movie_douban_com_subject_3627919_
Normal file
3301
test_data/https___movie_douban_com_subject_3627919_
Normal file
File diff suppressed because it is too large
Load diff
2522
test_data/https___movie_douban_com_subject_4296866_
Normal file
2522
test_data/https___movie_douban_com_subject_4296866_
Normal file
File diff suppressed because it is too large
Load diff
1109
test_data/https___music_douban_com_subject_33551231_
Normal file
1109
test_data/https___music_douban_com_subject_33551231_
Normal file
File diff suppressed because it is too large
Load diff
2717
test_data/https___store_steampowered_com_app_620
Normal file
2717
test_data/https___store_steampowered_com_app_620
Normal file
File diff suppressed because one or more lines are too long
2044
test_data/https___www_douban_com_game_10734307_
Normal file
2044
test_data/https___www_douban_com_game_10734307_
Normal file
File diff suppressed because it is too large
Load diff
1344
test_data/https___www_douban_com_location_drama_24849279_
Normal file
1344
test_data/https___www_douban_com_location_drama_24849279_
Normal file
File diff suppressed because it is too large
Load diff
20
test_data/https___www_goodreads_com_book_show_11798823
Normal file
20
test_data/https___www_goodreads_com_book_show_11798823
Normal file
File diff suppressed because one or more lines are too long
20
test_data/https___www_goodreads_com_book_show_3597767
Normal file
20
test_data/https___www_goodreads_com_book_show_3597767
Normal file
File diff suppressed because one or more lines are too long
20
test_data/https___www_goodreads_com_book_show_40961427
Normal file
20
test_data/https___www_goodreads_com_book_show_40961427
Normal file
File diff suppressed because one or more lines are too long
1509
test_data/https___www_goodreads_com_book_show_45064996
Normal file
1509
test_data/https___www_goodreads_com_book_show_45064996
Normal file
File diff suppressed because one or more lines are too long
20
test_data/https___www_goodreads_com_book_show_56821625
Normal file
20
test_data/https___www_goodreads_com_book_show_56821625
Normal file
File diff suppressed because one or more lines are too long
20
test_data/https___www_goodreads_com_book_show_59952545
Normal file
20
test_data/https___www_goodreads_com_book_show_59952545
Normal file
File diff suppressed because one or more lines are too long
20
test_data/https___www_goodreads_com_book_show_77566
Normal file
20
test_data/https___www_goodreads_com_book_show_77566
Normal file
File diff suppressed because one or more lines are too long
4165
test_data/https___www_goodreads_com_work_editions_1383900
Normal file
4165
test_data/https___www_goodreads_com_work_editions_1383900
Normal file
File diff suppressed because one or more lines are too long
4019
test_data/https___www_goodreads_com_work_editions_153313
Normal file
4019
test_data/https___www_goodreads_com_work_editions_153313
Normal file
File diff suppressed because one or more lines are too long
4160
test_data/https___www_goodreads_com_work_editions_24173962
Normal file
4160
test_data/https___www_goodreads_com_work_editions_24173962
Normal file
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,75 @@
|
|||
{
|
||||
"kind": "books#volume",
|
||||
"id": "hV--zQEACAAJ",
|
||||
"etag": "lwbqGlV/h5s",
|
||||
"selfLink": "https://www.googleapis.com/books/v1/volumes/hV--zQEACAAJ",
|
||||
"volumeInfo": {
|
||||
"title": "1984 Nineteen Eighty-Four",
|
||||
"authors": [
|
||||
"George Orwell"
|
||||
],
|
||||
"publisher": "Alma Classics",
|
||||
"publishedDate": "2021-01-07",
|
||||
"description": "In 1984, London is a grim city in the totalitarian state of Oceania where Big Brother is always watching you and the Thought Police can practically read your mind. Winston Smith is a man in grave danger for the simple reason that his memory still functions. Drawn into a forbidden love affair, Winston finds the courage to join a secret revolutionary organization called The Brotherhood, dedicated to the destruction of the Party. Together with his beloved Julia, he hazards his life in a deadly match against the powers that be.Lionel Trilling said of Orwell's masterpiece \" 1984 is a profound, terrifying, and wholly fascinating book. It is a fantasy of the political future, and like any such fantasy, serves its author as a magnifying device for an examination of the present.\" Though the year 1984 now exists in the past, Orwell's novel remains an urgent call for the individual willing to speak truth to power.\"",
|
||||
"industryIdentifiers": [
|
||||
{
|
||||
"type": "ISBN_10",
|
||||
"identifier": "1847498574"
|
||||
},
|
||||
{
|
||||
"type": "ISBN_13",
|
||||
"identifier": "9781847498571"
|
||||
}
|
||||
],
|
||||
"readingModes": {
|
||||
"text": false,
|
||||
"image": false
|
||||
},
|
||||
"pageCount": 400,
|
||||
"printedPageCount": 400,
|
||||
"dimensions": {
|
||||
"height": "19.90 cm",
|
||||
"width": "13.10 cm",
|
||||
"thickness": "2.20 cm"
|
||||
},
|
||||
"printType": "BOOK",
|
||||
"averageRating": 4,
|
||||
"ratingsCount": 564,
|
||||
"maturityRating": "NOT_MATURE",
|
||||
"allowAnonLogging": false,
|
||||
"contentVersion": "preview-1.0.0",
|
||||
"panelizationSummary": {
|
||||
"containsEpubBubbles": false,
|
||||
"containsImageBubbles": false
|
||||
},
|
||||
"imageLinks": {
|
||||
"smallThumbnail": "http://books.google.com/books/content?id=hV--zQEACAAJ&printsec=frontcover&img=1&zoom=5&imgtk=AFLRE72QQ6bzD4LfhArQGJHoUdX5wex-wfg5FVAKOo2MbmCbFSF_HbDUwhZ-gAvmSKiEBTyoRkC3Kvbo9k1jB0uiOyOXcvgAc2643MV091Ny8TySRaV2HSVXtch-MYK2qfzNvUKwGEhx&source=gbs_api",
|
||||
"thumbnail": "http://books.google.com/books/content?id=hV--zQEACAAJ&printsec=frontcover&img=1&zoom=1&imgtk=AFLRE70UTuB9rf2_mqyGrJGsI2XbzpjV2vGQP9Oyjc441rCvvRiGMhMGYXsgTMbAUZ3rHtxarPvPIqaT-RGH9JzzFEbrXs3cp7f2jaHVh3M-fyPcEkg0eao_AuYUePhckBN-PtHZNyy-&source=gbs_api"
|
||||
},
|
||||
"language": "en",
|
||||
"previewLink": "http://books.google.com/books?id=hV--zQEACAAJ&hl=&source=gbs_api",
|
||||
"infoLink": "https://play.google.com/store/books/details?id=hV--zQEACAAJ&source=gbs_api",
|
||||
"canonicalVolumeLink": "https://play.google.com/store/books/details?id=hV--zQEACAAJ"
|
||||
},
|
||||
"saleInfo": {
|
||||
"country": "US",
|
||||
"saleability": "NOT_FOR_SALE",
|
||||
"isEbook": false
|
||||
},
|
||||
"accessInfo": {
|
||||
"country": "US",
|
||||
"viewability": "NO_PAGES",
|
||||
"embeddable": false,
|
||||
"publicDomain": false,
|
||||
"textToSpeechPermission": "ALLOWED",
|
||||
"epub": {
|
||||
"isAvailable": false
|
||||
},
|
||||
"pdf": {
|
||||
"isAvailable": false
|
||||
},
|
||||
"webReaderLink": "http://play.google.com/books/reader?id=hV--zQEACAAJ&hl=&source=gbs_api",
|
||||
"accessViewStatus": "NONE",
|
||||
"quoteSharingAllowed": false
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
[{"id": 72, "age_ratings": [11721, 32022, 47683, 47684, 47685, 47686, 47687, 91785], "aggregated_rating": 92.4444444444444, "aggregated_rating_count": 13, "alternative_names": [50135], "artworks": [36972], "bundles": [55025, 191406], "category": 0, "collection": 87, "cover": {"id": 82660, "url": "//images.igdb.com/igdb/image/upload/t_thumb/co1rs4.jpg"}, "created_at": 1297956069, "dlcs": [99969, 114140], "external_games": [15150, 73156, 81867, 92870, 92979, 137388, 189642, 214010, 245334, 403070, 1303428, 1929756, 1931953, 2082680, 2161690, 2590310, 2600814], "first_release_date": 1303171200, "follows": 971, "franchises": [1724], "game_engines": [3], "game_modes": [1, 2, 3, 4], "genres": [{"id": 5, "name": "Shooter"}, {"id": 8, "name": "Platform"}, {"id": 9, "name": "Puzzle"}, {"id": 31, "name": "Adventure"}], "involved_companies": [{"id": 106733, "company": {"id": 56, "name": "Valve Corporation"}, "created_at": 1598486400, "developer": true, "game": 72, "porting": false, "publisher": true, "supporting": false, "updated_at": 1598486400, "checksum": "fa403088-a40a-1d83-16be-a68849472a6d"}, {"id": 106734, "company": {"id": 1, "name": "Electronic Arts"}, "created_at": 1598486400, "developer": false, "game": 72, "porting": false, "publisher": true, "supporting": false, "updated_at": 1598486400, "checksum": "53e59e19-f746-1195-c4e7-2b388e621317"}], "keywords": [350, 453, 575, 592, 1026, 1158, 1181, 1293, 1440, 1559, 1761, 2071, 2800, 3984, 4004, 4134, 4145, 4162, 4266, 4345, 4363, 4428, 4575, 4578, 4617, 4644, 4725, 4888, 4944, 4956, 4974, 5185, 5261, 5633, 5772, 5935, 5938, 5956, 6137, 6326, 6735, 6854, 7079, 7172, 7313, 7535, 7570, 7579, 8141, 8262, 8896, 9814, 10435, 11023, 11208, 12516, 14224, 18139, 18567, 27032], "multiplayer_modes": [11591, 11592, 11593, 11594, 11595], "name": "Portal 2", "platforms": [{"id": 3, "name": "Linux"}, {"id": 6, "name": "PC (Microsoft Windows)"}, {"id": 9, "name": "PlayStation 3"}, {"id": 12, "name": "Xbox 360"}, {"id": 14, "name": "Mac"}], "player_perspectives": [1], "rating": 91.6894220983232, "rating_count": 2765, "release_dates": [104964, 104965, 208203, 208204, 208205, 208206, 208207, 208208], "screenshots": [725, 726, 727, 728, 729], "similar_games": [71, 1877, 7350, 11646, 16992, 22387, 28070, 55038, 55190, 56033], "slug": "portal-2", "storyline": "You lost your memory, you are alone in a world full of danger, and your mission is survive using your mind. The only way to get out from this hell is.....Hi i'm GLAdOS, and welcome to the amazing world of portal 2, here i will expose you to a lot of tests, and try to k.. help Aperture Science envolve in a new era.\nYour job is advance in the levels i propose and get better and better, you will have an portal gun to help you, and remember nothing is impossible if you try, and try again and again and again....\nThe puzzles are waiting for you!", "summary": "Sequel to the acclaimed Portal (2007), Portal 2 pits the protagonist of the original game, Chell, and her new robot friend, Wheatley, against more puzzles conceived by GLaDOS, an A.I. with the sole purpose of testing the Portal Gun's mechanics and taking revenge on Chell for the events of Portal. As a result of several interactions and revelations, Chell once again pushes to escape Aperture Science Labs.", "tags": [1, 18, 27, 268435461, 268435464, 268435465, 268435487, 536871262, 536871365, 536871487, 536871504, 536871938, 536872070, 536872093, 536872205, 536872352, 536872471, 536872673, 536872983, 536873712, 536874896, 536874916, 536875046, 536875057, 536875074, 536875178, 536875257, 536875275, 536875340, 536875487, 536875490, 536875529, 536875556, 536875637, 536875800, 536875856, 536875868, 536875886, 536876097, 536876173, 536876545, 536876684, 536876847, 536876850, 536876868, 536877049, 536877238, 536877647, 536877766, 536877991, 536878084, 536878225, 536878447, 536878482, 536878491, 536879053, 536879174, 536879808, 536880726, 536881347, 536881935, 536882120, 536883428, 536885136, 536889051, 536889479, 536897944], "themes": [1, 18, 27], "total_rating": 92.0669332713838, "total_rating_count": 2778, "updated_at": 1670514780, "url": "https://www.igdb.com/games/portal-2", "videos": [432, 16451, 17844, 17845], "websites": [17869, 17870, 41194, 41195, 150881, 150882, 150883, 296808], "checksum": "bcca1b61-2b30-13b8-a0ec-faf45d2ffdad", "game_localizations": [726]}]
|
File diff suppressed because one or more lines are too long
|
@ -0,0 +1 @@
|
|||
[{"id": 17870, "category": 13, "game": {"id": 72, "age_ratings": [11721, 32022, 47683, 47684, 47685, 47686, 47687, 91785], "aggregated_rating": 92.4444444444444, "aggregated_rating_count": 13, "alternative_names": [50135], "artworks": [36972], "bundles": [55025, 191406], "category": 0, "collection": 87, "cover": 82660, "created_at": 1297956069, "dlcs": [99969, 114140], "external_games": [15150, 73156, 81867, 92870, 92979, 137388, 189642, 214010, 245334, 403070, 1303428, 1929756, 1931953, 2082680, 2161690, 2590310, 2600814], "first_release_date": 1303171200, "follows": 971, "franchises": [1724], "game_engines": [3], "game_modes": [1, 2, 3, 4], "genres": [5, 8, 9, 31], "involved_companies": [106733, 106734], "keywords": [350, 453, 575, 592, 1026, 1158, 1181, 1293, 1440, 1559, 1761, 2071, 2800, 3984, 4004, 4134, 4145, 4162, 4266, 4345, 4363, 4428, 4575, 4578, 4617, 4644, 4725, 4888, 4944, 4956, 4974, 5185, 5261, 5633, 5772, 5935, 5938, 5956, 6137, 6326, 6735, 6854, 7079, 7172, 7313, 7535, 7570, 7579, 8141, 8262, 8896, 9814, 10435, 11023, 11208, 12516, 14224, 18139, 18567, 27032], "multiplayer_modes": [11591, 11592, 11593, 11594, 11595], "name": "Portal 2", "platforms": [3, 6, 9, 12, 14], "player_perspectives": [1], "rating": 91.6894220983232, "rating_count": 2765, "release_dates": [104964, 104965, 208203, 208204, 208205, 208206, 208207, 208208], "screenshots": [725, 726, 727, 728, 729], "similar_games": [71, 1877, 7350, 11646, 16992, 22387, 28070, 55038, 55190, 56033], "slug": "portal-2", "storyline": "You lost your memory, you are alone in a world full of danger, and your mission is survive using your mind. The only way to get out from this hell is.....Hi i'm GLAdOS, and welcome to the amazing world of portal 2, here i will expose you to a lot of tests, and try to k.. help Aperture Science envolve in a new era.\nYour job is advance in the levels i propose and get better and better, you will have an portal gun to help you, and remember nothing is impossible if you try, and try again and again and again....\nThe puzzles are waiting for you!", "summary": "Sequel to the acclaimed Portal (2007), Portal 2 pits the protagonist of the original game, Chell, and her new robot friend, Wheatley, against more puzzles conceived by GLaDOS, an A.I. with the sole purpose of testing the Portal Gun's mechanics and taking revenge on Chell for the events of Portal. As a result of several interactions and revelations, Chell once again pushes to escape Aperture Science Labs.", "tags": [1, 18, 27, 268435461, 268435464, 268435465, 268435487, 536871262, 536871365, 536871487, 536871504, 536871938, 536872070, 536872093, 536872205, 536872352, 536872471, 536872673, 536872983, 536873712, 536874896, 536874916, 536875046, 536875057, 536875074, 536875178, 536875257, 536875275, 536875340, 536875487, 536875490, 536875529, 536875556, 536875637, 536875800, 536875856, 536875868, 536875886, 536876097, 536876173, 536876545, 536876684, 536876847, 536876850, 536876868, 536877049, 536877238, 536877647, 536877766, 536877991, 536878084, 536878225, 536878447, 536878482, 536878491, 536879053, 536879174, 536879808, 536880726, 536881347, 536881935, 536882120, 536883428, 536885136, 536889051, 536889479, 536897944], "themes": [1, 18, 27], "total_rating": 92.0669332713838, "total_rating_count": 2778, "updated_at": 1670514780, "url": "https://www.igdb.com/games/portal-2", "videos": [432, 16451, 17844, 17845], "websites": [17869, 17870, 41194, 41195, 150881, 150882, 150883, 296808], "checksum": "bcca1b61-2b30-13b8-a0ec-faf45d2ffdad", "game_localizations": [726]}, "trusted": true, "url": "https://store.steampowered.com/app/620", "checksum": "5281f967-6dfe-7658-96c6-af00ce010bbc"}]
|
|
@ -0,0 +1 @@
|
|||
[{"id": 17869, "category": 1, "game": 72, "trusted": false, "url": "http://www.thinkwithportals.com/", "checksum": "c40d590f-93bd-b86e-243c-73746c08be3b"}, {"id": 17870, "category": 13, "game": 72, "trusted": true, "url": "https://store.steampowered.com/app/620", "checksum": "5281f967-6dfe-7658-96c6-af00ce010bbc"}, {"id": 41194, "category": 3, "game": 72, "trusted": true, "url": "https://en.wikipedia.org/wiki/Portal_2", "checksum": "7354f471-16d6-5ed9-b4e4-049cceaab562"}, {"id": 41195, "category": 4, "game": 72, "trusted": true, "url": "https://www.facebook.com/Portal", "checksum": "035f6b48-3be1-77d5-1567-cf6fd8116ee7"}, {"id": 150881, "category": 9, "game": 72, "trusted": true, "url": "https://www.youtube.com/user/Valve", "checksum": "c1d4afb9-e96d-02f1-73bd-3384622e6aee"}, {"id": 150882, "category": 5, "game": 72, "trusted": true, "url": "https://twitter.com/valvesoftware", "checksum": "62bb9586-3293-bb01-f675-d65323ae371c"}, {"id": 150883, "category": 2, "game": 72, "trusted": false, "url": "https://theportalwiki.com/wiki/Portal_2", "checksum": "af689276-28c8-b145-7b19-f1d7df878c2a"}, {"id": 296808, "category": 6, "game": 72, "trusted": true, "url": "https://www.twitch.tv/directory/game/Portal%202", "checksum": "65629340-6190-833d-41b1-8eaf31918df3"}]
|
|
@ -0,0 +1 @@
|
|||
[{"id": 12644, "category": 1, "game": 7346, "trusted": false, "url": "http://www.zelda.com/breath-of-the-wild/", "checksum": "3d2ca280-a2d0-5664-c8a5-69eeeaf13558"}, {"id": 12645, "category": 2, "game": 7346, "trusted": false, "url": "http://zelda.wikia.com/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild", "checksum": "d5cb4657-dc8e-9de1-9643-b1ef64812d9f"}, {"id": 12646, "category": 3, "game": 7346, "trusted": true, "url": "https://en.wikipedia.org/wiki/The_Legend_of_Zelda:_Breath_of_the_Wild", "checksum": "c4570c3c-3a04-8d24-399a-0c04a17e7c56"}, {"id": 65034, "category": 14, "game": 7346, "trusted": true, "url": "https://www.reddit.com/r/Breath_of_the_Wild", "checksum": "f60505b3-18a4-3d60-9db2-febe4c6cb492"}, {"id": 169666, "category": 6, "game": 7346, "trusted": true, "url": "https://www.twitch.tv/nintendo", "checksum": "e2b20791-a9c4-76ad-4d76-3e7abc9148bb"}, {"id": 169667, "category": 9, "game": 7346, "trusted": true, "url": "https://www.youtube.com/nintendo", "checksum": "1e1c08ba-8f89-b567-0029-1d8aac22d147"}, {"id": 169668, "category": 4, "game": 7346, "trusted": true, "url": "https://www.facebook.com/Nintendo", "checksum": "046d8c8e-8f1d-8813-1266-c2911f490ba7"}, {"id": 169669, "category": 5, "game": 7346, "trusted": true, "url": "https://twitter.com/NintendoAmerica", "checksum": "e06dd12f-b6c5-ef72-f287-a9cebba12fa1"}, {"id": 169670, "category": 8, "game": 7346, "trusted": true, "url": "https://www.instagram.com/nintendo", "checksum": "dbff9e02-e9c2-f395-7e48-7e70cf58225c"}]
|
|
@ -43,7 +43,7 @@
|
|||
<body>
|
||||
<div id="page-wrapper">
|
||||
<div id="content-wrapper">
|
||||
{% include "partial/_navbar.html" %}
|
||||
{% include "partial/_navbar.html" with current="timeline" %}
|
||||
|
||||
<section id="content" class="container">
|
||||
<div class="grid grid--reverse-order">
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
<body>
|
||||
<div id="page-wrapper">
|
||||
<div id="content-wrapper">
|
||||
{% include "partial/_navbar.html" %}
|
||||
{% include "partial/_navbar.html" with current="data" %}
|
||||
|
||||
<section id="content">
|
||||
<div class="grid grid--reverse-order">
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Reference in a new issue