reformat new code with black
This commit is contained in:
parent
bde7ce47a3
commit
4ee560f6b4
49 changed files with 1915 additions and 1168 deletions
|
@ -10,7 +10,11 @@ from django.utils.baseconv import base62
|
|||
from django.shortcuts import render, get_object_or_404, redirect, reverse
|
||||
from django.http import Http404
|
||||
|
||||
api = NinjaAPI(title=settings.SITE_INFO['site_name'], version="1.0.0", description=f"{settings.SITE_INFO['site_name']} API <hr/><a href='{settings.APP_WEBSITE}'>Learn more</a>")
|
||||
api = NinjaAPI(
|
||||
title=settings.SITE_INFO["site_name"],
|
||||
version="1.0.0",
|
||||
description=f"{settings.SITE_INFO['site_name']} API <hr/><a href='{settings.APP_WEBSITE}'>Learn more</a>",
|
||||
)
|
||||
|
||||
|
||||
class ItemIn(Schema):
|
||||
|
|
|
@ -2,8 +2,8 @@ from django.apps import AppConfig
|
|||
|
||||
|
||||
class CatalogConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'catalog'
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "catalog"
|
||||
|
||||
def ready(self):
|
||||
# load key modules in proper order, make sure class inject and signal works as expected
|
||||
|
|
|
@ -25,8 +25,8 @@ from .utils import *
|
|||
|
||||
class Edition(Item):
|
||||
category = ItemCategory.Book
|
||||
url_path = 'book'
|
||||
demonstrative = _('这本书')
|
||||
url_path = "book"
|
||||
demonstrative = _("这本书")
|
||||
|
||||
isbn = PrimaryLookupIdDescriptor(IdType.ISBN)
|
||||
asin = PrimaryLookupIdDescriptor(IdType.ASIN)
|
||||
|
@ -35,30 +35,30 @@ class Edition(Item):
|
|||
# goodreads = LookupIdDescriptor(IdType.Goodreads)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
'title',
|
||||
'brief',
|
||||
"title",
|
||||
"brief",
|
||||
# legacy fields
|
||||
'subtitle',
|
||||
'orig_title',
|
||||
'author',
|
||||
'translator',
|
||||
'language',
|
||||
'pub_house',
|
||||
'pub_year',
|
||||
'pub_month',
|
||||
'binding',
|
||||
'price',
|
||||
'pages',
|
||||
'contents',
|
||||
'series',
|
||||
'imprint',
|
||||
"subtitle",
|
||||
"orig_title",
|
||||
"author",
|
||||
"translator",
|
||||
"language",
|
||||
"pub_house",
|
||||
"pub_year",
|
||||
"pub_month",
|
||||
"binding",
|
||||
"price",
|
||||
"pages",
|
||||
"contents",
|
||||
"series",
|
||||
"imprint",
|
||||
]
|
||||
subtitle = jsondata.CharField(null=True, blank=True, default=None)
|
||||
orig_title = jsondata.CharField(null=True, blank=True, default=None)
|
||||
author = jsondata.ArrayField(_('作者'), null=False, blank=False, default=list)
|
||||
translator = jsondata.ArrayField(_('译者'), null=True, blank=True, default=list)
|
||||
author = jsondata.ArrayField(_("作者"), null=False, blank=False, default=list)
|
||||
translator = jsondata.ArrayField(_("译者"), null=True, blank=True, default=list)
|
||||
language = jsondata.CharField(_("语言"), null=True, blank=True, default=None)
|
||||
pub_house = jsondata.CharField(_('出版方'), null=True, blank=True, default=None)
|
||||
pub_house = jsondata.CharField(_("出版方"), null=True, blank=True, default=None)
|
||||
pub_year = jsondata.IntegerField(_("发表年份"), null=True, blank=True)
|
||||
pub_month = jsondata.IntegerField(_("发表月份"), null=True, blank=True)
|
||||
binding = jsondata.CharField(null=True, blank=True, default=None)
|
||||
|
@ -80,8 +80,11 @@ class Edition(Item):
|
|||
"""add Work from resource.metadata['work'] if not yet"""
|
||||
links = resource.required_resources + resource.related_resources
|
||||
for w in links:
|
||||
if w['model'] == 'Work':
|
||||
work = Work.objects.filter(primary_lookup_id_type=w['id_type'], primary_lookup_id_value=w['id_value']).first()
|
||||
if w["model"] == "Work":
|
||||
work = Work.objects.filter(
|
||||
primary_lookup_id_type=w["id_type"],
|
||||
primary_lookup_id_value=w["id_value"],
|
||||
).first()
|
||||
if work and work not in self.works.all():
|
||||
self.works.add(work)
|
||||
# if not work:
|
||||
|
@ -90,15 +93,15 @@ class Edition(Item):
|
|||
|
||||
class Work(Item):
|
||||
category = ItemCategory.Book
|
||||
url_path = 'book/work'
|
||||
url_path = "book/work"
|
||||
douban_work = PrimaryLookupIdDescriptor(IdType.DoubanBook_Work)
|
||||
goodreads_work = PrimaryLookupIdDescriptor(IdType.Goodreads_Work)
|
||||
editions = models.ManyToManyField(Edition, related_name='works')
|
||||
editions = models.ManyToManyField(Edition, related_name="works")
|
||||
|
||||
|
||||
class Series(Item):
|
||||
category = ItemCategory.Book
|
||||
url_path = 'book/series'
|
||||
url_path = "book/series"
|
||||
# douban_serie = LookupIdDescriptor(IdType.DoubanBook_Serie)
|
||||
# goodreads_serie = LookupIdDescriptor(IdType.Goodreads_Serie)
|
||||
|
||||
|
|
|
@ -8,7 +8,7 @@ class BookTestCase(TestCase):
|
|||
def setUp(self):
|
||||
hyperion = Edition.objects.create(title="Hyperion")
|
||||
hyperion.pages = 500
|
||||
hyperion.isbn = '9780553283686'
|
||||
hyperion.isbn = "9780553283686"
|
||||
hyperion.save()
|
||||
# hyperion.isbn10 = '0553283685'
|
||||
|
||||
|
@ -22,39 +22,39 @@ class BookTestCase(TestCase):
|
|||
self.assertEqual(hyperion.title, "Hyperion")
|
||||
self.assertEqual(hyperion.pages, 500)
|
||||
self.assertEqual(hyperion.primary_lookup_id_type, IdType.ISBN)
|
||||
self.assertEqual(hyperion.primary_lookup_id_value, '9780553283686')
|
||||
self.assertEqual(hyperion.primary_lookup_id_value, "9780553283686")
|
||||
andymion = Edition(title="Andymion", pages=42)
|
||||
self.assertEqual(andymion.pages, 42)
|
||||
|
||||
def test_lookupids(self):
|
||||
hyperion = Edition.objects.get(title="Hyperion")
|
||||
hyperion.asin = 'B004G60EHS'
|
||||
hyperion.asin = "B004G60EHS"
|
||||
self.assertEqual(hyperion.primary_lookup_id_type, IdType.ASIN)
|
||||
self.assertEqual(hyperion.primary_lookup_id_value, 'B004G60EHS')
|
||||
self.assertEqual(hyperion.primary_lookup_id_value, "B004G60EHS")
|
||||
self.assertEqual(hyperion.isbn, None)
|
||||
self.assertEqual(hyperion.isbn10, None)
|
||||
|
||||
def test_isbn(self):
|
||||
t, n = detect_isbn_asin('0553283685')
|
||||
t, n = detect_isbn_asin("0553283685")
|
||||
self.assertEqual(t, IdType.ISBN)
|
||||
self.assertEqual(n, '9780553283686')
|
||||
t, n = detect_isbn_asin('9780553283686')
|
||||
self.assertEqual(n, "9780553283686")
|
||||
t, n = detect_isbn_asin("9780553283686")
|
||||
self.assertEqual(t, IdType.ISBN)
|
||||
t, n = detect_isbn_asin(' b0043M6780')
|
||||
t, n = detect_isbn_asin(" b0043M6780")
|
||||
self.assertEqual(t, IdType.ASIN)
|
||||
|
||||
hyperion = Edition.objects.get(title="Hyperion")
|
||||
self.assertEqual(hyperion.isbn, '9780553283686')
|
||||
self.assertEqual(hyperion.isbn10, '0553283685')
|
||||
hyperion.isbn10 = '0575099437'
|
||||
self.assertEqual(hyperion.isbn, '9780575099432')
|
||||
self.assertEqual(hyperion.isbn10, '0575099437')
|
||||
self.assertEqual(hyperion.isbn, "9780553283686")
|
||||
self.assertEqual(hyperion.isbn10, "0553283685")
|
||||
hyperion.isbn10 = "0575099437"
|
||||
self.assertEqual(hyperion.isbn, "9780575099432")
|
||||
self.assertEqual(hyperion.isbn10, "0575099437")
|
||||
|
||||
def test_work(self):
|
||||
hyperion_print = Edition.objects.get(title="Hyperion")
|
||||
hyperion_ebook = Edition(title="Hyperion")
|
||||
hyperion_ebook.save()
|
||||
hyperion_ebook.asin = 'B0043M6780'
|
||||
hyperion_ebook.asin = "B0043M6780"
|
||||
hyperion = Work(title="Hyperion")
|
||||
hyperion.save()
|
||||
hyperion.editions.add(hyperion_print)
|
||||
|
@ -69,9 +69,9 @@ class GoodreadsTestCase(TestCase):
|
|||
|
||||
def test_parse(self):
|
||||
t_type = IdType.Goodreads
|
||||
t_id = '77566'
|
||||
t_url = 'https://www.goodreads.com/zh/book/show/77566.Hyperion'
|
||||
t_url2 = 'https://www.goodreads.com/book/show/77566'
|
||||
t_id = "77566"
|
||||
t_url = "https://www.goodreads.com/zh/book/show/77566.Hyperion"
|
||||
t_url2 = "https://www.goodreads.com/book/show/77566"
|
||||
p1 = SiteManager.get_site_by_id_type(t_type)
|
||||
p2 = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
|
@ -79,9 +79,9 @@ class GoodreadsTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.goodreads.com/book/show/77566.Hyperion'
|
||||
t_url2 = 'https://www.goodreads.com/book/show/77566'
|
||||
isbn = '9780553283686'
|
||||
t_url = "https://www.goodreads.com/book/show/77566.Hyperion"
|
||||
t_url2 = "https://www.goodreads.com/book/show/77566"
|
||||
isbn = "9780553283686"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.url, t_url2)
|
||||
|
@ -90,39 +90,43 @@ class GoodreadsTestCase(TestCase):
|
|||
self.assertIsNotNone(site.resource)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), 'Hyperion')
|
||||
self.assertEqual(site.resource.metadata.get("title"), "Hyperion")
|
||||
self.assertEqual(site.resource.get_all_lookup_ids().get(IdType.ISBN), isbn)
|
||||
self.assertEqual(site.resource.required_resources[0]['id_value'], '1383900')
|
||||
edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn)
|
||||
self.assertEqual(site.resource.required_resources[0]["id_value"], "1383900")
|
||||
edition = Edition.objects.get(
|
||||
primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn
|
||||
)
|
||||
resource = edition.external_resources.all().first()
|
||||
self.assertEqual(resource.id_type, IdType.Goodreads)
|
||||
self.assertEqual(resource.id_value, '77566')
|
||||
self.assertNotEqual(resource.cover, '/media/item/default.svg')
|
||||
self.assertEqual(edition.isbn, '9780553283686')
|
||||
self.assertEqual(edition.title, 'Hyperion')
|
||||
self.assertEqual(resource.id_value, "77566")
|
||||
self.assertNotEqual(resource.cover, "/media/item/default.svg")
|
||||
self.assertEqual(edition.isbn, "9780553283686")
|
||||
self.assertEqual(edition.title, "Hyperion")
|
||||
|
||||
edition.delete()
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.url, t_url2)
|
||||
site.get_resource()
|
||||
self.assertEqual(site.ready, True, 'previous resource should still exist with data')
|
||||
self.assertEqual(
|
||||
site.ready, True, "previous resource should still exist with data"
|
||||
)
|
||||
|
||||
@use_local_response
|
||||
def test_asin(self):
|
||||
t_url = 'https://www.goodreads.com/book/show/45064996-hyperion'
|
||||
t_url = "https://www.goodreads.com/book/show/45064996-hyperion"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.item.title, 'Hyperion')
|
||||
self.assertEqual(site.resource.item.asin, 'B004G60EHS')
|
||||
self.assertEqual(site.resource.item.title, "Hyperion")
|
||||
self.assertEqual(site.resource.item.asin, "B004G60EHS")
|
||||
|
||||
@use_local_response
|
||||
def test_work(self):
|
||||
url = 'https://www.goodreads.com/work/editions/153313'
|
||||
url = "https://www.goodreads.com/work/editions/153313"
|
||||
p = SiteManager.get_site_by_url(url).get_resource_ready()
|
||||
self.assertEqual(p.item.title, '1984')
|
||||
url1 = 'https://www.goodreads.com/book/show/3597767-rok-1984'
|
||||
url2 = 'https://www.goodreads.com/book/show/40961427-1984'
|
||||
self.assertEqual(p.item.title, "1984")
|
||||
url1 = "https://www.goodreads.com/book/show/3597767-rok-1984"
|
||||
url2 = "https://www.goodreads.com/book/show/40961427-1984"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
w1 = p1.item.works.all().first()
|
||||
|
@ -133,9 +137,9 @@ class GoodreadsTestCase(TestCase):
|
|||
class GoogleBooksTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_type = IdType.GoogleBooks
|
||||
t_id = 'hV--zQEACAAJ'
|
||||
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms'
|
||||
t_url2 = 'https://books.google.com/books?id=hV--zQEACAAJ'
|
||||
t_id = "hV--zQEACAAJ"
|
||||
t_url = "https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms"
|
||||
t_url2 = "https://books.google.com/books?id=hV--zQEACAAJ"
|
||||
p1 = SiteManager.get_site_by_url(t_url)
|
||||
p2 = SiteManager.get_site_by_url(t_url2)
|
||||
self.assertIsNotNone(p1)
|
||||
|
@ -146,17 +150,19 @@ class GoogleBooksTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ'
|
||||
t_url = "https://books.google.com.bn/books?id=hV--zQEACAAJ"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
|
||||
self.assertEqual(
|
||||
site.resource.metadata.get("title"), "1984 Nineteen Eighty-Four"
|
||||
)
|
||||
self.assertEqual(site.resource.metadata.get("isbn"), "9781847498571")
|
||||
self.assertEqual(site.resource.id_type, IdType.GoogleBooks)
|
||||
self.assertEqual(site.resource.id_value, 'hV--zQEACAAJ')
|
||||
self.assertEqual(site.resource.item.isbn, '9781847498571')
|
||||
self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.id_value, "hV--zQEACAAJ")
|
||||
self.assertEqual(site.resource.item.isbn, "9781847498571")
|
||||
self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four")
|
||||
|
||||
|
||||
class DoubanBookTestCase(TestCase):
|
||||
|
@ -165,9 +171,9 @@ class DoubanBookTestCase(TestCase):
|
|||
|
||||
def test_parse(self):
|
||||
t_type = IdType.DoubanBook
|
||||
t_id = '35902899'
|
||||
t_url = 'https://m.douban.com/book/subject/35902899/'
|
||||
t_url2 = 'https://book.douban.com/subject/35902899/'
|
||||
t_id = "35902899"
|
||||
t_url = "https://m.douban.com/book/subject/35902899/"
|
||||
t_url2 = "https://book.douban.com/subject/35902899/"
|
||||
p1 = SiteManager.get_site_by_url(t_url)
|
||||
p2 = SiteManager.get_site_by_url(t_url2)
|
||||
self.assertEqual(p1.url, t_url2)
|
||||
|
@ -177,44 +183,46 @@ class DoubanBookTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://book.douban.com/subject/35902899/'
|
||||
t_url = "https://book.douban.com/subject/35902899/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.site_name, SiteName.Douban)
|
||||
self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
|
||||
self.assertEqual(
|
||||
site.resource.metadata.get("title"), "1984 Nineteen Eighty-Four"
|
||||
)
|
||||
self.assertEqual(site.resource.metadata.get("isbn"), "9781847498571")
|
||||
self.assertEqual(site.resource.id_type, IdType.DoubanBook)
|
||||
self.assertEqual(site.resource.id_value, '35902899')
|
||||
self.assertEqual(site.resource.item.isbn, '9781847498571')
|
||||
self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
|
||||
self.assertEqual(site.resource.id_value, "35902899")
|
||||
self.assertEqual(site.resource.item.isbn, "9781847498571")
|
||||
self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four")
|
||||
|
||||
@use_local_response
|
||||
def test_work(self):
|
||||
# url = 'https://www.goodreads.com/work/editions/153313'
|
||||
url1 = 'https://book.douban.com/subject/1089243/'
|
||||
url2 = 'https://book.douban.com/subject/2037260/'
|
||||
url1 = "https://book.douban.com/subject/1089243/"
|
||||
url2 = "https://book.douban.com/subject/2037260/"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
w1 = p1.item.works.all().first()
|
||||
w2 = p2.item.works.all().first()
|
||||
self.assertEqual(w1.title, '黄金时代')
|
||||
self.assertEqual(w2.title, '黄金时代')
|
||||
self.assertEqual(w1.title, "黄金时代")
|
||||
self.assertEqual(w2.title, "黄金时代")
|
||||
self.assertEqual(w1, w2)
|
||||
editions = w1.editions.all().order_by('title')
|
||||
editions = w1.editions.all().order_by("title")
|
||||
self.assertEqual(editions.count(), 2)
|
||||
self.assertEqual(editions[0].title, 'Wang in Love and Bondage')
|
||||
self.assertEqual(editions[1].title, '黄金时代')
|
||||
self.assertEqual(editions[0].title, "Wang in Love and Bondage")
|
||||
self.assertEqual(editions[1].title, "黄金时代")
|
||||
|
||||
|
||||
class MultiBookSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_editions(self):
|
||||
# isbn = '9781847498571'
|
||||
url1 = 'https://www.goodreads.com/book/show/56821625-1984'
|
||||
url2 = 'https://book.douban.com/subject/35902899/'
|
||||
url3 = 'https://books.google.com/books?id=hV--zQEACAAJ'
|
||||
url1 = "https://www.goodreads.com/book/show/56821625-1984"
|
||||
url2 = "https://book.douban.com/subject/35902899/"
|
||||
url3 = "https://books.google.com/books?id=hV--zQEACAAJ"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
|
@ -224,11 +232,13 @@ class MultiBookSitesTestCase(TestCase):
|
|||
@use_local_response
|
||||
def test_works(self):
|
||||
# url1 and url4 has same ISBN, hence they share same Edition instance, which belongs to 2 Work instances
|
||||
url1 = 'https://book.douban.com/subject/1089243/'
|
||||
url2 = 'https://book.douban.com/subject/2037260/'
|
||||
url3 = 'https://www.goodreads.com/book/show/59952545-golden-age'
|
||||
url4 = 'https://www.goodreads.com/book/show/11798823'
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready() # lxml bug may break this
|
||||
url1 = "https://book.douban.com/subject/1089243/"
|
||||
url2 = "https://book.douban.com/subject/2037260/"
|
||||
url3 = "https://www.goodreads.com/book/show/59952545-golden-age"
|
||||
url4 = "https://www.goodreads.com/book/show/11798823"
|
||||
p1 = SiteManager.get_site_by_url(
|
||||
url1
|
||||
).get_resource_ready() # lxml bug may break this
|
||||
w1 = p1.item.works.all().first()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
w2 = p2.item.works.all().first()
|
||||
|
@ -241,13 +251,13 @@ class MultiBookSitesTestCase(TestCase):
|
|||
self.assertEqual(p4.item.id, p1.item.id)
|
||||
self.assertEqual(p4.item.works.all().count(), 2)
|
||||
self.assertEqual(p1.item.works.all().count(), 2)
|
||||
w2e = w2.editions.all().order_by('title')
|
||||
w2e = w2.editions.all().order_by("title")
|
||||
self.assertEqual(w2e.count(), 2)
|
||||
self.assertEqual(w2e[0].title, 'Wang in Love and Bondage')
|
||||
self.assertEqual(w2e[1].title, '黄金时代')
|
||||
w3e = w3.editions.all().order_by('title')
|
||||
self.assertEqual(w2e[0].title, "Wang in Love and Bondage")
|
||||
self.assertEqual(w2e[1].title, "黄金时代")
|
||||
w3e = w3.editions.all().order_by("title")
|
||||
self.assertEqual(w3e.count(), 2)
|
||||
self.assertEqual(w3e[0].title, 'Golden Age: A Novel')
|
||||
self.assertEqual(w3e[1].title, '黄金时代')
|
||||
self.assertEqual(w3e[0].title, "Golden Age: A Novel")
|
||||
self.assertEqual(w3e[1].title, "黄金时代")
|
||||
e = Edition.objects.get(primary_lookup_id_value=9781662601217)
|
||||
self.assertEqual(e.title, 'Golden Age: A Novel')
|
||||
self.assertEqual(e.title, "Golden Age: A Novel")
|
||||
|
|
|
@ -10,7 +10,7 @@ def check_digit_10(isbn):
|
|||
w = i + 1
|
||||
sum += w * c
|
||||
r = sum % 11
|
||||
return 'X' if r == 10 else str(r)
|
||||
return "X" if r == 10 else str(r)
|
||||
|
||||
|
||||
def check_digit_13(isbn):
|
||||
|
@ -21,38 +21,38 @@ def check_digit_13(isbn):
|
|||
w = 3 if i % 2 else 1
|
||||
sum += w * c
|
||||
r = 10 - (sum % 10)
|
||||
return '0' if r == 10 else str(r)
|
||||
return "0" if r == 10 else str(r)
|
||||
|
||||
|
||||
def isbn_10_to_13(isbn):
|
||||
if not isbn or len(isbn) != 10:
|
||||
return None
|
||||
return '978' + isbn[:-1] + check_digit_13('978' + isbn[:-1])
|
||||
return "978" + isbn[:-1] + check_digit_13("978" + isbn[:-1])
|
||||
|
||||
|
||||
def isbn_13_to_10(isbn):
|
||||
if not isbn or len(isbn) != 13 or isbn[:3] != '978':
|
||||
if not isbn or len(isbn) != 13 or isbn[:3] != "978":
|
||||
return None
|
||||
else:
|
||||
return isbn[3:12] + check_digit_10(isbn[3:12])
|
||||
|
||||
|
||||
def is_isbn_13(isbn):
|
||||
return re.match(r'\d{13}', isbn) is not None
|
||||
return re.match(r"\d{13}", isbn) is not None
|
||||
|
||||
|
||||
def is_isbn_10(isbn):
|
||||
return re.match(r'\d{9}[X0-9]', isbn) is not None
|
||||
return re.match(r"\d{9}[X0-9]", isbn) is not None
|
||||
|
||||
|
||||
def is_asin(asin):
|
||||
return re.match(r'B[A-Z0-9]{9}', asin) is not None
|
||||
return re.match(r"B[A-Z0-9]{9}", asin) is not None
|
||||
|
||||
|
||||
def detect_isbn_asin(s):
|
||||
if not s:
|
||||
return None, None
|
||||
n = re.sub(r'[^0-9A-Z]', '', s.upper())
|
||||
n = re.sub(r"[^0-9A-Z]", "", s.upper())
|
||||
if is_isbn_13(n):
|
||||
return IdType.ISBN, n
|
||||
if is_isbn_10(n):
|
||||
|
|
|
@ -5,4 +5,28 @@ from .scrapers import *
|
|||
from . import jsondata
|
||||
|
||||
|
||||
__all__ = ('IdType', 'SiteName', 'ItemCategory', 'Item', 'ExternalResource', 'ResourceContent', 'ParseError', 'AbstractSite', 'SiteManager', 'jsondata', 'PrimaryLookupIdDescriptor', 'LookupIdDescriptor', 'get_mock_mode', 'get_mock_file', 'use_local_response', 'RetryDownloader', 'BasicDownloader', 'ProxiedDownloader', 'BasicImageDownloader', 'RESPONSE_OK', 'RESPONSE_NETWORK_ERROR', 'RESPONSE_INVALID_CONTENT', 'RESPONSE_CENSORSHIP')
|
||||
__all__ = (
|
||||
"IdType",
|
||||
"SiteName",
|
||||
"ItemCategory",
|
||||
"Item",
|
||||
"ExternalResource",
|
||||
"ResourceContent",
|
||||
"ParseError",
|
||||
"AbstractSite",
|
||||
"SiteManager",
|
||||
"jsondata",
|
||||
"PrimaryLookupIdDescriptor",
|
||||
"LookupIdDescriptor",
|
||||
"get_mock_mode",
|
||||
"get_mock_file",
|
||||
"use_local_response",
|
||||
"RetryDownloader",
|
||||
"BasicDownloader",
|
||||
"ProxiedDownloader",
|
||||
"BasicImageDownloader",
|
||||
"RESPONSE_OK",
|
||||
"RESPONSE_NETWORK_ERROR",
|
||||
"RESPONSE_INVALID_CONTENT",
|
||||
"RESPONSE_CENSORSHIP",
|
||||
)
|
||||
|
|
|
@ -29,6 +29,7 @@ def use_local_response(func):
|
|||
set_mock_mode(True)
|
||||
func(args)
|
||||
set_mock_mode(False)
|
||||
|
||||
return _func
|
||||
|
||||
|
||||
|
@ -43,9 +44,9 @@ def get_mock_mode():
|
|||
|
||||
|
||||
def get_mock_file(url):
|
||||
fn = url.replace('***REMOVED***', '1234') # Thank you, Github Action -_-!
|
||||
fn = re.sub(r'[^\w]', '_', fn)
|
||||
fn = re.sub(r'_key_[*A-Za-z0-9]+', '_key_8964', fn)
|
||||
fn = url.replace("***REMOVED***", "1234") # Thank you, Github Action -_-!
|
||||
fn = re.sub(r"[^\w]", "_", fn)
|
||||
fn = re.sub(r"_key_[*A-Za-z0-9]+", "_key_8964", fn)
|
||||
return fn
|
||||
|
||||
|
||||
|
@ -61,21 +62,23 @@ class DownloadError(Exception):
|
|||
error = "Censored Content"
|
||||
else:
|
||||
error = "Unknown Error"
|
||||
self.message = f"Download Failed: {error}{', ' + msg if msg else ''}, url: {self.url}"
|
||||
self.message = (
|
||||
f"Download Failed: {error}{', ' + msg if msg else ''}, url: {self.url}"
|
||||
)
|
||||
super().__init__(self.message)
|
||||
|
||||
|
||||
class BasicDownloader:
|
||||
headers = {
|
||||
# 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0',
|
||||
'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
||||
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
|
||||
'Accept-Encoding': 'gzip, deflate',
|
||||
'Connection': 'keep-alive',
|
||||
'DNT': '1',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Cache-Control': 'no-cache',
|
||||
"User-Agent": "Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
|
||||
"Accept-Encoding": "gzip, deflate",
|
||||
"Connection": "keep-alive",
|
||||
"DNT": "1",
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Cache-Control": "no-cache",
|
||||
}
|
||||
|
||||
def __init__(self, url, headers=None):
|
||||
|
@ -100,18 +103,28 @@ class BasicDownloader:
|
|||
try:
|
||||
if not _mock_mode:
|
||||
# TODO cache = get/set from redis
|
||||
resp = requests.get(url, headers=self.headers, timeout=self.get_timeout())
|
||||
resp = requests.get(
|
||||
url, headers=self.headers, timeout=self.get_timeout()
|
||||
)
|
||||
if settings.DOWNLOADER_SAVEDIR:
|
||||
with open(settings.DOWNLOADER_SAVEDIR + '/' + get_mock_file(url), 'w', encoding='utf-8') as fp:
|
||||
with open(
|
||||
settings.DOWNLOADER_SAVEDIR + "/" + get_mock_file(url),
|
||||
"w",
|
||||
encoding="utf-8",
|
||||
) as fp:
|
||||
fp.write(resp.text)
|
||||
else:
|
||||
resp = MockResponse(self.url)
|
||||
response_type = self.validate_response(resp)
|
||||
self.logs.append({'response_type': response_type, 'url': url, 'exception': None})
|
||||
self.logs.append(
|
||||
{"response_type": response_type, "url": url, "exception": None}
|
||||
)
|
||||
|
||||
return resp, response_type
|
||||
except RequestException as e:
|
||||
self.logs.append({'response_type': RESPONSE_NETWORK_ERROR, 'url': url, 'exception': e})
|
||||
self.logs.append(
|
||||
{"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e}
|
||||
)
|
||||
return None, RESPONSE_NETWORK_ERROR
|
||||
|
||||
def download(self):
|
||||
|
@ -126,16 +139,26 @@ class ProxiedDownloader(BasicDownloader):
|
|||
def get_proxied_urls(self):
|
||||
urls = []
|
||||
if settings.PROXYCRAWL_KEY is not None:
|
||||
urls.append(f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={self.url}')
|
||||
urls.append(
|
||||
f"https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={self.url}"
|
||||
)
|
||||
if settings.SCRAPESTACK_KEY is not None:
|
||||
# urls.append(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={self.url}')
|
||||
urls.append(f'http://api.scrapestack.com/scrape?keep_headers=1&access_key={settings.SCRAPESTACK_KEY}&url={self.url}')
|
||||
urls.append(
|
||||
f"http://api.scrapestack.com/scrape?keep_headers=1&access_key={settings.SCRAPESTACK_KEY}&url={self.url}"
|
||||
)
|
||||
if settings.SCRAPERAPI_KEY is not None:
|
||||
urls.append(f'http://api.scraperapi.com/?api_key={settings.SCRAPERAPI_KEY}&url={self.url}')
|
||||
urls.append(
|
||||
f"http://api.scraperapi.com/?api_key={settings.SCRAPERAPI_KEY}&url={self.url}"
|
||||
)
|
||||
return urls
|
||||
|
||||
def get_special_proxied_url(self):
|
||||
return f'{settings.LOCAL_PROXY}?url={self.url}' if settings.LOCAL_PROXY is not None else None
|
||||
return (
|
||||
f"{settings.LOCAL_PROXY}?url={self.url}"
|
||||
if settings.LOCAL_PROXY is not None
|
||||
else None
|
||||
)
|
||||
|
||||
def download(self):
|
||||
urls = self.get_proxied_urls()
|
||||
|
@ -144,7 +167,11 @@ class ProxiedDownloader(BasicDownloader):
|
|||
resp = None
|
||||
while url:
|
||||
resp, resp_type = self._download(url)
|
||||
if resp_type == RESPONSE_OK or resp_type == RESPONSE_INVALID_CONTENT or last_try:
|
||||
if (
|
||||
resp_type == RESPONSE_OK
|
||||
or resp_type == RESPONSE_INVALID_CONTENT
|
||||
or last_try
|
||||
):
|
||||
url = None
|
||||
elif resp_type == RESPONSE_CENSORSHIP:
|
||||
url = self.get_special_proxied_url()
|
||||
|
@ -169,15 +196,15 @@ class RetryDownloader(BasicDownloader):
|
|||
elif self.response_type != RESPONSE_NETWORK_ERROR and retries == 0:
|
||||
raise DownloadError(self)
|
||||
elif retries > 0:
|
||||
_logger.debug('Retry ' + self.url)
|
||||
_logger.debug("Retry " + self.url)
|
||||
time.sleep((settings.DOWNLOADER_RETRIES - retries) * 0.5)
|
||||
raise DownloadError(self, 'max out of retries')
|
||||
raise DownloadError(self, "max out of retries")
|
||||
|
||||
|
||||
class ImageDownloaderMixin:
|
||||
def __init__(self, url, referer=None):
|
||||
if referer is not None:
|
||||
self.headers['Referer'] = referer
|
||||
self.headers["Referer"] = referer
|
||||
super().__init__(url)
|
||||
|
||||
def validate_response(self, response):
|
||||
|
@ -186,8 +213,10 @@ class ImageDownloaderMixin:
|
|||
raw_img = response.content
|
||||
img = Image.open(BytesIO(raw_img))
|
||||
img.load() # corrupted image will trigger exception
|
||||
content_type = response.headers.get('Content-Type')
|
||||
self.extention = filetype.get_type(mime=content_type.partition(';')[0].strip()).extension
|
||||
content_type = response.headers.get("Content-Type")
|
||||
self.extention = filetype.get_type(
|
||||
mime=content_type.partition(";")[0].strip()
|
||||
).extension
|
||||
return RESPONSE_OK
|
||||
except Exception:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
|
@ -213,7 +242,9 @@ class ProxiedImageDownloader(ImageDownloaderMixin, ProxiedDownloader):
|
|||
pass
|
||||
|
||||
|
||||
_local_response_path = str(Path(__file__).parent.parent.parent.absolute()) + '/test_data/'
|
||||
_local_response_path = (
|
||||
str(Path(__file__).parent.parent.parent.absolute()) + "/test_data/"
|
||||
)
|
||||
|
||||
|
||||
class MockResponse:
|
||||
|
@ -225,23 +256,27 @@ class MockResponse:
|
|||
self.status_code = 200
|
||||
_logger.debug(f"use local response for {url} from {fn}")
|
||||
except Exception:
|
||||
self.content = b'Error: response file not found'
|
||||
self.content = b"Error: response file not found"
|
||||
self.status_code = 404
|
||||
_logger.debug(f"local response not found for {url} at {fn}")
|
||||
|
||||
@property
|
||||
def text(self):
|
||||
return self.content.decode('utf-8')
|
||||
return self.content.decode("utf-8")
|
||||
|
||||
def json(self):
|
||||
return json.load(StringIO(self.text))
|
||||
|
||||
def html(self):
|
||||
return html.fromstring(self.text) # may throw exception unexpectedly due to OS bug, see https://github.com/neodb-social/neodb/issues/5
|
||||
return html.fromstring(
|
||||
self.text
|
||||
) # may throw exception unexpectedly due to OS bug, see https://github.com/neodb-social/neodb/issues/5
|
||||
|
||||
@property
|
||||
def headers(self):
|
||||
return {'Content-Type': 'image/jpeg' if self.url.endswith('jpg') else 'text/html'}
|
||||
return {
|
||||
"Content-Type": "image/jpeg" if self.url.endswith("jpg") else "text/html"
|
||||
}
|
||||
|
||||
|
||||
requests.Response.html = MockResponse.html
|
||||
|
|
|
@ -24,25 +24,29 @@ class ResourceContent:
|
|||
cover_image_extention: str = None
|
||||
|
||||
def dict(self):
|
||||
return {'metadata': self.metadata, 'lookup_ids': self.lookup_ids}
|
||||
return {"metadata": self.metadata, "lookup_ids": self.lookup_ids}
|
||||
|
||||
def to_json(self) -> str:
|
||||
return json.dumps({'metadata': self.metadata, 'lookup_ids': self.lookup_ids})
|
||||
return json.dumps({"metadata": self.metadata, "lookup_ids": self.lookup_ids})
|
||||
|
||||
|
||||
class AbstractSite:
|
||||
"""
|
||||
Abstract class to represent a site
|
||||
"""
|
||||
|
||||
SITE_NAME = None
|
||||
ID_TYPE = None
|
||||
WIKI_PROPERTY_ID = 'P0undefined0'
|
||||
WIKI_PROPERTY_ID = "P0undefined0"
|
||||
DEFAULT_MODEL = None
|
||||
URL_PATTERNS = [r"\w+://undefined/(\d+)"]
|
||||
|
||||
@classmethod
|
||||
def validate_url(self, url: str):
|
||||
u = next(iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]), None)
|
||||
u = next(
|
||||
iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]),
|
||||
None,
|
||||
)
|
||||
return u is not None
|
||||
|
||||
@classmethod
|
||||
|
@ -51,15 +55,18 @@ class AbstractSite:
|
|||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
return 'https://undefined/' + id_value
|
||||
return "https://undefined/" + id_value
|
||||
|
||||
@classmethod
|
||||
def url_to_id(self, url: str):
|
||||
u = next(iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]), None)
|
||||
u = next(
|
||||
iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]),
|
||||
None,
|
||||
)
|
||||
return u[1] if u else None
|
||||
|
||||
def __str__(self):
|
||||
return f'<{self.__class__.__name__}: {self.url}>'
|
||||
return f"<{self.__class__.__name__}: {self.url}>"
|
||||
|
||||
def __init__(self, url=None):
|
||||
self.id_value = self.url_to_id(url) if url else None
|
||||
|
@ -70,7 +77,9 @@ class AbstractSite:
|
|||
if not self.resource:
|
||||
self.resource = ExternalResource.objects.filter(url=self.url).first()
|
||||
if self.resource is None:
|
||||
self.resource = ExternalResource(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url)
|
||||
self.resource = ExternalResource(
|
||||
id_type=self.ID_TYPE, id_value=self.id_value, url=self.url
|
||||
)
|
||||
return self.resource
|
||||
|
||||
def scrape(self) -> ResourceContent:
|
||||
|
@ -91,11 +100,13 @@ class AbstractSite:
|
|||
model = self.DEFAULT_MODEL
|
||||
t, v = model.get_best_lookup_id(p.get_all_lookup_ids())
|
||||
if t is not None:
|
||||
p.item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first()
|
||||
p.item = model.objects.filter(
|
||||
primary_lookup_id_type=t, primary_lookup_id_value=v
|
||||
).first()
|
||||
if p.item is None:
|
||||
obj = model.copy_metadata(p.metadata)
|
||||
obj['primary_lookup_id_type'] = t
|
||||
obj['primary_lookup_id_value'] = v
|
||||
obj["primary_lookup_id_type"] = t
|
||||
obj["primary_lookup_id_value"] = v
|
||||
p.item = model.objects.create(**obj)
|
||||
return p.item
|
||||
|
||||
|
@ -103,10 +114,17 @@ class AbstractSite:
|
|||
def ready(self):
|
||||
return bool(self.resource and self.resource.ready)
|
||||
|
||||
def get_resource_ready(self, auto_save=True, auto_create=True, auto_link=True, preloaded_content=None, ignore_existing_content=False):
|
||||
def get_resource_ready(
|
||||
self,
|
||||
auto_save=True,
|
||||
auto_create=True,
|
||||
auto_link=True,
|
||||
preloaded_content=None,
|
||||
ignore_existing_content=False,
|
||||
):
|
||||
"""
|
||||
Returns an ExternalResource in scraped state if possible
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
auto_save : bool
|
||||
|
@ -137,7 +155,7 @@ class AbstractSite:
|
|||
resource_content = self.scrape()
|
||||
p.update_content(resource_content)
|
||||
if not p.ready:
|
||||
_logger.error(f'unable to get resource {self.url} ready')
|
||||
_logger.error(f"unable to get resource {self.url} ready")
|
||||
return None
|
||||
if auto_create and p.item is None:
|
||||
self.get_item()
|
||||
|
@ -148,9 +166,12 @@ class AbstractSite:
|
|||
p.item.save()
|
||||
if auto_link:
|
||||
for linked_resource in p.required_resources:
|
||||
linked_site = SiteManager.get_site_by_url(linked_resource['url'])
|
||||
linked_site = SiteManager.get_site_by_url(linked_resource["url"])
|
||||
if linked_site:
|
||||
linked_site.get_resource_ready(auto_link=False, preloaded_content=linked_resource.get('content'))
|
||||
linked_site.get_resource_ready(
|
||||
auto_link=False,
|
||||
preloaded_content=linked_resource.get("content"),
|
||||
)
|
||||
else:
|
||||
_logger.error(f'unable to get site for {linked_resource["url"]}')
|
||||
p.item.update_linked_items_from_external_resource(p)
|
||||
|
@ -165,7 +186,7 @@ class SiteManager:
|
|||
def register(target) -> Callable:
|
||||
id_type = target.ID_TYPE
|
||||
if id_type in SiteManager.registry:
|
||||
raise ValueError(f'Site for {id_type} already exists')
|
||||
raise ValueError(f"Site for {id_type} already exists")
|
||||
SiteManager.registry[id_type] = target
|
||||
return target
|
||||
|
||||
|
@ -175,9 +196,17 @@ class SiteManager:
|
|||
|
||||
@staticmethod
|
||||
def get_site_by_url(url: str):
|
||||
cls = next(filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None)
|
||||
cls = next(
|
||||
filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None
|
||||
)
|
||||
if cls is None:
|
||||
cls = next(filter(lambda p: p.validate_url_fallback(url), SiteManager.registry.values()), None)
|
||||
cls = next(
|
||||
filter(
|
||||
lambda p: p.validate_url_fallback(url),
|
||||
SiteManager.registry.values(),
|
||||
),
|
||||
None,
|
||||
)
|
||||
return cls(url) if cls else None
|
||||
|
||||
@staticmethod
|
||||
|
@ -190,5 +219,7 @@ class SiteManager:
|
|||
return SiteManager.get_site_by_id_type(resource.id_type)
|
||||
|
||||
|
||||
ExternalResource.get_site = lambda resource: SiteManager.get_site_by_id_type(resource.id_type)
|
||||
ExternalResource.get_site = lambda resource: SiteManager.get_site_by_id_type(
|
||||
resource.id_type
|
||||
)
|
||||
# ExternalResource.get_site = SiteManager.get_site_by_resource
|
||||
|
|
|
@ -6,9 +6,14 @@ import uuid
|
|||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_ITEM_COVER = 'item/default.svg'
|
||||
DEFAULT_ITEM_COVER = "item/default.svg"
|
||||
|
||||
|
||||
def item_cover_path(resource, filename):
|
||||
fn = timezone.now().strftime('%Y/%m/%d/') + str(uuid.uuid4()) + '.' + filename.split('.')[-1]
|
||||
return 'item/' + resource.id_type + '/' + fn
|
||||
fn = (
|
||||
timezone.now().strftime("%Y/%m/%d/")
|
||||
+ str(uuid.uuid4())
|
||||
+ "."
|
||||
+ filename.split(".")[-1]
|
||||
)
|
||||
return "item/" + resource.id_type + "/" + fn
|
||||
|
|
|
@ -5,66 +5,63 @@ from django.db import models
|
|||
|
||||
class Game(Item):
|
||||
category = ItemCategory.Game
|
||||
url_path = 'game'
|
||||
demonstrative = _('这个游戏')
|
||||
url_path = "game"
|
||||
demonstrative = _("这个游戏")
|
||||
igdb = PrimaryLookupIdDescriptor(IdType.IGDB)
|
||||
steam = PrimaryLookupIdDescriptor(IdType.Steam)
|
||||
douban_game = PrimaryLookupIdDescriptor(IdType.DoubanGame)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
'title',
|
||||
'brief',
|
||||
'other_title',
|
||||
'developer',
|
||||
'publisher',
|
||||
'release_date',
|
||||
'genre',
|
||||
'platform',
|
||||
'official_site',
|
||||
"title",
|
||||
"brief",
|
||||
"other_title",
|
||||
"developer",
|
||||
"publisher",
|
||||
"release_date",
|
||||
"genre",
|
||||
"platform",
|
||||
"official_site",
|
||||
]
|
||||
|
||||
other_title = jsondata.ArrayField(
|
||||
models.CharField(blank=True, default='', max_length=500),
|
||||
models.CharField(blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
|
||||
developer = jsondata.ArrayField(
|
||||
models.CharField(blank=True, default='', max_length=500),
|
||||
models.CharField(blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
|
||||
publisher = jsondata.ArrayField(
|
||||
models.CharField(blank=True, default='', max_length=500),
|
||||
models.CharField(blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
|
||||
release_date = jsondata.DateField(
|
||||
auto_now=False,
|
||||
auto_now_add=False,
|
||||
null=True,
|
||||
blank=True
|
||||
auto_now=False, auto_now_add=False, null=True, blank=True
|
||||
)
|
||||
|
||||
genre = jsondata.ArrayField(
|
||||
models.CharField(blank=True, default='', max_length=200),
|
||||
models.CharField(blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
|
||||
platform = jsondata.ArrayField(
|
||||
models.CharField(blank=True, default='', max_length=200),
|
||||
models.CharField(blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
|
||||
official_site = jsondata.CharField(
|
||||
default='',
|
||||
default="",
|
||||
)
|
||||
|
|
|
@ -6,8 +6,8 @@ from catalog.models import *
|
|||
class IGDBTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.IGDB
|
||||
t_id_value = 'portal-2'
|
||||
t_url = 'https://www.igdb.com/games/portal-2'
|
||||
t_id_value = "portal-2"
|
||||
t_url = "https://www.igdb.com/games/portal-2"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -17,34 +17,39 @@ class IGDBTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.igdb.com/games/portal-2'
|
||||
t_url = "https://www.igdb.com/games/portal-2"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'Portal 2')
|
||||
self.assertEqual(site.resource.metadata["title"], "Portal 2")
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.steam, '620')
|
||||
self.assertEqual(site.resource.item.steam, "620")
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_non_steam(self):
|
||||
t_url = 'https://www.igdb.com/games/the-legend-of-zelda-breath-of-the-wild'
|
||||
t_url = "https://www.igdb.com/games/the-legend-of-zelda-breath-of-the-wild"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'The Legend of Zelda: Breath of the Wild')
|
||||
self.assertEqual(
|
||||
site.resource.metadata["title"], "The Legend of Zelda: Breath of the Wild"
|
||||
)
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IGDB)
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_value, 'the-legend-of-zelda-breath-of-the-wild')
|
||||
self.assertEqual(
|
||||
site.resource.item.primary_lookup_id_value,
|
||||
"the-legend-of-zelda-breath-of-the-wild",
|
||||
)
|
||||
|
||||
|
||||
class SteamTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Steam
|
||||
t_id_value = '620'
|
||||
t_url = 'https://store.steampowered.com/app/620/Portal_2/'
|
||||
t_url2 = 'https://store.steampowered.com/app/620'
|
||||
t_id_value = "620"
|
||||
t_url = "https://store.steampowered.com/app/620/Portal_2/"
|
||||
t_url2 = "https://store.steampowered.com/app/620"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -54,22 +59,24 @@ class SteamTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://store.steampowered.com/app/620/Portal_2/'
|
||||
t_url = "https://store.steampowered.com/app/620/Portal_2/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'Portal 2')
|
||||
self.assertEqual(site.resource.metadata['brief'], '“终身测试计划”现已升级,您可以为您自己或您的好友设计合作谜题!')
|
||||
self.assertEqual(site.resource.metadata["title"], "Portal 2")
|
||||
self.assertEqual(
|
||||
site.resource.metadata["brief"], "“终身测试计划”现已升级,您可以为您自己或您的好友设计合作谜题!"
|
||||
)
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.steam, '620')
|
||||
self.assertEqual(site.resource.item.steam, "620")
|
||||
|
||||
|
||||
class DoubanGameTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.DoubanGame
|
||||
t_id_value = '10734307'
|
||||
t_url = 'https://www.douban.com/game/10734307/'
|
||||
t_id_value = "10734307"
|
||||
t_url = "https://www.douban.com/game/10734307/"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -79,21 +86,21 @@ class DoubanGameTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.douban.com/game/10734307/'
|
||||
t_url = "https://www.douban.com/game/10734307/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], '传送门2 Portal 2')
|
||||
self.assertEqual(site.resource.metadata["title"], "传送门2 Portal 2")
|
||||
self.assertIsInstance(site.resource.item, Game)
|
||||
self.assertEqual(site.resource.item.douban_game, '10734307')
|
||||
self.assertEqual(site.resource.item.douban_game, "10734307")
|
||||
|
||||
|
||||
class BangumiGameTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Bangumi
|
||||
t_id_value = '15912'
|
||||
t_url = 'https://bgm.tv/subject/15912'
|
||||
t_id_value = "15912"
|
||||
t_url = "https://bgm.tv/subject/15912"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -110,8 +117,8 @@ class BangumiGameTestCase(TestCase):
|
|||
class MultiGameSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_games(self):
|
||||
url1 = 'https://www.igdb.com/games/portal-2'
|
||||
url2 = 'https://store.steampowered.com/app/620/Portal_2/'
|
||||
url1 = "https://www.igdb.com/games/portal-2"
|
||||
url2 = "https://store.steampowered.com/app/620/Portal_2/"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
|
|
|
@ -5,24 +5,24 @@ from catalog.sites import *
|
|||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Scrape a catalog item from external resource (and save it)'
|
||||
help = "Scrape a catalog item from external resource (and save it)"
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument('url', type=str, help='URL to scrape')
|
||||
parser.add_argument("url", type=str, help="URL to scrape")
|
||||
parser.add_argument(
|
||||
'--save',
|
||||
action='store_true',
|
||||
help='save to database',
|
||||
"--save",
|
||||
action="store_true",
|
||||
help="save to database",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
url = str(options['url'])
|
||||
url = str(options["url"])
|
||||
site = SiteManager.get_site_by_url(url)
|
||||
if site is None:
|
||||
self.stdout.write(self.style.ERROR(f'Unknown site for {url}'))
|
||||
self.stdout.write(self.style.ERROR(f"Unknown site for {url}"))
|
||||
return
|
||||
self.stdout.write(f'Fetching from {site}')
|
||||
if options['save']:
|
||||
self.stdout.write(f"Fetching from {site}")
|
||||
if options["save"]:
|
||||
resource = site.get_resource_ready()
|
||||
pprint.pp(resource.metadata)
|
||||
pprint.pp(site.get_item())
|
||||
|
@ -31,4 +31,4 @@ class Command(BaseCommand):
|
|||
resource = site.scrape()
|
||||
pprint.pp(resource.metadata)
|
||||
pprint.pp(resource.lookup_ids)
|
||||
self.stdout.write(self.style.SUCCESS(f'Done.'))
|
||||
self.stdout.write(self.style.SUCCESS(f"Done."))
|
||||
|
|
|
@ -37,7 +37,9 @@ def all_content_types():
|
|||
if _CONTENT_TYPE_LIST is None:
|
||||
_CONTENT_TYPE_LIST = {}
|
||||
for cls in Item.__subclasses__():
|
||||
_CONTENT_TYPE_LIST[cls] = ContentType.objects.get(app_label='catalog', model=cls.__name__.lower()).id
|
||||
_CONTENT_TYPE_LIST[cls] = ContentType.objects.get(
|
||||
app_label="catalog", model=cls.__name__.lower()
|
||||
).id
|
||||
return _CONTENT_TYPE_LIST
|
||||
|
||||
|
||||
|
@ -46,7 +48,7 @@ def all_categories():
|
|||
if _CATEGORY_LIST is None:
|
||||
_CATEGORY_LIST = {}
|
||||
for cls in Item.__subclasses__():
|
||||
c = getattr(cls, 'category', None)
|
||||
c = getattr(cls, "category", None)
|
||||
if c not in _CATEGORY_LIST:
|
||||
_CATEGORY_LIST[c] = [cls]
|
||||
else:
|
||||
|
|
|
@ -5,43 +5,93 @@ from django.db import models
|
|||
|
||||
class Movie(Item):
|
||||
category = ItemCategory.Movie
|
||||
url_path = 'movie'
|
||||
url_path = "movie"
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_movie = PrimaryLookupIdDescriptor(IdType.TMDB_Movie)
|
||||
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
|
||||
demonstrative = _('这部电影')
|
||||
demonstrative = _("这部电影")
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
'title',
|
||||
'orig_title',
|
||||
'other_title',
|
||||
'director',
|
||||
'playwright',
|
||||
'actor',
|
||||
'genre',
|
||||
'showtime',
|
||||
'site',
|
||||
'area',
|
||||
'language',
|
||||
'year',
|
||||
'duration',
|
||||
'season_number',
|
||||
'episodes',
|
||||
'single_episode_length',
|
||||
'brief',
|
||||
"title",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
"director",
|
||||
"playwright",
|
||||
"actor",
|
||||
"genre",
|
||||
"showtime",
|
||||
"site",
|
||||
"area",
|
||||
"language",
|
||||
"year",
|
||||
"duration",
|
||||
"season_number",
|
||||
"episodes",
|
||||
"single_episode_length",
|
||||
"brief",
|
||||
]
|
||||
orig_title = jsondata.CharField(_("original title"), blank=True, default='', max_length=500)
|
||||
other_title = jsondata.ArrayField(models.CharField(_("other title"), blank=True, default='', max_length=500), null=True, blank=True, default=list, )
|
||||
director = jsondata.ArrayField(models.CharField(_("director"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
playwright = jsondata.ArrayField(models.CharField(_("playwright"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
actor = jsondata.ArrayField(models.CharField(_("actor"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
genre = jsondata.ArrayField(models.CharField(_("genre"), blank=True, default='', max_length=50), null=True, blank=True, default=list, ) # , choices=MovieGenreEnum.choices
|
||||
showtime = jsondata.ArrayField(null=True, blank=True, default=list, )
|
||||
site = jsondata.URLField(_('site url'), blank=True, default='', max_length=200)
|
||||
area = jsondata.ArrayField(models.CharField(_("country or region"), blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
|
||||
language = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
|
||||
orig_title = jsondata.CharField(
|
||||
_("original title"), blank=True, default="", max_length=500
|
||||
)
|
||||
other_title = jsondata.ArrayField(
|
||||
models.CharField(_("other title"), blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
director = jsondata.ArrayField(
|
||||
models.CharField(_("director"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
playwright = jsondata.ArrayField(
|
||||
models.CharField(_("playwright"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
actor = jsondata.ArrayField(
|
||||
models.CharField(_("actor"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
genre = jsondata.ArrayField(
|
||||
models.CharField(_("genre"), blank=True, default="", max_length=50),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
) # , choices=MovieGenreEnum.choices
|
||||
showtime = jsondata.ArrayField(
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
site = jsondata.URLField(_("site url"), blank=True, default="", max_length=200)
|
||||
area = jsondata.ArrayField(
|
||||
models.CharField(
|
||||
_("country or region"),
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
year = jsondata.IntegerField(null=True, blank=True)
|
||||
season_number = jsondata.IntegerField(null=True, blank=True)
|
||||
episodes = jsondata.IntegerField(null=True, blank=True)
|
||||
single_episode_length = jsondata.IntegerField(null=True, blank=True)
|
||||
duration = jsondata.CharField(blank=True, default='', max_length=200)
|
||||
duration = jsondata.CharField(blank=True, default="", max_length=200)
|
||||
|
|
|
@ -4,8 +4,8 @@ from catalog.common import *
|
|||
|
||||
class DoubanMovieTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '3541415'
|
||||
t_url = 'https://movie.douban.com/subject/3541415/'
|
||||
t_id = "3541415"
|
||||
t_url = "https://movie.douban.com/subject/3541415/"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.DoubanMovie)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
|
@ -15,22 +15,24 @@ class DoubanMovieTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://movie.douban.com/subject/3541415/'
|
||||
t_url = "https://movie.douban.com/subject/3541415/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '3541415')
|
||||
self.assertEqual(site.id_value, "3541415")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], '盗梦空间')
|
||||
self.assertEqual(site.resource.metadata["title"], "盗梦空间")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'Movie')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt1375666')
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "Movie")
|
||||
self.assertEqual(site.resource.item.imdb, "tt1375666")
|
||||
|
||||
|
||||
class TMDBMovieTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '293767'
|
||||
t_url = 'https://www.themoviedb.org/movie/293767-billy-lynn-s-long-halftime-walk'
|
||||
t_url2 = 'https://www.themoviedb.org/movie/293767'
|
||||
t_id = "293767"
|
||||
t_url = (
|
||||
"https://www.themoviedb.org/movie/293767-billy-lynn-s-long-halftime-walk"
|
||||
)
|
||||
t_url2 = "https://www.themoviedb.org/movie/293767"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.TMDB_Movie)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
|
@ -41,22 +43,22 @@ class TMDBMovieTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.themoviedb.org/movie/293767'
|
||||
t_url = "https://www.themoviedb.org/movie/293767"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '293767')
|
||||
self.assertEqual(site.id_value, "293767")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], '比利·林恩的中场战事')
|
||||
self.assertEqual(site.resource.metadata["title"], "比利·林恩的中场战事")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'Movie')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt2513074')
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "Movie")
|
||||
self.assertEqual(site.resource.item.imdb, "tt2513074")
|
||||
|
||||
|
||||
class IMDBMovieTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = 'tt1375666'
|
||||
t_url = 'https://www.imdb.com/title/tt1375666/'
|
||||
t_url2 = 'https://www.imdb.com/title/tt1375666/'
|
||||
t_id = "tt1375666"
|
||||
t_url = "https://www.imdb.com/title/tt1375666/"
|
||||
t_url2 = "https://www.imdb.com/title/tt1375666/"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.IMDB)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
|
@ -67,22 +69,22 @@ class IMDBMovieTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.imdb.com/title/tt1375666/'
|
||||
t_url = "https://www.imdb.com/title/tt1375666/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, 'tt1375666')
|
||||
self.assertEqual(site.id_value, "tt1375666")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], '盗梦空间')
|
||||
self.assertEqual(site.resource.metadata["title"], "盗梦空间")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.imdb, 'tt1375666')
|
||||
self.assertEqual(site.resource.item.imdb, "tt1375666")
|
||||
|
||||
|
||||
class MultiMovieSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_movies(self):
|
||||
url1 = 'https://www.themoviedb.org/movie/27205-inception'
|
||||
url2 = 'https://movie.douban.com/subject/3541415/'
|
||||
url3 = 'https://www.imdb.com/title/tt1375666/'
|
||||
url1 = "https://www.themoviedb.org/movie/27205-inception"
|
||||
url2 = "https://movie.douban.com/subject/3541415/"
|
||||
url3 = "https://www.imdb.com/title/tt1375666/"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
|
|
|
@ -4,35 +4,47 @@ from django.db import models
|
|||
|
||||
|
||||
class Album(Item):
|
||||
url_path = 'album'
|
||||
url_path = "album"
|
||||
category = ItemCategory.Music
|
||||
demonstrative = _('这张专辑')
|
||||
demonstrative = _("这张专辑")
|
||||
barcode = PrimaryLookupIdDescriptor(IdType.GTIN)
|
||||
douban_music = PrimaryLookupIdDescriptor(IdType.DoubanMusic)
|
||||
spotify_album = PrimaryLookupIdDescriptor(IdType.Spotify_Album)
|
||||
METADATA_COPY_LIST = [
|
||||
'title',
|
||||
'other_title',
|
||||
'album_type',
|
||||
'media',
|
||||
'disc_count',
|
||||
'artist',
|
||||
'genre',
|
||||
'release_date',
|
||||
'duration',
|
||||
'company',
|
||||
'track_list',
|
||||
'brief',
|
||||
'bandcamp_album_id',
|
||||
"title",
|
||||
"other_title",
|
||||
"album_type",
|
||||
"media",
|
||||
"disc_count",
|
||||
"artist",
|
||||
"genre",
|
||||
"release_date",
|
||||
"duration",
|
||||
"company",
|
||||
"track_list",
|
||||
"brief",
|
||||
"bandcamp_album_id",
|
||||
]
|
||||
release_date = jsondata.DateField(_('发行日期'), auto_now=False, auto_now_add=False, null=True, blank=True)
|
||||
release_date = jsondata.DateField(
|
||||
_("发行日期"), auto_now=False, auto_now_add=False, null=True, blank=True
|
||||
)
|
||||
duration = jsondata.IntegerField(_("时长"), null=True, blank=True)
|
||||
artist = jsondata.ArrayField(models.CharField(_("artist"), blank=True, default='', max_length=200), null=True, blank=True, default=list)
|
||||
genre = jsondata.CharField(_("流派"), blank=True, default='', max_length=100)
|
||||
company = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=500), null=True, blank=True, default=list)
|
||||
artist = jsondata.ArrayField(
|
||||
models.CharField(_("artist"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
genre = jsondata.CharField(_("流派"), blank=True, default="", max_length=100)
|
||||
company = jsondata.ArrayField(
|
||||
models.CharField(blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
track_list = jsondata.TextField(_("曲目"), blank=True, default="")
|
||||
other_title = jsondata.CharField(blank=True, default='', max_length=500)
|
||||
album_type = jsondata.CharField(blank=True, default='', max_length=500)
|
||||
media = jsondata.CharField(blank=True, default='', max_length=500)
|
||||
bandcamp_album_id = jsondata.CharField(blank=True, default='', max_length=500)
|
||||
disc_count = jsondata.IntegerField(blank=True, default='', max_length=500)
|
||||
other_title = jsondata.CharField(blank=True, default="", max_length=500)
|
||||
album_type = jsondata.CharField(blank=True, default="", max_length=500)
|
||||
media = jsondata.CharField(blank=True, default="", max_length=500)
|
||||
bandcamp_album_id = jsondata.CharField(blank=True, default="", max_length=500)
|
||||
disc_count = jsondata.IntegerField(blank=True, default="", max_length=500)
|
||||
|
|
|
@ -6,8 +6,8 @@ from catalog.models import *
|
|||
class SpotifyTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Spotify_Album
|
||||
t_id_value = '65KwtzkJXw7oT819NFWmEP'
|
||||
t_url = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
|
||||
t_id_value = "65KwtzkJXw7oT819NFWmEP"
|
||||
t_url = "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -17,21 +17,21 @@ class SpotifyTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
|
||||
t_url = "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'The Race For Space')
|
||||
self.assertEqual(site.resource.metadata["title"], "The Race For Space")
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
self.assertEqual(site.resource.item.barcode, '3610159662676')
|
||||
self.assertEqual(site.resource.item.barcode, "3610159662676")
|
||||
|
||||
|
||||
class DoubanMusicTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.DoubanMusic
|
||||
t_id_value = '33551231'
|
||||
t_url = 'https://music.douban.com/subject/33551231/'
|
||||
t_id_value = "33551231"
|
||||
t_url = "https://music.douban.com/subject/33551231/"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -41,21 +41,21 @@ class DoubanMusicTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://music.douban.com/subject/33551231/'
|
||||
t_url = "https://music.douban.com/subject/33551231/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'The Race For Space')
|
||||
self.assertEqual(site.resource.metadata["title"], "The Race For Space")
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
self.assertEqual(site.resource.item.barcode, '3610159662676')
|
||||
self.assertEqual(site.resource.item.barcode, "3610159662676")
|
||||
|
||||
|
||||
class MultiMusicSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_albums(self):
|
||||
url1 = 'https://music.douban.com/subject/33551231/'
|
||||
url2 = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
|
||||
url1 = "https://music.douban.com/subject/33551231/"
|
||||
url2 = "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
self.assertEqual(p1.item.id, p2.item.id)
|
||||
|
@ -64,9 +64,9 @@ class MultiMusicSitesTestCase(TestCase):
|
|||
class BandcampTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id_type = IdType.Bandcamp
|
||||
t_id_value = 'intlanthem.bandcamp.com/album/in-these-times'
|
||||
t_url = 'https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw'
|
||||
t_url2 = 'https://intlanthem.bandcamp.com/album/in-these-times'
|
||||
t_id_value = "intlanthem.bandcamp.com/album/in-these-times"
|
||||
t_url = "https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw"
|
||||
t_url2 = "https://intlanthem.bandcamp.com/album/in-these-times"
|
||||
site = SiteManager.get_site_by_id_type(t_id_type)
|
||||
self.assertIsNotNone(site)
|
||||
self.assertEqual(site.validate_url(t_url), True)
|
||||
|
@ -76,11 +76,11 @@ class BandcampTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw'
|
||||
t_url = "https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], 'In These Times')
|
||||
self.assertEqual(site.resource.metadata['artist'], ['Makaya McCraven'])
|
||||
self.assertEqual(site.resource.metadata["title"], "In These Times")
|
||||
self.assertEqual(site.resource.metadata["artist"], ["Makaya McCraven"])
|
||||
self.assertIsInstance(site.resource.item, Album)
|
||||
|
|
|
@ -4,12 +4,12 @@ from django.utils.translation import gettext_lazy as _
|
|||
|
||||
class Performance(Item):
|
||||
category = ItemCategory.Performance
|
||||
url_path = 'performance'
|
||||
url_path = "performance"
|
||||
douban_drama = LookupIdDescriptor(IdType.DoubanDrama)
|
||||
versions = jsondata.ArrayField(_('版本'), null=False, blank=False, default=list)
|
||||
directors = jsondata.ArrayField(_('导演'), null=False, blank=False, default=list)
|
||||
playwrights = jsondata.ArrayField(_('编剧'), null=False, blank=False, default=list)
|
||||
actors = jsondata.ArrayField(_('主演'), null=False, blank=False, default=list)
|
||||
versions = jsondata.ArrayField(_("版本"), null=False, blank=False, default=list)
|
||||
directors = jsondata.ArrayField(_("导演"), null=False, blank=False, default=list)
|
||||
playwrights = jsondata.ArrayField(_("编剧"), null=False, blank=False, default=list)
|
||||
actors = jsondata.ArrayField(_("主演"), null=False, blank=False, default=list)
|
||||
|
||||
class Meta:
|
||||
proxy = True
|
||||
|
|
|
@ -7,8 +7,8 @@ class DoubanDramaTestCase(TestCase):
|
|||
pass
|
||||
|
||||
def test_parse(self):
|
||||
t_id = '24849279'
|
||||
t_url = 'https://www.douban.com/location/drama/24849279/'
|
||||
t_id = "24849279"
|
||||
t_url = "https://www.douban.com/location/drama/24849279/"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.DoubanDrama)
|
||||
self.assertIsNotNone(p1)
|
||||
p1 = SiteManager.get_site_by_url(t_url)
|
||||
|
@ -19,14 +19,14 @@ class DoubanDramaTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.douban.com/location/drama/24849279/'
|
||||
t_url = "https://www.douban.com/location/drama/24849279/"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
resource = site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(resource.metadata['title'], '红花侠')
|
||||
self.assertEqual(resource.metadata["title"], "红花侠")
|
||||
item = site.get_item()
|
||||
self.assertEqual(item.title, '红花侠')
|
||||
self.assertEqual(item.title, "红花侠")
|
||||
|
||||
# self.assertEqual(i.other_titles, ['スカーレットピンパーネル', 'THE SCARLET PIMPERNEL'])
|
||||
# self.assertEqual(len(i.brief), 545)
|
||||
|
|
|
@ -3,7 +3,7 @@ from catalog.common import *
|
|||
|
||||
class Podcast(Item):
|
||||
category = ItemCategory.Podcast
|
||||
url_path = 'podcast'
|
||||
url_path = "podcast"
|
||||
feed_url = PrimaryLookupIdDescriptor(IdType.Feed)
|
||||
apple_podcast = PrimaryLookupIdDescriptor(IdType.ApplePodcast)
|
||||
# ximalaya = LookupIdDescriptor(IdType.Ximalaya)
|
||||
|
|
|
@ -8,9 +8,9 @@ class ApplePodcastTestCase(TestCase):
|
|||
pass
|
||||
|
||||
def test_parse(self):
|
||||
t_id = '657765158'
|
||||
t_url = 'https://podcasts.apple.com/us/podcast/%E5%A4%A7%E5%86%85%E5%AF%86%E8%B0%88/id657765158'
|
||||
t_url2 = 'https://podcasts.apple.com/us/podcast/id657765158'
|
||||
t_id = "657765158"
|
||||
t_url = "https://podcasts.apple.com/us/podcast/%E5%A4%A7%E5%86%85%E5%AF%86%E8%B0%88/id657765158"
|
||||
t_url2 = "https://podcasts.apple.com/us/podcast/id657765158"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.ApplePodcast)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
|
@ -20,11 +20,14 @@ class ApplePodcastTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://podcasts.apple.com/gb/podcast/the-new-yorker-radio-hour/id1050430296'
|
||||
t_url = "https://podcasts.apple.com/gb/podcast/the-new-yorker-radio-hour/id1050430296"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '1050430296')
|
||||
self.assertEqual(site.id_value, "1050430296")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.resource.metadata['title'], 'The New Yorker Radio Hour')
|
||||
self.assertEqual(site.resource.metadata["title"], "The New Yorker Radio Hour")
|
||||
# self.assertEqual(site.resource.metadata['feed_url'], 'http://feeds.wnyc.org/newyorkerradiohour')
|
||||
self.assertEqual(site.resource.metadata['feed_url'], 'http://feeds.feedburner.com/newyorkerradiohour')
|
||||
self.assertEqual(
|
||||
site.resource.metadata["feed_url"],
|
||||
"http://feeds.feedburner.com/newyorkerradiohour",
|
||||
)
|
||||
|
|
|
@ -11,7 +11,7 @@ class ApplePodcast(AbstractSite):
|
|||
SITE_NAME = SiteName.ApplePodcast
|
||||
ID_TYPE = IdType.ApplePodcast
|
||||
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
|
||||
WIKI_PROPERTY_ID = 'P5842'
|
||||
WIKI_PROPERTY_ID = "P5842"
|
||||
DEFAULT_MODEL = Podcast
|
||||
|
||||
@classmethod
|
||||
|
@ -19,23 +19,27 @@ class ApplePodcast(AbstractSite):
|
|||
return "https://podcasts.apple.com/us/podcast/id" + id_value
|
||||
|
||||
def scrape(self):
|
||||
api_url = f'https://itunes.apple.com/lookup?id={self.id_value}'
|
||||
api_url = f"https://itunes.apple.com/lookup?id={self.id_value}"
|
||||
dl = BasicDownloader(api_url)
|
||||
resp = dl.download()
|
||||
r = resp.json()['results'][0]
|
||||
pd = ResourceContent(metadata={
|
||||
'title': r['trackName'],
|
||||
'feed_url': r['feedUrl'],
|
||||
'hosts': [r['artistName']],
|
||||
'genres': r['genres'],
|
||||
'cover_image_url': r['artworkUrl600'],
|
||||
})
|
||||
pd.lookup_ids[IdType.Feed] = pd.metadata.get('feed_url')
|
||||
r = resp.json()["results"][0]
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": r["trackName"],
|
||||
"feed_url": r["feedUrl"],
|
||||
"hosts": [r["artistName"]],
|
||||
"genres": r["genres"],
|
||||
"cover_image_url": r["artworkUrl600"],
|
||||
}
|
||||
)
|
||||
pd.lookup_ids[IdType.Feed] = pd.metadata.get("feed_url")
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -14,11 +14,9 @@ _logger = logging.getLogger(__name__)
|
|||
class Bandcamp(AbstractSite):
|
||||
SITE_NAME = SiteName.Bandcamp
|
||||
ID_TYPE = IdType.Bandcamp
|
||||
URL_PATTERNS = [
|
||||
r"https://([a-z0-9\-]+.bandcamp.com/album/[^?#/]+)"
|
||||
]
|
||||
URL_PATTERNS = [r"https://([a-z0-9\-]+.bandcamp.com/album/[^?#/]+)"]
|
||||
URL_PATTERN_FALLBACK = r"https://([a-z0-9\-\.]+/album/[^?#/]+)"
|
||||
WIKI_PROPERTY_ID = ''
|
||||
WIKI_PROPERTY_ID = ""
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
|
@ -32,16 +30,16 @@ class Bandcamp(AbstractSite):
|
|||
parsed_url = urllib.parse.urlparse(url)
|
||||
hostname = parsed_url.netloc
|
||||
try:
|
||||
answers = dns.resolver.query(hostname, 'CNAME')
|
||||
answers = dns.resolver.query(hostname, "CNAME")
|
||||
for rdata in answers:
|
||||
if str(rdata.target) == 'dom.bandcamp.com.':
|
||||
if str(rdata.target) == "dom.bandcamp.com.":
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
answers = dns.resolver.query(hostname, 'A')
|
||||
answers = dns.resolver.query(hostname, "A")
|
||||
for rdata in answers:
|
||||
if str(rdata.address) == '35.241.62.186':
|
||||
if str(rdata.address) == "35.241.62.186":
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
@ -50,34 +48,45 @@ class Bandcamp(AbstractSite):
|
|||
content = BasicDownloader(self.url).download().html()
|
||||
try:
|
||||
title = content.xpath("//h2[@class='trackTitle']/text()")[0].strip()
|
||||
artist = [content.xpath("//div[@id='name-section']/h3/span/a/text()")[0].strip()]
|
||||
artist = [
|
||||
content.xpath("//div[@id='name-section']/h3/span/a/text()")[0].strip()
|
||||
]
|
||||
except IndexError:
|
||||
raise ValueError("given url contains no valid info")
|
||||
|
||||
genre = [] # TODO: parse tags
|
||||
track_list = []
|
||||
release_nodes = content.xpath("//div[@class='tralbumData tralbum-credits']/text()")
|
||||
release_date = dateparser.parse(re.sub(r'releas\w+ ', '', release_nodes[0].strip())).strftime('%Y-%m-%d') if release_nodes else None
|
||||
release_nodes = content.xpath(
|
||||
"//div[@class='tralbumData tralbum-credits']/text()"
|
||||
)
|
||||
release_date = (
|
||||
dateparser.parse(
|
||||
re.sub(r"releas\w+ ", "", release_nodes[0].strip())
|
||||
).strftime("%Y-%m-%d")
|
||||
if release_nodes
|
||||
else None
|
||||
)
|
||||
duration = None
|
||||
company = None
|
||||
brief_nodes = content.xpath("//div[@class='tralbumData tralbum-about']/text()")
|
||||
brief = "".join(brief_nodes) if brief_nodes else None
|
||||
cover_url = content.xpath("//div[@id='tralbumArt']/a/@href")[0].strip()
|
||||
bandcamp_page_data = json.loads(content.xpath(
|
||||
"//meta[@name='bc-page-properties']/@content")[0].strip())
|
||||
bandcamp_album_id = bandcamp_page_data['item_id']
|
||||
bandcamp_page_data = json.loads(
|
||||
content.xpath("//meta[@name='bc-page-properties']/@content")[0].strip()
|
||||
)
|
||||
bandcamp_album_id = bandcamp_page_data["item_id"]
|
||||
|
||||
data = {
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
'genre': genre,
|
||||
'track_list': track_list,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'company': company,
|
||||
'brief': brief,
|
||||
'bandcamp_album_id': bandcamp_album_id,
|
||||
'cover_image_url': cover_url,
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"track_list": track_list,
|
||||
"release_date": release_date,
|
||||
"duration": duration,
|
||||
"company": company,
|
||||
"brief": brief,
|
||||
"bandcamp_album_id": bandcamp_album_id,
|
||||
"cover_image_url": cover_url,
|
||||
}
|
||||
pd = ResourceContent(metadata=data)
|
||||
if data["cover_image_url"]:
|
||||
|
@ -86,5 +95,7 @@ class Bandcamp(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {data["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -13,7 +13,7 @@ class Bangumi(AbstractSite):
|
|||
URL_PATTERNS = [
|
||||
r"https://bgm\.tv/subject/(\d+)",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
WIKI_PROPERTY_ID = ""
|
||||
DEFAULT_MODEL = None
|
||||
|
||||
@classmethod
|
||||
|
|
|
@ -13,14 +13,17 @@ class DoubanDownloader(ProxiedDownloader):
|
|||
elif response.status_code == 204:
|
||||
return RESPONSE_CENSORSHIP
|
||||
elif response.status_code == 200:
|
||||
content = response.content.decode('utf-8')
|
||||
if content.find('关于豆瓣') == -1:
|
||||
content = response.content.decode("utf-8")
|
||||
if content.find("关于豆瓣") == -1:
|
||||
# if content.find('你的 IP 发出') == -1:
|
||||
# error = error + 'Content not authentic' # response is garbage
|
||||
# else:
|
||||
# error = error + 'IP banned'
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
elif content.find('<title>页面不存在</title>') != -1 or content.find('呃... 你想访问的条目豆瓣不收录。') != -1: # re.search('不存在[^<]+</title>', content, re.MULTILINE):
|
||||
elif (
|
||||
content.find("<title>页面不存在</title>") != -1
|
||||
or content.find("呃... 你想访问的条目豆瓣不收录。") != -1
|
||||
): # re.search('不存在[^<]+</title>', content, re.MULTILINE):
|
||||
return RESPONSE_CENSORSHIP
|
||||
else:
|
||||
return RESPONSE_OK
|
||||
|
|
|
@ -12,8 +12,11 @@ _logger = logging.getLogger(__name__)
|
|||
class DoubanBook(AbstractSite):
|
||||
SITE_NAME = SiteName.Douban
|
||||
ID_TYPE = IdType.DoubanBook
|
||||
URL_PATTERNS = [r"\w+://book\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/book/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [
|
||||
r"\w+://book\.douban\.com/subject/(\d+)/{0,1}",
|
||||
r"\w+://m.douban.com/book/subject/(\d+)/{0,1}",
|
||||
]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Edition
|
||||
|
||||
@classmethod
|
||||
|
@ -23,31 +26,40 @@ class DoubanBook(AbstractSite):
|
|||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
isbn_elem = content.xpath("//div[@id='info']//span[text()='ISBN:']/following::text()")
|
||||
isbn_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='ISBN:']/following::text()"
|
||||
)
|
||||
isbn = isbn_elem[0].strip() if isbn_elem else None
|
||||
|
||||
title_elem = content.xpath("/html/body//h1/span/text()")
|
||||
title = title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
|
||||
title = (
|
||||
title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
|
||||
)
|
||||
|
||||
subtitle_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='副标题:']/following::text()")
|
||||
"//div[@id='info']//span[text()='副标题:']/following::text()"
|
||||
)
|
||||
subtitle = subtitle_elem[0].strip()[:500] if subtitle_elem else None
|
||||
|
||||
orig_title_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='原作名:']/following::text()")
|
||||
"//div[@id='info']//span[text()='原作名:']/following::text()"
|
||||
)
|
||||
orig_title = orig_title_elem[0].strip()[:500] if orig_title_elem else None
|
||||
|
||||
language_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='语言:']/following::text()")
|
||||
"//div[@id='info']//span[text()='语言:']/following::text()"
|
||||
)
|
||||
language = language_elem[0].strip() if language_elem else None
|
||||
|
||||
pub_house_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版社:']/following::text()")
|
||||
"//div[@id='info']//span[text()='出版社:']/following::text()"
|
||||
)
|
||||
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
|
||||
|
||||
pub_date_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版年:']/following::text()")
|
||||
pub_date = pub_date_elem[0].strip() if pub_date_elem else ''
|
||||
"//div[@id='info']//span[text()='出版年:']/following::text()"
|
||||
)
|
||||
pub_date = pub_date_elem[0].strip() if pub_date_elem else ""
|
||||
year_month_day = RE_NUMBERS.findall(pub_date)
|
||||
if len(year_month_day) in (2, 3):
|
||||
pub_year = int(year_month_day[0])
|
||||
|
@ -60,45 +72,62 @@ class DoubanBook(AbstractSite):
|
|||
pub_month = None
|
||||
if pub_year and pub_month and pub_year < pub_month:
|
||||
pub_year, pub_month = pub_month, pub_year
|
||||
pub_year = None if pub_year is not None and pub_year not in range(
|
||||
0, 3000) else pub_year
|
||||
pub_month = None if pub_month is not None and pub_month not in range(
|
||||
1, 12) else pub_month
|
||||
pub_year = (
|
||||
None
|
||||
if pub_year is not None and pub_year not in range(0, 3000)
|
||||
else pub_year
|
||||
)
|
||||
pub_month = (
|
||||
None
|
||||
if pub_month is not None and pub_month not in range(1, 12)
|
||||
else pub_month
|
||||
)
|
||||
|
||||
binding_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='装帧:']/following::text()")
|
||||
"//div[@id='info']//span[text()='装帧:']/following::text()"
|
||||
)
|
||||
binding = binding_elem[0].strip() if binding_elem else None
|
||||
|
||||
price_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='定价:']/following::text()")
|
||||
"//div[@id='info']//span[text()='定价:']/following::text()"
|
||||
)
|
||||
price = price_elem[0].strip() if price_elem else None
|
||||
|
||||
pages_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='页数:']/following::text()")
|
||||
"//div[@id='info']//span[text()='页数:']/following::text()"
|
||||
)
|
||||
pages = pages_elem[0].strip() if pages_elem else None
|
||||
if pages is not None:
|
||||
pages = int(RE_NUMBERS.findall(pages)[
|
||||
0]) if RE_NUMBERS.findall(pages) else None
|
||||
pages = (
|
||||
int(RE_NUMBERS.findall(pages)[0]) if RE_NUMBERS.findall(pages) else None
|
||||
)
|
||||
if pages and (pages > 999999 or pages < 1):
|
||||
pages = None
|
||||
|
||||
brief_elem = content.xpath(
|
||||
"//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()")
|
||||
brief = '\n'.join(p.strip()
|
||||
for p in brief_elem) if brief_elem else None
|
||||
"//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()"
|
||||
)
|
||||
brief = "\n".join(p.strip() for p in brief_elem) if brief_elem else None
|
||||
|
||||
contents = None
|
||||
try:
|
||||
contents_elem = content.xpath(
|
||||
"//h2/span[text()='目录']/../following-sibling::div[1]")[0]
|
||||
"//h2/span[text()='目录']/../following-sibling::div[1]"
|
||||
)[0]
|
||||
# if next the id of next sibling contains `dir`, that would be the full contents
|
||||
if "dir" in contents_elem.getnext().xpath("@id")[0]:
|
||||
contents_elem = contents_elem.getnext()
|
||||
contents = '\n'.join(p.strip() for p in contents_elem.xpath(
|
||||
"text()")[:-2]) if contents_elem is not None else None
|
||||
contents = (
|
||||
"\n".join(p.strip() for p in contents_elem.xpath("text()")[:-2])
|
||||
if contents_elem is not None
|
||||
else None
|
||||
)
|
||||
else:
|
||||
contents = '\n'.join(p.strip() for p in contents_elem.xpath(
|
||||
"text()")) if contents_elem is not None else None
|
||||
contents = (
|
||||
"\n".join(p.strip() for p in contents_elem.xpath("text()"))
|
||||
if contents_elem is not None
|
||||
else None
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
@ -106,82 +135,97 @@ class DoubanBook(AbstractSite):
|
|||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
# there are two html formats for authors and translators
|
||||
authors_elem = content.xpath("""//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()""")
|
||||
authors_elem = content.xpath(
|
||||
"""//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()"""
|
||||
)
|
||||
if not authors_elem:
|
||||
authors_elem = content.xpath(
|
||||
"""//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()""")
|
||||
"""//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()"""
|
||||
)
|
||||
if authors_elem:
|
||||
authors = []
|
||||
for author in authors_elem:
|
||||
authors.append(RE_WHITESPACES.sub(' ', author.strip())[:200])
|
||||
authors.append(RE_WHITESPACES.sub(" ", author.strip())[:200])
|
||||
else:
|
||||
authors = None
|
||||
|
||||
translators_elem = content.xpath("""//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()""")
|
||||
translators_elem = content.xpath(
|
||||
"""//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
|
||||
preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()"""
|
||||
)
|
||||
if not translators_elem:
|
||||
translators_elem = content.xpath(
|
||||
"""//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()""")
|
||||
"""//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()"""
|
||||
)
|
||||
if translators_elem:
|
||||
translators = []
|
||||
for translator in translators_elem:
|
||||
translators.append(RE_WHITESPACES.sub(' ', translator.strip()))
|
||||
translators.append(RE_WHITESPACES.sub(" ", translator.strip()))
|
||||
else:
|
||||
translators = None
|
||||
|
||||
cncode_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='统一书号:']/following::text()")
|
||||
"//div[@id='info']//span[text()='统一书号:']/following::text()"
|
||||
)
|
||||
cubn = cncode_elem[0].strip() if cncode_elem else None
|
||||
|
||||
series_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()")
|
||||
"//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()"
|
||||
)
|
||||
series = series_elem[0].strip() if series_elem else None
|
||||
|
||||
imprint_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()")
|
||||
"//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()"
|
||||
)
|
||||
imprint = imprint_elem[0].strip() if imprint_elem else None
|
||||
|
||||
data = {
|
||||
'title': title,
|
||||
'subtitle': subtitle,
|
||||
'orig_title': orig_title,
|
||||
'author': authors,
|
||||
'translator': translators,
|
||||
'language': language,
|
||||
'pub_house': pub_house,
|
||||
'pub_year': pub_year,
|
||||
'pub_month': pub_month,
|
||||
'binding': binding,
|
||||
'price': price,
|
||||
'pages': pages,
|
||||
'isbn': isbn,
|
||||
'cubn': cubn,
|
||||
'brief': brief,
|
||||
'contents': contents,
|
||||
'series': series,
|
||||
'imprint': imprint,
|
||||
'cover_image_url': img_url,
|
||||
"title": title,
|
||||
"subtitle": subtitle,
|
||||
"orig_title": orig_title,
|
||||
"author": authors,
|
||||
"translator": translators,
|
||||
"language": language,
|
||||
"pub_house": pub_house,
|
||||
"pub_year": pub_year,
|
||||
"pub_month": pub_month,
|
||||
"binding": binding,
|
||||
"price": price,
|
||||
"pages": pages,
|
||||
"isbn": isbn,
|
||||
"cubn": cubn,
|
||||
"brief": brief,
|
||||
"contents": contents,
|
||||
"series": series,
|
||||
"imprint": imprint,
|
||||
"cover_image_url": img_url,
|
||||
}
|
||||
|
||||
works_element = content.xpath('//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href')
|
||||
works_element = content.xpath(
|
||||
'//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href'
|
||||
)
|
||||
if works_element:
|
||||
r = re.match(r'\w+://book.douban.com/works/(\d+)', works_element[0])
|
||||
data['required_resources'] = [{
|
||||
'model': 'Work',
|
||||
'id_type': IdType.DoubanBook_Work,
|
||||
'id_value': r[1] if r else None,
|
||||
'title': data['title'],
|
||||
'url': works_element[0],
|
||||
'content': {'metadata': {'title': data['title']}}
|
||||
}]
|
||||
r = re.match(r"\w+://book.douban.com/works/(\d+)", works_element[0])
|
||||
data["required_resources"] = [
|
||||
{
|
||||
"model": "Work",
|
||||
"id_type": IdType.DoubanBook_Work,
|
||||
"id_value": r[1] if r else None,
|
||||
"title": data["title"],
|
||||
"url": works_element[0],
|
||||
"content": {"metadata": {"title": data["title"]}},
|
||||
}
|
||||
]
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
t, n = detect_isbn_asin(isbn)
|
||||
if t:
|
||||
pd.lookup_ids[t] = n
|
||||
pd.lookup_ids[IdType.CUBN] = cubn
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(img_url, self.url)
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(
|
||||
img_url, self.url
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -189,7 +233,7 @@ class DoubanBook(AbstractSite):
|
|||
class DoubanBook_Work(AbstractSite):
|
||||
ID_TYPE = IdType.DoubanBook_Work
|
||||
URL_PATTERNS = [r"\w+://book\.douban\.com/works/(\d+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Work
|
||||
|
||||
@classmethod
|
||||
|
@ -199,10 +243,12 @@ class DoubanBook_Work(AbstractSite):
|
|||
def scrape(self):
|
||||
content = DoubanDownloader(self.url).download().html()
|
||||
title_elem = content.xpath("//h1/text()")
|
||||
title = title_elem[0].split('全部版本(')[0].strip() if title_elem else None
|
||||
title = title_elem[0].split("全部版本(")[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, 'title')
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
})
|
||||
raise ParseError(self, "title")
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
}
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -12,7 +12,7 @@ class DoubanDrama(AbstractSite):
|
|||
SITE_NAME = SiteName.Douban
|
||||
ID_TYPE = IdType.DoubanDrama
|
||||
URL_PATTERNS = [r"\w+://www.douban.com/location/drama/(\d+)/"]
|
||||
WIKI_PROPERTY_ID = 'P6443'
|
||||
WIKI_PROPERTY_ID = "P6443"
|
||||
DEFAULT_MODEL = Performance
|
||||
|
||||
@classmethod
|
||||
|
@ -29,24 +29,51 @@ class DoubanDrama(AbstractSite):
|
|||
else:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
data['other_titles'] = [s.strip() for s in title_elem[1:]]
|
||||
other_title_elem = h.xpath("//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()")
|
||||
data["other_titles"] = [s.strip() for s in title_elem[1:]]
|
||||
other_title_elem = h.xpath(
|
||||
"//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()"
|
||||
)
|
||||
if len(other_title_elem) > 0:
|
||||
data['other_titles'].append(other_title_elem[0].strip())
|
||||
data["other_titles"].append(other_title_elem[0].strip())
|
||||
|
||||
plot_elem = h.xpath("//div[@id='link-report']/text()")
|
||||
if len(plot_elem) == 0:
|
||||
plot_elem = h.xpath("//div[@class='abstract']/text()")
|
||||
data['brief'] = '\n'.join(plot_elem) if len(plot_elem) > 0 else ''
|
||||
data["brief"] = "\n".join(plot_elem) if len(plot_elem) > 0 else ""
|
||||
|
||||
data['genres'] = [s.strip() for s in h.xpath("//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()")]
|
||||
data['versions'] = [s.strip() for s in h.xpath("//dl//dt[text()='版本:']/following-sibling::dd[@class='titles']/a//text()")]
|
||||
data['directors'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()")]
|
||||
data['playwrights'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()")]
|
||||
data['actors'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()")]
|
||||
data["genres"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()"
|
||||
)
|
||||
]
|
||||
data["versions"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//dl//dt[text()='版本:']/following-sibling::dd[@class='titles']/a//text()"
|
||||
)
|
||||
]
|
||||
data["directors"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()"
|
||||
)
|
||||
]
|
||||
data["playwrights"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()"
|
||||
)
|
||||
]
|
||||
data["actors"] = [
|
||||
s.strip()
|
||||
for s in h.xpath(
|
||||
"//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()"
|
||||
)
|
||||
]
|
||||
|
||||
img_url_elem = h.xpath("//img[@itemprop='image']/@src")
|
||||
data['cover_image_url'] = img_url_elem[0].strip() if img_url_elem else None
|
||||
data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
|
@ -55,5 +82,7 @@ class DoubanDrama(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -12,8 +12,11 @@ _logger = logging.getLogger(__name__)
|
|||
class DoubanGame(AbstractSite):
|
||||
SITE_NAME = SiteName.Douban
|
||||
ID_TYPE = IdType.DoubanGame
|
||||
URL_PATTERNS = [r"\w+://www\.douban\.com/game/(\d+)/{0,1}", r"\w+://m.douban.com/game/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
URL_PATTERNS = [
|
||||
r"\w+://www\.douban\.com/game/(\d+)/{0,1}",
|
||||
r"\w+://m.douban.com/game/subject/(\d+)/{0,1}",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ""
|
||||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
|
@ -29,49 +32,69 @@ class DoubanGame(AbstractSite):
|
|||
raise ParseError(self, "title")
|
||||
|
||||
other_title_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()")
|
||||
other_title = other_title_elem[0].strip().split(' / ') if other_title_elem else None
|
||||
"//dl[@class='game-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
other_title = (
|
||||
other_title_elem[0].strip().split(" / ") if other_title_elem else None
|
||||
)
|
||||
|
||||
developer_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()")
|
||||
developer = developer_elem[0].strip().split(' / ') if developer_elem else None
|
||||
"//dl[@class='game-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
developer = developer_elem[0].strip().split(" / ") if developer_elem else None
|
||||
|
||||
publisher_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()")
|
||||
publisher = publisher_elem[0].strip().split(' / ') if publisher_elem else None
|
||||
"//dl[@class='game-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
publisher = publisher_elem[0].strip().split(" / ") if publisher_elem else None
|
||||
|
||||
platform_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()")
|
||||
"//dl[@class='game-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
|
||||
)
|
||||
platform = platform_elem if platform_elem else None
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()")
|
||||
"//dl[@class='game-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
|
||||
)
|
||||
genre = None
|
||||
if genre_elem:
|
||||
genre = [g for g in genre_elem if g != '游戏']
|
||||
genre = [g for g in genre_elem if g != "游戏"]
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//dl[@class='game-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()")
|
||||
release_date = dateparser.parse(date_elem[0].strip()).strftime('%Y-%m-%d') if date_elem else None
|
||||
"//dl[@class='game-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
|
||||
)
|
||||
release_date = (
|
||||
dateparser.parse(date_elem[0].strip()).strftime("%Y-%m-%d")
|
||||
if date_elem
|
||||
else None
|
||||
)
|
||||
|
||||
brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
|
||||
brief = '\n'.join(brief_elem) if brief_elem else None
|
||||
brief = "\n".join(brief_elem) if brief_elem else None
|
||||
|
||||
img_url_elem = content.xpath(
|
||||
"//div[@class='item-subject-info']/div[@class='pic']//img/@src")
|
||||
"//div[@class='item-subject-info']/div[@class='pic']//img/@src"
|
||||
)
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'other_title': other_title,
|
||||
'developer': developer,
|
||||
'publisher': publisher,
|
||||
'release_date': release_date,
|
||||
'genre': genre,
|
||||
'platform': platform,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url
|
||||
})
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"other_title": other_title,
|
||||
"developer": developer,
|
||||
"publisher": publisher,
|
||||
"release_date": release_date,
|
||||
"genre": genre,
|
||||
"platform": platform,
|
||||
"brief": brief,
|
||||
"cover_image_url": img_url,
|
||||
}
|
||||
)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -15,8 +15,11 @@ _logger = logging.getLogger(__name__)
|
|||
class DoubanMovie(AbstractSite):
|
||||
SITE_NAME = SiteName.Douban
|
||||
ID_TYPE = IdType.DoubanMovie
|
||||
URL_PATTERNS = [r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [
|
||||
r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}",
|
||||
r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}",
|
||||
]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
# no DEFAULT_MODEL as it may be either TV Season and Movie
|
||||
|
||||
@classmethod
|
||||
|
@ -27,16 +30,16 @@ class DoubanMovie(AbstractSite):
|
|||
content = DoubanDownloader(self.url).download().html()
|
||||
|
||||
try:
|
||||
raw_title = content.xpath(
|
||||
"//span[@property='v:itemreviewed']/text()")[0].strip()
|
||||
raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
|
||||
0
|
||||
].strip()
|
||||
except IndexError:
|
||||
raise ParseError(self, 'title')
|
||||
raise ParseError(self, "title")
|
||||
|
||||
orig_title = content.xpath(
|
||||
"//img[@rel='v:image']/@alt")[0].strip()
|
||||
orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()
|
||||
title = raw_title.split(orig_title)[0].strip()
|
||||
# if has no chinese title
|
||||
if title == '':
|
||||
if title == "":
|
||||
title = orig_title
|
||||
|
||||
if title == orig_title:
|
||||
|
@ -44,107 +47,134 @@ class DoubanMovie(AbstractSite):
|
|||
|
||||
# there are two html formats for authors and translators
|
||||
other_title_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]")
|
||||
other_title = other_title_elem[0].strip().split(
|
||||
' / ') if other_title_elem else None
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
|
||||
)
|
||||
other_title = (
|
||||
other_title_elem[0].strip().split(" / ") if other_title_elem else None
|
||||
)
|
||||
|
||||
imdb_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()")
|
||||
"//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"
|
||||
)
|
||||
if not imdb_elem:
|
||||
imdb_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"
|
||||
)
|
||||
imdb_code = imdb_elem[0].strip() if imdb_elem else None
|
||||
|
||||
director_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()")
|
||||
"//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"
|
||||
)
|
||||
director = director_elem if director_elem else None
|
||||
|
||||
playwright_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()")
|
||||
playwright = list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
|
||||
"//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"
|
||||
)
|
||||
playwright = (
|
||||
list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
|
||||
)
|
||||
|
||||
actor_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()")
|
||||
"//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"
|
||||
)
|
||||
actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None
|
||||
|
||||
genre_elem = content.xpath("//span[@property='v:genre']/text()")
|
||||
genre = []
|
||||
if genre_elem:
|
||||
for g in genre_elem:
|
||||
g = g.split(' ')[0]
|
||||
if g == '紀錄片': # likely some original data on douban was corrupted
|
||||
g = '纪录片'
|
||||
elif g == '鬼怪':
|
||||
g = '惊悚'
|
||||
g = g.split(" ")[0]
|
||||
if g == "紀錄片": # likely some original data on douban was corrupted
|
||||
g = "纪录片"
|
||||
elif g == "鬼怪":
|
||||
g = "惊悚"
|
||||
genre.append(g)
|
||||
|
||||
showtime_elem = content.xpath(
|
||||
"//span[@property='v:initialReleaseDate']/text()")
|
||||
showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")
|
||||
if showtime_elem:
|
||||
showtime = []
|
||||
for st in showtime_elem:
|
||||
parts = st.split('(')
|
||||
parts = st.split("(")
|
||||
if len(parts) == 1:
|
||||
time = st.split('(')[0]
|
||||
region = ''
|
||||
time = st.split("(")[0]
|
||||
region = ""
|
||||
else:
|
||||
time = st.split('(')[0]
|
||||
region = st.split('(')[1][0:-1]
|
||||
time = st.split("(")[0]
|
||||
region = st.split("(")[1][0:-1]
|
||||
showtime.append({time: region})
|
||||
else:
|
||||
showtime = None
|
||||
|
||||
site_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href")
|
||||
"//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"
|
||||
)
|
||||
site = site_elem[0].strip()[:200] if site_elem else None
|
||||
if site and not re.match(r'http.+', site):
|
||||
if site and not re.match(r"http.+", site):
|
||||
site = None
|
||||
|
||||
area_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"
|
||||
)
|
||||
if area_elem:
|
||||
area = [a.strip()[:100] for a in area_elem[0].split('/')]
|
||||
area = [a.strip()[:100] for a in area_elem[0].split("/")]
|
||||
else:
|
||||
area = None
|
||||
|
||||
language_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"
|
||||
)
|
||||
if language_elem:
|
||||
language = [a.strip() for a in language_elem[0].split(' / ')]
|
||||
language = [a.strip() for a in language_elem[0].split(" / ")]
|
||||
else:
|
||||
language = None
|
||||
|
||||
year_elem = content.xpath("//span[@class='year']/text()")
|
||||
year = int(re.search(r'\d+', year_elem[0])[0]) if year_elem and re.search(r'\d+', year_elem[0]) else None
|
||||
year = (
|
||||
int(re.search(r"\d+", year_elem[0])[0])
|
||||
if year_elem and re.search(r"\d+", year_elem[0])
|
||||
else None
|
||||
)
|
||||
|
||||
duration_elem = content.xpath("//span[@property='v:runtime']/text()")
|
||||
other_duration_elem = content.xpath(
|
||||
"//span[@property='v:runtime']/following-sibling::text()[1]")
|
||||
"//span[@property='v:runtime']/following-sibling::text()[1]"
|
||||
)
|
||||
if duration_elem:
|
||||
duration = duration_elem[0].strip()
|
||||
if other_duration_elem:
|
||||
duration += other_duration_elem[0].rstrip()
|
||||
duration = duration.split('/')[0].strip()
|
||||
duration = duration.split("/")[0].strip()
|
||||
else:
|
||||
duration = None
|
||||
|
||||
season_elem = content.xpath(
|
||||
"//*[@id='season']/option[@selected='selected']/text()")
|
||||
"//*[@id='season']/option[@selected='selected']/text()"
|
||||
)
|
||||
if not season_elem:
|
||||
season_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"
|
||||
)
|
||||
season = int(season_elem[0].strip()) if season_elem else None
|
||||
else:
|
||||
season = int(season_elem[0].strip())
|
||||
|
||||
episodes_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]")
|
||||
episodes = int(episodes_elem[0].strip()) if episodes_elem and episodes_elem[0].strip().isdigit() else None
|
||||
"//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"
|
||||
)
|
||||
episodes = (
|
||||
int(episodes_elem[0].strip())
|
||||
if episodes_elem and episodes_elem[0].strip().isdigit()
|
||||
else None
|
||||
)
|
||||
|
||||
single_episode_length_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]")
|
||||
single_episode_length = single_episode_length_elem[0].strip(
|
||||
)[:100] if single_episode_length_elem else None
|
||||
"//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"
|
||||
)
|
||||
single_episode_length = (
|
||||
single_episode_length_elem[0].strip()[:100]
|
||||
if single_episode_length_elem
|
||||
else None
|
||||
)
|
||||
|
||||
# if has field `episodes` not none then must be series
|
||||
is_series = True if episodes else False
|
||||
|
@ -152,64 +182,87 @@ class DoubanMovie(AbstractSite):
|
|||
brief_elem = content.xpath("//span[@class='all hidden']")
|
||||
if not brief_elem:
|
||||
brief_elem = content.xpath("//span[@property='v:summary']")
|
||||
brief = '\n'.join([e.strip() for e in brief_elem[0].xpath(
|
||||
'./text()')]) if brief_elem else None
|
||||
brief = (
|
||||
"\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
|
||||
if brief_elem
|
||||
else None
|
||||
)
|
||||
|
||||
img_url_elem = content.xpath("//img[@rel='v:image']/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': other_title,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': site,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season_number': season,
|
||||
'episode_count': episodes,
|
||||
'single_episode_length': single_episode_length,
|
||||
'brief': brief,
|
||||
'is_series': is_series,
|
||||
'cover_image_url': img_url,
|
||||
})
|
||||
pd.metadata['preferred_model'] = ('TVSeason' if season else 'TVShow') if is_series else 'Movie'
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"orig_title": orig_title,
|
||||
"other_title": other_title,
|
||||
"imdb_code": imdb_code,
|
||||
"director": director,
|
||||
"playwright": playwright,
|
||||
"actor": actor,
|
||||
"genre": genre,
|
||||
"showtime": showtime,
|
||||
"site": site,
|
||||
"area": area,
|
||||
"language": language,
|
||||
"year": year,
|
||||
"duration": duration,
|
||||
"season_number": season,
|
||||
"episode_count": episodes,
|
||||
"single_episode_length": single_episode_length,
|
||||
"brief": brief,
|
||||
"is_series": is_series,
|
||||
"cover_image_url": img_url,
|
||||
}
|
||||
)
|
||||
pd.metadata["preferred_model"] = (
|
||||
("TVSeason" if season else "TVShow") if is_series else "Movie"
|
||||
)
|
||||
|
||||
if imdb_code:
|
||||
res_data = search_tmdb_by_imdb_id(imdb_code)
|
||||
tmdb_show_id = None
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'Movie'
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVShow'
|
||||
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVSeason'
|
||||
tmdb_show_id = res_data['tv_season_results'][0]['show_id']
|
||||
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
|
||||
pd.metadata['preferred_model'] = 'TVSeason'
|
||||
tmdb_show_id = res_data['tv_episode_results'][0]['show_id']
|
||||
if res_data['tv_episode_results'][0]['episode_number'] != 1:
|
||||
_logger.warning(f'Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}')
|
||||
resp = query_tmdb_tv_episode(tmdb_show_id, res_data['tv_episode_results'][0]['season_number'], 1)
|
||||
imdb_code = resp['external_ids']['imdb_id']
|
||||
_logger.warning(f'Douban Movie {self.url} re-mapped to imdb episode {imdb_code}')
|
||||
if "movie_results" in res_data and len(res_data["movie_results"]) > 0:
|
||||
pd.metadata["preferred_model"] = "Movie"
|
||||
elif "tv_results" in res_data and len(res_data["tv_results"]) > 0:
|
||||
pd.metadata["preferred_model"] = "TVShow"
|
||||
elif (
|
||||
"tv_season_results" in res_data
|
||||
and len(res_data["tv_season_results"]) > 0
|
||||
):
|
||||
pd.metadata["preferred_model"] = "TVSeason"
|
||||
tmdb_show_id = res_data["tv_season_results"][0]["show_id"]
|
||||
elif (
|
||||
"tv_episode_results" in res_data
|
||||
and len(res_data["tv_episode_results"]) > 0
|
||||
):
|
||||
pd.metadata["preferred_model"] = "TVSeason"
|
||||
tmdb_show_id = res_data["tv_episode_results"][0]["show_id"]
|
||||
if res_data["tv_episode_results"][0]["episode_number"] != 1:
|
||||
_logger.warning(
|
||||
f"Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}"
|
||||
)
|
||||
resp = query_tmdb_tv_episode(
|
||||
tmdb_show_id,
|
||||
res_data["tv_episode_results"][0]["season_number"],
|
||||
1,
|
||||
)
|
||||
imdb_code = resp["external_ids"]["imdb_id"]
|
||||
_logger.warning(
|
||||
f"Douban Movie {self.url} re-mapped to imdb episode {imdb_code}"
|
||||
)
|
||||
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if tmdb_show_id:
|
||||
pd.metadata['required_resources'] = [{
|
||||
'model': 'TVShow',
|
||||
'id_type': IdType.TMDB_TV,
|
||||
'id_value': tmdb_show_id,
|
||||
'title': title,
|
||||
'url': TMDB_TV.id_to_url(tmdb_show_id),
|
||||
}]
|
||||
pd.metadata["required_resources"] = [
|
||||
{
|
||||
"model": "TVShow",
|
||||
"id_type": IdType.TMDB_TV,
|
||||
"id_value": tmdb_show_id,
|
||||
"title": title,
|
||||
"url": TMDB_TV.id_to_url(tmdb_show_id),
|
||||
}
|
||||
]
|
||||
# TODO parse sister seasons
|
||||
# pd.metadata['related_resources'] = []
|
||||
if pd.metadata["cover_image_url"]:
|
||||
|
@ -218,5 +271,7 @@ class DoubanMovie(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -12,8 +12,11 @@ _logger = logging.getLogger(__name__)
|
|||
class DoubanMusic(AbstractSite):
|
||||
SITE_NAME = SiteName.Douban
|
||||
ID_TYPE = IdType.DoubanMusic
|
||||
URL_PATTERNS = [r"\w+://music\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/music/subject/(\d+)/{0,1}"]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
URL_PATTERNS = [
|
||||
r"\w+://music\.douban\.com/subject/(\d+)/{0,1}",
|
||||
r"\w+://m.douban.com/music/subject/(\d+)/{0,1}",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ""
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
|
@ -28,75 +31,95 @@ class DoubanMusic(AbstractSite):
|
|||
if not title:
|
||||
raise ParseError(self, "title")
|
||||
|
||||
artists_elem = content.xpath("//div[@id='info']/span/span[@class='pl']/a/text()")
|
||||
artist = None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
|
||||
artists_elem = content.xpath(
|
||||
"//div[@id='info']/span/span[@class='pl']/a/text()"
|
||||
)
|
||||
artist = (
|
||||
None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
|
||||
)
|
||||
|
||||
genre_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='流派:']/following::text()[1]")
|
||||
"//div[@id='info']//span[text()='流派:']/following::text()[1]"
|
||||
)
|
||||
genre = genre_elem[0].strip() if genre_elem else None
|
||||
|
||||
date_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='发行时间:']/following::text()[1]")
|
||||
release_date = dateparser.parse(date_elem[0].strip()).strftime('%Y-%m-%d') if date_elem else None
|
||||
"//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
|
||||
)
|
||||
release_date = (
|
||||
dateparser.parse(date_elem[0].strip()).strftime("%Y-%m-%d")
|
||||
if date_elem
|
||||
else None
|
||||
)
|
||||
|
||||
company_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='出版者:']/following::text()[1]")
|
||||
"//div[@id='info']//span[text()='出版者:']/following::text()[1]"
|
||||
)
|
||||
company = company_elem[0].strip() if company_elem else None
|
||||
|
||||
track_list_elem = content.xpath(
|
||||
"//div[@class='track-list']/div[@class='indent']/div/text()"
|
||||
)
|
||||
if track_list_elem:
|
||||
track_list = '\n'.join([track.strip() for track in track_list_elem])
|
||||
track_list = "\n".join([track.strip() for track in track_list_elem])
|
||||
else:
|
||||
track_list = None
|
||||
|
||||
brief_elem = content.xpath("//span[@class='all hidden']")
|
||||
if not brief_elem:
|
||||
brief_elem = content.xpath("//span[@property='v:summary']")
|
||||
brief = '\n'.join([e.strip() for e in brief_elem[0].xpath(
|
||||
'./text()')]) if brief_elem else None
|
||||
brief = (
|
||||
"\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
|
||||
if brief_elem
|
||||
else None
|
||||
)
|
||||
|
||||
img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
|
||||
img_url = img_url_elem[0].strip() if img_url_elem else None
|
||||
|
||||
data = {
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
'genre': genre,
|
||||
'release_date': release_date,
|
||||
'duration': None,
|
||||
'company': [company],
|
||||
'track_list': track_list,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"release_date": release_date,
|
||||
"duration": None,
|
||||
"company": [company],
|
||||
"track_list": track_list,
|
||||
"brief": brief,
|
||||
"cover_image_url": img_url,
|
||||
}
|
||||
gtin = None
|
||||
isrc = None
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
data['other_title'] = other_elem[0].strip()
|
||||
data["other_title"] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
data['album_type'] = other_elem[0].strip()
|
||||
data["album_type"] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
data['media'] = other_elem[0].strip()
|
||||
data["media"] = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
isrc = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
gtin = other_elem[0].strip()
|
||||
other_elem = content.xpath(
|
||||
"//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]")
|
||||
"//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]"
|
||||
)
|
||||
if other_elem:
|
||||
data['disc_count'] = other_elem[0].strip()
|
||||
data["disc_count"] = other_elem[0].strip()
|
||||
|
||||
pd = ResourceContent(metadata=data)
|
||||
if gtin:
|
||||
|
@ -109,5 +132,7 @@ class DoubanMusic(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -14,7 +14,7 @@ class GoodreadsDownloader(RetryDownloader):
|
|||
if response is None:
|
||||
return RESPONSE_NETWORK_ERROR
|
||||
elif response.status_code == 200:
|
||||
if response.text.find('__NEXT_DATA__') != -1:
|
||||
if response.text.find("__NEXT_DATA__") != -1:
|
||||
return RESPONSE_OK
|
||||
else:
|
||||
# Goodreads may return legacy version for a/b testing
|
||||
|
@ -28,9 +28,12 @@ class GoodreadsDownloader(RetryDownloader):
|
|||
class Goodreads(AbstractSite):
|
||||
SITE_NAME = SiteName.Goodreads
|
||||
ID_TYPE = IdType.Goodreads
|
||||
WIKI_PROPERTY_ID = 'P2968'
|
||||
WIKI_PROPERTY_ID = "P2968"
|
||||
DEFAULT_MODEL = Edition
|
||||
URL_PATTERNS = [r".+goodreads.com/.*book/show/(\d+)", r".+goodreads.com/.*book/(\d+)"]
|
||||
URL_PATTERNS = [
|
||||
r".+goodreads.com/.*book/show/(\d+)",
|
||||
r".+goodreads.com/.*book/(\d+)",
|
||||
]
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
|
@ -48,39 +51,41 @@ class Goodreads(AbstractSite):
|
|||
elem = h.xpath('//script[@id="__NEXT_DATA__"]/text()')
|
||||
src = elem[0].strip() if elem else None
|
||||
if not src:
|
||||
raise ParseError(self, '__NEXT_DATA__ element')
|
||||
d = json.loads(src)['props']['pageProps']['apolloState']
|
||||
o = {'Book': [], 'Work': [], 'Series': [], 'Contributor': []}
|
||||
raise ParseError(self, "__NEXT_DATA__ element")
|
||||
d = json.loads(src)["props"]["pageProps"]["apolloState"]
|
||||
o = {"Book": [], "Work": [], "Series": [], "Contributor": []}
|
||||
for v in d.values():
|
||||
t = v.get('__typename')
|
||||
t = v.get("__typename")
|
||||
if t and t in o:
|
||||
o[t].append(v)
|
||||
b = next(filter(lambda x: x.get('title'), o['Book']), None)
|
||||
b = next(filter(lambda x: x.get("title"), o["Book"]), None)
|
||||
if not b:
|
||||
# Goodreads may return empty page template when internal service timeouts
|
||||
raise ParseError(self, 'Book in __NEXT_DATA__ json')
|
||||
data['title'] = b['title']
|
||||
data['brief'] = b['description']
|
||||
raise ParseError(self, "Book in __NEXT_DATA__ json")
|
||||
data["title"] = b["title"]
|
||||
data["brief"] = b["description"]
|
||||
ids = {}
|
||||
t, n = detect_isbn_asin(b['details'].get('asin'))
|
||||
t, n = detect_isbn_asin(b["details"].get("asin"))
|
||||
if t:
|
||||
ids[t] = n
|
||||
t, n = detect_isbn_asin(b['details'].get('isbn13'))
|
||||
t, n = detect_isbn_asin(b["details"].get("isbn13"))
|
||||
if t:
|
||||
ids[t] = n
|
||||
# amazon has a known problem to use another book's isbn as asin
|
||||
# so we alway overwrite asin-converted isbn with real isbn
|
||||
data['pages'] = b['details'].get('numPages')
|
||||
data['cover_image_url'] = b['imageUrl']
|
||||
w = next(filter(lambda x: x.get('details'), o['Work']), None)
|
||||
data["pages"] = b["details"].get("numPages")
|
||||
data["cover_image_url"] = b["imageUrl"]
|
||||
w = next(filter(lambda x: x.get("details"), o["Work"]), None)
|
||||
if w:
|
||||
data['required_resources'] = [{
|
||||
'model': 'Work',
|
||||
'id_type': IdType.Goodreads_Work,
|
||||
'id_value': str(w['legacyId']),
|
||||
'title': w['details']['originalTitle'],
|
||||
'url': w['editions']['webUrl'],
|
||||
}]
|
||||
data["required_resources"] = [
|
||||
{
|
||||
"model": "Work",
|
||||
"id_type": IdType.Goodreads_Work,
|
||||
"id_value": str(w["legacyId"]),
|
||||
"title": w["details"]["originalTitle"],
|
||||
"url": w["editions"]["webUrl"],
|
||||
}
|
||||
]
|
||||
pd = ResourceContent(metadata=data)
|
||||
pd.lookup_ids[IdType.ISBN] = ids.get(IdType.ISBN)
|
||||
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
|
||||
|
@ -90,7 +95,9 @@ class Goodreads(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {data["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -98,7 +105,7 @@ class Goodreads(AbstractSite):
|
|||
class Goodreads_Work(AbstractSite):
|
||||
SITE_NAME = SiteName.Goodreads
|
||||
ID_TYPE = IdType.Goodreads_Work
|
||||
WIKI_PROPERTY_ID = ''
|
||||
WIKI_PROPERTY_ID = ""
|
||||
DEFAULT_MODEL = Work
|
||||
URL_PATTERNS = [r".+goodreads.com/work/editions/(\d+)"]
|
||||
|
||||
|
@ -111,14 +118,18 @@ class Goodreads_Work(AbstractSite):
|
|||
title_elem = content.xpath("//h1/a/text()")
|
||||
title = title_elem[0].strip() if title_elem else None
|
||||
if not title:
|
||||
raise ParseError(self, 'title')
|
||||
raise ParseError(self, "title")
|
||||
author_elem = content.xpath("//h2/a/text()")
|
||||
author = author_elem[0].strip() if author_elem else None
|
||||
first_published_elem = content.xpath("//h2/span/text()")
|
||||
first_published = first_published_elem[0].strip() if first_published_elem else None
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'author': author,
|
||||
'first_published': first_published
|
||||
})
|
||||
first_published = (
|
||||
first_published_elem[0].strip() if first_published_elem else None
|
||||
)
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"author": author,
|
||||
"first_published": first_published,
|
||||
}
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -16,7 +16,7 @@ class GoogleBooks(AbstractSite):
|
|||
r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^&#?]+)",
|
||||
r"https://books\.google\.co[^/]+/books/about/[^?]+?id=([^&#?]+)",
|
||||
]
|
||||
WIKI_PROPERTY_ID = ''
|
||||
WIKI_PROPERTY_ID = ""
|
||||
DEFAULT_MODEL = Edition
|
||||
|
||||
@classmethod
|
||||
|
@ -24,57 +24,76 @@ class GoogleBooks(AbstractSite):
|
|||
return "https://books.google.com/books?id=" + id_value
|
||||
|
||||
def scrape(self):
|
||||
api_url = f'https://www.googleapis.com/books/v1/volumes/{self.id_value}'
|
||||
api_url = f"https://www.googleapis.com/books/v1/volumes/{self.id_value}"
|
||||
b = BasicDownloader(api_url).download().json()
|
||||
other = {}
|
||||
title = b['volumeInfo']['title']
|
||||
subtitle = b['volumeInfo']['subtitle'] if 'subtitle' in b['volumeInfo'] else None
|
||||
title = b["volumeInfo"]["title"]
|
||||
subtitle = (
|
||||
b["volumeInfo"]["subtitle"] if "subtitle" in b["volumeInfo"] else None
|
||||
)
|
||||
pub_year = None
|
||||
pub_month = None
|
||||
if 'publishedDate' in b['volumeInfo']:
|
||||
pub_date = b['volumeInfo']['publishedDate'].split('-')
|
||||
if "publishedDate" in b["volumeInfo"]:
|
||||
pub_date = b["volumeInfo"]["publishedDate"].split("-")
|
||||
pub_year = pub_date[0]
|
||||
pub_month = pub_date[1] if len(pub_date) > 1 else None
|
||||
pub_house = b['volumeInfo']['publisher'] if 'publisher' in b['volumeInfo'] else None
|
||||
language = b['volumeInfo']['language'] if 'language' in b['volumeInfo'] else None
|
||||
pages = b['volumeInfo']['pageCount'] if 'pageCount' in b['volumeInfo'] else None
|
||||
if 'mainCategory' in b['volumeInfo']:
|
||||
other['分类'] = b['volumeInfo']['mainCategory']
|
||||
authors = b['volumeInfo']['authors'] if 'authors' in b['volumeInfo'] else None
|
||||
if 'description' in b['volumeInfo']:
|
||||
brief = b['volumeInfo']['description']
|
||||
elif 'textSnippet' in b['volumeInfo']:
|
||||
pub_house = (
|
||||
b["volumeInfo"]["publisher"] if "publisher" in b["volumeInfo"] else None
|
||||
)
|
||||
language = (
|
||||
b["volumeInfo"]["language"] if "language" in b["volumeInfo"] else None
|
||||
)
|
||||
pages = b["volumeInfo"]["pageCount"] if "pageCount" in b["volumeInfo"] else None
|
||||
if "mainCategory" in b["volumeInfo"]:
|
||||
other["分类"] = b["volumeInfo"]["mainCategory"]
|
||||
authors = b["volumeInfo"]["authors"] if "authors" in b["volumeInfo"] else None
|
||||
if "description" in b["volumeInfo"]:
|
||||
brief = b["volumeInfo"]["description"]
|
||||
elif "textSnippet" in b["volumeInfo"]:
|
||||
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
|
||||
else:
|
||||
brief = ''
|
||||
brief = re.sub(r'<.*?>', '', brief.replace('<br', '\n<br'))
|
||||
img_url = b['volumeInfo']['imageLinks']['thumbnail'] if 'imageLinks' in b['volumeInfo'] else None
|
||||
brief = ""
|
||||
brief = re.sub(r"<.*?>", "", brief.replace("<br", "\n<br"))
|
||||
img_url = (
|
||||
b["volumeInfo"]["imageLinks"]["thumbnail"]
|
||||
if "imageLinks" in b["volumeInfo"]
|
||||
else None
|
||||
)
|
||||
isbn10 = None
|
||||
isbn13 = None
|
||||
for iid in b['volumeInfo']['industryIdentifiers'] if 'industryIdentifiers' in b['volumeInfo'] else []:
|
||||
if iid['type'] == 'ISBN_10':
|
||||
isbn10 = iid['identifier']
|
||||
if iid['type'] == 'ISBN_13':
|
||||
isbn13 = iid['identifier']
|
||||
for iid in (
|
||||
b["volumeInfo"]["industryIdentifiers"]
|
||||
if "industryIdentifiers" in b["volumeInfo"]
|
||||
else []
|
||||
):
|
||||
if iid["type"] == "ISBN_10":
|
||||
isbn10 = iid["identifier"]
|
||||
if iid["type"] == "ISBN_13":
|
||||
isbn13 = iid["identifier"]
|
||||
isbn = isbn13 # if isbn13 is not None else isbn10
|
||||
|
||||
raw_img, ext = BasicImageDownloader.download_image(img_url, self.url)
|
||||
data = {
|
||||
'title': title,
|
||||
'subtitle': subtitle,
|
||||
'orig_title': None,
|
||||
'author': authors,
|
||||
'translator': None,
|
||||
'language': language,
|
||||
'pub_house': pub_house,
|
||||
'pub_year': pub_year,
|
||||
'pub_month': pub_month,
|
||||
'binding': None,
|
||||
'pages': pages,
|
||||
'isbn': isbn,
|
||||
'brief': brief,
|
||||
'contents': None,
|
||||
'other_info': other,
|
||||
'cover_image_url': img_url,
|
||||
"title": title,
|
||||
"subtitle": subtitle,
|
||||
"orig_title": None,
|
||||
"author": authors,
|
||||
"translator": None,
|
||||
"language": language,
|
||||
"pub_house": pub_house,
|
||||
"pub_year": pub_year,
|
||||
"pub_month": pub_month,
|
||||
"binding": None,
|
||||
"pages": pages,
|
||||
"isbn": isbn,
|
||||
"brief": brief,
|
||||
"contents": None,
|
||||
"other_info": other,
|
||||
"cover_image_url": img_url,
|
||||
}
|
||||
return ResourceContent(metadata=data, cover_image=raw_img, cover_image_extention=ext, lookup_ids={IdType.ISBN: isbn13})
|
||||
return ResourceContent(
|
||||
metadata=data,
|
||||
cover_image=raw_img,
|
||||
cover_image_extention=ext,
|
||||
lookup_ids={IdType.ISBN: isbn13},
|
||||
)
|
||||
|
|
|
@ -19,10 +19,12 @@ _logger = logging.getLogger(__name__)
|
|||
|
||||
def _igdb_access_token():
|
||||
try:
|
||||
token = requests.post(f'https://id.twitch.tv/oauth2/token?client_id={settings.IGDB_CLIENT_ID}&client_secret={settings.IGDB_CLIENT_SECRET}&grant_type=client_credentials').json()['access_token']
|
||||
token = requests.post(
|
||||
f"https://id.twitch.tv/oauth2/token?client_id={settings.IGDB_CLIENT_ID}&client_secret={settings.IGDB_CLIENT_SECRET}&grant_type=client_credentials"
|
||||
).json()["access_token"]
|
||||
except Exception:
|
||||
_logger.error('unable to obtain IGDB token')
|
||||
token = '<invalid>'
|
||||
_logger.error("unable to obtain IGDB token")
|
||||
token = "<invalid>"
|
||||
return token
|
||||
|
||||
|
||||
|
@ -30,11 +32,11 @@ _wrapper = IGDBWrapper(settings.IGDB_CLIENT_ID, _igdb_access_token())
|
|||
|
||||
|
||||
def search_igdb_by_3p_url(steam_url):
|
||||
r = IGDB.api_query('websites', f'fields *, game.*; where url = "{steam_url}";')
|
||||
r = IGDB.api_query("websites", f'fields *, game.*; where url = "{steam_url}";')
|
||||
if not r:
|
||||
return None
|
||||
r = sorted(r, key=lambda w: w['game']['id'])
|
||||
return IGDB(url=r[0]['game']['url'])
|
||||
r = sorted(r, key=lambda w: w["game"]["id"])
|
||||
return IGDB(url=r[0]["game"]["url"])
|
||||
|
||||
|
||||
@SiteManager.register
|
||||
|
@ -42,7 +44,7 @@ class IGDB(AbstractSite):
|
|||
SITE_NAME = SiteName.IGDB
|
||||
ID_TYPE = IdType.IGDB
|
||||
URL_PATTERNS = [r"\w+://www\.igdb\.com/games/([a-zA-Z0-9\-_]+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
|
@ -51,64 +53,97 @@ class IGDB(AbstractSite):
|
|||
|
||||
@classmethod
|
||||
def api_query(cls, p, q):
|
||||
key = 'igdb:' + p + '/' + q
|
||||
key = "igdb:" + p + "/" + q
|
||||
if get_mock_mode():
|
||||
r = BasicDownloader(key).download().json()
|
||||
else:
|
||||
r = json.loads(_wrapper.api_request(p, q))
|
||||
if settings.DOWNLOADER_SAVEDIR:
|
||||
with open(settings.DOWNLOADER_SAVEDIR + '/' + get_mock_file(key), 'w', encoding='utf-8') as fp:
|
||||
with open(
|
||||
settings.DOWNLOADER_SAVEDIR + "/" + get_mock_file(key),
|
||||
"w",
|
||||
encoding="utf-8",
|
||||
) as fp:
|
||||
fp.write(json.dumps(r))
|
||||
return r
|
||||
|
||||
def scrape(self):
|
||||
fields = '*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name'
|
||||
r = self.api_query('games', f'fields {fields}; where url = "{self.url}";')[0]
|
||||
brief = r['summary'] if 'summary' in r else ''
|
||||
brief += "\n\n" + r['storyline'] if 'storyline' in r else ''
|
||||
fields = "*, cover.url, genres.name, platforms.name, involved_companies.*, involved_companies.company.name"
|
||||
r = self.api_query("games", f'fields {fields}; where url = "{self.url}";')[0]
|
||||
brief = r["summary"] if "summary" in r else ""
|
||||
brief += "\n\n" + r["storyline"] if "storyline" in r else ""
|
||||
developer = None
|
||||
publisher = None
|
||||
release_date = None
|
||||
genre = None
|
||||
platform = None
|
||||
if 'involved_companies' in r:
|
||||
developer = next(iter([c['company']['name'] for c in r['involved_companies'] if c['developer']]), None)
|
||||
publisher = next(iter([c['company']['name'] for c in r['involved_companies'] if c['publisher']]), None)
|
||||
if 'platforms' in r:
|
||||
ps = sorted(r['platforms'], key=lambda p: p['id'])
|
||||
platform = [(p['name'] if p['id'] != 6 else 'Windows') for p in ps]
|
||||
if 'first_release_date' in r:
|
||||
release_date = datetime.datetime.fromtimestamp(r['first_release_date'], datetime.timezone.utc).strftime('%Y-%m-%d')
|
||||
if 'genres' in r:
|
||||
genre = [g['name'] for g in r['genres']]
|
||||
websites = self.api_query('websites', f'fields *; where game.url = "{self.url}";')
|
||||
if "involved_companies" in r:
|
||||
developer = next(
|
||||
iter(
|
||||
[
|
||||
c["company"]["name"]
|
||||
for c in r["involved_companies"]
|
||||
if c["developer"]
|
||||
]
|
||||
),
|
||||
None,
|
||||
)
|
||||
publisher = next(
|
||||
iter(
|
||||
[
|
||||
c["company"]["name"]
|
||||
for c in r["involved_companies"]
|
||||
if c["publisher"]
|
||||
]
|
||||
),
|
||||
None,
|
||||
)
|
||||
if "platforms" in r:
|
||||
ps = sorted(r["platforms"], key=lambda p: p["id"])
|
||||
platform = [(p["name"] if p["id"] != 6 else "Windows") for p in ps]
|
||||
if "first_release_date" in r:
|
||||
release_date = datetime.datetime.fromtimestamp(
|
||||
r["first_release_date"], datetime.timezone.utc
|
||||
).strftime("%Y-%m-%d")
|
||||
if "genres" in r:
|
||||
genre = [g["name"] for g in r["genres"]]
|
||||
websites = self.api_query(
|
||||
"websites", f'fields *; where game.url = "{self.url}";'
|
||||
)
|
||||
steam_url = None
|
||||
official_site = None
|
||||
for website in websites:
|
||||
if website['category'] == 1:
|
||||
official_site = website['url']
|
||||
elif website['category'] == 13:
|
||||
steam_url = website['url']
|
||||
pd = ResourceContent(metadata={
|
||||
'title': r['name'],
|
||||
'other_title': [],
|
||||
'developer': [developer],
|
||||
'publisher': [publisher],
|
||||
'release_date': release_date,
|
||||
'genre': genre,
|
||||
'platform': platform,
|
||||
'brief': brief,
|
||||
'official_site': official_site,
|
||||
'igdb_id': r['id'],
|
||||
'cover_image_url': 'https:' + r['cover']['url'].replace('t_thumb', 't_cover_big'),
|
||||
})
|
||||
if website["category"] == 1:
|
||||
official_site = website["url"]
|
||||
elif website["category"] == 13:
|
||||
steam_url = website["url"]
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": r["name"],
|
||||
"other_title": [],
|
||||
"developer": [developer],
|
||||
"publisher": [publisher],
|
||||
"release_date": release_date,
|
||||
"genre": genre,
|
||||
"platform": platform,
|
||||
"brief": brief,
|
||||
"official_site": official_site,
|
||||
"igdb_id": r["id"],
|
||||
"cover_image_url": "https:"
|
||||
+ r["cover"]["url"].replace("t_thumb", "t_cover_big"),
|
||||
}
|
||||
)
|
||||
if steam_url:
|
||||
pd.lookup_ids[IdType.Steam] = SiteManager.get_site_by_id_type(IdType.Steam).url_to_id(steam_url)
|
||||
pd.lookup_ids[IdType.Steam] = SiteManager.get_site_by_id_type(
|
||||
IdType.Steam
|
||||
).url_to_id(steam_url)
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
|
|
@ -12,8 +12,8 @@ _logger = logging.getLogger(__name__)
|
|||
class IMDB(AbstractSite):
|
||||
SITE_NAME = SiteName.IMDB
|
||||
ID_TYPE = IdType.IMDB
|
||||
URL_PATTERNS = [r'\w+://www.imdb.com/title/(tt\d+)']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [r"\w+://www.imdb.com/title/(tt\d+)"]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
|
||||
@classmethod
|
||||
def id_to_url(self, id_value):
|
||||
|
@ -22,28 +22,35 @@ class IMDB(AbstractSite):
|
|||
def scrape(self):
|
||||
self.scraped = False
|
||||
res_data = search_tmdb_by_imdb_id(self.id_value)
|
||||
if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
|
||||
url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
|
||||
elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
|
||||
if "movie_results" in res_data and len(res_data["movie_results"]) > 0:
|
||||
url = (
|
||||
f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
|
||||
)
|
||||
elif "tv_results" in res_data and len(res_data["tv_results"]) > 0:
|
||||
url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}"
|
||||
elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
|
||||
elif "tv_season_results" in res_data and len(res_data["tv_season_results"]) > 0:
|
||||
# this should not happen given IMDB only has ids for either show or episode
|
||||
tv_id = res_data['tv_season_results'][0]['show_id']
|
||||
season_number = res_data['tv_season_results'][0]['season_number']
|
||||
tv_id = res_data["tv_season_results"][0]["show_id"]
|
||||
season_number = res_data["tv_season_results"][0]["season_number"]
|
||||
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
|
||||
elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
|
||||
tv_id = res_data['tv_episode_results'][0]['show_id']
|
||||
season_number = res_data['tv_episode_results'][0]['season_number']
|
||||
episode_number = res_data['tv_episode_results'][0]['episode_number']
|
||||
elif (
|
||||
"tv_episode_results" in res_data and len(res_data["tv_episode_results"]) > 0
|
||||
):
|
||||
tv_id = res_data["tv_episode_results"][0]["show_id"]
|
||||
season_number = res_data["tv_episode_results"][0]["season_number"]
|
||||
episode_number = res_data["tv_episode_results"][0]["episode_number"]
|
||||
if season_number == 0:
|
||||
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
|
||||
elif episode_number == 1:
|
||||
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}"
|
||||
else:
|
||||
raise ParseError(self, "IMDB id matching TMDB but not first episode, this is not supported")
|
||||
raise ParseError(
|
||||
self,
|
||||
"IMDB id matching TMDB but not first episode, this is not supported",
|
||||
)
|
||||
else:
|
||||
raise ParseError(self, "IMDB id not found in TMDB")
|
||||
tmdb = SiteManager.get_site_by_url(url)
|
||||
pd = tmdb.scrape()
|
||||
pd.metadata['preferred_model'] = tmdb.DEFAULT_MODEL.__name__
|
||||
pd.metadata["preferred_model"] = tmdb.DEFAULT_MODEL.__name__
|
||||
return pd
|
||||
|
|
|
@ -23,8 +23,8 @@ spotify_token_expire_time = time.time()
|
|||
class Spotify(AbstractSite):
|
||||
SITE_NAME = SiteName.Spotify
|
||||
ID_TYPE = IdType.Spotify_Album
|
||||
URL_PATTERNS = [r'\w+://open\.spotify\.com/album/([a-zA-Z0-9]+)']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [r"\w+://open\.spotify\.com/album/([a-zA-Z0-9]+)"]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Album
|
||||
|
||||
@classmethod
|
||||
|
@ -33,58 +33,63 @@ class Spotify(AbstractSite):
|
|||
|
||||
def scrape(self):
|
||||
api_url = "https://api.spotify.com/v1/albums/" + self.id_value
|
||||
headers = {
|
||||
'Authorization': f"Bearer {get_spotify_token()}"
|
||||
}
|
||||
headers = {"Authorization": f"Bearer {get_spotify_token()}"}
|
||||
res_data = BasicDownloader(api_url, headers=headers).download().json()
|
||||
artist = []
|
||||
for artist_dict in res_data['artists']:
|
||||
artist.append(artist_dict['name'])
|
||||
for artist_dict in res_data["artists"]:
|
||||
artist.append(artist_dict["name"])
|
||||
|
||||
title = res_data['name']
|
||||
title = res_data["name"]
|
||||
|
||||
genre = ', '.join(res_data['genres'])
|
||||
genre = ", ".join(res_data["genres"])
|
||||
|
||||
company = []
|
||||
for com in res_data['copyrights']:
|
||||
company.append(com['text'])
|
||||
for com in res_data["copyrights"]:
|
||||
company.append(com["text"])
|
||||
|
||||
duration = 0
|
||||
track_list = []
|
||||
track_urls = []
|
||||
for track in res_data['tracks']['items']:
|
||||
track_urls.append(track['external_urls']['spotify'])
|
||||
duration += track['duration_ms']
|
||||
if res_data['tracks']['items'][-1]['disc_number'] > 1:
|
||||
for track in res_data["tracks"]["items"]:
|
||||
track_urls.append(track["external_urls"]["spotify"])
|
||||
duration += track["duration_ms"]
|
||||
if res_data["tracks"]["items"][-1]["disc_number"] > 1:
|
||||
# more than one disc
|
||||
track_list.append(str(
|
||||
track['disc_number']) + '-' + str(track['track_number']) + '. ' + track['name'])
|
||||
track_list.append(
|
||||
str(track["disc_number"])
|
||||
+ "-"
|
||||
+ str(track["track_number"])
|
||||
+ ". "
|
||||
+ track["name"]
|
||||
)
|
||||
else:
|
||||
track_list.append(str(track['track_number']) + '. ' + track['name'])
|
||||
track_list = '\n'.join(track_list)
|
||||
track_list.append(str(track["track_number"]) + ". " + track["name"])
|
||||
track_list = "\n".join(track_list)
|
||||
|
||||
release_date = dateparser.parse(res_data['release_date']).strftime('%Y-%m-%d')
|
||||
release_date = dateparser.parse(res_data["release_date"]).strftime("%Y-%m-%d")
|
||||
|
||||
gtin = None
|
||||
if res_data['external_ids'].get('upc'):
|
||||
gtin = res_data['external_ids'].get('upc')
|
||||
if res_data['external_ids'].get('ean'):
|
||||
gtin = res_data['external_ids'].get('ean')
|
||||
if res_data["external_ids"].get("upc"):
|
||||
gtin = res_data["external_ids"].get("upc")
|
||||
if res_data["external_ids"].get("ean"):
|
||||
gtin = res_data["external_ids"].get("ean")
|
||||
isrc = None
|
||||
if res_data['external_ids'].get('isrc'):
|
||||
isrc = res_data['external_ids'].get('isrc')
|
||||
if res_data["external_ids"].get("isrc"):
|
||||
isrc = res_data["external_ids"].get("isrc")
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'artist': artist,
|
||||
'genre': genre,
|
||||
'track_list': track_list,
|
||||
'release_date': release_date,
|
||||
'duration': duration,
|
||||
'company': company,
|
||||
'brief': None,
|
||||
'cover_image_url': res_data['images'][0]['url']
|
||||
})
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"artist": artist,
|
||||
"genre": genre,
|
||||
"track_list": track_list,
|
||||
"release_date": release_date,
|
||||
"duration": duration,
|
||||
"company": company,
|
||||
"brief": None,
|
||||
"cover_image_url": res_data["images"][0]["url"],
|
||||
}
|
||||
)
|
||||
if gtin:
|
||||
pd.lookup_ids[IdType.GTIN] = gtin
|
||||
if isrc:
|
||||
|
@ -95,14 +100,16 @@ class Spotify(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
def get_spotify_token():
|
||||
global spotify_token, spotify_token_expire_time
|
||||
if get_mock_mode():
|
||||
return 'mocked'
|
||||
return "mocked"
|
||||
if spotify_token is None or is_spotify_token_expired():
|
||||
invoke_spotify_token()
|
||||
return spotify_token
|
||||
|
@ -117,12 +124,8 @@ def invoke_spotify_token():
|
|||
global spotify_token, spotify_token_expire_time
|
||||
r = requests.post(
|
||||
"https://accounts.spotify.com/api/token",
|
||||
data={
|
||||
"grant_type": "client_credentials"
|
||||
},
|
||||
headers={
|
||||
"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"
|
||||
}
|
||||
data={"grant_type": "client_credentials"},
|
||||
headers={"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"},
|
||||
)
|
||||
data = r.json()
|
||||
if r.status_code == 401:
|
||||
|
@ -131,16 +134,12 @@ def invoke_spotify_token():
|
|||
# for example debugging using a http client
|
||||
r = requests.post(
|
||||
"https://accounts.spotify.com/api/token",
|
||||
data={
|
||||
"grant_type": "client_credentials"
|
||||
},
|
||||
headers={
|
||||
"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"
|
||||
}
|
||||
data={"grant_type": "client_credentials"},
|
||||
headers={"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"},
|
||||
)
|
||||
data = r.json()
|
||||
elif r.status_code != 200:
|
||||
raise Exception(f"Request to spotify API fails. Reason: {r.reason}")
|
||||
# minus 2 for execution time error
|
||||
spotify_token_expire_time = int(data['expires_in']) + time.time() - 2
|
||||
spotify_token = data['access_token']
|
||||
spotify_token_expire_time = int(data["expires_in"]) + time.time() - 2
|
||||
spotify_token = data["access_token"]
|
||||
|
|
|
@ -13,7 +13,7 @@ class Steam(AbstractSite):
|
|||
SITE_NAME = SiteName.Steam
|
||||
ID_TYPE = IdType.Steam
|
||||
URL_PATTERNS = [r"\w+://store\.steampowered\.com/app/(\d+)"]
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Game
|
||||
|
||||
@classmethod
|
||||
|
@ -25,41 +25,58 @@ class Steam(AbstractSite):
|
|||
pd = i.scrape() if i else ResourceContent()
|
||||
|
||||
headers = BasicDownloader.headers.copy()
|
||||
headers['Host'] = 'store.steampowered.com'
|
||||
headers['Cookie'] = "wants_mature_content=1; birthtime=754700401;"
|
||||
headers["Host"] = "store.steampowered.com"
|
||||
headers["Cookie"] = "wants_mature_content=1; birthtime=754700401;"
|
||||
content = BasicDownloader(self.url, headers=headers).download().html()
|
||||
|
||||
title = content.xpath("//div[@class='apphub_AppName']/text()")[0]
|
||||
developer = content.xpath("//div[@id='developers_list']/a/text()")
|
||||
publisher = content.xpath("//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()")
|
||||
publisher = content.xpath(
|
||||
"//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()"
|
||||
)
|
||||
release_date = dateparser.parse(
|
||||
content.xpath(
|
||||
"//div[@class='release_date']/div[@class='date']/text()")[0]
|
||||
).strftime('%Y-%m-%d')
|
||||
content.xpath("//div[@class='release_date']/div[@class='date']/text()")[0]
|
||||
).strftime("%Y-%m-%d")
|
||||
genre = content.xpath(
|
||||
"//div[@class='details_block']/b[2]/following-sibling::a/text()")
|
||||
platform = ['PC']
|
||||
brief = content.xpath(
|
||||
"//div[@class='game_description_snippet']/text()")[0].strip()
|
||||
"//div[@class='details_block']/b[2]/following-sibling::a/text()"
|
||||
)
|
||||
platform = ["PC"]
|
||||
brief = content.xpath("//div[@class='game_description_snippet']/text()")[
|
||||
0
|
||||
].strip()
|
||||
# try Steam images if no image from IGDB
|
||||
if pd.cover_image is None:
|
||||
pd.metadata['cover_image_url'] = content.xpath("//img[@class='game_header_image_full']/@src")[0].replace("header.jpg", "library_600x900.jpg")
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
|
||||
pd.metadata["cover_image_url"] = content.xpath(
|
||||
"//img[@class='game_header_image_full']/@src"
|
||||
)[0].replace("header.jpg", "library_600x900.jpg")
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
if pd.cover_image is None:
|
||||
pd.metadata['cover_image_url'] = content.xpath("//img[@class='game_header_image_full']/@src")[0]
|
||||
pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
|
||||
pd.metadata["cover_image_url"] = content.xpath(
|
||||
"//img[@class='game_header_image_full']/@src"
|
||||
)[0]
|
||||
(
|
||||
pd.cover_image,
|
||||
pd.cover_image_extention,
|
||||
) = BasicImageDownloader.download_image(
|
||||
pd.metadata["cover_image_url"], self.url
|
||||
)
|
||||
# merge data from IGDB, use localized Steam data if available
|
||||
d = {
|
||||
'developer': developer,
|
||||
'publisher': publisher,
|
||||
'release_date': release_date,
|
||||
'genre': genre,
|
||||
'platform': platform,
|
||||
"developer": developer,
|
||||
"publisher": publisher,
|
||||
"release_date": release_date,
|
||||
"genre": genre,
|
||||
"platform": platform,
|
||||
}
|
||||
d.update(pd.metadata)
|
||||
pd.metadata = d
|
||||
if title:
|
||||
pd.metadata['title'] = title
|
||||
pd.metadata["title"] = title
|
||||
if brief:
|
||||
pd.metadata['brief'] = brief
|
||||
pd.metadata["brief"] = brief
|
||||
return pd
|
||||
|
|
|
@ -37,8 +37,8 @@ def _copy_dict(s, key_map):
|
|||
class TMDB_Movie(AbstractSite):
|
||||
SITE_NAME = SiteName.TMDB
|
||||
ID_TYPE = IdType.TMDB_Movie
|
||||
URL_PATTERNS = [r'\w+://www.themoviedb.org/movie/(\d+)']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [r"\w+://www.themoviedb.org/movie/(\d+)"]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = Movie
|
||||
|
||||
@classmethod
|
||||
|
@ -55,37 +55,59 @@ class TMDB_Movie(AbstractSite):
|
|||
res_data = BasicDownloader(api_url).download().json()
|
||||
|
||||
if is_series:
|
||||
title = res_data['name']
|
||||
orig_title = res_data['original_name']
|
||||
year = int(res_data['first_air_date'].split(
|
||||
'-')[0]) if res_data['first_air_date'] else None
|
||||
imdb_code = res_data['external_ids']['imdb_id']
|
||||
showtime = [{res_data['first_air_date']: "首播日期"}
|
||||
] if res_data['first_air_date'] else None
|
||||
title = res_data["name"]
|
||||
orig_title = res_data["original_name"]
|
||||
year = (
|
||||
int(res_data["first_air_date"].split("-")[0])
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["external_ids"]["imdb_id"]
|
||||
showtime = (
|
||||
[{res_data["first_air_date"]: "首播日期"}]
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
duration = None
|
||||
else:
|
||||
title = res_data['title']
|
||||
orig_title = res_data['original_title']
|
||||
year = int(res_data['release_date'].split('-')
|
||||
[0]) if res_data['release_date'] else None
|
||||
showtime = [{res_data['release_date']: "发布日期"}
|
||||
] if res_data['release_date'] else None
|
||||
imdb_code = res_data['imdb_id']
|
||||
title = res_data["title"]
|
||||
orig_title = res_data["original_title"]
|
||||
year = (
|
||||
int(res_data["release_date"].split("-")[0])
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
showtime = (
|
||||
[{res_data["release_date"]: "发布日期"}]
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["imdb_id"]
|
||||
# in minutes
|
||||
duration = res_data['runtime'] if res_data['runtime'] else None
|
||||
duration = res_data["runtime"] if res_data["runtime"] else None
|
||||
|
||||
genre = [x['name'] for x in res_data['genres']]
|
||||
language = list(map(lambda x: x['name'], res_data['spoken_languages']))
|
||||
brief = res_data['overview']
|
||||
genre = [x["name"] for x in res_data["genres"]]
|
||||
language = list(map(lambda x: x["name"], res_data["spoken_languages"]))
|
||||
brief = res_data["overview"]
|
||||
|
||||
if is_series:
|
||||
director = list(map(lambda x: x['name'], res_data['created_by']))
|
||||
director = list(map(lambda x: x["name"], res_data["created_by"]))
|
||||
else:
|
||||
director = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Director', res_data['credits']['crew'])))
|
||||
playwright = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Screenplay', res_data['credits']['crew'])))
|
||||
actor = list(map(lambda x: x['name'], res_data['credits']['cast']))
|
||||
director = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(
|
||||
lambda c: c["job"] == "Director", res_data["credits"]["crew"]
|
||||
),
|
||||
)
|
||||
)
|
||||
playwright = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(lambda c: c["job"] == "Screenplay", res_data["credits"]["crew"]),
|
||||
)
|
||||
)
|
||||
actor = list(map(lambda x: x["name"], res_data["credits"]["cast"]))
|
||||
area = []
|
||||
|
||||
other_info = {}
|
||||
|
@ -95,33 +117,39 @@ class TMDB_Movie(AbstractSite):
|
|||
# other_info['奖项'] = res_data['awards']
|
||||
# other_info['TMDB_ID'] = id
|
||||
if is_series:
|
||||
other_info['Seasons'] = res_data['number_of_seasons']
|
||||
other_info['Episodes'] = res_data['number_of_episodes']
|
||||
other_info["Seasons"] = res_data["number_of_seasons"]
|
||||
other_info["Episodes"] = res_data["number_of_episodes"]
|
||||
|
||||
# TODO: use GET /configuration to get base url
|
||||
img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
|
||||
img_url = (
|
||||
("https://image.tmdb.org/t/p/original/" + res_data["poster_path"])
|
||||
if res_data["poster_path"] is not None
|
||||
else None
|
||||
)
|
||||
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': None,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': None,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season': None,
|
||||
'episodes': None,
|
||||
'single_episode_length': None,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url,
|
||||
})
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"orig_title": orig_title,
|
||||
"other_title": None,
|
||||
"imdb_code": imdb_code,
|
||||
"director": director,
|
||||
"playwright": playwright,
|
||||
"actor": actor,
|
||||
"genre": genre,
|
||||
"showtime": showtime,
|
||||
"site": None,
|
||||
"area": area,
|
||||
"language": language,
|
||||
"year": year,
|
||||
"duration": duration,
|
||||
"season": None,
|
||||
"episodes": None,
|
||||
"single_episode_length": None,
|
||||
"brief": brief,
|
||||
"cover_image_url": img_url,
|
||||
}
|
||||
)
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
if pd.metadata["cover_image_url"]:
|
||||
|
@ -130,7 +158,9 @@ class TMDB_Movie(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -138,8 +168,11 @@ class TMDB_Movie(AbstractSite):
|
|||
class TMDB_TV(AbstractSite):
|
||||
SITE_NAME = SiteName.TMDB
|
||||
ID_TYPE = IdType.TMDB_TV
|
||||
URL_PATTERNS = [r'\w+://www.themoviedb.org/tv/(\d+)[^/]*$', r'\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [
|
||||
r"\w+://www.themoviedb.org/tv/(\d+)[^/]*$",
|
||||
r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons",
|
||||
]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = TVShow
|
||||
|
||||
@classmethod
|
||||
|
@ -156,38 +189,60 @@ class TMDB_TV(AbstractSite):
|
|||
res_data = BasicDownloader(api_url).download().json()
|
||||
|
||||
if is_series:
|
||||
title = res_data['name']
|
||||
orig_title = res_data['original_name']
|
||||
year = int(res_data['first_air_date'].split(
|
||||
'-')[0]) if res_data['first_air_date'] else None
|
||||
imdb_code = res_data['external_ids']['imdb_id']
|
||||
showtime = [{res_data['first_air_date']: "首播日期"}
|
||||
] if res_data['first_air_date'] else None
|
||||
title = res_data["name"]
|
||||
orig_title = res_data["original_name"]
|
||||
year = (
|
||||
int(res_data["first_air_date"].split("-")[0])
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["external_ids"]["imdb_id"]
|
||||
showtime = (
|
||||
[{res_data["first_air_date"]: "首播日期"}]
|
||||
if res_data["first_air_date"]
|
||||
else None
|
||||
)
|
||||
duration = None
|
||||
else:
|
||||
title = res_data['title']
|
||||
orig_title = res_data['original_title']
|
||||
year = int(res_data['release_date'].split('-')
|
||||
[0]) if res_data['release_date'] else None
|
||||
showtime = [{res_data['release_date']: "发布日期"}
|
||||
] if res_data['release_date'] else None
|
||||
imdb_code = res_data['imdb_id']
|
||||
title = res_data["title"]
|
||||
orig_title = res_data["original_title"]
|
||||
year = (
|
||||
int(res_data["release_date"].split("-")[0])
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
showtime = (
|
||||
[{res_data["release_date"]: "发布日期"}]
|
||||
if res_data["release_date"]
|
||||
else None
|
||||
)
|
||||
imdb_code = res_data["imdb_id"]
|
||||
# in minutes
|
||||
duration = res_data['runtime'] if res_data['runtime'] else None
|
||||
duration = res_data["runtime"] if res_data["runtime"] else None
|
||||
|
||||
genre = [x['name'] for x in res_data['genres']]
|
||||
genre = [x["name"] for x in res_data["genres"]]
|
||||
|
||||
language = list(map(lambda x: x['name'], res_data['spoken_languages']))
|
||||
brief = res_data['overview']
|
||||
language = list(map(lambda x: x["name"], res_data["spoken_languages"]))
|
||||
brief = res_data["overview"]
|
||||
|
||||
if is_series:
|
||||
director = list(map(lambda x: x['name'], res_data['created_by']))
|
||||
director = list(map(lambda x: x["name"], res_data["created_by"]))
|
||||
else:
|
||||
director = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Director', res_data['credits']['crew'])))
|
||||
playwright = list(map(lambda x: x['name'], filter(
|
||||
lambda c: c['job'] == 'Screenplay', res_data['credits']['crew'])))
|
||||
actor = list(map(lambda x: x['name'], res_data['credits']['cast']))
|
||||
director = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(
|
||||
lambda c: c["job"] == "Director", res_data["credits"]["crew"]
|
||||
),
|
||||
)
|
||||
)
|
||||
playwright = list(
|
||||
map(
|
||||
lambda x: x["name"],
|
||||
filter(lambda c: c["job"] == "Screenplay", res_data["credits"]["crew"]),
|
||||
)
|
||||
)
|
||||
actor = list(map(lambda x: x["name"], res_data["credits"]["cast"]))
|
||||
area = []
|
||||
|
||||
other_info = {}
|
||||
|
@ -197,41 +252,53 @@ class TMDB_TV(AbstractSite):
|
|||
# other_info['奖项'] = res_data['awards']
|
||||
# other_info['TMDB_ID'] = id
|
||||
if is_series:
|
||||
other_info['Seasons'] = res_data['number_of_seasons']
|
||||
other_info['Episodes'] = res_data['number_of_episodes']
|
||||
other_info["Seasons"] = res_data["number_of_seasons"]
|
||||
other_info["Episodes"] = res_data["number_of_episodes"]
|
||||
|
||||
# TODO: use GET /configuration to get base url
|
||||
img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
|
||||
img_url = (
|
||||
("https://image.tmdb.org/t/p/original/" + res_data["poster_path"])
|
||||
if res_data["poster_path"] is not None
|
||||
else None
|
||||
)
|
||||
|
||||
season_links = list(map(lambda s: {
|
||||
'model': 'TVSeason',
|
||||
'id_type': IdType.TMDB_TVSeason,
|
||||
'id_value': f'{self.id_value}-{s["season_number"]}',
|
||||
'title': s['name'],
|
||||
'url': f'{self.url}/season/{s["season_number"]}'}, res_data['seasons']))
|
||||
pd = ResourceContent(metadata={
|
||||
'title': title,
|
||||
'orig_title': orig_title,
|
||||
'other_title': None,
|
||||
'imdb_code': imdb_code,
|
||||
'director': director,
|
||||
'playwright': playwright,
|
||||
'actor': actor,
|
||||
'genre': genre,
|
||||
'showtime': showtime,
|
||||
'site': None,
|
||||
'area': area,
|
||||
'language': language,
|
||||
'year': year,
|
||||
'duration': duration,
|
||||
'season_count': res_data['number_of_seasons'],
|
||||
'season': None,
|
||||
'episodes': None,
|
||||
'single_episode_length': None,
|
||||
'brief': brief,
|
||||
'cover_image_url': img_url,
|
||||
'related_resources': season_links,
|
||||
})
|
||||
season_links = list(
|
||||
map(
|
||||
lambda s: {
|
||||
"model": "TVSeason",
|
||||
"id_type": IdType.TMDB_TVSeason,
|
||||
"id_value": f'{self.id_value}-{s["season_number"]}',
|
||||
"title": s["name"],
|
||||
"url": f'{self.url}/season/{s["season_number"]}',
|
||||
},
|
||||
res_data["seasons"],
|
||||
)
|
||||
)
|
||||
pd = ResourceContent(
|
||||
metadata={
|
||||
"title": title,
|
||||
"orig_title": orig_title,
|
||||
"other_title": None,
|
||||
"imdb_code": imdb_code,
|
||||
"director": director,
|
||||
"playwright": playwright,
|
||||
"actor": actor,
|
||||
"genre": genre,
|
||||
"showtime": showtime,
|
||||
"site": None,
|
||||
"area": area,
|
||||
"language": language,
|
||||
"year": year,
|
||||
"duration": duration,
|
||||
"season_count": res_data["number_of_seasons"],
|
||||
"season": None,
|
||||
"episodes": None,
|
||||
"single_episode_length": None,
|
||||
"brief": brief,
|
||||
"cover_image_url": img_url,
|
||||
"related_resources": season_links,
|
||||
}
|
||||
)
|
||||
if imdb_code:
|
||||
pd.lookup_ids[IdType.IMDB] = imdb_code
|
||||
|
||||
|
@ -241,7 +308,9 @@ class TMDB_TV(AbstractSite):
|
|||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
return pd
|
||||
|
||||
|
||||
|
@ -249,58 +318,87 @@ class TMDB_TV(AbstractSite):
|
|||
class TMDB_TVSeason(AbstractSite):
|
||||
SITE_NAME = SiteName.TMDB
|
||||
ID_TYPE = IdType.TMDB_TVSeason
|
||||
URL_PATTERNS = [r'\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*$']
|
||||
WIKI_PROPERTY_ID = '?'
|
||||
URL_PATTERNS = [r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*$"]
|
||||
WIKI_PROPERTY_ID = "?"
|
||||
DEFAULT_MODEL = TVSeason
|
||||
ID_PATTERN = r'^(\d+)-(\d+)$'
|
||||
ID_PATTERN = r"^(\d+)-(\d+)$"
|
||||
|
||||
@classmethod
|
||||
def url_to_id(cls, url: str):
|
||||
u = next(iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), None)
|
||||
return u[1] + '-' + u[2] if u else None
|
||||
u = next(
|
||||
iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), None
|
||||
)
|
||||
return u[1] + "-" + u[2] if u else None
|
||||
|
||||
@classmethod
|
||||
def id_to_url(cls, id_value):
|
||||
v = id_value.split('-')
|
||||
v = id_value.split("-")
|
||||
return f"https://www.themoviedb.org/tv/{v[0]}/season/{v[1]}"
|
||||
|
||||
def scrape(self):
|
||||
v = self.id_value.split('-')
|
||||
v = self.id_value.split("-")
|
||||
api_url = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
d = BasicDownloader(api_url).download().json()
|
||||
if not d.get('id'):
|
||||
raise ParseError('id')
|
||||
pd = ResourceContent(metadata=_copy_dict(d, {'name': 'title', 'overview': 'brief', 'air_date': 'air_date', 'season_number': 0, 'external_ids': []}))
|
||||
pd.metadata['required_resources'] = [{
|
||||
'model': 'TVShow',
|
||||
'id_type': IdType.TMDB_TV,
|
||||
'id_value': v[0],
|
||||
'title': f'TMDB TV Show {v[0]}',
|
||||
'url': f"https://www.themoviedb.org/tv/{v[0]}",
|
||||
}]
|
||||
pd.lookup_ids[IdType.IMDB] = d['external_ids'].get('imdb_id')
|
||||
pd.metadata['cover_image_url'] = ('https://image.tmdb.org/t/p/original/' + d['poster_path']) if d['poster_path'] else None
|
||||
pd.metadata['title'] = pd.metadata['title'] if pd.metadata['title'] else f'Season {d["season_number"]}'
|
||||
pd.metadata['episode_number_list'] = list(map(lambda ep: ep['episode_number'], d['episodes']))
|
||||
pd.metadata['episode_count'] = len(pd.metadata['episode_number_list'])
|
||||
if not d.get("id"):
|
||||
raise ParseError("id")
|
||||
pd = ResourceContent(
|
||||
metadata=_copy_dict(
|
||||
d,
|
||||
{
|
||||
"name": "title",
|
||||
"overview": "brief",
|
||||
"air_date": "air_date",
|
||||
"season_number": 0,
|
||||
"external_ids": [],
|
||||
},
|
||||
)
|
||||
)
|
||||
pd.metadata["required_resources"] = [
|
||||
{
|
||||
"model": "TVShow",
|
||||
"id_type": IdType.TMDB_TV,
|
||||
"id_value": v[0],
|
||||
"title": f"TMDB TV Show {v[0]}",
|
||||
"url": f"https://www.themoviedb.org/tv/{v[0]}",
|
||||
}
|
||||
]
|
||||
pd.lookup_ids[IdType.IMDB] = d["external_ids"].get("imdb_id")
|
||||
pd.metadata["cover_image_url"] = (
|
||||
("https://image.tmdb.org/t/p/original/" + d["poster_path"])
|
||||
if d["poster_path"]
|
||||
else None
|
||||
)
|
||||
pd.metadata["title"] = (
|
||||
pd.metadata["title"]
|
||||
if pd.metadata["title"]
|
||||
else f'Season {d["season_number"]}'
|
||||
)
|
||||
pd.metadata["episode_number_list"] = list(
|
||||
map(lambda ep: ep["episode_number"], d["episodes"])
|
||||
)
|
||||
pd.metadata["episode_count"] = len(pd.metadata["episode_number_list"])
|
||||
if pd.metadata["cover_image_url"]:
|
||||
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
|
||||
try:
|
||||
pd.cover_image = imgdl.download().content
|
||||
pd.cover_image_extention = imgdl.extention
|
||||
except Exception:
|
||||
_logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
|
||||
_logger.debug(
|
||||
f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
|
||||
)
|
||||
|
||||
# get external id from 1st episode
|
||||
if pd.lookup_ids[IdType.IMDB]:
|
||||
_logger.warning("Unexpected IMDB id for TMDB tv season")
|
||||
elif len(pd.metadata['episode_number_list']) == 0:
|
||||
_logger.warning("Unable to lookup IMDB id for TMDB tv season with zero episodes")
|
||||
elif len(pd.metadata["episode_number_list"]) == 0:
|
||||
_logger.warning(
|
||||
"Unable to lookup IMDB id for TMDB tv season with zero episodes"
|
||||
)
|
||||
else:
|
||||
ep = pd.metadata['episode_number_list'][0]
|
||||
ep = pd.metadata["episode_number_list"][0]
|
||||
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
|
||||
d2 = BasicDownloader(api_url2).download().json()
|
||||
if not d2.get('id'):
|
||||
raise ParseError('episode id for season')
|
||||
pd.lookup_ids[IdType.IMDB] = d2['external_ids'].get('imdb_id')
|
||||
if not d2.get("id"):
|
||||
raise ParseError("episode id for season")
|
||||
pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id")
|
||||
return pd
|
||||
|
|
|
@ -31,8 +31,8 @@ from django.utils.translation import gettext_lazy as _
|
|||
|
||||
class TVShow(Item):
|
||||
category = ItemCategory.TV
|
||||
url_path = 'tv'
|
||||
demonstrative = _('这部剧集')
|
||||
url_path = "tv"
|
||||
demonstrative = _("这部剧集")
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_tv = PrimaryLookupIdDescriptor(IdType.TMDB_TV)
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
|
@ -40,100 +40,208 @@ class TVShow(Item):
|
|||
episode_count = models.PositiveIntegerField(null=True)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
'title',
|
||||
'season_count',
|
||||
'orig_title',
|
||||
'other_title',
|
||||
'director',
|
||||
'playwright',
|
||||
'actor',
|
||||
'genre',
|
||||
'showtime',
|
||||
'site',
|
||||
'area',
|
||||
'language',
|
||||
'year',
|
||||
'duration',
|
||||
'season_count',
|
||||
'episode_count',
|
||||
'single_episode_length',
|
||||
'brief',
|
||||
"title",
|
||||
"season_count",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
"director",
|
||||
"playwright",
|
||||
"actor",
|
||||
"genre",
|
||||
"showtime",
|
||||
"site",
|
||||
"area",
|
||||
"language",
|
||||
"year",
|
||||
"duration",
|
||||
"season_count",
|
||||
"episode_count",
|
||||
"single_episode_length",
|
||||
"brief",
|
||||
]
|
||||
orig_title = jsondata.CharField(_("original title"), blank=True, default='', max_length=500)
|
||||
other_title = jsondata.ArrayField(models.CharField(_("other title"), blank=True, default='', max_length=500), null=True, blank=True, default=list, )
|
||||
director = jsondata.ArrayField(models.CharField(_("director"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
playwright = jsondata.ArrayField(models.CharField(_("playwright"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
actor = jsondata.ArrayField(models.CharField(_("actor"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
genre = jsondata.ArrayField(models.CharField(_("genre"), blank=True, default='', max_length=50), null=True, blank=True, default=list, ) # , choices=MovieGenreEnum.choices
|
||||
showtime = jsondata.ArrayField(null=True, blank=True, default=list, )
|
||||
site = jsondata.URLField(_('site url'), blank=True, default='', max_length=200)
|
||||
area = jsondata.ArrayField(models.CharField(_("country or region"), blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
|
||||
language = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
|
||||
orig_title = jsondata.CharField(
|
||||
_("original title"), blank=True, default="", max_length=500
|
||||
)
|
||||
other_title = jsondata.ArrayField(
|
||||
models.CharField(_("other title"), blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
director = jsondata.ArrayField(
|
||||
models.CharField(_("director"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
playwright = jsondata.ArrayField(
|
||||
models.CharField(_("playwright"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
actor = jsondata.ArrayField(
|
||||
models.CharField(_("actor"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
genre = jsondata.ArrayField(
|
||||
models.CharField(_("genre"), blank=True, default="", max_length=50),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
) # , choices=MovieGenreEnum.choices
|
||||
showtime = jsondata.ArrayField(
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
site = jsondata.URLField(_("site url"), blank=True, default="", max_length=200)
|
||||
area = jsondata.ArrayField(
|
||||
models.CharField(
|
||||
_("country or region"),
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
year = jsondata.IntegerField(null=True, blank=True)
|
||||
season_number = jsondata.IntegerField(null=True, blank=True)
|
||||
single_episode_length = jsondata.IntegerField(null=True, blank=True)
|
||||
duration = jsondata.CharField(blank=True, default='', max_length=200)
|
||||
duration = jsondata.CharField(blank=True, default="", max_length=200)
|
||||
|
||||
|
||||
class TVSeason(Item):
|
||||
category = ItemCategory.TV
|
||||
url_path = 'tv/season'
|
||||
demonstrative = _('这部剧集')
|
||||
url_path = "tv/season"
|
||||
demonstrative = _("这部剧集")
|
||||
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
tmdb_tvseason = PrimaryLookupIdDescriptor(IdType.TMDB_TVSeason)
|
||||
show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons')
|
||||
show = models.ForeignKey(
|
||||
TVShow, null=True, on_delete=models.SET_NULL, related_name="seasons"
|
||||
)
|
||||
season_number = models.PositiveIntegerField(null=True)
|
||||
episode_count = models.PositiveIntegerField(null=True)
|
||||
|
||||
METADATA_COPY_LIST = [
|
||||
'title',
|
||||
'orig_title',
|
||||
'other_title',
|
||||
'director',
|
||||
'playwright',
|
||||
'actor',
|
||||
'genre',
|
||||
'showtime',
|
||||
'site',
|
||||
'area',
|
||||
'language',
|
||||
'year',
|
||||
'duration',
|
||||
'season_number',
|
||||
'episode_count',
|
||||
'single_episode_length',
|
||||
'brief',
|
||||
"title",
|
||||
"orig_title",
|
||||
"other_title",
|
||||
"director",
|
||||
"playwright",
|
||||
"actor",
|
||||
"genre",
|
||||
"showtime",
|
||||
"site",
|
||||
"area",
|
||||
"language",
|
||||
"year",
|
||||
"duration",
|
||||
"season_number",
|
||||
"episode_count",
|
||||
"single_episode_length",
|
||||
"brief",
|
||||
]
|
||||
orig_title = jsondata.CharField(_("original title"), blank=True, default='', max_length=500)
|
||||
other_title = jsondata.ArrayField(models.CharField(_("other title"), blank=True, default='', max_length=500), null=True, blank=True, default=list, )
|
||||
director = jsondata.ArrayField(models.CharField(_("director"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
playwright = jsondata.ArrayField(models.CharField(_("playwright"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
actor = jsondata.ArrayField(models.CharField(_("actor"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
|
||||
genre = jsondata.ArrayField(models.CharField(_("genre"), blank=True, default='', max_length=50), null=True, blank=True, default=list, ) # , choices=MovieGenreEnum.choices
|
||||
showtime = jsondata.ArrayField(null=True, blank=True, default=list, )
|
||||
site = jsondata.URLField(_('site url'), blank=True, default='', max_length=200)
|
||||
area = jsondata.ArrayField(models.CharField(_("country or region"), blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
|
||||
language = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
|
||||
orig_title = jsondata.CharField(
|
||||
_("original title"), blank=True, default="", max_length=500
|
||||
)
|
||||
other_title = jsondata.ArrayField(
|
||||
models.CharField(_("other title"), blank=True, default="", max_length=500),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
director = jsondata.ArrayField(
|
||||
models.CharField(_("director"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
playwright = jsondata.ArrayField(
|
||||
models.CharField(_("playwright"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
actor = jsondata.ArrayField(
|
||||
models.CharField(_("actor"), blank=True, default="", max_length=200),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
genre = jsondata.ArrayField(
|
||||
models.CharField(_("genre"), blank=True, default="", max_length=50),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
) # , choices=MovieGenreEnum.choices
|
||||
showtime = jsondata.ArrayField(
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
site = jsondata.URLField(_("site url"), blank=True, default="", max_length=200)
|
||||
area = jsondata.ArrayField(
|
||||
models.CharField(
|
||||
_("country or region"),
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
language = jsondata.ArrayField(
|
||||
models.CharField(
|
||||
blank=True,
|
||||
default="",
|
||||
max_length=100,
|
||||
),
|
||||
null=True,
|
||||
blank=True,
|
||||
default=list,
|
||||
)
|
||||
year = jsondata.IntegerField(null=True, blank=True)
|
||||
single_episode_length = jsondata.IntegerField(null=True, blank=True)
|
||||
duration = jsondata.CharField(blank=True, default='', max_length=200)
|
||||
duration = jsondata.CharField(blank=True, default="", max_length=200)
|
||||
|
||||
def update_linked_items_from_external_resource(self, resource):
|
||||
"""add Work from resource.metadata['work'] if not yet"""
|
||||
links = resource.required_resources + resource.related_resources
|
||||
for w in links:
|
||||
if w['model'] == 'TVShow':
|
||||
p = ExternalResource.objects.filter(id_type=w['id_type'], id_value=w['id_value']).first()
|
||||
if w["model"] == "TVShow":
|
||||
p = ExternalResource.objects.filter(
|
||||
id_type=w["id_type"], id_value=w["id_value"]
|
||||
).first()
|
||||
if p and p.item and self.show != p.item:
|
||||
self.show = p.item
|
||||
|
||||
|
||||
class TVEpisode(Item):
|
||||
category = ItemCategory.TV
|
||||
url_path = 'tv/episode'
|
||||
show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
season = models.ForeignKey(TVSeason, null=True, on_delete=models.SET_NULL, related_name='episodes')
|
||||
url_path = "tv/episode"
|
||||
show = models.ForeignKey(
|
||||
TVShow, null=True, on_delete=models.SET_NULL, related_name="episodes"
|
||||
)
|
||||
season = models.ForeignKey(
|
||||
TVSeason, null=True, on_delete=models.SET_NULL, related_name="episodes"
|
||||
)
|
||||
episode_number = models.PositiveIntegerField(null=True)
|
||||
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
|
||||
METADATA_COPY_LIST = ['title', 'brief', 'episode_number']
|
||||
METADATA_COPY_LIST = ["title", "brief", "episode_number"]
|
||||
|
|
|
@ -5,10 +5,10 @@ from catalog.tv.models import *
|
|||
|
||||
class TMDBTVTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '57243'
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who'
|
||||
t_url1 = 'https://www.themoviedb.org/tv/57243-doctor-who/seasons'
|
||||
t_url2 = 'https://www.themoviedb.org/tv/57243'
|
||||
t_id = "57243"
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who"
|
||||
t_url1 = "https://www.themoviedb.org/tv/57243-doctor-who/seasons"
|
||||
t_url2 = "https://www.themoviedb.org/tv/57243"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.TMDB_TV)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
|
@ -17,29 +17,29 @@ class TMDBTVTestCase(TestCase):
|
|||
p2 = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(p1.id_to_url(t_id), t_url2)
|
||||
self.assertEqual(p2.url_to_id(t_url), t_id)
|
||||
wrong_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/13'
|
||||
wrong_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/13"
|
||||
s1 = SiteManager.get_site_by_url(wrong_url)
|
||||
self.assertNotIsInstance(s1, TVShow)
|
||||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who'
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '57243')
|
||||
self.assertEqual(site.id_value, "57243")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], '神秘博士')
|
||||
self.assertEqual(site.resource.metadata["title"], "神秘博士")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'TVShow')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt0436992')
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "TVShow")
|
||||
self.assertEqual(site.resource.item.imdb, "tt0436992")
|
||||
|
||||
|
||||
class TMDBTVSeasonTestCase(TestCase):
|
||||
def test_parse(self):
|
||||
t_id = '57243-11'
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/11'
|
||||
t_url_unique = 'https://www.themoviedb.org/tv/57243/season/11'
|
||||
t_id = "57243-11"
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/11"
|
||||
t_url_unique = "https://www.themoviedb.org/tv/57243/season/11"
|
||||
p1 = SiteManager.get_site_by_id_type(IdType.TMDB_TVSeason)
|
||||
self.assertIsNotNone(p1)
|
||||
self.assertEqual(p1.validate_url(t_url), True)
|
||||
|
@ -50,48 +50,48 @@ class TMDBTVSeasonTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
t_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/4'
|
||||
t_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/4"
|
||||
site = SiteManager.get_site_by_url(t_url)
|
||||
self.assertEqual(site.ready, False)
|
||||
self.assertEqual(site.id_value, '57243-4')
|
||||
self.assertEqual(site.id_value, "57243-4")
|
||||
site.get_resource_ready()
|
||||
self.assertEqual(site.ready, True)
|
||||
self.assertEqual(site.resource.metadata['title'], '第 4 季')
|
||||
self.assertEqual(site.resource.metadata["title"], "第 4 季")
|
||||
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
|
||||
self.assertEqual(site.resource.item.__class__.__name__, 'TVSeason')
|
||||
self.assertEqual(site.resource.item.imdb, 'tt1159991')
|
||||
self.assertEqual(site.resource.item.__class__.__name__, "TVSeason")
|
||||
self.assertEqual(site.resource.item.imdb, "tt1159991")
|
||||
self.assertIsNotNone(site.resource.item.show)
|
||||
self.assertEqual(site.resource.item.show.imdb, 'tt0436992')
|
||||
self.assertEqual(site.resource.item.show.imdb, "tt0436992")
|
||||
|
||||
|
||||
class DoubanMovieTVTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_scrape(self):
|
||||
url3 = 'https://movie.douban.com/subject/3627919/'
|
||||
url3 = "https://movie.douban.com/subject/3627919/"
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVSeason')
|
||||
self.assertEqual(p3.item.__class__.__name__, "TVSeason")
|
||||
self.assertIsNotNone(p3.item.show)
|
||||
self.assertEqual(p3.item.show.imdb, 'tt0436992')
|
||||
self.assertEqual(p3.item.show.imdb, "tt0436992")
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_singleseason(self):
|
||||
url3 = 'https://movie.douban.com/subject/26895436/'
|
||||
url3 = "https://movie.douban.com/subject/26895436/"
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVShow')
|
||||
self.assertEqual(p3.item.__class__.__name__, "TVShow")
|
||||
|
||||
@use_local_response
|
||||
def test_scrape_fix_imdb(self):
|
||||
url = 'https://movie.douban.com/subject/35597581/'
|
||||
url = "https://movie.douban.com/subject/35597581/"
|
||||
item = SiteManager.get_site_by_url(url).get_resource_ready().item
|
||||
# this douban links to S6E3, we'll reset it to S6E1 to keep consistant
|
||||
self.assertEqual(item.imdb, 'tt21599650')
|
||||
self.assertEqual(item.imdb, "tt21599650")
|
||||
|
||||
|
||||
class MultiTVSitesTestCase(TestCase):
|
||||
@use_local_response
|
||||
def test_tvshows(self):
|
||||
url1 = 'https://www.themoviedb.org/tv/57243-doctor-who'
|
||||
url2 = 'https://www.imdb.com/title/tt0436992/'
|
||||
url1 = "https://www.themoviedb.org/tv/57243-doctor-who"
|
||||
url2 = "https://www.imdb.com/title/tt0436992/"
|
||||
# url3 = 'https://movie.douban.com/subject/3541415/'
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
|
@ -101,9 +101,9 @@ class MultiTVSitesTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_tvseasons(self):
|
||||
url1 = 'https://www.themoviedb.org/tv/57243-doctor-who/season/4'
|
||||
url2 = 'https://www.imdb.com/title/tt1159991/'
|
||||
url3 = 'https://movie.douban.com/subject/3627919/'
|
||||
url1 = "https://www.themoviedb.org/tv/57243-doctor-who/season/4"
|
||||
url2 = "https://www.imdb.com/title/tt1159991/"
|
||||
url3 = "https://movie.douban.com/subject/3627919/"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
|
@ -114,18 +114,18 @@ class MultiTVSitesTestCase(TestCase):
|
|||
|
||||
@use_local_response
|
||||
def test_miniseries(self):
|
||||
url1 = 'https://www.themoviedb.org/tv/86941-the-north-water'
|
||||
url3 = 'https://movie.douban.com/subject/26895436/'
|
||||
url1 = "https://www.themoviedb.org/tv/86941-the-north-water"
|
||||
url3 = "https://movie.douban.com/subject/26895436/"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
self.assertEqual(p3.item.__class__.__name__, 'TVShow')
|
||||
self.assertEqual(p3.item.__class__.__name__, "TVShow")
|
||||
self.assertEqual(p1.item.id, p3.item.id)
|
||||
|
||||
@use_local_response
|
||||
def test_tvspecial(self):
|
||||
url1 = 'https://www.themoviedb.org/movie/282758-doctor-who-the-runaway-bride'
|
||||
url2 = 'hhttps://www.imdb.com/title/tt0827573/'
|
||||
url3 = 'https://movie.douban.com/subject/4296866/'
|
||||
url1 = "https://www.themoviedb.org/movie/282758-doctor-who-the-runaway-bride"
|
||||
url2 = "hhttps://www.imdb.com/title/tt0827573/"
|
||||
url3 = "https://movie.douban.com/subject/4296866/"
|
||||
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
|
||||
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
|
||||
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
|
||||
|
|
|
@ -3,13 +3,13 @@ from .api import api
|
|||
from .views import *
|
||||
from .models import *
|
||||
|
||||
app_name = 'catalog'
|
||||
app_name = "catalog"
|
||||
|
||||
|
||||
def _get_all_url_paths():
|
||||
paths = ['item']
|
||||
paths = ["item"]
|
||||
for cls in Item.__subclasses__():
|
||||
p = getattr(cls, 'url_path', None)
|
||||
p = getattr(cls, "url_path", None)
|
||||
if p:
|
||||
paths.append(p)
|
||||
res = "|".join(paths)
|
||||
|
@ -17,9 +17,31 @@ def _get_all_url_paths():
|
|||
|
||||
|
||||
urlpatterns = [
|
||||
re_path(r'^item/(?P<item_uid>[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12})?$', retrieve_by_uuid, name='retrieve_by_uuid'),
|
||||
re_path(r'^(?P<item_path>' + _get_all_url_paths() + ')/(?P<item_uuid>[A-Za-z0-9]{21,22})$', retrieve, name='retrieve'),
|
||||
re_path(r'^(?P<item_path>' + _get_all_url_paths() + ')/(?P<item_uuid>[A-Za-z0-9]{21,22})/reviews', review_list, name='review_list'),
|
||||
re_path(r'^(?P<item_path>' + _get_all_url_paths() + ')/(?P<item_uuid>[A-Za-z0-9]{21,22})/marks(?:/(?P<following_only>\\w+))?', mark_list, name='mark_list'),
|
||||
re_path(
|
||||
r"^item/(?P<item_uid>[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12})?$",
|
||||
retrieve_by_uuid,
|
||||
name="retrieve_by_uuid",
|
||||
),
|
||||
re_path(
|
||||
r"^(?P<item_path>"
|
||||
+ _get_all_url_paths()
|
||||
+ ")/(?P<item_uuid>[A-Za-z0-9]{21,22})$",
|
||||
retrieve,
|
||||
name="retrieve",
|
||||
),
|
||||
re_path(
|
||||
r"^(?P<item_path>"
|
||||
+ _get_all_url_paths()
|
||||
+ ")/(?P<item_uuid>[A-Za-z0-9]{21,22})/reviews",
|
||||
review_list,
|
||||
name="review_list",
|
||||
),
|
||||
re_path(
|
||||
r"^(?P<item_path>"
|
||||
+ _get_all_url_paths()
|
||||
+ ")/(?P<item_uuid>[A-Za-z0-9]{21,22})/marks(?:/(?P<following_only>\\w+))?",
|
||||
mark_list,
|
||||
name="mark_list",
|
||||
),
|
||||
path("api/", api.urls),
|
||||
]
|
||||
|
|
|
@ -34,9 +34,9 @@ def retrieve_by_uuid(request, item_uid):
|
|||
|
||||
|
||||
def retrieve(request, item_path, item_uuid):
|
||||
if request.method == 'GET':
|
||||
if request.method == "GET":
|
||||
item = get_object_or_404(Item, uid=base62.decode(item_uuid))
|
||||
item_url = f'/{item_path}/{item_uuid}'
|
||||
item_url = f"/{item_path}/{item_uuid}"
|
||||
if item.url != item_url:
|
||||
return redirect(item.url)
|
||||
mark = None
|
||||
|
@ -44,26 +44,46 @@ def retrieve(request, item_path, item_uuid):
|
|||
mark_list = None
|
||||
review_list = None
|
||||
collection_list = []
|
||||
shelf_types = [(n[1], n[2]) for n in iter(ShelfTypeNames) if n[0] == item.category]
|
||||
shelf_types = [
|
||||
(n[1], n[2]) for n in iter(ShelfTypeNames) if n[0] == item.category
|
||||
]
|
||||
if request.user.is_authenticated:
|
||||
visible = query_visible(request.user)
|
||||
mark = Mark(request.user, item)
|
||||
_logger.info(mark.rating)
|
||||
review = mark.review
|
||||
collection_list = item.collections.all().filter(visible).annotate(like_counts=Count('likes')).order_by('-like_counts')
|
||||
mark_query = ShelfMember.objects.filter(item=item).filter(visible).order_by('-created_time')
|
||||
mark_list = [member.mark for member in mark_query[:NUM_REVIEWS_ON_ITEM_PAGE]]
|
||||
review_list = Review.objects.filter(item=item).filter(visible).order_by('-created_time')[:NUM_REVIEWS_ON_ITEM_PAGE]
|
||||
collection_list = (
|
||||
item.collections.all()
|
||||
.filter(visible)
|
||||
.annotate(like_counts=Count("likes"))
|
||||
.order_by("-like_counts")
|
||||
)
|
||||
mark_query = (
|
||||
ShelfMember.objects.filter(item=item)
|
||||
.filter(visible)
|
||||
.order_by("-created_time")
|
||||
)
|
||||
mark_list = [
|
||||
member.mark for member in mark_query[:NUM_REVIEWS_ON_ITEM_PAGE]
|
||||
]
|
||||
review_list = (
|
||||
Review.objects.filter(item=item)
|
||||
.filter(visible)
|
||||
.order_by("-created_time")[:NUM_REVIEWS_ON_ITEM_PAGE]
|
||||
)
|
||||
|
||||
return render(request, item.class_name + '.html', {
|
||||
'item': item,
|
||||
'mark': mark,
|
||||
'review': review,
|
||||
'mark_list': mark_list,
|
||||
'review_list': review_list,
|
||||
'collection_list': collection_list,
|
||||
'shelf_types': shelf_types,
|
||||
}
|
||||
return render(
|
||||
request,
|
||||
item.class_name + ".html",
|
||||
{
|
||||
"item": item,
|
||||
"mark": mark,
|
||||
"review": review,
|
||||
"mark_list": mark_list,
|
||||
"review_list": review_list,
|
||||
"collection_list": collection_list,
|
||||
"shelf_types": shelf_types,
|
||||
},
|
||||
)
|
||||
else:
|
||||
return HttpResponseBadRequest()
|
||||
|
@ -73,23 +93,24 @@ def mark_list(request, item_path, item_uuid, following_only=False):
|
|||
item = get_object_or_404(Item, uid=base62.decode(item_uuid))
|
||||
if not item:
|
||||
return HttpResponseNotFound("item not found")
|
||||
queryset = ShelfMember.objects.filter(item=item).order_by('-created_time')
|
||||
queryset = ShelfMember.objects.filter(item=item).order_by("-created_time")
|
||||
if following_only:
|
||||
queryset = queryset.filter(query_following(request.user))
|
||||
else:
|
||||
queryset = queryset.filter(query_visible(request.user))
|
||||
paginator = Paginator(queryset, NUM_REVIEWS_ON_LIST_PAGE)
|
||||
page_number = request.GET.get('page', default=1)
|
||||
page_number = request.GET.get("page", default=1)
|
||||
marks = paginator.get_page(page_number)
|
||||
marks.pagination = PageLinksGenerator(
|
||||
PAGE_LINK_NUMBER, page_number, paginator.num_pages)
|
||||
PAGE_LINK_NUMBER, page_number, paginator.num_pages
|
||||
)
|
||||
return render(
|
||||
request,
|
||||
'item_mark_list.html',
|
||||
"item_mark_list.html",
|
||||
{
|
||||
'marks': marks,
|
||||
'item': item,
|
||||
}
|
||||
"marks": marks,
|
||||
"item": item,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
|
@ -97,18 +118,19 @@ def review_list(request, item_path, item_uuid):
|
|||
item = get_object_or_404(Item, uid=base62.decode(item_uuid))
|
||||
if not item:
|
||||
return HttpResponseNotFound("item not found")
|
||||
queryset = Review.objects.filter(item=item).order_by('-created_time')
|
||||
queryset = Review.objects.filter(item=item).order_by("-created_time")
|
||||
queryset = queryset.filter(query_visible(request.user))
|
||||
paginator = Paginator(queryset, NUM_REVIEWS_ON_LIST_PAGE)
|
||||
page_number = request.GET.get('page', default=1)
|
||||
page_number = request.GET.get("page", default=1)
|
||||
reviews = paginator.get_page(page_number)
|
||||
reviews.pagination = PageLinksGenerator(
|
||||
PAGE_LINK_NUMBER, page_number, paginator.num_pages)
|
||||
PAGE_LINK_NUMBER, page_number, paginator.num_pages
|
||||
)
|
||||
return render(
|
||||
request,
|
||||
'item_review_list.html',
|
||||
"item_review_list.html",
|
||||
{
|
||||
'reviews': reviews,
|
||||
'item': item,
|
||||
}
|
||||
"reviews": reviews,
|
||||
"item": item,
|
||||
},
|
||||
)
|
||||
|
|
|
@ -2,5 +2,5 @@ from django.apps import AppConfig
|
|||
|
||||
|
||||
class JournalConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'journal'
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "journal"
|
||||
|
|
|
@ -12,27 +12,23 @@ from common.forms import PreviewImageInput
|
|||
class ReviewForm(forms.ModelForm):
|
||||
class Meta:
|
||||
model = Review
|
||||
fields = [
|
||||
'id',
|
||||
'item',
|
||||
'title',
|
||||
'body',
|
||||
'visibility'
|
||||
]
|
||||
fields = ["id", "item", "title", "body", "visibility"]
|
||||
widgets = {
|
||||
'item': forms.TextInput(attrs={"hidden": ""}),
|
||||
"item": forms.TextInput(attrs={"hidden": ""}),
|
||||
}
|
||||
|
||||
title = forms.CharField(label=_("评论标题"))
|
||||
body = MarkdownxFormField(label=_("评论正文 (Markdown)"))
|
||||
share_to_mastodon = forms.BooleanField(
|
||||
label=_("分享到联邦网络"), initial=True, required=False)
|
||||
label=_("分享到联邦网络"), initial=True, required=False
|
||||
)
|
||||
id = forms.IntegerField(required=False, widget=forms.HiddenInput())
|
||||
visibility = forms.TypedChoiceField(
|
||||
label=_("可见性"),
|
||||
initial=0,
|
||||
coerce=int,
|
||||
choices=VisibilityType.choices,
|
||||
widget=forms.RadioSelect
|
||||
widget=forms.RadioSelect,
|
||||
)
|
||||
|
||||
|
||||
|
@ -52,26 +48,26 @@ class CollectionForm(forms.ModelForm):
|
|||
initial=0,
|
||||
coerce=int,
|
||||
choices=VisibilityType.choices,
|
||||
widget=forms.RadioSelect
|
||||
widget=forms.RadioSelect,
|
||||
)
|
||||
collaborative = forms.TypedChoiceField(
|
||||
label=_("协作整理权限"),
|
||||
initial=0,
|
||||
coerce=int,
|
||||
choices=COLLABORATIVE_CHOICES,
|
||||
widget=forms.RadioSelect
|
||||
widget=forms.RadioSelect,
|
||||
)
|
||||
|
||||
class Meta:
|
||||
model = Collection
|
||||
fields = [
|
||||
'title',
|
||||
'cover',
|
||||
'visibility',
|
||||
'collaborative',
|
||||
'brief',
|
||||
"title",
|
||||
"cover",
|
||||
"visibility",
|
||||
"collaborative",
|
||||
"brief",
|
||||
]
|
||||
|
||||
widgets = {
|
||||
'cover': PreviewImageInput(),
|
||||
"cover": PreviewImageInput(),
|
||||
}
|
||||
|
|
|
@ -17,7 +17,11 @@ class UserOwnedObjectMixin:
|
|||
return False
|
||||
if self.visibility == 2:
|
||||
return False
|
||||
if viewer.is_blocking(owner) or owner.is_blocking(viewer) or viewer.is_muting(owner):
|
||||
if (
|
||||
viewer.is_blocking(owner)
|
||||
or owner.is_blocking(viewer)
|
||||
or viewer.is_muting(owner)
|
||||
):
|
||||
return False
|
||||
if self.visibility == 1:
|
||||
return viewer.is_following(owner)
|
||||
|
@ -25,12 +29,26 @@ class UserOwnedObjectMixin:
|
|||
return True
|
||||
|
||||
def is_editable_by(self, viewer):
|
||||
return viewer.is_authenticated and (viewer.is_staff or viewer.is_superuser or viewer == self.owner)
|
||||
return viewer.is_authenticated and (
|
||||
viewer.is_staff or viewer.is_superuser or viewer == self.owner
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def get_available(cls, entity, request_user, following_only=False):
|
||||
# e.g. SongMark.get_available(song, request.user)
|
||||
query_kwargs = {entity.__class__.__name__.lower(): entity}
|
||||
all_entities = cls.objects.filter(**query_kwargs).order_by("-created_time") # get all marks for song
|
||||
visible_entities = list(filter(lambda _entity: _entity.is_visible_to(request_user) and (_entity.owner.mastodon_username in request_user.mastodon_following if following_only else True), all_entities))
|
||||
all_entities = cls.objects.filter(**query_kwargs).order_by(
|
||||
"-created_time"
|
||||
) # get all marks for song
|
||||
visible_entities = list(
|
||||
filter(
|
||||
lambda _entity: _entity.is_visible_to(request_user)
|
||||
and (
|
||||
_entity.owner.mastodon_username in request_user.mastodon_following
|
||||
if following_only
|
||||
else True
|
||||
),
|
||||
all_entities,
|
||||
)
|
||||
)
|
||||
return visible_entities
|
||||
|
|
|
@ -7,21 +7,21 @@ register = template.Library()
|
|||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def wish_item_action(context, item):
|
||||
user = context['request'].user
|
||||
user = context["request"].user
|
||||
if user and user.is_authenticated:
|
||||
action = {
|
||||
'taken': user.shelf_manager.locate_item(item) is not None,
|
||||
'url': reverse("journal:wish", args=[item.uuid]),
|
||||
"taken": user.shelf_manager.locate_item(item) is not None,
|
||||
"url": reverse("journal:wish", args=[item.uuid]),
|
||||
}
|
||||
return action
|
||||
|
||||
|
||||
@register.simple_tag(takes_context=True)
|
||||
def like_piece_action(context, piece):
|
||||
user = context['request'].user
|
||||
user = context["request"].user
|
||||
if user and user.is_authenticated:
|
||||
action = {
|
||||
'taken': Like.objects.filter(target=piece, owner=user).first() is not None,
|
||||
'url': reverse("journal:like", args=[piece.uuid]),
|
||||
"taken": Like.objects.filter(target=piece, owner=user).first() is not None,
|
||||
"url": reverse("journal:like", args=[piece.uuid]),
|
||||
}
|
||||
return action
|
||||
|
|
|
@ -2,8 +2,8 @@ from django.apps import AppConfig
|
|||
|
||||
|
||||
class SocialConfig(AppConfig):
|
||||
default_auto_field = 'django.db.models.BigAutoField'
|
||||
name = 'social'
|
||||
default_auto_field = "django.db.models.BigAutoField"
|
||||
name = "social"
|
||||
|
||||
def ready(self):
|
||||
# load key modules in proper order, make sure class inject and signal works as expected
|
||||
|
|
|
@ -21,23 +21,27 @@ _logger = logging.getLogger(__name__)
|
|||
|
||||
|
||||
class ActivityTemplate(models.TextChoices):
|
||||
"""
|
||||
"""
|
||||
MarkItem = 'mark_item'
|
||||
ReviewItem = 'review_item'
|
||||
CreateCollection = 'create_collection'
|
||||
LikeCollection = 'like_collection'
|
||||
""" """
|
||||
|
||||
MarkItem = "mark_item"
|
||||
ReviewItem = "review_item"
|
||||
CreateCollection = "create_collection"
|
||||
LikeCollection = "like_collection"
|
||||
|
||||
|
||||
class LocalActivity(models.Model, UserOwnedObjectMixin):
|
||||
owner = models.ForeignKey(User, on_delete=models.CASCADE)
|
||||
visibility = models.PositiveSmallIntegerField(default=0) # 0: Public / 1: Follower only / 2: Self only
|
||||
template = models.CharField(blank=False, choices=ActivityTemplate.choices, max_length=50)
|
||||
visibility = models.PositiveSmallIntegerField(
|
||||
default=0
|
||||
) # 0: Public / 1: Follower only / 2: Self only
|
||||
template = models.CharField(
|
||||
blank=False, choices=ActivityTemplate.choices, max_length=50
|
||||
)
|
||||
action_object = models.ForeignKey(Piece, on_delete=models.CASCADE)
|
||||
created_time = models.DateTimeField(default=timezone.now, db_index=True)
|
||||
|
||||
def __str__(self):
|
||||
return f'Activity [{self.owner}:{self.template}:{self.action_object}]'
|
||||
return f"Activity [{self.owner}:{self.template}:{self.action_object}]"
|
||||
|
||||
|
||||
class ActivityManager:
|
||||
|
@ -48,7 +52,11 @@ class ActivityManager:
|
|||
q = Q(owner_id__in=self.owner.following, visibility__lt=2) | Q(owner=self.owner)
|
||||
if before_time:
|
||||
q = q & Q(created_time__lt=before_time)
|
||||
return LocalActivity.objects.filter(q).order_by('-created_time').prefetch_related('action_object') # .select_related() https://github.com/django-polymorphic/django-polymorphic/pull/531
|
||||
return (
|
||||
LocalActivity.objects.filter(q)
|
||||
.order_by("-created_time")
|
||||
.prefetch_related("action_object")
|
||||
) # .select_related() https://github.com/django-polymorphic/django-polymorphic/pull/531
|
||||
|
||||
@staticmethod
|
||||
def get_manager_for_user(user):
|
||||
|
@ -56,7 +64,7 @@ class ActivityManager:
|
|||
|
||||
|
||||
User.activity_manager = cached_property(ActivityManager.get_manager_for_user)
|
||||
User.activity_manager.__set_name__(User, 'activity_manager')
|
||||
User.activity_manager.__set_name__(User, "activity_manager")
|
||||
|
||||
|
||||
class DataSignalManager:
|
||||
|
@ -68,9 +76,9 @@ class DataSignalManager:
|
|||
if processor_class:
|
||||
processor = processor_class(instance)
|
||||
if created:
|
||||
if hasattr(processor, 'created'):
|
||||
if hasattr(processor, "created"):
|
||||
processor.created()
|
||||
elif hasattr(processor, 'updated'):
|
||||
elif hasattr(processor, "updated"):
|
||||
processor.updated()
|
||||
|
||||
@staticmethod
|
||||
|
@ -78,7 +86,7 @@ class DataSignalManager:
|
|||
processor_class = DataSignalManager.processors.get(instance.__class__)
|
||||
if processor_class:
|
||||
processor = processor_class(instance)
|
||||
if hasattr(processor, 'deleted'):
|
||||
if hasattr(processor, "deleted"):
|
||||
processor.deleted()
|
||||
|
||||
@staticmethod
|
||||
|
@ -103,15 +111,17 @@ class DefaultActivityProcessor:
|
|||
|
||||
def created(self):
|
||||
params = {
|
||||
'owner': self.action_object.owner,
|
||||
'visibility': self.action_object.visibility,
|
||||
'template': self.template,
|
||||
'action_object': self.action_object,
|
||||
"owner": self.action_object.owner,
|
||||
"visibility": self.action_object.visibility,
|
||||
"template": self.template,
|
||||
"action_object": self.action_object,
|
||||
}
|
||||
LocalActivity.objects.create(**params)
|
||||
|
||||
def updated(self):
|
||||
activity = LocalActivity.objects.filter(action_object=self.action_object).first()
|
||||
activity = LocalActivity.objects.filter(
|
||||
action_object=self.action_object
|
||||
).first()
|
||||
if not activity:
|
||||
self.created()
|
||||
elif activity.visibility != self.action_object.visibility:
|
||||
|
|
|
@ -2,8 +2,8 @@ from django.urls import path, re_path
|
|||
from .views import *
|
||||
|
||||
|
||||
app_name = 'social'
|
||||
app_name = "social"
|
||||
urlpatterns = [
|
||||
path('', feed, name='feed'),
|
||||
path('data', data, name='data'),
|
||||
path("", feed, name="feed"),
|
||||
path("data", data, name="data"),
|
||||
]
|
||||
|
|
|
@ -23,31 +23,35 @@ PAGE_SIZE = 10
|
|||
|
||||
@login_required
|
||||
def feed(request):
|
||||
if request.method != 'GET':
|
||||
if request.method != "GET":
|
||||
return
|
||||
user = request.user
|
||||
unread = Announcement.objects.filter(pk__gt=user.read_announcement_index).order_by('-pk')
|
||||
unread = Announcement.objects.filter(pk__gt=user.read_announcement_index).order_by(
|
||||
"-pk"
|
||||
)
|
||||
if unread:
|
||||
user.read_announcement_index = Announcement.objects.latest('pk').pk
|
||||
user.save(update_fields=['read_announcement_index'])
|
||||
user.read_announcement_index = Announcement.objects.latest("pk").pk
|
||||
user.save(update_fields=["read_announcement_index"])
|
||||
return render(
|
||||
request,
|
||||
'feed.html',
|
||||
"feed.html",
|
||||
{
|
||||
'top_tags': user.tag_manager.all_tags[:10],
|
||||
'unread_announcements': unread,
|
||||
}
|
||||
"top_tags": user.tag_manager.all_tags[:10],
|
||||
"unread_announcements": unread,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@login_required
|
||||
def data(request):
|
||||
if request.method != 'GET':
|
||||
if request.method != "GET":
|
||||
return
|
||||
return render(
|
||||
request,
|
||||
'feed_data.html',
|
||||
"feed_data.html",
|
||||
{
|
||||
'activities': ActivityManager(request.user).get_timeline(before_time=request.GET.get('last'))[:PAGE_SIZE],
|
||||
}
|
||||
"activities": ActivityManager(request.user).get_timeline(
|
||||
before_time=request.GET.get("last")
|
||||
)[:PAGE_SIZE],
|
||||
},
|
||||
)
|
||||
|
|
Loading…
Add table
Reference in a new issue