diff --git a/catalog/api.py b/catalog/api.py
index 26eb4f37..25da154d 100644
--- a/catalog/api.py
+++ b/catalog/api.py
@@ -10,7 +10,11 @@ from django.utils.baseconv import base62
from django.shortcuts import render, get_object_or_404, redirect, reverse
from django.http import Http404
-api = NinjaAPI(title=settings.SITE_INFO['site_name'], version="1.0.0", description=f"{settings.SITE_INFO['site_name']} API
Learn more")
+api = NinjaAPI(
+ title=settings.SITE_INFO["site_name"],
+ version="1.0.0",
+ description=f"{settings.SITE_INFO['site_name']} API
Learn more",
+)
class ItemIn(Schema):
diff --git a/catalog/apps.py b/catalog/apps.py
index 62a2dd40..aff10ed7 100644
--- a/catalog/apps.py
+++ b/catalog/apps.py
@@ -2,8 +2,8 @@ from django.apps import AppConfig
class CatalogConfig(AppConfig):
- default_auto_field = 'django.db.models.BigAutoField'
- name = 'catalog'
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "catalog"
def ready(self):
# load key modules in proper order, make sure class inject and signal works as expected
diff --git a/catalog/book/models.py b/catalog/book/models.py
index 1e441fe9..18051b21 100644
--- a/catalog/book/models.py
+++ b/catalog/book/models.py
@@ -25,8 +25,8 @@ from .utils import *
class Edition(Item):
category = ItemCategory.Book
- url_path = 'book'
- demonstrative = _('这本书')
+ url_path = "book"
+ demonstrative = _("这本书")
isbn = PrimaryLookupIdDescriptor(IdType.ISBN)
asin = PrimaryLookupIdDescriptor(IdType.ASIN)
@@ -35,30 +35,30 @@ class Edition(Item):
# goodreads = LookupIdDescriptor(IdType.Goodreads)
METADATA_COPY_LIST = [
- 'title',
- 'brief',
+ "title",
+ "brief",
# legacy fields
- 'subtitle',
- 'orig_title',
- 'author',
- 'translator',
- 'language',
- 'pub_house',
- 'pub_year',
- 'pub_month',
- 'binding',
- 'price',
- 'pages',
- 'contents',
- 'series',
- 'imprint',
+ "subtitle",
+ "orig_title",
+ "author",
+ "translator",
+ "language",
+ "pub_house",
+ "pub_year",
+ "pub_month",
+ "binding",
+ "price",
+ "pages",
+ "contents",
+ "series",
+ "imprint",
]
subtitle = jsondata.CharField(null=True, blank=True, default=None)
orig_title = jsondata.CharField(null=True, blank=True, default=None)
- author = jsondata.ArrayField(_('作者'), null=False, blank=False, default=list)
- translator = jsondata.ArrayField(_('译者'), null=True, blank=True, default=list)
+ author = jsondata.ArrayField(_("作者"), null=False, blank=False, default=list)
+ translator = jsondata.ArrayField(_("译者"), null=True, blank=True, default=list)
language = jsondata.CharField(_("语言"), null=True, blank=True, default=None)
- pub_house = jsondata.CharField(_('出版方'), null=True, blank=True, default=None)
+ pub_house = jsondata.CharField(_("出版方"), null=True, blank=True, default=None)
pub_year = jsondata.IntegerField(_("发表年份"), null=True, blank=True)
pub_month = jsondata.IntegerField(_("发表月份"), null=True, blank=True)
binding = jsondata.CharField(null=True, blank=True, default=None)
@@ -80,8 +80,11 @@ class Edition(Item):
"""add Work from resource.metadata['work'] if not yet"""
links = resource.required_resources + resource.related_resources
for w in links:
- if w['model'] == 'Work':
- work = Work.objects.filter(primary_lookup_id_type=w['id_type'], primary_lookup_id_value=w['id_value']).first()
+ if w["model"] == "Work":
+ work = Work.objects.filter(
+ primary_lookup_id_type=w["id_type"],
+ primary_lookup_id_value=w["id_value"],
+ ).first()
if work and work not in self.works.all():
self.works.add(work)
# if not work:
@@ -90,15 +93,15 @@ class Edition(Item):
class Work(Item):
category = ItemCategory.Book
- url_path = 'book/work'
+ url_path = "book/work"
douban_work = PrimaryLookupIdDescriptor(IdType.DoubanBook_Work)
goodreads_work = PrimaryLookupIdDescriptor(IdType.Goodreads_Work)
- editions = models.ManyToManyField(Edition, related_name='works')
+ editions = models.ManyToManyField(Edition, related_name="works")
class Series(Item):
category = ItemCategory.Book
- url_path = 'book/series'
+ url_path = "book/series"
# douban_serie = LookupIdDescriptor(IdType.DoubanBook_Serie)
# goodreads_serie = LookupIdDescriptor(IdType.Goodreads_Serie)
diff --git a/catalog/book/tests.py b/catalog/book/tests.py
index d6dce95d..720761fc 100644
--- a/catalog/book/tests.py
+++ b/catalog/book/tests.py
@@ -8,7 +8,7 @@ class BookTestCase(TestCase):
def setUp(self):
hyperion = Edition.objects.create(title="Hyperion")
hyperion.pages = 500
- hyperion.isbn = '9780553283686'
+ hyperion.isbn = "9780553283686"
hyperion.save()
# hyperion.isbn10 = '0553283685'
@@ -22,39 +22,39 @@ class BookTestCase(TestCase):
self.assertEqual(hyperion.title, "Hyperion")
self.assertEqual(hyperion.pages, 500)
self.assertEqual(hyperion.primary_lookup_id_type, IdType.ISBN)
- self.assertEqual(hyperion.primary_lookup_id_value, '9780553283686')
+ self.assertEqual(hyperion.primary_lookup_id_value, "9780553283686")
andymion = Edition(title="Andymion", pages=42)
self.assertEqual(andymion.pages, 42)
def test_lookupids(self):
hyperion = Edition.objects.get(title="Hyperion")
- hyperion.asin = 'B004G60EHS'
+ hyperion.asin = "B004G60EHS"
self.assertEqual(hyperion.primary_lookup_id_type, IdType.ASIN)
- self.assertEqual(hyperion.primary_lookup_id_value, 'B004G60EHS')
+ self.assertEqual(hyperion.primary_lookup_id_value, "B004G60EHS")
self.assertEqual(hyperion.isbn, None)
self.assertEqual(hyperion.isbn10, None)
def test_isbn(self):
- t, n = detect_isbn_asin('0553283685')
+ t, n = detect_isbn_asin("0553283685")
self.assertEqual(t, IdType.ISBN)
- self.assertEqual(n, '9780553283686')
- t, n = detect_isbn_asin('9780553283686')
+ self.assertEqual(n, "9780553283686")
+ t, n = detect_isbn_asin("9780553283686")
self.assertEqual(t, IdType.ISBN)
- t, n = detect_isbn_asin(' b0043M6780')
+ t, n = detect_isbn_asin(" b0043M6780")
self.assertEqual(t, IdType.ASIN)
hyperion = Edition.objects.get(title="Hyperion")
- self.assertEqual(hyperion.isbn, '9780553283686')
- self.assertEqual(hyperion.isbn10, '0553283685')
- hyperion.isbn10 = '0575099437'
- self.assertEqual(hyperion.isbn, '9780575099432')
- self.assertEqual(hyperion.isbn10, '0575099437')
+ self.assertEqual(hyperion.isbn, "9780553283686")
+ self.assertEqual(hyperion.isbn10, "0553283685")
+ hyperion.isbn10 = "0575099437"
+ self.assertEqual(hyperion.isbn, "9780575099432")
+ self.assertEqual(hyperion.isbn10, "0575099437")
def test_work(self):
hyperion_print = Edition.objects.get(title="Hyperion")
hyperion_ebook = Edition(title="Hyperion")
hyperion_ebook.save()
- hyperion_ebook.asin = 'B0043M6780'
+ hyperion_ebook.asin = "B0043M6780"
hyperion = Work(title="Hyperion")
hyperion.save()
hyperion.editions.add(hyperion_print)
@@ -69,9 +69,9 @@ class GoodreadsTestCase(TestCase):
def test_parse(self):
t_type = IdType.Goodreads
- t_id = '77566'
- t_url = 'https://www.goodreads.com/zh/book/show/77566.Hyperion'
- t_url2 = 'https://www.goodreads.com/book/show/77566'
+ t_id = "77566"
+ t_url = "https://www.goodreads.com/zh/book/show/77566.Hyperion"
+ t_url2 = "https://www.goodreads.com/book/show/77566"
p1 = SiteManager.get_site_by_id_type(t_type)
p2 = SiteManager.get_site_by_url(t_url)
self.assertEqual(p1.id_to_url(t_id), t_url2)
@@ -79,9 +79,9 @@ class GoodreadsTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.goodreads.com/book/show/77566.Hyperion'
- t_url2 = 'https://www.goodreads.com/book/show/77566'
- isbn = '9780553283686'
+ t_url = "https://www.goodreads.com/book/show/77566.Hyperion"
+ t_url2 = "https://www.goodreads.com/book/show/77566"
+ isbn = "9780553283686"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
self.assertEqual(site.url, t_url2)
@@ -90,39 +90,43 @@ class GoodreadsTestCase(TestCase):
self.assertIsNotNone(site.resource)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata.get('title'), 'Hyperion')
+ self.assertEqual(site.resource.metadata.get("title"), "Hyperion")
self.assertEqual(site.resource.get_all_lookup_ids().get(IdType.ISBN), isbn)
- self.assertEqual(site.resource.required_resources[0]['id_value'], '1383900')
- edition = Edition.objects.get(primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn)
+ self.assertEqual(site.resource.required_resources[0]["id_value"], "1383900")
+ edition = Edition.objects.get(
+ primary_lookup_id_type=IdType.ISBN, primary_lookup_id_value=isbn
+ )
resource = edition.external_resources.all().first()
self.assertEqual(resource.id_type, IdType.Goodreads)
- self.assertEqual(resource.id_value, '77566')
- self.assertNotEqual(resource.cover, '/media/item/default.svg')
- self.assertEqual(edition.isbn, '9780553283686')
- self.assertEqual(edition.title, 'Hyperion')
+ self.assertEqual(resource.id_value, "77566")
+ self.assertNotEqual(resource.cover, "/media/item/default.svg")
+ self.assertEqual(edition.isbn, "9780553283686")
+ self.assertEqual(edition.title, "Hyperion")
edition.delete()
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
self.assertEqual(site.url, t_url2)
site.get_resource()
- self.assertEqual(site.ready, True, 'previous resource should still exist with data')
+ self.assertEqual(
+ site.ready, True, "previous resource should still exist with data"
+ )
@use_local_response
def test_asin(self):
- t_url = 'https://www.goodreads.com/book/show/45064996-hyperion'
+ t_url = "https://www.goodreads.com/book/show/45064996-hyperion"
site = SiteManager.get_site_by_url(t_url)
site.get_resource_ready()
- self.assertEqual(site.resource.item.title, 'Hyperion')
- self.assertEqual(site.resource.item.asin, 'B004G60EHS')
+ self.assertEqual(site.resource.item.title, "Hyperion")
+ self.assertEqual(site.resource.item.asin, "B004G60EHS")
@use_local_response
def test_work(self):
- url = 'https://www.goodreads.com/work/editions/153313'
+ url = "https://www.goodreads.com/work/editions/153313"
p = SiteManager.get_site_by_url(url).get_resource_ready()
- self.assertEqual(p.item.title, '1984')
- url1 = 'https://www.goodreads.com/book/show/3597767-rok-1984'
- url2 = 'https://www.goodreads.com/book/show/40961427-1984'
+ self.assertEqual(p.item.title, "1984")
+ url1 = "https://www.goodreads.com/book/show/3597767-rok-1984"
+ url2 = "https://www.goodreads.com/book/show/40961427-1984"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
w1 = p1.item.works.all().first()
@@ -133,9 +137,9 @@ class GoodreadsTestCase(TestCase):
class GoogleBooksTestCase(TestCase):
def test_parse(self):
t_type = IdType.GoogleBooks
- t_id = 'hV--zQEACAAJ'
- t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms'
- t_url2 = 'https://books.google.com/books?id=hV--zQEACAAJ'
+ t_id = "hV--zQEACAAJ"
+ t_url = "https://books.google.com.bn/books?id=hV--zQEACAAJ&hl=ms"
+ t_url2 = "https://books.google.com/books?id=hV--zQEACAAJ"
p1 = SiteManager.get_site_by_url(t_url)
p2 = SiteManager.get_site_by_url(t_url2)
self.assertIsNotNone(p1)
@@ -146,17 +150,19 @@ class GoogleBooksTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://books.google.com.bn/books?id=hV--zQEACAAJ'
+ t_url = "https://books.google.com.bn/books?id=hV--zQEACAAJ"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
- self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
+ self.assertEqual(
+ site.resource.metadata.get("title"), "1984 Nineteen Eighty-Four"
+ )
+ self.assertEqual(site.resource.metadata.get("isbn"), "9781847498571")
self.assertEqual(site.resource.id_type, IdType.GoogleBooks)
- self.assertEqual(site.resource.id_value, 'hV--zQEACAAJ')
- self.assertEqual(site.resource.item.isbn, '9781847498571')
- self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
+ self.assertEqual(site.resource.id_value, "hV--zQEACAAJ")
+ self.assertEqual(site.resource.item.isbn, "9781847498571")
+ self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four")
class DoubanBookTestCase(TestCase):
@@ -165,9 +171,9 @@ class DoubanBookTestCase(TestCase):
def test_parse(self):
t_type = IdType.DoubanBook
- t_id = '35902899'
- t_url = 'https://m.douban.com/book/subject/35902899/'
- t_url2 = 'https://book.douban.com/subject/35902899/'
+ t_id = "35902899"
+ t_url = "https://m.douban.com/book/subject/35902899/"
+ t_url2 = "https://book.douban.com/subject/35902899/"
p1 = SiteManager.get_site_by_url(t_url)
p2 = SiteManager.get_site_by_url(t_url2)
self.assertEqual(p1.url, t_url2)
@@ -177,44 +183,46 @@ class DoubanBookTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://book.douban.com/subject/35902899/'
+ t_url = "https://book.douban.com/subject/35902899/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
self.assertEqual(site.resource.site_name, SiteName.Douban)
- self.assertEqual(site.resource.metadata.get('title'), '1984 Nineteen Eighty-Four')
- self.assertEqual(site.resource.metadata.get('isbn'), '9781847498571')
+ self.assertEqual(
+ site.resource.metadata.get("title"), "1984 Nineteen Eighty-Four"
+ )
+ self.assertEqual(site.resource.metadata.get("isbn"), "9781847498571")
self.assertEqual(site.resource.id_type, IdType.DoubanBook)
- self.assertEqual(site.resource.id_value, '35902899')
- self.assertEqual(site.resource.item.isbn, '9781847498571')
- self.assertEqual(site.resource.item.title, '1984 Nineteen Eighty-Four')
+ self.assertEqual(site.resource.id_value, "35902899")
+ self.assertEqual(site.resource.item.isbn, "9781847498571")
+ self.assertEqual(site.resource.item.title, "1984 Nineteen Eighty-Four")
@use_local_response
def test_work(self):
# url = 'https://www.goodreads.com/work/editions/153313'
- url1 = 'https://book.douban.com/subject/1089243/'
- url2 = 'https://book.douban.com/subject/2037260/'
+ url1 = "https://book.douban.com/subject/1089243/"
+ url2 = "https://book.douban.com/subject/2037260/"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
w1 = p1.item.works.all().first()
w2 = p2.item.works.all().first()
- self.assertEqual(w1.title, '黄金时代')
- self.assertEqual(w2.title, '黄金时代')
+ self.assertEqual(w1.title, "黄金时代")
+ self.assertEqual(w2.title, "黄金时代")
self.assertEqual(w1, w2)
- editions = w1.editions.all().order_by('title')
+ editions = w1.editions.all().order_by("title")
self.assertEqual(editions.count(), 2)
- self.assertEqual(editions[0].title, 'Wang in Love and Bondage')
- self.assertEqual(editions[1].title, '黄金时代')
+ self.assertEqual(editions[0].title, "Wang in Love and Bondage")
+ self.assertEqual(editions[1].title, "黄金时代")
class MultiBookSitesTestCase(TestCase):
@use_local_response
def test_editions(self):
# isbn = '9781847498571'
- url1 = 'https://www.goodreads.com/book/show/56821625-1984'
- url2 = 'https://book.douban.com/subject/35902899/'
- url3 = 'https://books.google.com/books?id=hV--zQEACAAJ'
+ url1 = "https://www.goodreads.com/book/show/56821625-1984"
+ url2 = "https://book.douban.com/subject/35902899/"
+ url3 = "https://books.google.com/books?id=hV--zQEACAAJ"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
@@ -224,11 +232,13 @@ class MultiBookSitesTestCase(TestCase):
@use_local_response
def test_works(self):
# url1 and url4 has same ISBN, hence they share same Edition instance, which belongs to 2 Work instances
- url1 = 'https://book.douban.com/subject/1089243/'
- url2 = 'https://book.douban.com/subject/2037260/'
- url3 = 'https://www.goodreads.com/book/show/59952545-golden-age'
- url4 = 'https://www.goodreads.com/book/show/11798823'
- p1 = SiteManager.get_site_by_url(url1).get_resource_ready() # lxml bug may break this
+ url1 = "https://book.douban.com/subject/1089243/"
+ url2 = "https://book.douban.com/subject/2037260/"
+ url3 = "https://www.goodreads.com/book/show/59952545-golden-age"
+ url4 = "https://www.goodreads.com/book/show/11798823"
+ p1 = SiteManager.get_site_by_url(
+ url1
+ ).get_resource_ready() # lxml bug may break this
w1 = p1.item.works.all().first()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
w2 = p2.item.works.all().first()
@@ -241,13 +251,13 @@ class MultiBookSitesTestCase(TestCase):
self.assertEqual(p4.item.id, p1.item.id)
self.assertEqual(p4.item.works.all().count(), 2)
self.assertEqual(p1.item.works.all().count(), 2)
- w2e = w2.editions.all().order_by('title')
+ w2e = w2.editions.all().order_by("title")
self.assertEqual(w2e.count(), 2)
- self.assertEqual(w2e[0].title, 'Wang in Love and Bondage')
- self.assertEqual(w2e[1].title, '黄金时代')
- w3e = w3.editions.all().order_by('title')
+ self.assertEqual(w2e[0].title, "Wang in Love and Bondage")
+ self.assertEqual(w2e[1].title, "黄金时代")
+ w3e = w3.editions.all().order_by("title")
self.assertEqual(w3e.count(), 2)
- self.assertEqual(w3e[0].title, 'Golden Age: A Novel')
- self.assertEqual(w3e[1].title, '黄金时代')
+ self.assertEqual(w3e[0].title, "Golden Age: A Novel")
+ self.assertEqual(w3e[1].title, "黄金时代")
e = Edition.objects.get(primary_lookup_id_value=9781662601217)
- self.assertEqual(e.title, 'Golden Age: A Novel')
+ self.assertEqual(e.title, "Golden Age: A Novel")
diff --git a/catalog/book/utils.py b/catalog/book/utils.py
index 6598e65b..62e08e00 100644
--- a/catalog/book/utils.py
+++ b/catalog/book/utils.py
@@ -10,7 +10,7 @@ def check_digit_10(isbn):
w = i + 1
sum += w * c
r = sum % 11
- return 'X' if r == 10 else str(r)
+ return "X" if r == 10 else str(r)
def check_digit_13(isbn):
@@ -21,38 +21,38 @@ def check_digit_13(isbn):
w = 3 if i % 2 else 1
sum += w * c
r = 10 - (sum % 10)
- return '0' if r == 10 else str(r)
+ return "0" if r == 10 else str(r)
def isbn_10_to_13(isbn):
if not isbn or len(isbn) != 10:
return None
- return '978' + isbn[:-1] + check_digit_13('978' + isbn[:-1])
+ return "978" + isbn[:-1] + check_digit_13("978" + isbn[:-1])
def isbn_13_to_10(isbn):
- if not isbn or len(isbn) != 13 or isbn[:3] != '978':
+ if not isbn or len(isbn) != 13 or isbn[:3] != "978":
return None
else:
return isbn[3:12] + check_digit_10(isbn[3:12])
def is_isbn_13(isbn):
- return re.match(r'\d{13}', isbn) is not None
+ return re.match(r"\d{13}", isbn) is not None
def is_isbn_10(isbn):
- return re.match(r'\d{9}[X0-9]', isbn) is not None
+ return re.match(r"\d{9}[X0-9]", isbn) is not None
def is_asin(asin):
- return re.match(r'B[A-Z0-9]{9}', asin) is not None
+ return re.match(r"B[A-Z0-9]{9}", asin) is not None
def detect_isbn_asin(s):
if not s:
return None, None
- n = re.sub(r'[^0-9A-Z]', '', s.upper())
+ n = re.sub(r"[^0-9A-Z]", "", s.upper())
if is_isbn_13(n):
return IdType.ISBN, n
if is_isbn_10(n):
diff --git a/catalog/common/__init__.py b/catalog/common/__init__.py
index 105be222..33fc7184 100644
--- a/catalog/common/__init__.py
+++ b/catalog/common/__init__.py
@@ -5,4 +5,28 @@ from .scrapers import *
from . import jsondata
-__all__ = ('IdType', 'SiteName', 'ItemCategory', 'Item', 'ExternalResource', 'ResourceContent', 'ParseError', 'AbstractSite', 'SiteManager', 'jsondata', 'PrimaryLookupIdDescriptor', 'LookupIdDescriptor', 'get_mock_mode', 'get_mock_file', 'use_local_response', 'RetryDownloader', 'BasicDownloader', 'ProxiedDownloader', 'BasicImageDownloader', 'RESPONSE_OK', 'RESPONSE_NETWORK_ERROR', 'RESPONSE_INVALID_CONTENT', 'RESPONSE_CENSORSHIP')
+__all__ = (
+ "IdType",
+ "SiteName",
+ "ItemCategory",
+ "Item",
+ "ExternalResource",
+ "ResourceContent",
+ "ParseError",
+ "AbstractSite",
+ "SiteManager",
+ "jsondata",
+ "PrimaryLookupIdDescriptor",
+ "LookupIdDescriptor",
+ "get_mock_mode",
+ "get_mock_file",
+ "use_local_response",
+ "RetryDownloader",
+ "BasicDownloader",
+ "ProxiedDownloader",
+ "BasicImageDownloader",
+ "RESPONSE_OK",
+ "RESPONSE_NETWORK_ERROR",
+ "RESPONSE_INVALID_CONTENT",
+ "RESPONSE_CENSORSHIP",
+)
diff --git a/catalog/common/downloaders.py b/catalog/common/downloaders.py
index a9d95e21..b3d7cf47 100644
--- a/catalog/common/downloaders.py
+++ b/catalog/common/downloaders.py
@@ -29,6 +29,7 @@ def use_local_response(func):
set_mock_mode(True)
func(args)
set_mock_mode(False)
+
return _func
@@ -43,9 +44,9 @@ def get_mock_mode():
def get_mock_file(url):
- fn = url.replace('***REMOVED***', '1234') # Thank you, Github Action -_-!
- fn = re.sub(r'[^\w]', '_', fn)
- fn = re.sub(r'_key_[*A-Za-z0-9]+', '_key_8964', fn)
+ fn = url.replace("***REMOVED***", "1234") # Thank you, Github Action -_-!
+ fn = re.sub(r"[^\w]", "_", fn)
+ fn = re.sub(r"_key_[*A-Za-z0-9]+", "_key_8964", fn)
return fn
@@ -61,21 +62,23 @@ class DownloadError(Exception):
error = "Censored Content"
else:
error = "Unknown Error"
- self.message = f"Download Failed: {error}{', ' + msg if msg else ''}, url: {self.url}"
+ self.message = (
+ f"Download Failed: {error}{', ' + msg if msg else ''}, url: {self.url}"
+ )
super().__init__(self.message)
class BasicDownloader:
headers = {
# 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:107.0) Gecko/20100101 Firefox/107.0',
- 'User-Agent': 'Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
- 'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
- 'Accept-Encoding': 'gzip, deflate',
- 'Connection': 'keep-alive',
- 'DNT': '1',
- 'Upgrade-Insecure-Requests': '1',
- 'Cache-Control': 'no-cache',
+ "User-Agent": "Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1",
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
+ "Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
+ "Accept-Encoding": "gzip, deflate",
+ "Connection": "keep-alive",
+ "DNT": "1",
+ "Upgrade-Insecure-Requests": "1",
+ "Cache-Control": "no-cache",
}
def __init__(self, url, headers=None):
@@ -100,18 +103,28 @@ class BasicDownloader:
try:
if not _mock_mode:
# TODO cache = get/set from redis
- resp = requests.get(url, headers=self.headers, timeout=self.get_timeout())
+ resp = requests.get(
+ url, headers=self.headers, timeout=self.get_timeout()
+ )
if settings.DOWNLOADER_SAVEDIR:
- with open(settings.DOWNLOADER_SAVEDIR + '/' + get_mock_file(url), 'w', encoding='utf-8') as fp:
+ with open(
+ settings.DOWNLOADER_SAVEDIR + "/" + get_mock_file(url),
+ "w",
+ encoding="utf-8",
+ ) as fp:
fp.write(resp.text)
else:
resp = MockResponse(self.url)
response_type = self.validate_response(resp)
- self.logs.append({'response_type': response_type, 'url': url, 'exception': None})
+ self.logs.append(
+ {"response_type": response_type, "url": url, "exception": None}
+ )
return resp, response_type
except RequestException as e:
- self.logs.append({'response_type': RESPONSE_NETWORK_ERROR, 'url': url, 'exception': e})
+ self.logs.append(
+ {"response_type": RESPONSE_NETWORK_ERROR, "url": url, "exception": e}
+ )
return None, RESPONSE_NETWORK_ERROR
def download(self):
@@ -126,16 +139,26 @@ class ProxiedDownloader(BasicDownloader):
def get_proxied_urls(self):
urls = []
if settings.PROXYCRAWL_KEY is not None:
- urls.append(f'https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={self.url}')
+ urls.append(
+ f"https://api.proxycrawl.com/?token={settings.PROXYCRAWL_KEY}&url={self.url}"
+ )
if settings.SCRAPESTACK_KEY is not None:
# urls.append(f'http://api.scrapestack.com/scrape?access_key={settings.SCRAPESTACK_KEY}&url={self.url}')
- urls.append(f'http://api.scrapestack.com/scrape?keep_headers=1&access_key={settings.SCRAPESTACK_KEY}&url={self.url}')
+ urls.append(
+ f"http://api.scrapestack.com/scrape?keep_headers=1&access_key={settings.SCRAPESTACK_KEY}&url={self.url}"
+ )
if settings.SCRAPERAPI_KEY is not None:
- urls.append(f'http://api.scraperapi.com/?api_key={settings.SCRAPERAPI_KEY}&url={self.url}')
+ urls.append(
+ f"http://api.scraperapi.com/?api_key={settings.SCRAPERAPI_KEY}&url={self.url}"
+ )
return urls
def get_special_proxied_url(self):
- return f'{settings.LOCAL_PROXY}?url={self.url}' if settings.LOCAL_PROXY is not None else None
+ return (
+ f"{settings.LOCAL_PROXY}?url={self.url}"
+ if settings.LOCAL_PROXY is not None
+ else None
+ )
def download(self):
urls = self.get_proxied_urls()
@@ -144,7 +167,11 @@ class ProxiedDownloader(BasicDownloader):
resp = None
while url:
resp, resp_type = self._download(url)
- if resp_type == RESPONSE_OK or resp_type == RESPONSE_INVALID_CONTENT or last_try:
+ if (
+ resp_type == RESPONSE_OK
+ or resp_type == RESPONSE_INVALID_CONTENT
+ or last_try
+ ):
url = None
elif resp_type == RESPONSE_CENSORSHIP:
url = self.get_special_proxied_url()
@@ -169,15 +196,15 @@ class RetryDownloader(BasicDownloader):
elif self.response_type != RESPONSE_NETWORK_ERROR and retries == 0:
raise DownloadError(self)
elif retries > 0:
- _logger.debug('Retry ' + self.url)
+ _logger.debug("Retry " + self.url)
time.sleep((settings.DOWNLOADER_RETRIES - retries) * 0.5)
- raise DownloadError(self, 'max out of retries')
+ raise DownloadError(self, "max out of retries")
class ImageDownloaderMixin:
def __init__(self, url, referer=None):
if referer is not None:
- self.headers['Referer'] = referer
+ self.headers["Referer"] = referer
super().__init__(url)
def validate_response(self, response):
@@ -186,8 +213,10 @@ class ImageDownloaderMixin:
raw_img = response.content
img = Image.open(BytesIO(raw_img))
img.load() # corrupted image will trigger exception
- content_type = response.headers.get('Content-Type')
- self.extention = filetype.get_type(mime=content_type.partition(';')[0].strip()).extension
+ content_type = response.headers.get("Content-Type")
+ self.extention = filetype.get_type(
+ mime=content_type.partition(";")[0].strip()
+ ).extension
return RESPONSE_OK
except Exception:
return RESPONSE_NETWORK_ERROR
@@ -213,7 +242,9 @@ class ProxiedImageDownloader(ImageDownloaderMixin, ProxiedDownloader):
pass
-_local_response_path = str(Path(__file__).parent.parent.parent.absolute()) + '/test_data/'
+_local_response_path = (
+ str(Path(__file__).parent.parent.parent.absolute()) + "/test_data/"
+)
class MockResponse:
@@ -225,23 +256,27 @@ class MockResponse:
self.status_code = 200
_logger.debug(f"use local response for {url} from {fn}")
except Exception:
- self.content = b'Error: response file not found'
+ self.content = b"Error: response file not found"
self.status_code = 404
_logger.debug(f"local response not found for {url} at {fn}")
@property
def text(self):
- return self.content.decode('utf-8')
+ return self.content.decode("utf-8")
def json(self):
return json.load(StringIO(self.text))
def html(self):
- return html.fromstring(self.text) # may throw exception unexpectedly due to OS bug, see https://github.com/neodb-social/neodb/issues/5
+ return html.fromstring(
+ self.text
+ ) # may throw exception unexpectedly due to OS bug, see https://github.com/neodb-social/neodb/issues/5
@property
def headers(self):
- return {'Content-Type': 'image/jpeg' if self.url.endswith('jpg') else 'text/html'}
+ return {
+ "Content-Type": "image/jpeg" if self.url.endswith("jpg") else "text/html"
+ }
requests.Response.html = MockResponse.html
diff --git a/catalog/common/sites.py b/catalog/common/sites.py
index 8f0dcdb3..676bffdc 100644
--- a/catalog/common/sites.py
+++ b/catalog/common/sites.py
@@ -24,25 +24,29 @@ class ResourceContent:
cover_image_extention: str = None
def dict(self):
- return {'metadata': self.metadata, 'lookup_ids': self.lookup_ids}
+ return {"metadata": self.metadata, "lookup_ids": self.lookup_ids}
def to_json(self) -> str:
- return json.dumps({'metadata': self.metadata, 'lookup_ids': self.lookup_ids})
+ return json.dumps({"metadata": self.metadata, "lookup_ids": self.lookup_ids})
class AbstractSite:
"""
Abstract class to represent a site
"""
+
SITE_NAME = None
ID_TYPE = None
- WIKI_PROPERTY_ID = 'P0undefined0'
+ WIKI_PROPERTY_ID = "P0undefined0"
DEFAULT_MODEL = None
URL_PATTERNS = [r"\w+://undefined/(\d+)"]
@classmethod
def validate_url(self, url: str):
- u = next(iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]), None)
+ u = next(
+ iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]),
+ None,
+ )
return u is not None
@classmethod
@@ -51,15 +55,18 @@ class AbstractSite:
@classmethod
def id_to_url(self, id_value):
- return 'https://undefined/' + id_value
+ return "https://undefined/" + id_value
@classmethod
def url_to_id(self, url: str):
- u = next(iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]), None)
+ u = next(
+ iter([re.match(p, url) for p in self.URL_PATTERNS if re.match(p, url)]),
+ None,
+ )
return u[1] if u else None
def __str__(self):
- return f'<{self.__class__.__name__}: {self.url}>'
+ return f"<{self.__class__.__name__}: {self.url}>"
def __init__(self, url=None):
self.id_value = self.url_to_id(url) if url else None
@@ -70,7 +77,9 @@ class AbstractSite:
if not self.resource:
self.resource = ExternalResource.objects.filter(url=self.url).first()
if self.resource is None:
- self.resource = ExternalResource(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url)
+ self.resource = ExternalResource(
+ id_type=self.ID_TYPE, id_value=self.id_value, url=self.url
+ )
return self.resource
def scrape(self) -> ResourceContent:
@@ -91,11 +100,13 @@ class AbstractSite:
model = self.DEFAULT_MODEL
t, v = model.get_best_lookup_id(p.get_all_lookup_ids())
if t is not None:
- p.item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first()
+ p.item = model.objects.filter(
+ primary_lookup_id_type=t, primary_lookup_id_value=v
+ ).first()
if p.item is None:
obj = model.copy_metadata(p.metadata)
- obj['primary_lookup_id_type'] = t
- obj['primary_lookup_id_value'] = v
+ obj["primary_lookup_id_type"] = t
+ obj["primary_lookup_id_value"] = v
p.item = model.objects.create(**obj)
return p.item
@@ -103,10 +114,17 @@ class AbstractSite:
def ready(self):
return bool(self.resource and self.resource.ready)
- def get_resource_ready(self, auto_save=True, auto_create=True, auto_link=True, preloaded_content=None, ignore_existing_content=False):
+ def get_resource_ready(
+ self,
+ auto_save=True,
+ auto_create=True,
+ auto_link=True,
+ preloaded_content=None,
+ ignore_existing_content=False,
+ ):
"""
Returns an ExternalResource in scraped state if possible
-
+
Parameters
----------
auto_save : bool
@@ -137,7 +155,7 @@ class AbstractSite:
resource_content = self.scrape()
p.update_content(resource_content)
if not p.ready:
- _logger.error(f'unable to get resource {self.url} ready')
+ _logger.error(f"unable to get resource {self.url} ready")
return None
if auto_create and p.item is None:
self.get_item()
@@ -148,9 +166,12 @@ class AbstractSite:
p.item.save()
if auto_link:
for linked_resource in p.required_resources:
- linked_site = SiteManager.get_site_by_url(linked_resource['url'])
+ linked_site = SiteManager.get_site_by_url(linked_resource["url"])
if linked_site:
- linked_site.get_resource_ready(auto_link=False, preloaded_content=linked_resource.get('content'))
+ linked_site.get_resource_ready(
+ auto_link=False,
+ preloaded_content=linked_resource.get("content"),
+ )
else:
_logger.error(f'unable to get site for {linked_resource["url"]}')
p.item.update_linked_items_from_external_resource(p)
@@ -165,7 +186,7 @@ class SiteManager:
def register(target) -> Callable:
id_type = target.ID_TYPE
if id_type in SiteManager.registry:
- raise ValueError(f'Site for {id_type} already exists')
+ raise ValueError(f"Site for {id_type} already exists")
SiteManager.registry[id_type] = target
return target
@@ -175,9 +196,17 @@ class SiteManager:
@staticmethod
def get_site_by_url(url: str):
- cls = next(filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None)
+ cls = next(
+ filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None
+ )
if cls is None:
- cls = next(filter(lambda p: p.validate_url_fallback(url), SiteManager.registry.values()), None)
+ cls = next(
+ filter(
+ lambda p: p.validate_url_fallback(url),
+ SiteManager.registry.values(),
+ ),
+ None,
+ )
return cls(url) if cls else None
@staticmethod
@@ -190,5 +219,7 @@ class SiteManager:
return SiteManager.get_site_by_id_type(resource.id_type)
-ExternalResource.get_site = lambda resource: SiteManager.get_site_by_id_type(resource.id_type)
+ExternalResource.get_site = lambda resource: SiteManager.get_site_by_id_type(
+ resource.id_type
+)
# ExternalResource.get_site = SiteManager.get_site_by_resource
diff --git a/catalog/common/utils.py b/catalog/common/utils.py
index 5bfc82c4..29f32a93 100644
--- a/catalog/common/utils.py
+++ b/catalog/common/utils.py
@@ -6,9 +6,14 @@ import uuid
_logger = logging.getLogger(__name__)
-DEFAULT_ITEM_COVER = 'item/default.svg'
+DEFAULT_ITEM_COVER = "item/default.svg"
def item_cover_path(resource, filename):
- fn = timezone.now().strftime('%Y/%m/%d/') + str(uuid.uuid4()) + '.' + filename.split('.')[-1]
- return 'item/' + resource.id_type + '/' + fn
+ fn = (
+ timezone.now().strftime("%Y/%m/%d/")
+ + str(uuid.uuid4())
+ + "."
+ + filename.split(".")[-1]
+ )
+ return "item/" + resource.id_type + "/" + fn
diff --git a/catalog/game/models.py b/catalog/game/models.py
index ea6b0b51..295b882f 100644
--- a/catalog/game/models.py
+++ b/catalog/game/models.py
@@ -5,66 +5,63 @@ from django.db import models
class Game(Item):
category = ItemCategory.Game
- url_path = 'game'
- demonstrative = _('这个游戏')
+ url_path = "game"
+ demonstrative = _("这个游戏")
igdb = PrimaryLookupIdDescriptor(IdType.IGDB)
steam = PrimaryLookupIdDescriptor(IdType.Steam)
douban_game = PrimaryLookupIdDescriptor(IdType.DoubanGame)
METADATA_COPY_LIST = [
- 'title',
- 'brief',
- 'other_title',
- 'developer',
- 'publisher',
- 'release_date',
- 'genre',
- 'platform',
- 'official_site',
+ "title",
+ "brief",
+ "other_title",
+ "developer",
+ "publisher",
+ "release_date",
+ "genre",
+ "platform",
+ "official_site",
]
other_title = jsondata.ArrayField(
- models.CharField(blank=True, default='', max_length=500),
+ models.CharField(blank=True, default="", max_length=500),
null=True,
blank=True,
default=list,
)
developer = jsondata.ArrayField(
- models.CharField(blank=True, default='', max_length=500),
+ models.CharField(blank=True, default="", max_length=500),
null=True,
blank=True,
default=list,
)
publisher = jsondata.ArrayField(
- models.CharField(blank=True, default='', max_length=500),
+ models.CharField(blank=True, default="", max_length=500),
null=True,
blank=True,
default=list,
)
release_date = jsondata.DateField(
- auto_now=False,
- auto_now_add=False,
- null=True,
- blank=True
+ auto_now=False, auto_now_add=False, null=True, blank=True
)
genre = jsondata.ArrayField(
- models.CharField(blank=True, default='', max_length=200),
+ models.CharField(blank=True, default="", max_length=200),
null=True,
blank=True,
default=list,
)
platform = jsondata.ArrayField(
- models.CharField(blank=True, default='', max_length=200),
+ models.CharField(blank=True, default="", max_length=200),
null=True,
blank=True,
default=list,
)
official_site = jsondata.CharField(
- default='',
+ default="",
)
diff --git a/catalog/game/tests.py b/catalog/game/tests.py
index bef6d4cf..4a455810 100644
--- a/catalog/game/tests.py
+++ b/catalog/game/tests.py
@@ -6,8 +6,8 @@ from catalog.models import *
class IGDBTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.IGDB
- t_id_value = 'portal-2'
- t_url = 'https://www.igdb.com/games/portal-2'
+ t_id_value = "portal-2"
+ t_url = "https://www.igdb.com/games/portal-2"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -17,34 +17,39 @@ class IGDBTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.igdb.com/games/portal-2'
+ t_url = "https://www.igdb.com/games/portal-2"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], 'Portal 2')
+ self.assertEqual(site.resource.metadata["title"], "Portal 2")
self.assertIsInstance(site.resource.item, Game)
- self.assertEqual(site.resource.item.steam, '620')
+ self.assertEqual(site.resource.item.steam, "620")
@use_local_response
def test_scrape_non_steam(self):
- t_url = 'https://www.igdb.com/games/the-legend-of-zelda-breath-of-the-wild'
+ t_url = "https://www.igdb.com/games/the-legend-of-zelda-breath-of-the-wild"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], 'The Legend of Zelda: Breath of the Wild')
+ self.assertEqual(
+ site.resource.metadata["title"], "The Legend of Zelda: Breath of the Wild"
+ )
self.assertIsInstance(site.resource.item, Game)
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IGDB)
- self.assertEqual(site.resource.item.primary_lookup_id_value, 'the-legend-of-zelda-breath-of-the-wild')
+ self.assertEqual(
+ site.resource.item.primary_lookup_id_value,
+ "the-legend-of-zelda-breath-of-the-wild",
+ )
class SteamTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.Steam
- t_id_value = '620'
- t_url = 'https://store.steampowered.com/app/620/Portal_2/'
- t_url2 = 'https://store.steampowered.com/app/620'
+ t_id_value = "620"
+ t_url = "https://store.steampowered.com/app/620/Portal_2/"
+ t_url2 = "https://store.steampowered.com/app/620"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -54,22 +59,24 @@ class SteamTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://store.steampowered.com/app/620/Portal_2/'
+ t_url = "https://store.steampowered.com/app/620/Portal_2/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], 'Portal 2')
- self.assertEqual(site.resource.metadata['brief'], '“终身测试计划”现已升级,您可以为您自己或您的好友设计合作谜题!')
+ self.assertEqual(site.resource.metadata["title"], "Portal 2")
+ self.assertEqual(
+ site.resource.metadata["brief"], "“终身测试计划”现已升级,您可以为您自己或您的好友设计合作谜题!"
+ )
self.assertIsInstance(site.resource.item, Game)
- self.assertEqual(site.resource.item.steam, '620')
+ self.assertEqual(site.resource.item.steam, "620")
class DoubanGameTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.DoubanGame
- t_id_value = '10734307'
- t_url = 'https://www.douban.com/game/10734307/'
+ t_id_value = "10734307"
+ t_url = "https://www.douban.com/game/10734307/"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -79,21 +86,21 @@ class DoubanGameTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.douban.com/game/10734307/'
+ t_url = "https://www.douban.com/game/10734307/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], '传送门2 Portal 2')
+ self.assertEqual(site.resource.metadata["title"], "传送门2 Portal 2")
self.assertIsInstance(site.resource.item, Game)
- self.assertEqual(site.resource.item.douban_game, '10734307')
+ self.assertEqual(site.resource.item.douban_game, "10734307")
class BangumiGameTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.Bangumi
- t_id_value = '15912'
- t_url = 'https://bgm.tv/subject/15912'
+ t_id_value = "15912"
+ t_url = "https://bgm.tv/subject/15912"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -110,8 +117,8 @@ class BangumiGameTestCase(TestCase):
class MultiGameSitesTestCase(TestCase):
@use_local_response
def test_games(self):
- url1 = 'https://www.igdb.com/games/portal-2'
- url2 = 'https://store.steampowered.com/app/620/Portal_2/'
+ url1 = "https://www.igdb.com/games/portal-2"
+ url2 = "https://store.steampowered.com/app/620/Portal_2/"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
self.assertEqual(p1.item.id, p2.item.id)
diff --git a/catalog/management/commands/cat.py b/catalog/management/commands/cat.py
index f7e162f0..b853e0b3 100644
--- a/catalog/management/commands/cat.py
+++ b/catalog/management/commands/cat.py
@@ -5,24 +5,24 @@ from catalog.sites import *
class Command(BaseCommand):
- help = 'Scrape a catalog item from external resource (and save it)'
+ help = "Scrape a catalog item from external resource (and save it)"
def add_arguments(self, parser):
- parser.add_argument('url', type=str, help='URL to scrape')
+ parser.add_argument("url", type=str, help="URL to scrape")
parser.add_argument(
- '--save',
- action='store_true',
- help='save to database',
+ "--save",
+ action="store_true",
+ help="save to database",
)
def handle(self, *args, **options):
- url = str(options['url'])
+ url = str(options["url"])
site = SiteManager.get_site_by_url(url)
if site is None:
- self.stdout.write(self.style.ERROR(f'Unknown site for {url}'))
+ self.stdout.write(self.style.ERROR(f"Unknown site for {url}"))
return
- self.stdout.write(f'Fetching from {site}')
- if options['save']:
+ self.stdout.write(f"Fetching from {site}")
+ if options["save"]:
resource = site.get_resource_ready()
pprint.pp(resource.metadata)
pprint.pp(site.get_item())
@@ -31,4 +31,4 @@ class Command(BaseCommand):
resource = site.scrape()
pprint.pp(resource.metadata)
pprint.pp(resource.lookup_ids)
- self.stdout.write(self.style.SUCCESS(f'Done.'))
+ self.stdout.write(self.style.SUCCESS(f"Done."))
diff --git a/catalog/models.py b/catalog/models.py
index 306f57c1..75b13ec5 100644
--- a/catalog/models.py
+++ b/catalog/models.py
@@ -37,7 +37,9 @@ def all_content_types():
if _CONTENT_TYPE_LIST is None:
_CONTENT_TYPE_LIST = {}
for cls in Item.__subclasses__():
- _CONTENT_TYPE_LIST[cls] = ContentType.objects.get(app_label='catalog', model=cls.__name__.lower()).id
+ _CONTENT_TYPE_LIST[cls] = ContentType.objects.get(
+ app_label="catalog", model=cls.__name__.lower()
+ ).id
return _CONTENT_TYPE_LIST
@@ -46,7 +48,7 @@ def all_categories():
if _CATEGORY_LIST is None:
_CATEGORY_LIST = {}
for cls in Item.__subclasses__():
- c = getattr(cls, 'category', None)
+ c = getattr(cls, "category", None)
if c not in _CATEGORY_LIST:
_CATEGORY_LIST[c] = [cls]
else:
diff --git a/catalog/movie/models.py b/catalog/movie/models.py
index 1d001437..ae4a0e6b 100644
--- a/catalog/movie/models.py
+++ b/catalog/movie/models.py
@@ -5,43 +5,93 @@ from django.db import models
class Movie(Item):
category = ItemCategory.Movie
- url_path = 'movie'
+ url_path = "movie"
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
tmdb_movie = PrimaryLookupIdDescriptor(IdType.TMDB_Movie)
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
- demonstrative = _('这部电影')
+ demonstrative = _("这部电影")
METADATA_COPY_LIST = [
- 'title',
- 'orig_title',
- 'other_title',
- 'director',
- 'playwright',
- 'actor',
- 'genre',
- 'showtime',
- 'site',
- 'area',
- 'language',
- 'year',
- 'duration',
- 'season_number',
- 'episodes',
- 'single_episode_length',
- 'brief',
+ "title",
+ "orig_title",
+ "other_title",
+ "director",
+ "playwright",
+ "actor",
+ "genre",
+ "showtime",
+ "site",
+ "area",
+ "language",
+ "year",
+ "duration",
+ "season_number",
+ "episodes",
+ "single_episode_length",
+ "brief",
]
- orig_title = jsondata.CharField(_("original title"), blank=True, default='', max_length=500)
- other_title = jsondata.ArrayField(models.CharField(_("other title"), blank=True, default='', max_length=500), null=True, blank=True, default=list, )
- director = jsondata.ArrayField(models.CharField(_("director"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- playwright = jsondata.ArrayField(models.CharField(_("playwright"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- actor = jsondata.ArrayField(models.CharField(_("actor"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- genre = jsondata.ArrayField(models.CharField(_("genre"), blank=True, default='', max_length=50), null=True, blank=True, default=list, ) # , choices=MovieGenreEnum.choices
- showtime = jsondata.ArrayField(null=True, blank=True, default=list, )
- site = jsondata.URLField(_('site url'), blank=True, default='', max_length=200)
- area = jsondata.ArrayField(models.CharField(_("country or region"), blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
- language = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
+ orig_title = jsondata.CharField(
+ _("original title"), blank=True, default="", max_length=500
+ )
+ other_title = jsondata.ArrayField(
+ models.CharField(_("other title"), blank=True, default="", max_length=500),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ director = jsondata.ArrayField(
+ models.CharField(_("director"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ playwright = jsondata.ArrayField(
+ models.CharField(_("playwright"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ actor = jsondata.ArrayField(
+ models.CharField(_("actor"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ genre = jsondata.ArrayField(
+ models.CharField(_("genre"), blank=True, default="", max_length=50),
+ null=True,
+ blank=True,
+ default=list,
+ ) # , choices=MovieGenreEnum.choices
+ showtime = jsondata.ArrayField(
+ null=True,
+ blank=True,
+ default=list,
+ )
+ site = jsondata.URLField(_("site url"), blank=True, default="", max_length=200)
+ area = jsondata.ArrayField(
+ models.CharField(
+ _("country or region"),
+ blank=True,
+ default="",
+ max_length=100,
+ ),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ language = jsondata.ArrayField(
+ models.CharField(
+ blank=True,
+ default="",
+ max_length=100,
+ ),
+ null=True,
+ blank=True,
+ default=list,
+ )
year = jsondata.IntegerField(null=True, blank=True)
season_number = jsondata.IntegerField(null=True, blank=True)
episodes = jsondata.IntegerField(null=True, blank=True)
single_episode_length = jsondata.IntegerField(null=True, blank=True)
- duration = jsondata.CharField(blank=True, default='', max_length=200)
+ duration = jsondata.CharField(blank=True, default="", max_length=200)
diff --git a/catalog/movie/tests.py b/catalog/movie/tests.py
index 44ab58c1..8f41b4fe 100644
--- a/catalog/movie/tests.py
+++ b/catalog/movie/tests.py
@@ -4,8 +4,8 @@ from catalog.common import *
class DoubanMovieTestCase(TestCase):
def test_parse(self):
- t_id = '3541415'
- t_url = 'https://movie.douban.com/subject/3541415/'
+ t_id = "3541415"
+ t_url = "https://movie.douban.com/subject/3541415/"
p1 = SiteManager.get_site_by_id_type(IdType.DoubanMovie)
self.assertIsNotNone(p1)
self.assertEqual(p1.validate_url(t_url), True)
@@ -15,22 +15,24 @@ class DoubanMovieTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://movie.douban.com/subject/3541415/'
+ t_url = "https://movie.douban.com/subject/3541415/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
- self.assertEqual(site.id_value, '3541415')
+ self.assertEqual(site.id_value, "3541415")
site.get_resource_ready()
- self.assertEqual(site.resource.metadata['title'], '盗梦空间')
+ self.assertEqual(site.resource.metadata["title"], "盗梦空间")
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
- self.assertEqual(site.resource.item.__class__.__name__, 'Movie')
- self.assertEqual(site.resource.item.imdb, 'tt1375666')
+ self.assertEqual(site.resource.item.__class__.__name__, "Movie")
+ self.assertEqual(site.resource.item.imdb, "tt1375666")
class TMDBMovieTestCase(TestCase):
def test_parse(self):
- t_id = '293767'
- t_url = 'https://www.themoviedb.org/movie/293767-billy-lynn-s-long-halftime-walk'
- t_url2 = 'https://www.themoviedb.org/movie/293767'
+ t_id = "293767"
+ t_url = (
+ "https://www.themoviedb.org/movie/293767-billy-lynn-s-long-halftime-walk"
+ )
+ t_url2 = "https://www.themoviedb.org/movie/293767"
p1 = SiteManager.get_site_by_id_type(IdType.TMDB_Movie)
self.assertIsNotNone(p1)
self.assertEqual(p1.validate_url(t_url), True)
@@ -41,22 +43,22 @@ class TMDBMovieTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.themoviedb.org/movie/293767'
+ t_url = "https://www.themoviedb.org/movie/293767"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
- self.assertEqual(site.id_value, '293767')
+ self.assertEqual(site.id_value, "293767")
site.get_resource_ready()
- self.assertEqual(site.resource.metadata['title'], '比利·林恩的中场战事')
+ self.assertEqual(site.resource.metadata["title"], "比利·林恩的中场战事")
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
- self.assertEqual(site.resource.item.__class__.__name__, 'Movie')
- self.assertEqual(site.resource.item.imdb, 'tt2513074')
+ self.assertEqual(site.resource.item.__class__.__name__, "Movie")
+ self.assertEqual(site.resource.item.imdb, "tt2513074")
class IMDBMovieTestCase(TestCase):
def test_parse(self):
- t_id = 'tt1375666'
- t_url = 'https://www.imdb.com/title/tt1375666/'
- t_url2 = 'https://www.imdb.com/title/tt1375666/'
+ t_id = "tt1375666"
+ t_url = "https://www.imdb.com/title/tt1375666/"
+ t_url2 = "https://www.imdb.com/title/tt1375666/"
p1 = SiteManager.get_site_by_id_type(IdType.IMDB)
self.assertIsNotNone(p1)
self.assertEqual(p1.validate_url(t_url), True)
@@ -67,22 +69,22 @@ class IMDBMovieTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.imdb.com/title/tt1375666/'
+ t_url = "https://www.imdb.com/title/tt1375666/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
- self.assertEqual(site.id_value, 'tt1375666')
+ self.assertEqual(site.id_value, "tt1375666")
site.get_resource_ready()
- self.assertEqual(site.resource.metadata['title'], '盗梦空间')
+ self.assertEqual(site.resource.metadata["title"], "盗梦空间")
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
- self.assertEqual(site.resource.item.imdb, 'tt1375666')
+ self.assertEqual(site.resource.item.imdb, "tt1375666")
class MultiMovieSitesTestCase(TestCase):
@use_local_response
def test_movies(self):
- url1 = 'https://www.themoviedb.org/movie/27205-inception'
- url2 = 'https://movie.douban.com/subject/3541415/'
- url3 = 'https://www.imdb.com/title/tt1375666/'
+ url1 = "https://www.themoviedb.org/movie/27205-inception"
+ url2 = "https://movie.douban.com/subject/3541415/"
+ url3 = "https://www.imdb.com/title/tt1375666/"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
diff --git a/catalog/music/models.py b/catalog/music/models.py
index b4993eae..12f777fe 100644
--- a/catalog/music/models.py
+++ b/catalog/music/models.py
@@ -4,35 +4,47 @@ from django.db import models
class Album(Item):
- url_path = 'album'
+ url_path = "album"
category = ItemCategory.Music
- demonstrative = _('这张专辑')
+ demonstrative = _("这张专辑")
barcode = PrimaryLookupIdDescriptor(IdType.GTIN)
douban_music = PrimaryLookupIdDescriptor(IdType.DoubanMusic)
spotify_album = PrimaryLookupIdDescriptor(IdType.Spotify_Album)
METADATA_COPY_LIST = [
- 'title',
- 'other_title',
- 'album_type',
- 'media',
- 'disc_count',
- 'artist',
- 'genre',
- 'release_date',
- 'duration',
- 'company',
- 'track_list',
- 'brief',
- 'bandcamp_album_id',
+ "title",
+ "other_title",
+ "album_type",
+ "media",
+ "disc_count",
+ "artist",
+ "genre",
+ "release_date",
+ "duration",
+ "company",
+ "track_list",
+ "brief",
+ "bandcamp_album_id",
]
- release_date = jsondata.DateField(_('发行日期'), auto_now=False, auto_now_add=False, null=True, blank=True)
+ release_date = jsondata.DateField(
+ _("发行日期"), auto_now=False, auto_now_add=False, null=True, blank=True
+ )
duration = jsondata.IntegerField(_("时长"), null=True, blank=True)
- artist = jsondata.ArrayField(models.CharField(_("artist"), blank=True, default='', max_length=200), null=True, blank=True, default=list)
- genre = jsondata.CharField(_("流派"), blank=True, default='', max_length=100)
- company = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=500), null=True, blank=True, default=list)
+ artist = jsondata.ArrayField(
+ models.CharField(_("artist"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ genre = jsondata.CharField(_("流派"), blank=True, default="", max_length=100)
+ company = jsondata.ArrayField(
+ models.CharField(blank=True, default="", max_length=500),
+ null=True,
+ blank=True,
+ default=list,
+ )
track_list = jsondata.TextField(_("曲目"), blank=True, default="")
- other_title = jsondata.CharField(blank=True, default='', max_length=500)
- album_type = jsondata.CharField(blank=True, default='', max_length=500)
- media = jsondata.CharField(blank=True, default='', max_length=500)
- bandcamp_album_id = jsondata.CharField(blank=True, default='', max_length=500)
- disc_count = jsondata.IntegerField(blank=True, default='', max_length=500)
+ other_title = jsondata.CharField(blank=True, default="", max_length=500)
+ album_type = jsondata.CharField(blank=True, default="", max_length=500)
+ media = jsondata.CharField(blank=True, default="", max_length=500)
+ bandcamp_album_id = jsondata.CharField(blank=True, default="", max_length=500)
+ disc_count = jsondata.IntegerField(blank=True, default="", max_length=500)
diff --git a/catalog/music/tests.py b/catalog/music/tests.py
index a2182692..aed4e715 100644
--- a/catalog/music/tests.py
+++ b/catalog/music/tests.py
@@ -6,8 +6,8 @@ from catalog.models import *
class SpotifyTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.Spotify_Album
- t_id_value = '65KwtzkJXw7oT819NFWmEP'
- t_url = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
+ t_id_value = "65KwtzkJXw7oT819NFWmEP"
+ t_url = "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -17,21 +17,21 @@ class SpotifyTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
+ t_url = "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], 'The Race For Space')
+ self.assertEqual(site.resource.metadata["title"], "The Race For Space")
self.assertIsInstance(site.resource.item, Album)
- self.assertEqual(site.resource.item.barcode, '3610159662676')
+ self.assertEqual(site.resource.item.barcode, "3610159662676")
class DoubanMusicTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.DoubanMusic
- t_id_value = '33551231'
- t_url = 'https://music.douban.com/subject/33551231/'
+ t_id_value = "33551231"
+ t_url = "https://music.douban.com/subject/33551231/"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -41,21 +41,21 @@ class DoubanMusicTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://music.douban.com/subject/33551231/'
+ t_url = "https://music.douban.com/subject/33551231/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], 'The Race For Space')
+ self.assertEqual(site.resource.metadata["title"], "The Race For Space")
self.assertIsInstance(site.resource.item, Album)
- self.assertEqual(site.resource.item.barcode, '3610159662676')
+ self.assertEqual(site.resource.item.barcode, "3610159662676")
class MultiMusicSitesTestCase(TestCase):
@use_local_response
def test_albums(self):
- url1 = 'https://music.douban.com/subject/33551231/'
- url2 = 'https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP'
+ url1 = "https://music.douban.com/subject/33551231/"
+ url2 = "https://open.spotify.com/album/65KwtzkJXw7oT819NFWmEP"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
self.assertEqual(p1.item.id, p2.item.id)
@@ -64,9 +64,9 @@ class MultiMusicSitesTestCase(TestCase):
class BandcampTestCase(TestCase):
def test_parse(self):
t_id_type = IdType.Bandcamp
- t_id_value = 'intlanthem.bandcamp.com/album/in-these-times'
- t_url = 'https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw'
- t_url2 = 'https://intlanthem.bandcamp.com/album/in-these-times'
+ t_id_value = "intlanthem.bandcamp.com/album/in-these-times"
+ t_url = "https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw"
+ t_url2 = "https://intlanthem.bandcamp.com/album/in-these-times"
site = SiteManager.get_site_by_id_type(t_id_type)
self.assertIsNotNone(site)
self.assertEqual(site.validate_url(t_url), True)
@@ -76,11 +76,11 @@ class BandcampTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw'
+ t_url = "https://intlanthem.bandcamp.com/album/in-these-times?from=hpbcw"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], 'In These Times')
- self.assertEqual(site.resource.metadata['artist'], ['Makaya McCraven'])
+ self.assertEqual(site.resource.metadata["title"], "In These Times")
+ self.assertEqual(site.resource.metadata["artist"], ["Makaya McCraven"])
self.assertIsInstance(site.resource.item, Album)
diff --git a/catalog/performance/models.py b/catalog/performance/models.py
index e82b905d..a0b531ab 100644
--- a/catalog/performance/models.py
+++ b/catalog/performance/models.py
@@ -4,12 +4,12 @@ from django.utils.translation import gettext_lazy as _
class Performance(Item):
category = ItemCategory.Performance
- url_path = 'performance'
+ url_path = "performance"
douban_drama = LookupIdDescriptor(IdType.DoubanDrama)
- versions = jsondata.ArrayField(_('版本'), null=False, blank=False, default=list)
- directors = jsondata.ArrayField(_('导演'), null=False, blank=False, default=list)
- playwrights = jsondata.ArrayField(_('编剧'), null=False, blank=False, default=list)
- actors = jsondata.ArrayField(_('主演'), null=False, blank=False, default=list)
+ versions = jsondata.ArrayField(_("版本"), null=False, blank=False, default=list)
+ directors = jsondata.ArrayField(_("导演"), null=False, blank=False, default=list)
+ playwrights = jsondata.ArrayField(_("编剧"), null=False, blank=False, default=list)
+ actors = jsondata.ArrayField(_("主演"), null=False, blank=False, default=list)
class Meta:
proxy = True
diff --git a/catalog/performance/tests.py b/catalog/performance/tests.py
index 8e765743..9154706a 100644
--- a/catalog/performance/tests.py
+++ b/catalog/performance/tests.py
@@ -7,8 +7,8 @@ class DoubanDramaTestCase(TestCase):
pass
def test_parse(self):
- t_id = '24849279'
- t_url = 'https://www.douban.com/location/drama/24849279/'
+ t_id = "24849279"
+ t_url = "https://www.douban.com/location/drama/24849279/"
p1 = SiteManager.get_site_by_id_type(IdType.DoubanDrama)
self.assertIsNotNone(p1)
p1 = SiteManager.get_site_by_url(t_url)
@@ -19,14 +19,14 @@ class DoubanDramaTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.douban.com/location/drama/24849279/'
+ t_url = "https://www.douban.com/location/drama/24849279/"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
resource = site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(resource.metadata['title'], '红花侠')
+ self.assertEqual(resource.metadata["title"], "红花侠")
item = site.get_item()
- self.assertEqual(item.title, '红花侠')
+ self.assertEqual(item.title, "红花侠")
# self.assertEqual(i.other_titles, ['スカーレットピンパーネル', 'THE SCARLET PIMPERNEL'])
# self.assertEqual(len(i.brief), 545)
diff --git a/catalog/podcast/models.py b/catalog/podcast/models.py
index 367daea2..6c808c8a 100644
--- a/catalog/podcast/models.py
+++ b/catalog/podcast/models.py
@@ -3,7 +3,7 @@ from catalog.common import *
class Podcast(Item):
category = ItemCategory.Podcast
- url_path = 'podcast'
+ url_path = "podcast"
feed_url = PrimaryLookupIdDescriptor(IdType.Feed)
apple_podcast = PrimaryLookupIdDescriptor(IdType.ApplePodcast)
# ximalaya = LookupIdDescriptor(IdType.Ximalaya)
diff --git a/catalog/podcast/tests.py b/catalog/podcast/tests.py
index 615b8925..93140791 100644
--- a/catalog/podcast/tests.py
+++ b/catalog/podcast/tests.py
@@ -8,9 +8,9 @@ class ApplePodcastTestCase(TestCase):
pass
def test_parse(self):
- t_id = '657765158'
- t_url = 'https://podcasts.apple.com/us/podcast/%E5%A4%A7%E5%86%85%E5%AF%86%E8%B0%88/id657765158'
- t_url2 = 'https://podcasts.apple.com/us/podcast/id657765158'
+ t_id = "657765158"
+ t_url = "https://podcasts.apple.com/us/podcast/%E5%A4%A7%E5%86%85%E5%AF%86%E8%B0%88/id657765158"
+ t_url2 = "https://podcasts.apple.com/us/podcast/id657765158"
p1 = SiteManager.get_site_by_id_type(IdType.ApplePodcast)
self.assertIsNotNone(p1)
self.assertEqual(p1.validate_url(t_url), True)
@@ -20,11 +20,14 @@ class ApplePodcastTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://podcasts.apple.com/gb/podcast/the-new-yorker-radio-hour/id1050430296'
+ t_url = "https://podcasts.apple.com/gb/podcast/the-new-yorker-radio-hour/id1050430296"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
- self.assertEqual(site.id_value, '1050430296')
+ self.assertEqual(site.id_value, "1050430296")
site.get_resource_ready()
- self.assertEqual(site.resource.metadata['title'], 'The New Yorker Radio Hour')
+ self.assertEqual(site.resource.metadata["title"], "The New Yorker Radio Hour")
# self.assertEqual(site.resource.metadata['feed_url'], 'http://feeds.wnyc.org/newyorkerradiohour')
- self.assertEqual(site.resource.metadata['feed_url'], 'http://feeds.feedburner.com/newyorkerradiohour')
+ self.assertEqual(
+ site.resource.metadata["feed_url"],
+ "http://feeds.feedburner.com/newyorkerradiohour",
+ )
diff --git a/catalog/sites/apple_podcast.py b/catalog/sites/apple_podcast.py
index 2fd78bd8..d1bc0534 100644
--- a/catalog/sites/apple_podcast.py
+++ b/catalog/sites/apple_podcast.py
@@ -11,7 +11,7 @@ class ApplePodcast(AbstractSite):
SITE_NAME = SiteName.ApplePodcast
ID_TYPE = IdType.ApplePodcast
URL_PATTERNS = [r"https://[^.]+.apple.com/\w+/podcast/*[^/?]*/id(\d+)"]
- WIKI_PROPERTY_ID = 'P5842'
+ WIKI_PROPERTY_ID = "P5842"
DEFAULT_MODEL = Podcast
@classmethod
@@ -19,23 +19,27 @@ class ApplePodcast(AbstractSite):
return "https://podcasts.apple.com/us/podcast/id" + id_value
def scrape(self):
- api_url = f'https://itunes.apple.com/lookup?id={self.id_value}'
+ api_url = f"https://itunes.apple.com/lookup?id={self.id_value}"
dl = BasicDownloader(api_url)
resp = dl.download()
- r = resp.json()['results'][0]
- pd = ResourceContent(metadata={
- 'title': r['trackName'],
- 'feed_url': r['feedUrl'],
- 'hosts': [r['artistName']],
- 'genres': r['genres'],
- 'cover_image_url': r['artworkUrl600'],
- })
- pd.lookup_ids[IdType.Feed] = pd.metadata.get('feed_url')
+ r = resp.json()["results"][0]
+ pd = ResourceContent(
+ metadata={
+ "title": r["trackName"],
+ "feed_url": r["feedUrl"],
+ "hosts": [r["artistName"]],
+ "genres": r["genres"],
+ "cover_image_url": r["artworkUrl600"],
+ }
+ )
+ pd.lookup_ids[IdType.Feed] = pd.metadata.get("feed_url")
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
diff --git a/catalog/sites/bandcamp.py b/catalog/sites/bandcamp.py
index 394dafa8..b96752f8 100644
--- a/catalog/sites/bandcamp.py
+++ b/catalog/sites/bandcamp.py
@@ -14,11 +14,9 @@ _logger = logging.getLogger(__name__)
class Bandcamp(AbstractSite):
SITE_NAME = SiteName.Bandcamp
ID_TYPE = IdType.Bandcamp
- URL_PATTERNS = [
- r"https://([a-z0-9\-]+.bandcamp.com/album/[^?#/]+)"
- ]
+ URL_PATTERNS = [r"https://([a-z0-9\-]+.bandcamp.com/album/[^?#/]+)"]
URL_PATTERN_FALLBACK = r"https://([a-z0-9\-\.]+/album/[^?#/]+)"
- WIKI_PROPERTY_ID = ''
+ WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Album
@classmethod
@@ -32,16 +30,16 @@ class Bandcamp(AbstractSite):
parsed_url = urllib.parse.urlparse(url)
hostname = parsed_url.netloc
try:
- answers = dns.resolver.query(hostname, 'CNAME')
+ answers = dns.resolver.query(hostname, "CNAME")
for rdata in answers:
- if str(rdata.target) == 'dom.bandcamp.com.':
+ if str(rdata.target) == "dom.bandcamp.com.":
return True
except Exception:
pass
try:
- answers = dns.resolver.query(hostname, 'A')
+ answers = dns.resolver.query(hostname, "A")
for rdata in answers:
- if str(rdata.address) == '35.241.62.186':
+ if str(rdata.address) == "35.241.62.186":
return True
except Exception:
pass
@@ -50,34 +48,45 @@ class Bandcamp(AbstractSite):
content = BasicDownloader(self.url).download().html()
try:
title = content.xpath("//h2[@class='trackTitle']/text()")[0].strip()
- artist = [content.xpath("//div[@id='name-section']/h3/span/a/text()")[0].strip()]
+ artist = [
+ content.xpath("//div[@id='name-section']/h3/span/a/text()")[0].strip()
+ ]
except IndexError:
raise ValueError("given url contains no valid info")
genre = [] # TODO: parse tags
track_list = []
- release_nodes = content.xpath("//div[@class='tralbumData tralbum-credits']/text()")
- release_date = dateparser.parse(re.sub(r'releas\w+ ', '', release_nodes[0].strip())).strftime('%Y-%m-%d') if release_nodes else None
+ release_nodes = content.xpath(
+ "//div[@class='tralbumData tralbum-credits']/text()"
+ )
+ release_date = (
+ dateparser.parse(
+ re.sub(r"releas\w+ ", "", release_nodes[0].strip())
+ ).strftime("%Y-%m-%d")
+ if release_nodes
+ else None
+ )
duration = None
company = None
brief_nodes = content.xpath("//div[@class='tralbumData tralbum-about']/text()")
brief = "".join(brief_nodes) if brief_nodes else None
cover_url = content.xpath("//div[@id='tralbumArt']/a/@href")[0].strip()
- bandcamp_page_data = json.loads(content.xpath(
- "//meta[@name='bc-page-properties']/@content")[0].strip())
- bandcamp_album_id = bandcamp_page_data['item_id']
+ bandcamp_page_data = json.loads(
+ content.xpath("//meta[@name='bc-page-properties']/@content")[0].strip()
+ )
+ bandcamp_album_id = bandcamp_page_data["item_id"]
data = {
- 'title': title,
- 'artist': artist,
- 'genre': genre,
- 'track_list': track_list,
- 'release_date': release_date,
- 'duration': duration,
- 'company': company,
- 'brief': brief,
- 'bandcamp_album_id': bandcamp_album_id,
- 'cover_image_url': cover_url,
+ "title": title,
+ "artist": artist,
+ "genre": genre,
+ "track_list": track_list,
+ "release_date": release_date,
+ "duration": duration,
+ "company": company,
+ "brief": brief,
+ "bandcamp_album_id": bandcamp_album_id,
+ "cover_image_url": cover_url,
}
pd = ResourceContent(metadata=data)
if data["cover_image_url"]:
@@ -86,5 +95,7 @@ class Bandcamp(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {data["cover_image_url"]}'
+ )
return pd
diff --git a/catalog/sites/bangumi.py b/catalog/sites/bangumi.py
index 6be9bd6b..5c95b957 100644
--- a/catalog/sites/bangumi.py
+++ b/catalog/sites/bangumi.py
@@ -13,7 +13,7 @@ class Bangumi(AbstractSite):
URL_PATTERNS = [
r"https://bgm\.tv/subject/(\d+)",
]
- WIKI_PROPERTY_ID = ''
+ WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = None
@classmethod
diff --git a/catalog/sites/douban.py b/catalog/sites/douban.py
index b26d42fc..6f47af2e 100644
--- a/catalog/sites/douban.py
+++ b/catalog/sites/douban.py
@@ -13,14 +13,17 @@ class DoubanDownloader(ProxiedDownloader):
elif response.status_code == 204:
return RESPONSE_CENSORSHIP
elif response.status_code == 200:
- content = response.content.decode('utf-8')
- if content.find('关于豆瓣') == -1:
+ content = response.content.decode("utf-8")
+ if content.find("关于豆瓣") == -1:
# if content.find('你的 IP 发出') == -1:
# error = error + 'Content not authentic' # response is garbage
# else:
# error = error + 'IP banned'
return RESPONSE_NETWORK_ERROR
- elif content.find('页面不存在') != -1 or content.find('呃... 你想访问的条目豆瓣不收录。') != -1: # re.search('不存在[^<]+', content, re.MULTILINE):
+ elif (
+ content.find("页面不存在") != -1
+ or content.find("呃... 你想访问的条目豆瓣不收录。") != -1
+ ): # re.search('不存在[^<]+', content, re.MULTILINE):
return RESPONSE_CENSORSHIP
else:
return RESPONSE_OK
diff --git a/catalog/sites/douban_book.py b/catalog/sites/douban_book.py
index 22ae9119..2bf11908 100644
--- a/catalog/sites/douban_book.py
+++ b/catalog/sites/douban_book.py
@@ -12,8 +12,11 @@ _logger = logging.getLogger(__name__)
class DoubanBook(AbstractSite):
SITE_NAME = SiteName.Douban
ID_TYPE = IdType.DoubanBook
- URL_PATTERNS = [r"\w+://book\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/book/subject/(\d+)/{0,1}"]
- WIKI_PROPERTY_ID = '?'
+ URL_PATTERNS = [
+ r"\w+://book\.douban\.com/subject/(\d+)/{0,1}",
+ r"\w+://m.douban.com/book/subject/(\d+)/{0,1}",
+ ]
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Edition
@classmethod
@@ -23,31 +26,40 @@ class DoubanBook(AbstractSite):
def scrape(self):
content = DoubanDownloader(self.url).download().html()
- isbn_elem = content.xpath("//div[@id='info']//span[text()='ISBN:']/following::text()")
+ isbn_elem = content.xpath(
+ "//div[@id='info']//span[text()='ISBN:']/following::text()"
+ )
isbn = isbn_elem[0].strip() if isbn_elem else None
title_elem = content.xpath("/html/body//h1/span/text()")
- title = title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
+ title = (
+ title_elem[0].strip() if title_elem else f"Unknown Title {self.id_value}"
+ )
subtitle_elem = content.xpath(
- "//div[@id='info']//span[text()='副标题:']/following::text()")
+ "//div[@id='info']//span[text()='副标题:']/following::text()"
+ )
subtitle = subtitle_elem[0].strip()[:500] if subtitle_elem else None
orig_title_elem = content.xpath(
- "//div[@id='info']//span[text()='原作名:']/following::text()")
+ "//div[@id='info']//span[text()='原作名:']/following::text()"
+ )
orig_title = orig_title_elem[0].strip()[:500] if orig_title_elem else None
language_elem = content.xpath(
- "//div[@id='info']//span[text()='语言:']/following::text()")
+ "//div[@id='info']//span[text()='语言:']/following::text()"
+ )
language = language_elem[0].strip() if language_elem else None
pub_house_elem = content.xpath(
- "//div[@id='info']//span[text()='出版社:']/following::text()")
+ "//div[@id='info']//span[text()='出版社:']/following::text()"
+ )
pub_house = pub_house_elem[0].strip() if pub_house_elem else None
pub_date_elem = content.xpath(
- "//div[@id='info']//span[text()='出版年:']/following::text()")
- pub_date = pub_date_elem[0].strip() if pub_date_elem else ''
+ "//div[@id='info']//span[text()='出版年:']/following::text()"
+ )
+ pub_date = pub_date_elem[0].strip() if pub_date_elem else ""
year_month_day = RE_NUMBERS.findall(pub_date)
if len(year_month_day) in (2, 3):
pub_year = int(year_month_day[0])
@@ -60,45 +72,62 @@ class DoubanBook(AbstractSite):
pub_month = None
if pub_year and pub_month and pub_year < pub_month:
pub_year, pub_month = pub_month, pub_year
- pub_year = None if pub_year is not None and pub_year not in range(
- 0, 3000) else pub_year
- pub_month = None if pub_month is not None and pub_month not in range(
- 1, 12) else pub_month
+ pub_year = (
+ None
+ if pub_year is not None and pub_year not in range(0, 3000)
+ else pub_year
+ )
+ pub_month = (
+ None
+ if pub_month is not None and pub_month not in range(1, 12)
+ else pub_month
+ )
binding_elem = content.xpath(
- "//div[@id='info']//span[text()='装帧:']/following::text()")
+ "//div[@id='info']//span[text()='装帧:']/following::text()"
+ )
binding = binding_elem[0].strip() if binding_elem else None
price_elem = content.xpath(
- "//div[@id='info']//span[text()='定价:']/following::text()")
+ "//div[@id='info']//span[text()='定价:']/following::text()"
+ )
price = price_elem[0].strip() if price_elem else None
pages_elem = content.xpath(
- "//div[@id='info']//span[text()='页数:']/following::text()")
+ "//div[@id='info']//span[text()='页数:']/following::text()"
+ )
pages = pages_elem[0].strip() if pages_elem else None
if pages is not None:
- pages = int(RE_NUMBERS.findall(pages)[
- 0]) if RE_NUMBERS.findall(pages) else None
+ pages = (
+ int(RE_NUMBERS.findall(pages)[0]) if RE_NUMBERS.findall(pages) else None
+ )
if pages and (pages > 999999 or pages < 1):
pages = None
brief_elem = content.xpath(
- "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()")
- brief = '\n'.join(p.strip()
- for p in brief_elem) if brief_elem else None
+ "//h2/span[text()='内容简介']/../following-sibling::div[1]//div[@class='intro'][not(ancestor::span[@class='short'])]/p/text()"
+ )
+ brief = "\n".join(p.strip() for p in brief_elem) if brief_elem else None
contents = None
try:
contents_elem = content.xpath(
- "//h2/span[text()='目录']/../following-sibling::div[1]")[0]
+ "//h2/span[text()='目录']/../following-sibling::div[1]"
+ )[0]
# if next the id of next sibling contains `dir`, that would be the full contents
if "dir" in contents_elem.getnext().xpath("@id")[0]:
contents_elem = contents_elem.getnext()
- contents = '\n'.join(p.strip() for p in contents_elem.xpath(
- "text()")[:-2]) if contents_elem is not None else None
+ contents = (
+ "\n".join(p.strip() for p in contents_elem.xpath("text()")[:-2])
+ if contents_elem is not None
+ else None
+ )
else:
- contents = '\n'.join(p.strip() for p in contents_elem.xpath(
- "text()")) if contents_elem is not None else None
+ contents = (
+ "\n".join(p.strip() for p in contents_elem.xpath("text()"))
+ if contents_elem is not None
+ else None
+ )
except Exception:
pass
@@ -106,82 +135,97 @@ class DoubanBook(AbstractSite):
img_url = img_url_elem[0].strip() if img_url_elem else None
# there are two html formats for authors and translators
- authors_elem = content.xpath("""//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
- preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()""")
+ authors_elem = content.xpath(
+ """//div[@id='info']//span[text()='作者:']/following-sibling::br[1]/
+ preceding-sibling::a[preceding-sibling::span[text()='作者:']]/text()"""
+ )
if not authors_elem:
authors_elem = content.xpath(
- """//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()""")
+ """//div[@id='info']//span[text()=' 作者']/following-sibling::a/text()"""
+ )
if authors_elem:
authors = []
for author in authors_elem:
- authors.append(RE_WHITESPACES.sub(' ', author.strip())[:200])
+ authors.append(RE_WHITESPACES.sub(" ", author.strip())[:200])
else:
authors = None
- translators_elem = content.xpath("""//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
- preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()""")
+ translators_elem = content.xpath(
+ """//div[@id='info']//span[text()='译者:']/following-sibling::br[1]/
+ preceding-sibling::a[preceding-sibling::span[text()='译者:']]/text()"""
+ )
if not translators_elem:
translators_elem = content.xpath(
- """//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()""")
+ """//div[@id='info']//span[text()=' 译者']/following-sibling::a/text()"""
+ )
if translators_elem:
translators = []
for translator in translators_elem:
- translators.append(RE_WHITESPACES.sub(' ', translator.strip()))
+ translators.append(RE_WHITESPACES.sub(" ", translator.strip()))
else:
translators = None
cncode_elem = content.xpath(
- "//div[@id='info']//span[text()='统一书号:']/following::text()")
+ "//div[@id='info']//span[text()='统一书号:']/following::text()"
+ )
cubn = cncode_elem[0].strip() if cncode_elem else None
series_elem = content.xpath(
- "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()")
+ "//div[@id='info']//span[text()='丛书:']/following-sibling::a[1]/text()"
+ )
series = series_elem[0].strip() if series_elem else None
imprint_elem = content.xpath(
- "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()")
+ "//div[@id='info']//span[text()='出品方:']/following-sibling::a[1]/text()"
+ )
imprint = imprint_elem[0].strip() if imprint_elem else None
data = {
- 'title': title,
- 'subtitle': subtitle,
- 'orig_title': orig_title,
- 'author': authors,
- 'translator': translators,
- 'language': language,
- 'pub_house': pub_house,
- 'pub_year': pub_year,
- 'pub_month': pub_month,
- 'binding': binding,
- 'price': price,
- 'pages': pages,
- 'isbn': isbn,
- 'cubn': cubn,
- 'brief': brief,
- 'contents': contents,
- 'series': series,
- 'imprint': imprint,
- 'cover_image_url': img_url,
+ "title": title,
+ "subtitle": subtitle,
+ "orig_title": orig_title,
+ "author": authors,
+ "translator": translators,
+ "language": language,
+ "pub_house": pub_house,
+ "pub_year": pub_year,
+ "pub_month": pub_month,
+ "binding": binding,
+ "price": price,
+ "pages": pages,
+ "isbn": isbn,
+ "cubn": cubn,
+ "brief": brief,
+ "contents": contents,
+ "series": series,
+ "imprint": imprint,
+ "cover_image_url": img_url,
}
- works_element = content.xpath('//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href')
+ works_element = content.xpath(
+ '//h2/span[text()="这本书的其他版本"]/following-sibling::span[@class="pl"]/a/@href'
+ )
if works_element:
- r = re.match(r'\w+://book.douban.com/works/(\d+)', works_element[0])
- data['required_resources'] = [{
- 'model': 'Work',
- 'id_type': IdType.DoubanBook_Work,
- 'id_value': r[1] if r else None,
- 'title': data['title'],
- 'url': works_element[0],
- 'content': {'metadata': {'title': data['title']}}
- }]
+ r = re.match(r"\w+://book.douban.com/works/(\d+)", works_element[0])
+ data["required_resources"] = [
+ {
+ "model": "Work",
+ "id_type": IdType.DoubanBook_Work,
+ "id_value": r[1] if r else None,
+ "title": data["title"],
+ "url": works_element[0],
+ "content": {"metadata": {"title": data["title"]}},
+ }
+ ]
pd = ResourceContent(metadata=data)
t, n = detect_isbn_asin(isbn)
if t:
pd.lookup_ids[t] = n
pd.lookup_ids[IdType.CUBN] = cubn
- pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(img_url, self.url)
+ pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(
+ img_url, self.url
+ )
return pd
@@ -189,7 +233,7 @@ class DoubanBook(AbstractSite):
class DoubanBook_Work(AbstractSite):
ID_TYPE = IdType.DoubanBook_Work
URL_PATTERNS = [r"\w+://book\.douban\.com/works/(\d+)"]
- WIKI_PROPERTY_ID = '?'
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Work
@classmethod
@@ -199,10 +243,12 @@ class DoubanBook_Work(AbstractSite):
def scrape(self):
content = DoubanDownloader(self.url).download().html()
title_elem = content.xpath("//h1/text()")
- title = title_elem[0].split('全部版本(')[0].strip() if title_elem else None
+ title = title_elem[0].split("全部版本(")[0].strip() if title_elem else None
if not title:
- raise ParseError(self, 'title')
- pd = ResourceContent(metadata={
- 'title': title,
- })
+ raise ParseError(self, "title")
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ }
+ )
return pd
diff --git a/catalog/sites/douban_drama.py b/catalog/sites/douban_drama.py
index 8740a61c..a4185cbb 100644
--- a/catalog/sites/douban_drama.py
+++ b/catalog/sites/douban_drama.py
@@ -12,7 +12,7 @@ class DoubanDrama(AbstractSite):
SITE_NAME = SiteName.Douban
ID_TYPE = IdType.DoubanDrama
URL_PATTERNS = [r"\w+://www.douban.com/location/drama/(\d+)/"]
- WIKI_PROPERTY_ID = 'P6443'
+ WIKI_PROPERTY_ID = "P6443"
DEFAULT_MODEL = Performance
@classmethod
@@ -29,24 +29,51 @@ class DoubanDrama(AbstractSite):
else:
raise ParseError(self, "title")
- data['other_titles'] = [s.strip() for s in title_elem[1:]]
- other_title_elem = h.xpath("//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()")
+ data["other_titles"] = [s.strip() for s in title_elem[1:]]
+ other_title_elem = h.xpath(
+ "//dl//dt[text()='又名:']/following::dd[@itemprop='name']/text()"
+ )
if len(other_title_elem) > 0:
- data['other_titles'].append(other_title_elem[0].strip())
+ data["other_titles"].append(other_title_elem[0].strip())
plot_elem = h.xpath("//div[@id='link-report']/text()")
if len(plot_elem) == 0:
plot_elem = h.xpath("//div[@class='abstract']/text()")
- data['brief'] = '\n'.join(plot_elem) if len(plot_elem) > 0 else ''
+ data["brief"] = "\n".join(plot_elem) if len(plot_elem) > 0 else ""
- data['genres'] = [s.strip() for s in h.xpath("//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()")]
- data['versions'] = [s.strip() for s in h.xpath("//dl//dt[text()='版本:']/following-sibling::dd[@class='titles']/a//text()")]
- data['directors'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()")]
- data['playwrights'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()")]
- data['actors'] = [s.strip() for s in h.xpath("//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()")]
+ data["genres"] = [
+ s.strip()
+ for s in h.xpath(
+ "//dl//dt[text()='类型:']/following-sibling::dd[@itemprop='genre']/text()"
+ )
+ ]
+ data["versions"] = [
+ s.strip()
+ for s in h.xpath(
+ "//dl//dt[text()='版本:']/following-sibling::dd[@class='titles']/a//text()"
+ )
+ ]
+ data["directors"] = [
+ s.strip()
+ for s in h.xpath(
+ "//div[@class='meta']/dl//dt[text()='导演:']/following-sibling::dd/a[@itemprop='director']//text()"
+ )
+ ]
+ data["playwrights"] = [
+ s.strip()
+ for s in h.xpath(
+ "//div[@class='meta']/dl//dt[text()='编剧:']/following-sibling::dd/a[@itemprop='author']//text()"
+ )
+ ]
+ data["actors"] = [
+ s.strip()
+ for s in h.xpath(
+ "//div[@class='meta']/dl//dt[text()='主演:']/following-sibling::dd/a[@itemprop='actor']//text()"
+ )
+ ]
img_url_elem = h.xpath("//img[@itemprop='image']/@src")
- data['cover_image_url'] = img_url_elem[0].strip() if img_url_elem else None
+ data["cover_image_url"] = img_url_elem[0].strip() if img_url_elem else None
pd = ResourceContent(metadata=data)
if pd.metadata["cover_image_url"]:
@@ -55,5 +82,7 @@ class DoubanDrama(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
diff --git a/catalog/sites/douban_game.py b/catalog/sites/douban_game.py
index 210002f2..d639cdd4 100644
--- a/catalog/sites/douban_game.py
+++ b/catalog/sites/douban_game.py
@@ -12,8 +12,11 @@ _logger = logging.getLogger(__name__)
class DoubanGame(AbstractSite):
SITE_NAME = SiteName.Douban
ID_TYPE = IdType.DoubanGame
- URL_PATTERNS = [r"\w+://www\.douban\.com/game/(\d+)/{0,1}", r"\w+://m.douban.com/game/subject/(\d+)/{0,1}"]
- WIKI_PROPERTY_ID = ''
+ URL_PATTERNS = [
+ r"\w+://www\.douban\.com/game/(\d+)/{0,1}",
+ r"\w+://m.douban.com/game/subject/(\d+)/{0,1}",
+ ]
+ WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Game
@classmethod
@@ -29,49 +32,69 @@ class DoubanGame(AbstractSite):
raise ParseError(self, "title")
other_title_elem = content.xpath(
- "//dl[@class='game-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()")
- other_title = other_title_elem[0].strip().split(' / ') if other_title_elem else None
+ "//dl[@class='game-attr']//dt[text()='别名:']/following-sibling::dd[1]/text()"
+ )
+ other_title = (
+ other_title_elem[0].strip().split(" / ") if other_title_elem else None
+ )
developer_elem = content.xpath(
- "//dl[@class='game-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()")
- developer = developer_elem[0].strip().split(' / ') if developer_elem else None
+ "//dl[@class='game-attr']//dt[text()='开发商:']/following-sibling::dd[1]/text()"
+ )
+ developer = developer_elem[0].strip().split(" / ") if developer_elem else None
publisher_elem = content.xpath(
- "//dl[@class='game-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()")
- publisher = publisher_elem[0].strip().split(' / ') if publisher_elem else None
+ "//dl[@class='game-attr']//dt[text()='发行商:']/following-sibling::dd[1]/text()"
+ )
+ publisher = publisher_elem[0].strip().split(" / ") if publisher_elem else None
platform_elem = content.xpath(
- "//dl[@class='game-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()")
+ "//dl[@class='game-attr']//dt[text()='平台:']/following-sibling::dd[1]/a/text()"
+ )
platform = platform_elem if platform_elem else None
genre_elem = content.xpath(
- "//dl[@class='game-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()")
+ "//dl[@class='game-attr']//dt[text()='类型:']/following-sibling::dd[1]/a/text()"
+ )
genre = None
if genre_elem:
- genre = [g for g in genre_elem if g != '游戏']
+ genre = [g for g in genre_elem if g != "游戏"]
date_elem = content.xpath(
- "//dl[@class='game-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()")
- release_date = dateparser.parse(date_elem[0].strip()).strftime('%Y-%m-%d') if date_elem else None
+ "//dl[@class='game-attr']//dt[text()='发行日期:']/following-sibling::dd[1]/text()"
+ )
+ release_date = (
+ dateparser.parse(date_elem[0].strip()).strftime("%Y-%m-%d")
+ if date_elem
+ else None
+ )
brief_elem = content.xpath("//div[@class='mod item-desc']/p/text()")
- brief = '\n'.join(brief_elem) if brief_elem else None
+ brief = "\n".join(brief_elem) if brief_elem else None
img_url_elem = content.xpath(
- "//div[@class='item-subject-info']/div[@class='pic']//img/@src")
+ "//div[@class='item-subject-info']/div[@class='pic']//img/@src"
+ )
img_url = img_url_elem[0].strip() if img_url_elem else None
- pd = ResourceContent(metadata={
- 'title': title,
- 'other_title': other_title,
- 'developer': developer,
- 'publisher': publisher,
- 'release_date': release_date,
- 'genre': genre,
- 'platform': platform,
- 'brief': brief,
- 'cover_image_url': img_url
- })
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ "other_title": other_title,
+ "developer": developer,
+ "publisher": publisher,
+ "release_date": release_date,
+ "genre": genre,
+ "platform": platform,
+ "brief": brief,
+ "cover_image_url": img_url,
+ }
+ )
if pd.metadata["cover_image_url"]:
- pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
+ (
+ pd.cover_image,
+ pd.cover_image_extention,
+ ) = BasicImageDownloader.download_image(
+ pd.metadata["cover_image_url"], self.url
+ )
return pd
diff --git a/catalog/sites/douban_movie.py b/catalog/sites/douban_movie.py
index d00ddc16..1a246887 100644
--- a/catalog/sites/douban_movie.py
+++ b/catalog/sites/douban_movie.py
@@ -15,8 +15,11 @@ _logger = logging.getLogger(__name__)
class DoubanMovie(AbstractSite):
SITE_NAME = SiteName.Douban
ID_TYPE = IdType.DoubanMovie
- URL_PATTERNS = [r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}"]
- WIKI_PROPERTY_ID = '?'
+ URL_PATTERNS = [
+ r"\w+://movie\.douban\.com/subject/(\d+)/{0,1}",
+ r"\w+://m.douban.com/movie/subject/(\d+)/{0,1}",
+ ]
+ WIKI_PROPERTY_ID = "?"
# no DEFAULT_MODEL as it may be either TV Season and Movie
@classmethod
@@ -27,16 +30,16 @@ class DoubanMovie(AbstractSite):
content = DoubanDownloader(self.url).download().html()
try:
- raw_title = content.xpath(
- "//span[@property='v:itemreviewed']/text()")[0].strip()
+ raw_title = content.xpath("//span[@property='v:itemreviewed']/text()")[
+ 0
+ ].strip()
except IndexError:
- raise ParseError(self, 'title')
+ raise ParseError(self, "title")
- orig_title = content.xpath(
- "//img[@rel='v:image']/@alt")[0].strip()
+ orig_title = content.xpath("//img[@rel='v:image']/@alt")[0].strip()
title = raw_title.split(orig_title)[0].strip()
# if has no chinese title
- if title == '':
+ if title == "":
title = orig_title
if title == orig_title:
@@ -44,107 +47,134 @@ class DoubanMovie(AbstractSite):
# there are two html formats for authors and translators
other_title_elem = content.xpath(
- "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]")
- other_title = other_title_elem[0].strip().split(
- ' / ') if other_title_elem else None
+ "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
+ )
+ other_title = (
+ other_title_elem[0].strip().split(" / ") if other_title_elem else None
+ )
imdb_elem = content.xpath(
- "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()")
+ "//div[@id='info']//span[text()='IMDb链接:']/following-sibling::a[1]/text()"
+ )
if not imdb_elem:
imdb_elem = content.xpath(
- "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='IMDb:']/following-sibling::text()[1]"
+ )
imdb_code = imdb_elem[0].strip() if imdb_elem else None
director_elem = content.xpath(
- "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()")
+ "//div[@id='info']//span[text()='导演']/following-sibling::span[1]/a/text()"
+ )
director = director_elem if director_elem else None
playwright_elem = content.xpath(
- "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()")
- playwright = list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
+ "//div[@id='info']//span[text()='编剧']/following-sibling::span[1]/a/text()"
+ )
+ playwright = (
+ list(map(lambda a: a[:200], playwright_elem)) if playwright_elem else None
+ )
actor_elem = content.xpath(
- "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()")
+ "//div[@id='info']//span[text()='主演']/following-sibling::span[1]/a/text()"
+ )
actor = list(map(lambda a: a[:200], actor_elem)) if actor_elem else None
genre_elem = content.xpath("//span[@property='v:genre']/text()")
genre = []
if genre_elem:
for g in genre_elem:
- g = g.split(' ')[0]
- if g == '紀錄片': # likely some original data on douban was corrupted
- g = '纪录片'
- elif g == '鬼怪':
- g = '惊悚'
+ g = g.split(" ")[0]
+ if g == "紀錄片": # likely some original data on douban was corrupted
+ g = "纪录片"
+ elif g == "鬼怪":
+ g = "惊悚"
genre.append(g)
- showtime_elem = content.xpath(
- "//span[@property='v:initialReleaseDate']/text()")
+ showtime_elem = content.xpath("//span[@property='v:initialReleaseDate']/text()")
if showtime_elem:
showtime = []
for st in showtime_elem:
- parts = st.split('(')
+ parts = st.split("(")
if len(parts) == 1:
- time = st.split('(')[0]
- region = ''
+ time = st.split("(")[0]
+ region = ""
else:
- time = st.split('(')[0]
- region = st.split('(')[1][0:-1]
+ time = st.split("(")[0]
+ region = st.split("(")[1][0:-1]
showtime.append({time: region})
else:
showtime = None
site_elem = content.xpath(
- "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href")
+ "//div[@id='info']//span[text()='官方网站:']/following-sibling::a[1]/@href"
+ )
site = site_elem[0].strip()[:200] if site_elem else None
- if site and not re.match(r'http.+', site):
+ if site and not re.match(r"http.+", site):
site = None
area_elem = content.xpath(
- "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='制片国家/地区:']/following-sibling::text()[1]"
+ )
if area_elem:
- area = [a.strip()[:100] for a in area_elem[0].split('/')]
+ area = [a.strip()[:100] for a in area_elem[0].split("/")]
else:
area = None
language_elem = content.xpath(
- "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='语言:']/following-sibling::text()[1]"
+ )
if language_elem:
- language = [a.strip() for a in language_elem[0].split(' / ')]
+ language = [a.strip() for a in language_elem[0].split(" / ")]
else:
language = None
year_elem = content.xpath("//span[@class='year']/text()")
- year = int(re.search(r'\d+', year_elem[0])[0]) if year_elem and re.search(r'\d+', year_elem[0]) else None
+ year = (
+ int(re.search(r"\d+", year_elem[0])[0])
+ if year_elem and re.search(r"\d+", year_elem[0])
+ else None
+ )
duration_elem = content.xpath("//span[@property='v:runtime']/text()")
other_duration_elem = content.xpath(
- "//span[@property='v:runtime']/following-sibling::text()[1]")
+ "//span[@property='v:runtime']/following-sibling::text()[1]"
+ )
if duration_elem:
duration = duration_elem[0].strip()
if other_duration_elem:
duration += other_duration_elem[0].rstrip()
- duration = duration.split('/')[0].strip()
+ duration = duration.split("/")[0].strip()
else:
duration = None
season_elem = content.xpath(
- "//*[@id='season']/option[@selected='selected']/text()")
+ "//*[@id='season']/option[@selected='selected']/text()"
+ )
if not season_elem:
season_elem = content.xpath(
- "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='季数:']/following-sibling::text()[1]"
+ )
season = int(season_elem[0].strip()) if season_elem else None
else:
season = int(season_elem[0].strip())
episodes_elem = content.xpath(
- "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]")
- episodes = int(episodes_elem[0].strip()) if episodes_elem and episodes_elem[0].strip().isdigit() else None
+ "//div[@id='info']//span[text()='集数:']/following-sibling::text()[1]"
+ )
+ episodes = (
+ int(episodes_elem[0].strip())
+ if episodes_elem and episodes_elem[0].strip().isdigit()
+ else None
+ )
single_episode_length_elem = content.xpath(
- "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]")
- single_episode_length = single_episode_length_elem[0].strip(
- )[:100] if single_episode_length_elem else None
+ "//div[@id='info']//span[text()='单集片长:']/following-sibling::text()[1]"
+ )
+ single_episode_length = (
+ single_episode_length_elem[0].strip()[:100]
+ if single_episode_length_elem
+ else None
+ )
# if has field `episodes` not none then must be series
is_series = True if episodes else False
@@ -152,64 +182,87 @@ class DoubanMovie(AbstractSite):
brief_elem = content.xpath("//span[@class='all hidden']")
if not brief_elem:
brief_elem = content.xpath("//span[@property='v:summary']")
- brief = '\n'.join([e.strip() for e in brief_elem[0].xpath(
- './text()')]) if brief_elem else None
+ brief = (
+ "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
+ if brief_elem
+ else None
+ )
img_url_elem = content.xpath("//img[@rel='v:image']/@src")
img_url = img_url_elem[0].strip() if img_url_elem else None
- pd = ResourceContent(metadata={
- 'title': title,
- 'orig_title': orig_title,
- 'other_title': other_title,
- 'imdb_code': imdb_code,
- 'director': director,
- 'playwright': playwright,
- 'actor': actor,
- 'genre': genre,
- 'showtime': showtime,
- 'site': site,
- 'area': area,
- 'language': language,
- 'year': year,
- 'duration': duration,
- 'season_number': season,
- 'episode_count': episodes,
- 'single_episode_length': single_episode_length,
- 'brief': brief,
- 'is_series': is_series,
- 'cover_image_url': img_url,
- })
- pd.metadata['preferred_model'] = ('TVSeason' if season else 'TVShow') if is_series else 'Movie'
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ "orig_title": orig_title,
+ "other_title": other_title,
+ "imdb_code": imdb_code,
+ "director": director,
+ "playwright": playwright,
+ "actor": actor,
+ "genre": genre,
+ "showtime": showtime,
+ "site": site,
+ "area": area,
+ "language": language,
+ "year": year,
+ "duration": duration,
+ "season_number": season,
+ "episode_count": episodes,
+ "single_episode_length": single_episode_length,
+ "brief": brief,
+ "is_series": is_series,
+ "cover_image_url": img_url,
+ }
+ )
+ pd.metadata["preferred_model"] = (
+ ("TVSeason" if season else "TVShow") if is_series else "Movie"
+ )
if imdb_code:
res_data = search_tmdb_by_imdb_id(imdb_code)
tmdb_show_id = None
- if 'movie_results' in res_data and len(res_data['movie_results']) > 0:
- pd.metadata['preferred_model'] = 'Movie'
- elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
- pd.metadata['preferred_model'] = 'TVShow'
- elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
- pd.metadata['preferred_model'] = 'TVSeason'
- tmdb_show_id = res_data['tv_season_results'][0]['show_id']
- elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
- pd.metadata['preferred_model'] = 'TVSeason'
- tmdb_show_id = res_data['tv_episode_results'][0]['show_id']
- if res_data['tv_episode_results'][0]['episode_number'] != 1:
- _logger.warning(f'Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}')
- resp = query_tmdb_tv_episode(tmdb_show_id, res_data['tv_episode_results'][0]['season_number'], 1)
- imdb_code = resp['external_ids']['imdb_id']
- _logger.warning(f'Douban Movie {self.url} re-mapped to imdb episode {imdb_code}')
+ if "movie_results" in res_data and len(res_data["movie_results"]) > 0:
+ pd.metadata["preferred_model"] = "Movie"
+ elif "tv_results" in res_data and len(res_data["tv_results"]) > 0:
+ pd.metadata["preferred_model"] = "TVShow"
+ elif (
+ "tv_season_results" in res_data
+ and len(res_data["tv_season_results"]) > 0
+ ):
+ pd.metadata["preferred_model"] = "TVSeason"
+ tmdb_show_id = res_data["tv_season_results"][0]["show_id"]
+ elif (
+ "tv_episode_results" in res_data
+ and len(res_data["tv_episode_results"]) > 0
+ ):
+ pd.metadata["preferred_model"] = "TVSeason"
+ tmdb_show_id = res_data["tv_episode_results"][0]["show_id"]
+ if res_data["tv_episode_results"][0]["episode_number"] != 1:
+ _logger.warning(
+ f"Douban Movie {self.url} mapping to unexpected imdb episode {imdb_code}"
+ )
+ resp = query_tmdb_tv_episode(
+ tmdb_show_id,
+ res_data["tv_episode_results"][0]["season_number"],
+ 1,
+ )
+ imdb_code = resp["external_ids"]["imdb_id"]
+ _logger.warning(
+ f"Douban Movie {self.url} re-mapped to imdb episode {imdb_code}"
+ )
pd.lookup_ids[IdType.IMDB] = imdb_code
if tmdb_show_id:
- pd.metadata['required_resources'] = [{
- 'model': 'TVShow',
- 'id_type': IdType.TMDB_TV,
- 'id_value': tmdb_show_id,
- 'title': title,
- 'url': TMDB_TV.id_to_url(tmdb_show_id),
- }]
+ pd.metadata["required_resources"] = [
+ {
+ "model": "TVShow",
+ "id_type": IdType.TMDB_TV,
+ "id_value": tmdb_show_id,
+ "title": title,
+ "url": TMDB_TV.id_to_url(tmdb_show_id),
+ }
+ ]
# TODO parse sister seasons
# pd.metadata['related_resources'] = []
if pd.metadata["cover_image_url"]:
@@ -218,5 +271,7 @@ class DoubanMovie(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
diff --git a/catalog/sites/douban_music.py b/catalog/sites/douban_music.py
index 1d8038f0..db31a7b0 100644
--- a/catalog/sites/douban_music.py
+++ b/catalog/sites/douban_music.py
@@ -12,8 +12,11 @@ _logger = logging.getLogger(__name__)
class DoubanMusic(AbstractSite):
SITE_NAME = SiteName.Douban
ID_TYPE = IdType.DoubanMusic
- URL_PATTERNS = [r"\w+://music\.douban\.com/subject/(\d+)/{0,1}", r"\w+://m.douban.com/music/subject/(\d+)/{0,1}"]
- WIKI_PROPERTY_ID = ''
+ URL_PATTERNS = [
+ r"\w+://music\.douban\.com/subject/(\d+)/{0,1}",
+ r"\w+://m.douban.com/music/subject/(\d+)/{0,1}",
+ ]
+ WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Album
@classmethod
@@ -28,75 +31,95 @@ class DoubanMusic(AbstractSite):
if not title:
raise ParseError(self, "title")
- artists_elem = content.xpath("//div[@id='info']/span/span[@class='pl']/a/text()")
- artist = None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
+ artists_elem = content.xpath(
+ "//div[@id='info']/span/span[@class='pl']/a/text()"
+ )
+ artist = (
+ None if not artists_elem else list(map(lambda a: a[:200], artists_elem))
+ )
genre_elem = content.xpath(
- "//div[@id='info']//span[text()='流派:']/following::text()[1]")
+ "//div[@id='info']//span[text()='流派:']/following::text()[1]"
+ )
genre = genre_elem[0].strip() if genre_elem else None
date_elem = content.xpath(
- "//div[@id='info']//span[text()='发行时间:']/following::text()[1]")
- release_date = dateparser.parse(date_elem[0].strip()).strftime('%Y-%m-%d') if date_elem else None
+ "//div[@id='info']//span[text()='发行时间:']/following::text()[1]"
+ )
+ release_date = (
+ dateparser.parse(date_elem[0].strip()).strftime("%Y-%m-%d")
+ if date_elem
+ else None
+ )
company_elem = content.xpath(
- "//div[@id='info']//span[text()='出版者:']/following::text()[1]")
+ "//div[@id='info']//span[text()='出版者:']/following::text()[1]"
+ )
company = company_elem[0].strip() if company_elem else None
track_list_elem = content.xpath(
"//div[@class='track-list']/div[@class='indent']/div/text()"
)
if track_list_elem:
- track_list = '\n'.join([track.strip() for track in track_list_elem])
+ track_list = "\n".join([track.strip() for track in track_list_elem])
else:
track_list = None
brief_elem = content.xpath("//span[@class='all hidden']")
if not brief_elem:
brief_elem = content.xpath("//span[@property='v:summary']")
- brief = '\n'.join([e.strip() for e in brief_elem[0].xpath(
- './text()')]) if brief_elem else None
+ brief = (
+ "\n".join([e.strip() for e in brief_elem[0].xpath("./text()")])
+ if brief_elem
+ else None
+ )
img_url_elem = content.xpath("//div[@id='mainpic']//img/@src")
img_url = img_url_elem[0].strip() if img_url_elem else None
data = {
- 'title': title,
- 'artist': artist,
- 'genre': genre,
- 'release_date': release_date,
- 'duration': None,
- 'company': [company],
- 'track_list': track_list,
- 'brief': brief,
- 'cover_image_url': img_url
+ "title": title,
+ "artist": artist,
+ "genre": genre,
+ "release_date": release_date,
+ "duration": None,
+ "company": [company],
+ "track_list": track_list,
+ "brief": brief,
+ "cover_image_url": img_url,
}
gtin = None
isrc = None
other_elem = content.xpath(
- "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='又名:']/following-sibling::text()[1]"
+ )
if other_elem:
- data['other_title'] = other_elem[0].strip()
+ data["other_title"] = other_elem[0].strip()
other_elem = content.xpath(
- "//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='专辑类型:']/following-sibling::text()[1]"
+ )
if other_elem:
- data['album_type'] = other_elem[0].strip()
+ data["album_type"] = other_elem[0].strip()
other_elem = content.xpath(
- "//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='介质:']/following-sibling::text()[1]"
+ )
if other_elem:
- data['media'] = other_elem[0].strip()
+ data["media"] = other_elem[0].strip()
other_elem = content.xpath(
- "//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='ISRC:']/following-sibling::text()[1]"
+ )
if other_elem:
isrc = other_elem[0].strip()
other_elem = content.xpath(
- "//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='条形码:']/following-sibling::text()[1]"
+ )
if other_elem:
gtin = other_elem[0].strip()
other_elem = content.xpath(
- "//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]")
+ "//div[@id='info']//span[text()='碟片数:']/following-sibling::text()[1]"
+ )
if other_elem:
- data['disc_count'] = other_elem[0].strip()
+ data["disc_count"] = other_elem[0].strip()
pd = ResourceContent(metadata=data)
if gtin:
@@ -109,5 +132,7 @@ class DoubanMusic(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
diff --git a/catalog/sites/goodreads.py b/catalog/sites/goodreads.py
index 45d3d637..b6d2768f 100644
--- a/catalog/sites/goodreads.py
+++ b/catalog/sites/goodreads.py
@@ -14,7 +14,7 @@ class GoodreadsDownloader(RetryDownloader):
if response is None:
return RESPONSE_NETWORK_ERROR
elif response.status_code == 200:
- if response.text.find('__NEXT_DATA__') != -1:
+ if response.text.find("__NEXT_DATA__") != -1:
return RESPONSE_OK
else:
# Goodreads may return legacy version for a/b testing
@@ -28,9 +28,12 @@ class GoodreadsDownloader(RetryDownloader):
class Goodreads(AbstractSite):
SITE_NAME = SiteName.Goodreads
ID_TYPE = IdType.Goodreads
- WIKI_PROPERTY_ID = 'P2968'
+ WIKI_PROPERTY_ID = "P2968"
DEFAULT_MODEL = Edition
- URL_PATTERNS = [r".+goodreads.com/.*book/show/(\d+)", r".+goodreads.com/.*book/(\d+)"]
+ URL_PATTERNS = [
+ r".+goodreads.com/.*book/show/(\d+)",
+ r".+goodreads.com/.*book/(\d+)",
+ ]
@classmethod
def id_to_url(self, id_value):
@@ -48,39 +51,41 @@ class Goodreads(AbstractSite):
elem = h.xpath('//script[@id="__NEXT_DATA__"]/text()')
src = elem[0].strip() if elem else None
if not src:
- raise ParseError(self, '__NEXT_DATA__ element')
- d = json.loads(src)['props']['pageProps']['apolloState']
- o = {'Book': [], 'Work': [], 'Series': [], 'Contributor': []}
+ raise ParseError(self, "__NEXT_DATA__ element")
+ d = json.loads(src)["props"]["pageProps"]["apolloState"]
+ o = {"Book": [], "Work": [], "Series": [], "Contributor": []}
for v in d.values():
- t = v.get('__typename')
+ t = v.get("__typename")
if t and t in o:
o[t].append(v)
- b = next(filter(lambda x: x.get('title'), o['Book']), None)
+ b = next(filter(lambda x: x.get("title"), o["Book"]), None)
if not b:
# Goodreads may return empty page template when internal service timeouts
- raise ParseError(self, 'Book in __NEXT_DATA__ json')
- data['title'] = b['title']
- data['brief'] = b['description']
+ raise ParseError(self, "Book in __NEXT_DATA__ json")
+ data["title"] = b["title"]
+ data["brief"] = b["description"]
ids = {}
- t, n = detect_isbn_asin(b['details'].get('asin'))
+ t, n = detect_isbn_asin(b["details"].get("asin"))
if t:
ids[t] = n
- t, n = detect_isbn_asin(b['details'].get('isbn13'))
+ t, n = detect_isbn_asin(b["details"].get("isbn13"))
if t:
ids[t] = n
# amazon has a known problem to use another book's isbn as asin
# so we alway overwrite asin-converted isbn with real isbn
- data['pages'] = b['details'].get('numPages')
- data['cover_image_url'] = b['imageUrl']
- w = next(filter(lambda x: x.get('details'), o['Work']), None)
+ data["pages"] = b["details"].get("numPages")
+ data["cover_image_url"] = b["imageUrl"]
+ w = next(filter(lambda x: x.get("details"), o["Work"]), None)
if w:
- data['required_resources'] = [{
- 'model': 'Work',
- 'id_type': IdType.Goodreads_Work,
- 'id_value': str(w['legacyId']),
- 'title': w['details']['originalTitle'],
- 'url': w['editions']['webUrl'],
- }]
+ data["required_resources"] = [
+ {
+ "model": "Work",
+ "id_type": IdType.Goodreads_Work,
+ "id_value": str(w["legacyId"]),
+ "title": w["details"]["originalTitle"],
+ "url": w["editions"]["webUrl"],
+ }
+ ]
pd = ResourceContent(metadata=data)
pd.lookup_ids[IdType.ISBN] = ids.get(IdType.ISBN)
pd.lookup_ids[IdType.ASIN] = ids.get(IdType.ASIN)
@@ -90,7 +95,9 @@ class Goodreads(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {data["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {data["cover_image_url"]}'
+ )
return pd
@@ -98,7 +105,7 @@ class Goodreads(AbstractSite):
class Goodreads_Work(AbstractSite):
SITE_NAME = SiteName.Goodreads
ID_TYPE = IdType.Goodreads_Work
- WIKI_PROPERTY_ID = ''
+ WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Work
URL_PATTERNS = [r".+goodreads.com/work/editions/(\d+)"]
@@ -111,14 +118,18 @@ class Goodreads_Work(AbstractSite):
title_elem = content.xpath("//h1/a/text()")
title = title_elem[0].strip() if title_elem else None
if not title:
- raise ParseError(self, 'title')
+ raise ParseError(self, "title")
author_elem = content.xpath("//h2/a/text()")
author = author_elem[0].strip() if author_elem else None
first_published_elem = content.xpath("//h2/span/text()")
- first_published = first_published_elem[0].strip() if first_published_elem else None
- pd = ResourceContent(metadata={
- 'title': title,
- 'author': author,
- 'first_published': first_published
- })
+ first_published = (
+ first_published_elem[0].strip() if first_published_elem else None
+ )
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ "author": author,
+ "first_published": first_published,
+ }
+ )
return pd
diff --git a/catalog/sites/google_books.py b/catalog/sites/google_books.py
index 806056a6..a036df37 100644
--- a/catalog/sites/google_books.py
+++ b/catalog/sites/google_books.py
@@ -16,7 +16,7 @@ class GoogleBooks(AbstractSite):
r"https://www\.google\.co[^/]+/books/edition/[^/]+/([^?]+)",
r"https://books\.google\.co[^/]+/books/about/[^?]+?id=([^?]+)",
]
- WIKI_PROPERTY_ID = ''
+ WIKI_PROPERTY_ID = ""
DEFAULT_MODEL = Edition
@classmethod
@@ -24,57 +24,76 @@ class GoogleBooks(AbstractSite):
return "https://books.google.com/books?id=" + id_value
def scrape(self):
- api_url = f'https://www.googleapis.com/books/v1/volumes/{self.id_value}'
+ api_url = f"https://www.googleapis.com/books/v1/volumes/{self.id_value}"
b = BasicDownloader(api_url).download().json()
other = {}
- title = b['volumeInfo']['title']
- subtitle = b['volumeInfo']['subtitle'] if 'subtitle' in b['volumeInfo'] else None
+ title = b["volumeInfo"]["title"]
+ subtitle = (
+ b["volumeInfo"]["subtitle"] if "subtitle" in b["volumeInfo"] else None
+ )
pub_year = None
pub_month = None
- if 'publishedDate' in b['volumeInfo']:
- pub_date = b['volumeInfo']['publishedDate'].split('-')
+ if "publishedDate" in b["volumeInfo"]:
+ pub_date = b["volumeInfo"]["publishedDate"].split("-")
pub_year = pub_date[0]
pub_month = pub_date[1] if len(pub_date) > 1 else None
- pub_house = b['volumeInfo']['publisher'] if 'publisher' in b['volumeInfo'] else None
- language = b['volumeInfo']['language'] if 'language' in b['volumeInfo'] else None
- pages = b['volumeInfo']['pageCount'] if 'pageCount' in b['volumeInfo'] else None
- if 'mainCategory' in b['volumeInfo']:
- other['分类'] = b['volumeInfo']['mainCategory']
- authors = b['volumeInfo']['authors'] if 'authors' in b['volumeInfo'] else None
- if 'description' in b['volumeInfo']:
- brief = b['volumeInfo']['description']
- elif 'textSnippet' in b['volumeInfo']:
+ pub_house = (
+ b["volumeInfo"]["publisher"] if "publisher" in b["volumeInfo"] else None
+ )
+ language = (
+ b["volumeInfo"]["language"] if "language" in b["volumeInfo"] else None
+ )
+ pages = b["volumeInfo"]["pageCount"] if "pageCount" in b["volumeInfo"] else None
+ if "mainCategory" in b["volumeInfo"]:
+ other["分类"] = b["volumeInfo"]["mainCategory"]
+ authors = b["volumeInfo"]["authors"] if "authors" in b["volumeInfo"] else None
+ if "description" in b["volumeInfo"]:
+ brief = b["volumeInfo"]["description"]
+ elif "textSnippet" in b["volumeInfo"]:
brief = b["volumeInfo"]["textSnippet"]["searchInfo"]
else:
- brief = ''
- brief = re.sub(r'<.*?>', '', brief.replace('
", "", brief.replace("
0:
- url = f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
- elif 'tv_results' in res_data and len(res_data['tv_results']) > 0:
+ if "movie_results" in res_data and len(res_data["movie_results"]) > 0:
+ url = (
+ f"https://www.themoviedb.org/movie/{res_data['movie_results'][0]['id']}"
+ )
+ elif "tv_results" in res_data and len(res_data["tv_results"]) > 0:
url = f"https://www.themoviedb.org/tv/{res_data['tv_results'][0]['id']}"
- elif 'tv_season_results' in res_data and len(res_data['tv_season_results']) > 0:
+ elif "tv_season_results" in res_data and len(res_data["tv_season_results"]) > 0:
# this should not happen given IMDB only has ids for either show or episode
- tv_id = res_data['tv_season_results'][0]['show_id']
- season_number = res_data['tv_season_results'][0]['season_number']
+ tv_id = res_data["tv_season_results"][0]["show_id"]
+ season_number = res_data["tv_season_results"][0]["season_number"]
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
- elif 'tv_episode_results' in res_data and len(res_data['tv_episode_results']) > 0:
- tv_id = res_data['tv_episode_results'][0]['show_id']
- season_number = res_data['tv_episode_results'][0]['season_number']
- episode_number = res_data['tv_episode_results'][0]['episode_number']
+ elif (
+ "tv_episode_results" in res_data and len(res_data["tv_episode_results"]) > 0
+ ):
+ tv_id = res_data["tv_episode_results"][0]["show_id"]
+ season_number = res_data["tv_episode_results"][0]["season_number"]
+ episode_number = res_data["tv_episode_results"][0]["episode_number"]
if season_number == 0:
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}/episode/{episode_number}"
elif episode_number == 1:
url = f"https://www.themoviedb.org/tv/{tv_id}/season/{season_number}"
else:
- raise ParseError(self, "IMDB id matching TMDB but not first episode, this is not supported")
+ raise ParseError(
+ self,
+ "IMDB id matching TMDB but not first episode, this is not supported",
+ )
else:
raise ParseError(self, "IMDB id not found in TMDB")
tmdb = SiteManager.get_site_by_url(url)
pd = tmdb.scrape()
- pd.metadata['preferred_model'] = tmdb.DEFAULT_MODEL.__name__
+ pd.metadata["preferred_model"] = tmdb.DEFAULT_MODEL.__name__
return pd
diff --git a/catalog/sites/spotify.py b/catalog/sites/spotify.py
index 23f68120..5656cc1a 100644
--- a/catalog/sites/spotify.py
+++ b/catalog/sites/spotify.py
@@ -23,8 +23,8 @@ spotify_token_expire_time = time.time()
class Spotify(AbstractSite):
SITE_NAME = SiteName.Spotify
ID_TYPE = IdType.Spotify_Album
- URL_PATTERNS = [r'\w+://open\.spotify\.com/album/([a-zA-Z0-9]+)']
- WIKI_PROPERTY_ID = '?'
+ URL_PATTERNS = [r"\w+://open\.spotify\.com/album/([a-zA-Z0-9]+)"]
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Album
@classmethod
@@ -33,58 +33,63 @@ class Spotify(AbstractSite):
def scrape(self):
api_url = "https://api.spotify.com/v1/albums/" + self.id_value
- headers = {
- 'Authorization': f"Bearer {get_spotify_token()}"
- }
+ headers = {"Authorization": f"Bearer {get_spotify_token()}"}
res_data = BasicDownloader(api_url, headers=headers).download().json()
artist = []
- for artist_dict in res_data['artists']:
- artist.append(artist_dict['name'])
+ for artist_dict in res_data["artists"]:
+ artist.append(artist_dict["name"])
- title = res_data['name']
+ title = res_data["name"]
- genre = ', '.join(res_data['genres'])
+ genre = ", ".join(res_data["genres"])
company = []
- for com in res_data['copyrights']:
- company.append(com['text'])
+ for com in res_data["copyrights"]:
+ company.append(com["text"])
duration = 0
track_list = []
track_urls = []
- for track in res_data['tracks']['items']:
- track_urls.append(track['external_urls']['spotify'])
- duration += track['duration_ms']
- if res_data['tracks']['items'][-1]['disc_number'] > 1:
+ for track in res_data["tracks"]["items"]:
+ track_urls.append(track["external_urls"]["spotify"])
+ duration += track["duration_ms"]
+ if res_data["tracks"]["items"][-1]["disc_number"] > 1:
# more than one disc
- track_list.append(str(
- track['disc_number']) + '-' + str(track['track_number']) + '. ' + track['name'])
+ track_list.append(
+ str(track["disc_number"])
+ + "-"
+ + str(track["track_number"])
+ + ". "
+ + track["name"]
+ )
else:
- track_list.append(str(track['track_number']) + '. ' + track['name'])
- track_list = '\n'.join(track_list)
+ track_list.append(str(track["track_number"]) + ". " + track["name"])
+ track_list = "\n".join(track_list)
- release_date = dateparser.parse(res_data['release_date']).strftime('%Y-%m-%d')
+ release_date = dateparser.parse(res_data["release_date"]).strftime("%Y-%m-%d")
gtin = None
- if res_data['external_ids'].get('upc'):
- gtin = res_data['external_ids'].get('upc')
- if res_data['external_ids'].get('ean'):
- gtin = res_data['external_ids'].get('ean')
+ if res_data["external_ids"].get("upc"):
+ gtin = res_data["external_ids"].get("upc")
+ if res_data["external_ids"].get("ean"):
+ gtin = res_data["external_ids"].get("ean")
isrc = None
- if res_data['external_ids'].get('isrc'):
- isrc = res_data['external_ids'].get('isrc')
+ if res_data["external_ids"].get("isrc"):
+ isrc = res_data["external_ids"].get("isrc")
- pd = ResourceContent(metadata={
- 'title': title,
- 'artist': artist,
- 'genre': genre,
- 'track_list': track_list,
- 'release_date': release_date,
- 'duration': duration,
- 'company': company,
- 'brief': None,
- 'cover_image_url': res_data['images'][0]['url']
- })
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ "artist": artist,
+ "genre": genre,
+ "track_list": track_list,
+ "release_date": release_date,
+ "duration": duration,
+ "company": company,
+ "brief": None,
+ "cover_image_url": res_data["images"][0]["url"],
+ }
+ )
if gtin:
pd.lookup_ids[IdType.GTIN] = gtin
if isrc:
@@ -95,14 +100,16 @@ class Spotify(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
def get_spotify_token():
global spotify_token, spotify_token_expire_time
if get_mock_mode():
- return 'mocked'
+ return "mocked"
if spotify_token is None or is_spotify_token_expired():
invoke_spotify_token()
return spotify_token
@@ -117,12 +124,8 @@ def invoke_spotify_token():
global spotify_token, spotify_token_expire_time
r = requests.post(
"https://accounts.spotify.com/api/token",
- data={
- "grant_type": "client_credentials"
- },
- headers={
- "Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"
- }
+ data={"grant_type": "client_credentials"},
+ headers={"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"},
)
data = r.json()
if r.status_code == 401:
@@ -131,16 +134,12 @@ def invoke_spotify_token():
# for example debugging using a http client
r = requests.post(
"https://accounts.spotify.com/api/token",
- data={
- "grant_type": "client_credentials"
- },
- headers={
- "Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"
- }
+ data={"grant_type": "client_credentials"},
+ headers={"Authorization": f"Basic {settings.SPOTIFY_CREDENTIAL}"},
)
data = r.json()
elif r.status_code != 200:
raise Exception(f"Request to spotify API fails. Reason: {r.reason}")
# minus 2 for execution time error
- spotify_token_expire_time = int(data['expires_in']) + time.time() - 2
- spotify_token = data['access_token']
+ spotify_token_expire_time = int(data["expires_in"]) + time.time() - 2
+ spotify_token = data["access_token"]
diff --git a/catalog/sites/steam.py b/catalog/sites/steam.py
index 029e885f..77815361 100644
--- a/catalog/sites/steam.py
+++ b/catalog/sites/steam.py
@@ -13,7 +13,7 @@ class Steam(AbstractSite):
SITE_NAME = SiteName.Steam
ID_TYPE = IdType.Steam
URL_PATTERNS = [r"\w+://store\.steampowered\.com/app/(\d+)"]
- WIKI_PROPERTY_ID = '?'
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Game
@classmethod
@@ -25,41 +25,58 @@ class Steam(AbstractSite):
pd = i.scrape() if i else ResourceContent()
headers = BasicDownloader.headers.copy()
- headers['Host'] = 'store.steampowered.com'
- headers['Cookie'] = "wants_mature_content=1; birthtime=754700401;"
+ headers["Host"] = "store.steampowered.com"
+ headers["Cookie"] = "wants_mature_content=1; birthtime=754700401;"
content = BasicDownloader(self.url, headers=headers).download().html()
title = content.xpath("//div[@class='apphub_AppName']/text()")[0]
developer = content.xpath("//div[@id='developers_list']/a/text()")
- publisher = content.xpath("//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()")
+ publisher = content.xpath(
+ "//div[@class='glance_ctn']//div[@class='dev_row'][2]//a/text()"
+ )
release_date = dateparser.parse(
- content.xpath(
- "//div[@class='release_date']/div[@class='date']/text()")[0]
- ).strftime('%Y-%m-%d')
+ content.xpath("//div[@class='release_date']/div[@class='date']/text()")[0]
+ ).strftime("%Y-%m-%d")
genre = content.xpath(
- "//div[@class='details_block']/b[2]/following-sibling::a/text()")
- platform = ['PC']
- brief = content.xpath(
- "//div[@class='game_description_snippet']/text()")[0].strip()
+ "//div[@class='details_block']/b[2]/following-sibling::a/text()"
+ )
+ platform = ["PC"]
+ brief = content.xpath("//div[@class='game_description_snippet']/text()")[
+ 0
+ ].strip()
# try Steam images if no image from IGDB
if pd.cover_image is None:
- pd.metadata['cover_image_url'] = content.xpath("//img[@class='game_header_image_full']/@src")[0].replace("header.jpg", "library_600x900.jpg")
- pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
+ pd.metadata["cover_image_url"] = content.xpath(
+ "//img[@class='game_header_image_full']/@src"
+ )[0].replace("header.jpg", "library_600x900.jpg")
+ (
+ pd.cover_image,
+ pd.cover_image_extention,
+ ) = BasicImageDownloader.download_image(
+ pd.metadata["cover_image_url"], self.url
+ )
if pd.cover_image is None:
- pd.metadata['cover_image_url'] = content.xpath("//img[@class='game_header_image_full']/@src")[0]
- pd.cover_image, pd.cover_image_extention = BasicImageDownloader.download_image(pd.metadata['cover_image_url'], self.url)
+ pd.metadata["cover_image_url"] = content.xpath(
+ "//img[@class='game_header_image_full']/@src"
+ )[0]
+ (
+ pd.cover_image,
+ pd.cover_image_extention,
+ ) = BasicImageDownloader.download_image(
+ pd.metadata["cover_image_url"], self.url
+ )
# merge data from IGDB, use localized Steam data if available
d = {
- 'developer': developer,
- 'publisher': publisher,
- 'release_date': release_date,
- 'genre': genre,
- 'platform': platform,
+ "developer": developer,
+ "publisher": publisher,
+ "release_date": release_date,
+ "genre": genre,
+ "platform": platform,
}
d.update(pd.metadata)
pd.metadata = d
if title:
- pd.metadata['title'] = title
+ pd.metadata["title"] = title
if brief:
- pd.metadata['brief'] = brief
+ pd.metadata["brief"] = brief
return pd
diff --git a/catalog/sites/tmdb.py b/catalog/sites/tmdb.py
index ba7f7538..1b721bfe 100644
--- a/catalog/sites/tmdb.py
+++ b/catalog/sites/tmdb.py
@@ -37,8 +37,8 @@ def _copy_dict(s, key_map):
class TMDB_Movie(AbstractSite):
SITE_NAME = SiteName.TMDB
ID_TYPE = IdType.TMDB_Movie
- URL_PATTERNS = [r'\w+://www.themoviedb.org/movie/(\d+)']
- WIKI_PROPERTY_ID = '?'
+ URL_PATTERNS = [r"\w+://www.themoviedb.org/movie/(\d+)"]
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = Movie
@classmethod
@@ -55,37 +55,59 @@ class TMDB_Movie(AbstractSite):
res_data = BasicDownloader(api_url).download().json()
if is_series:
- title = res_data['name']
- orig_title = res_data['original_name']
- year = int(res_data['first_air_date'].split(
- '-')[0]) if res_data['first_air_date'] else None
- imdb_code = res_data['external_ids']['imdb_id']
- showtime = [{res_data['first_air_date']: "首播日期"}
- ] if res_data['first_air_date'] else None
+ title = res_data["name"]
+ orig_title = res_data["original_name"]
+ year = (
+ int(res_data["first_air_date"].split("-")[0])
+ if res_data["first_air_date"]
+ else None
+ )
+ imdb_code = res_data["external_ids"]["imdb_id"]
+ showtime = (
+ [{res_data["first_air_date"]: "首播日期"}]
+ if res_data["first_air_date"]
+ else None
+ )
duration = None
else:
- title = res_data['title']
- orig_title = res_data['original_title']
- year = int(res_data['release_date'].split('-')
- [0]) if res_data['release_date'] else None
- showtime = [{res_data['release_date']: "发布日期"}
- ] if res_data['release_date'] else None
- imdb_code = res_data['imdb_id']
+ title = res_data["title"]
+ orig_title = res_data["original_title"]
+ year = (
+ int(res_data["release_date"].split("-")[0])
+ if res_data["release_date"]
+ else None
+ )
+ showtime = (
+ [{res_data["release_date"]: "发布日期"}]
+ if res_data["release_date"]
+ else None
+ )
+ imdb_code = res_data["imdb_id"]
# in minutes
- duration = res_data['runtime'] if res_data['runtime'] else None
+ duration = res_data["runtime"] if res_data["runtime"] else None
- genre = [x['name'] for x in res_data['genres']]
- language = list(map(lambda x: x['name'], res_data['spoken_languages']))
- brief = res_data['overview']
+ genre = [x["name"] for x in res_data["genres"]]
+ language = list(map(lambda x: x["name"], res_data["spoken_languages"]))
+ brief = res_data["overview"]
if is_series:
- director = list(map(lambda x: x['name'], res_data['created_by']))
+ director = list(map(lambda x: x["name"], res_data["created_by"]))
else:
- director = list(map(lambda x: x['name'], filter(
- lambda c: c['job'] == 'Director', res_data['credits']['crew'])))
- playwright = list(map(lambda x: x['name'], filter(
- lambda c: c['job'] == 'Screenplay', res_data['credits']['crew'])))
- actor = list(map(lambda x: x['name'], res_data['credits']['cast']))
+ director = list(
+ map(
+ lambda x: x["name"],
+ filter(
+ lambda c: c["job"] == "Director", res_data["credits"]["crew"]
+ ),
+ )
+ )
+ playwright = list(
+ map(
+ lambda x: x["name"],
+ filter(lambda c: c["job"] == "Screenplay", res_data["credits"]["crew"]),
+ )
+ )
+ actor = list(map(lambda x: x["name"], res_data["credits"]["cast"]))
area = []
other_info = {}
@@ -95,33 +117,39 @@ class TMDB_Movie(AbstractSite):
# other_info['奖项'] = res_data['awards']
# other_info['TMDB_ID'] = id
if is_series:
- other_info['Seasons'] = res_data['number_of_seasons']
- other_info['Episodes'] = res_data['number_of_episodes']
+ other_info["Seasons"] = res_data["number_of_seasons"]
+ other_info["Episodes"] = res_data["number_of_episodes"]
# TODO: use GET /configuration to get base url
- img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
+ img_url = (
+ ("https://image.tmdb.org/t/p/original/" + res_data["poster_path"])
+ if res_data["poster_path"] is not None
+ else None
+ )
- pd = ResourceContent(metadata={
- 'title': title,
- 'orig_title': orig_title,
- 'other_title': None,
- 'imdb_code': imdb_code,
- 'director': director,
- 'playwright': playwright,
- 'actor': actor,
- 'genre': genre,
- 'showtime': showtime,
- 'site': None,
- 'area': area,
- 'language': language,
- 'year': year,
- 'duration': duration,
- 'season': None,
- 'episodes': None,
- 'single_episode_length': None,
- 'brief': brief,
- 'cover_image_url': img_url,
- })
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ "orig_title": orig_title,
+ "other_title": None,
+ "imdb_code": imdb_code,
+ "director": director,
+ "playwright": playwright,
+ "actor": actor,
+ "genre": genre,
+ "showtime": showtime,
+ "site": None,
+ "area": area,
+ "language": language,
+ "year": year,
+ "duration": duration,
+ "season": None,
+ "episodes": None,
+ "single_episode_length": None,
+ "brief": brief,
+ "cover_image_url": img_url,
+ }
+ )
if imdb_code:
pd.lookup_ids[IdType.IMDB] = imdb_code
if pd.metadata["cover_image_url"]:
@@ -130,7 +158,9 @@ class TMDB_Movie(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
@@ -138,8 +168,11 @@ class TMDB_Movie(AbstractSite):
class TMDB_TV(AbstractSite):
SITE_NAME = SiteName.TMDB
ID_TYPE = IdType.TMDB_TV
- URL_PATTERNS = [r'\w+://www.themoviedb.org/tv/(\d+)[^/]*$', r'\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons']
- WIKI_PROPERTY_ID = '?'
+ URL_PATTERNS = [
+ r"\w+://www.themoviedb.org/tv/(\d+)[^/]*$",
+ r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/seasons",
+ ]
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = TVShow
@classmethod
@@ -156,38 +189,60 @@ class TMDB_TV(AbstractSite):
res_data = BasicDownloader(api_url).download().json()
if is_series:
- title = res_data['name']
- orig_title = res_data['original_name']
- year = int(res_data['first_air_date'].split(
- '-')[0]) if res_data['first_air_date'] else None
- imdb_code = res_data['external_ids']['imdb_id']
- showtime = [{res_data['first_air_date']: "首播日期"}
- ] if res_data['first_air_date'] else None
+ title = res_data["name"]
+ orig_title = res_data["original_name"]
+ year = (
+ int(res_data["first_air_date"].split("-")[0])
+ if res_data["first_air_date"]
+ else None
+ )
+ imdb_code = res_data["external_ids"]["imdb_id"]
+ showtime = (
+ [{res_data["first_air_date"]: "首播日期"}]
+ if res_data["first_air_date"]
+ else None
+ )
duration = None
else:
- title = res_data['title']
- orig_title = res_data['original_title']
- year = int(res_data['release_date'].split('-')
- [0]) if res_data['release_date'] else None
- showtime = [{res_data['release_date']: "发布日期"}
- ] if res_data['release_date'] else None
- imdb_code = res_data['imdb_id']
+ title = res_data["title"]
+ orig_title = res_data["original_title"]
+ year = (
+ int(res_data["release_date"].split("-")[0])
+ if res_data["release_date"]
+ else None
+ )
+ showtime = (
+ [{res_data["release_date"]: "发布日期"}]
+ if res_data["release_date"]
+ else None
+ )
+ imdb_code = res_data["imdb_id"]
# in minutes
- duration = res_data['runtime'] if res_data['runtime'] else None
+ duration = res_data["runtime"] if res_data["runtime"] else None
- genre = [x['name'] for x in res_data['genres']]
+ genre = [x["name"] for x in res_data["genres"]]
- language = list(map(lambda x: x['name'], res_data['spoken_languages']))
- brief = res_data['overview']
+ language = list(map(lambda x: x["name"], res_data["spoken_languages"]))
+ brief = res_data["overview"]
if is_series:
- director = list(map(lambda x: x['name'], res_data['created_by']))
+ director = list(map(lambda x: x["name"], res_data["created_by"]))
else:
- director = list(map(lambda x: x['name'], filter(
- lambda c: c['job'] == 'Director', res_data['credits']['crew'])))
- playwright = list(map(lambda x: x['name'], filter(
- lambda c: c['job'] == 'Screenplay', res_data['credits']['crew'])))
- actor = list(map(lambda x: x['name'], res_data['credits']['cast']))
+ director = list(
+ map(
+ lambda x: x["name"],
+ filter(
+ lambda c: c["job"] == "Director", res_data["credits"]["crew"]
+ ),
+ )
+ )
+ playwright = list(
+ map(
+ lambda x: x["name"],
+ filter(lambda c: c["job"] == "Screenplay", res_data["credits"]["crew"]),
+ )
+ )
+ actor = list(map(lambda x: x["name"], res_data["credits"]["cast"]))
area = []
other_info = {}
@@ -197,41 +252,53 @@ class TMDB_TV(AbstractSite):
# other_info['奖项'] = res_data['awards']
# other_info['TMDB_ID'] = id
if is_series:
- other_info['Seasons'] = res_data['number_of_seasons']
- other_info['Episodes'] = res_data['number_of_episodes']
+ other_info["Seasons"] = res_data["number_of_seasons"]
+ other_info["Episodes"] = res_data["number_of_episodes"]
# TODO: use GET /configuration to get base url
- img_url = ('https://image.tmdb.org/t/p/original/' + res_data['poster_path']) if res_data['poster_path'] is not None else None
+ img_url = (
+ ("https://image.tmdb.org/t/p/original/" + res_data["poster_path"])
+ if res_data["poster_path"] is not None
+ else None
+ )
- season_links = list(map(lambda s: {
- 'model': 'TVSeason',
- 'id_type': IdType.TMDB_TVSeason,
- 'id_value': f'{self.id_value}-{s["season_number"]}',
- 'title': s['name'],
- 'url': f'{self.url}/season/{s["season_number"]}'}, res_data['seasons']))
- pd = ResourceContent(metadata={
- 'title': title,
- 'orig_title': orig_title,
- 'other_title': None,
- 'imdb_code': imdb_code,
- 'director': director,
- 'playwright': playwright,
- 'actor': actor,
- 'genre': genre,
- 'showtime': showtime,
- 'site': None,
- 'area': area,
- 'language': language,
- 'year': year,
- 'duration': duration,
- 'season_count': res_data['number_of_seasons'],
- 'season': None,
- 'episodes': None,
- 'single_episode_length': None,
- 'brief': brief,
- 'cover_image_url': img_url,
- 'related_resources': season_links,
- })
+ season_links = list(
+ map(
+ lambda s: {
+ "model": "TVSeason",
+ "id_type": IdType.TMDB_TVSeason,
+ "id_value": f'{self.id_value}-{s["season_number"]}',
+ "title": s["name"],
+ "url": f'{self.url}/season/{s["season_number"]}',
+ },
+ res_data["seasons"],
+ )
+ )
+ pd = ResourceContent(
+ metadata={
+ "title": title,
+ "orig_title": orig_title,
+ "other_title": None,
+ "imdb_code": imdb_code,
+ "director": director,
+ "playwright": playwright,
+ "actor": actor,
+ "genre": genre,
+ "showtime": showtime,
+ "site": None,
+ "area": area,
+ "language": language,
+ "year": year,
+ "duration": duration,
+ "season_count": res_data["number_of_seasons"],
+ "season": None,
+ "episodes": None,
+ "single_episode_length": None,
+ "brief": brief,
+ "cover_image_url": img_url,
+ "related_resources": season_links,
+ }
+ )
if imdb_code:
pd.lookup_ids[IdType.IMDB] = imdb_code
@@ -241,7 +308,9 @@ class TMDB_TV(AbstractSite):
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
return pd
@@ -249,58 +318,87 @@ class TMDB_TV(AbstractSite):
class TMDB_TVSeason(AbstractSite):
SITE_NAME = SiteName.TMDB
ID_TYPE = IdType.TMDB_TVSeason
- URL_PATTERNS = [r'\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*$']
- WIKI_PROPERTY_ID = '?'
+ URL_PATTERNS = [r"\w+://www.themoviedb.org/tv/(\d+)[^/]*/season/(\d+)[^/]*$"]
+ WIKI_PROPERTY_ID = "?"
DEFAULT_MODEL = TVSeason
- ID_PATTERN = r'^(\d+)-(\d+)$'
+ ID_PATTERN = r"^(\d+)-(\d+)$"
@classmethod
def url_to_id(cls, url: str):
- u = next(iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), None)
- return u[1] + '-' + u[2] if u else None
+ u = next(
+ iter([re.match(p, url) for p in cls.URL_PATTERNS if re.match(p, url)]), None
+ )
+ return u[1] + "-" + u[2] if u else None
@classmethod
def id_to_url(cls, id_value):
- v = id_value.split('-')
+ v = id_value.split("-")
return f"https://www.themoviedb.org/tv/{v[0]}/season/{v[1]}"
def scrape(self):
- v = self.id_value.split('-')
+ v = self.id_value.split("-")
api_url = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
d = BasicDownloader(api_url).download().json()
- if not d.get('id'):
- raise ParseError('id')
- pd = ResourceContent(metadata=_copy_dict(d, {'name': 'title', 'overview': 'brief', 'air_date': 'air_date', 'season_number': 0, 'external_ids': []}))
- pd.metadata['required_resources'] = [{
- 'model': 'TVShow',
- 'id_type': IdType.TMDB_TV,
- 'id_value': v[0],
- 'title': f'TMDB TV Show {v[0]}',
- 'url': f"https://www.themoviedb.org/tv/{v[0]}",
- }]
- pd.lookup_ids[IdType.IMDB] = d['external_ids'].get('imdb_id')
- pd.metadata['cover_image_url'] = ('https://image.tmdb.org/t/p/original/' + d['poster_path']) if d['poster_path'] else None
- pd.metadata['title'] = pd.metadata['title'] if pd.metadata['title'] else f'Season {d["season_number"]}'
- pd.metadata['episode_number_list'] = list(map(lambda ep: ep['episode_number'], d['episodes']))
- pd.metadata['episode_count'] = len(pd.metadata['episode_number_list'])
+ if not d.get("id"):
+ raise ParseError("id")
+ pd = ResourceContent(
+ metadata=_copy_dict(
+ d,
+ {
+ "name": "title",
+ "overview": "brief",
+ "air_date": "air_date",
+ "season_number": 0,
+ "external_ids": [],
+ },
+ )
+ )
+ pd.metadata["required_resources"] = [
+ {
+ "model": "TVShow",
+ "id_type": IdType.TMDB_TV,
+ "id_value": v[0],
+ "title": f"TMDB TV Show {v[0]}",
+ "url": f"https://www.themoviedb.org/tv/{v[0]}",
+ }
+ ]
+ pd.lookup_ids[IdType.IMDB] = d["external_ids"].get("imdb_id")
+ pd.metadata["cover_image_url"] = (
+ ("https://image.tmdb.org/t/p/original/" + d["poster_path"])
+ if d["poster_path"]
+ else None
+ )
+ pd.metadata["title"] = (
+ pd.metadata["title"]
+ if pd.metadata["title"]
+ else f'Season {d["season_number"]}'
+ )
+ pd.metadata["episode_number_list"] = list(
+ map(lambda ep: ep["episode_number"], d["episodes"])
+ )
+ pd.metadata["episode_count"] = len(pd.metadata["episode_number_list"])
if pd.metadata["cover_image_url"]:
imgdl = BasicImageDownloader(pd.metadata["cover_image_url"], self.url)
try:
pd.cover_image = imgdl.download().content
pd.cover_image_extention = imgdl.extention
except Exception:
- _logger.debug(f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}')
+ _logger.debug(
+ f'failed to download cover for {self.url} from {pd.metadata["cover_image_url"]}'
+ )
# get external id from 1st episode
if pd.lookup_ids[IdType.IMDB]:
_logger.warning("Unexpected IMDB id for TMDB tv season")
- elif len(pd.metadata['episode_number_list']) == 0:
- _logger.warning("Unable to lookup IMDB id for TMDB tv season with zero episodes")
+ elif len(pd.metadata["episode_number_list"]) == 0:
+ _logger.warning(
+ "Unable to lookup IMDB id for TMDB tv season with zero episodes"
+ )
else:
- ep = pd.metadata['episode_number_list'][0]
+ ep = pd.metadata["episode_number_list"][0]
api_url2 = f"https://api.themoviedb.org/3/tv/{v[0]}/season/{v[1]}/episode/{ep}?api_key={settings.TMDB_API3_KEY}&language=zh-CN&append_to_response=external_ids,credits"
d2 = BasicDownloader(api_url2).download().json()
- if not d2.get('id'):
- raise ParseError('episode id for season')
- pd.lookup_ids[IdType.IMDB] = d2['external_ids'].get('imdb_id')
+ if not d2.get("id"):
+ raise ParseError("episode id for season")
+ pd.lookup_ids[IdType.IMDB] = d2["external_ids"].get("imdb_id")
return pd
diff --git a/catalog/tv/models.py b/catalog/tv/models.py
index c8692010..9b74e16c 100644
--- a/catalog/tv/models.py
+++ b/catalog/tv/models.py
@@ -31,8 +31,8 @@ from django.utils.translation import gettext_lazy as _
class TVShow(Item):
category = ItemCategory.TV
- url_path = 'tv'
- demonstrative = _('这部剧集')
+ url_path = "tv"
+ demonstrative = _("这部剧集")
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
tmdb_tv = PrimaryLookupIdDescriptor(IdType.TMDB_TV)
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
@@ -40,100 +40,208 @@ class TVShow(Item):
episode_count = models.PositiveIntegerField(null=True)
METADATA_COPY_LIST = [
- 'title',
- 'season_count',
- 'orig_title',
- 'other_title',
- 'director',
- 'playwright',
- 'actor',
- 'genre',
- 'showtime',
- 'site',
- 'area',
- 'language',
- 'year',
- 'duration',
- 'season_count',
- 'episode_count',
- 'single_episode_length',
- 'brief',
+ "title",
+ "season_count",
+ "orig_title",
+ "other_title",
+ "director",
+ "playwright",
+ "actor",
+ "genre",
+ "showtime",
+ "site",
+ "area",
+ "language",
+ "year",
+ "duration",
+ "season_count",
+ "episode_count",
+ "single_episode_length",
+ "brief",
]
- orig_title = jsondata.CharField(_("original title"), blank=True, default='', max_length=500)
- other_title = jsondata.ArrayField(models.CharField(_("other title"), blank=True, default='', max_length=500), null=True, blank=True, default=list, )
- director = jsondata.ArrayField(models.CharField(_("director"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- playwright = jsondata.ArrayField(models.CharField(_("playwright"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- actor = jsondata.ArrayField(models.CharField(_("actor"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- genre = jsondata.ArrayField(models.CharField(_("genre"), blank=True, default='', max_length=50), null=True, blank=True, default=list, ) # , choices=MovieGenreEnum.choices
- showtime = jsondata.ArrayField(null=True, blank=True, default=list, )
- site = jsondata.URLField(_('site url'), blank=True, default='', max_length=200)
- area = jsondata.ArrayField(models.CharField(_("country or region"), blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
- language = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
+ orig_title = jsondata.CharField(
+ _("original title"), blank=True, default="", max_length=500
+ )
+ other_title = jsondata.ArrayField(
+ models.CharField(_("other title"), blank=True, default="", max_length=500),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ director = jsondata.ArrayField(
+ models.CharField(_("director"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ playwright = jsondata.ArrayField(
+ models.CharField(_("playwright"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ actor = jsondata.ArrayField(
+ models.CharField(_("actor"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ genre = jsondata.ArrayField(
+ models.CharField(_("genre"), blank=True, default="", max_length=50),
+ null=True,
+ blank=True,
+ default=list,
+ ) # , choices=MovieGenreEnum.choices
+ showtime = jsondata.ArrayField(
+ null=True,
+ blank=True,
+ default=list,
+ )
+ site = jsondata.URLField(_("site url"), blank=True, default="", max_length=200)
+ area = jsondata.ArrayField(
+ models.CharField(
+ _("country or region"),
+ blank=True,
+ default="",
+ max_length=100,
+ ),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ language = jsondata.ArrayField(
+ models.CharField(
+ blank=True,
+ default="",
+ max_length=100,
+ ),
+ null=True,
+ blank=True,
+ default=list,
+ )
year = jsondata.IntegerField(null=True, blank=True)
season_number = jsondata.IntegerField(null=True, blank=True)
single_episode_length = jsondata.IntegerField(null=True, blank=True)
- duration = jsondata.CharField(blank=True, default='', max_length=200)
+ duration = jsondata.CharField(blank=True, default="", max_length=200)
class TVSeason(Item):
category = ItemCategory.TV
- url_path = 'tv/season'
- demonstrative = _('这部剧集')
+ url_path = "tv/season"
+ demonstrative = _("这部剧集")
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
tmdb_tvseason = PrimaryLookupIdDescriptor(IdType.TMDB_TVSeason)
- show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='seasons')
+ show = models.ForeignKey(
+ TVShow, null=True, on_delete=models.SET_NULL, related_name="seasons"
+ )
season_number = models.PositiveIntegerField(null=True)
episode_count = models.PositiveIntegerField(null=True)
METADATA_COPY_LIST = [
- 'title',
- 'orig_title',
- 'other_title',
- 'director',
- 'playwright',
- 'actor',
- 'genre',
- 'showtime',
- 'site',
- 'area',
- 'language',
- 'year',
- 'duration',
- 'season_number',
- 'episode_count',
- 'single_episode_length',
- 'brief',
+ "title",
+ "orig_title",
+ "other_title",
+ "director",
+ "playwright",
+ "actor",
+ "genre",
+ "showtime",
+ "site",
+ "area",
+ "language",
+ "year",
+ "duration",
+ "season_number",
+ "episode_count",
+ "single_episode_length",
+ "brief",
]
- orig_title = jsondata.CharField(_("original title"), blank=True, default='', max_length=500)
- other_title = jsondata.ArrayField(models.CharField(_("other title"), blank=True, default='', max_length=500), null=True, blank=True, default=list, )
- director = jsondata.ArrayField(models.CharField(_("director"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- playwright = jsondata.ArrayField(models.CharField(_("playwright"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- actor = jsondata.ArrayField(models.CharField(_("actor"), blank=True, default='', max_length=200), null=True, blank=True, default=list, )
- genre = jsondata.ArrayField(models.CharField(_("genre"), blank=True, default='', max_length=50), null=True, blank=True, default=list, ) # , choices=MovieGenreEnum.choices
- showtime = jsondata.ArrayField(null=True, blank=True, default=list, )
- site = jsondata.URLField(_('site url'), blank=True, default='', max_length=200)
- area = jsondata.ArrayField(models.CharField(_("country or region"), blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
- language = jsondata.ArrayField(models.CharField(blank=True, default='', max_length=100, ), null=True, blank=True, default=list, )
+ orig_title = jsondata.CharField(
+ _("original title"), blank=True, default="", max_length=500
+ )
+ other_title = jsondata.ArrayField(
+ models.CharField(_("other title"), blank=True, default="", max_length=500),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ director = jsondata.ArrayField(
+ models.CharField(_("director"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ playwright = jsondata.ArrayField(
+ models.CharField(_("playwright"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ actor = jsondata.ArrayField(
+ models.CharField(_("actor"), blank=True, default="", max_length=200),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ genre = jsondata.ArrayField(
+ models.CharField(_("genre"), blank=True, default="", max_length=50),
+ null=True,
+ blank=True,
+ default=list,
+ ) # , choices=MovieGenreEnum.choices
+ showtime = jsondata.ArrayField(
+ null=True,
+ blank=True,
+ default=list,
+ )
+ site = jsondata.URLField(_("site url"), blank=True, default="", max_length=200)
+ area = jsondata.ArrayField(
+ models.CharField(
+ _("country or region"),
+ blank=True,
+ default="",
+ max_length=100,
+ ),
+ null=True,
+ blank=True,
+ default=list,
+ )
+ language = jsondata.ArrayField(
+ models.CharField(
+ blank=True,
+ default="",
+ max_length=100,
+ ),
+ null=True,
+ blank=True,
+ default=list,
+ )
year = jsondata.IntegerField(null=True, blank=True)
single_episode_length = jsondata.IntegerField(null=True, blank=True)
- duration = jsondata.CharField(blank=True, default='', max_length=200)
+ duration = jsondata.CharField(blank=True, default="", max_length=200)
def update_linked_items_from_external_resource(self, resource):
"""add Work from resource.metadata['work'] if not yet"""
links = resource.required_resources + resource.related_resources
for w in links:
- if w['model'] == 'TVShow':
- p = ExternalResource.objects.filter(id_type=w['id_type'], id_value=w['id_value']).first()
+ if w["model"] == "TVShow":
+ p = ExternalResource.objects.filter(
+ id_type=w["id_type"], id_value=w["id_value"]
+ ).first()
if p and p.item and self.show != p.item:
self.show = p.item
class TVEpisode(Item):
category = ItemCategory.TV
- url_path = 'tv/episode'
- show = models.ForeignKey(TVShow, null=True, on_delete=models.SET_NULL, related_name='episodes')
- season = models.ForeignKey(TVSeason, null=True, on_delete=models.SET_NULL, related_name='episodes')
+ url_path = "tv/episode"
+ show = models.ForeignKey(
+ TVShow, null=True, on_delete=models.SET_NULL, related_name="episodes"
+ )
+ season = models.ForeignKey(
+ TVSeason, null=True, on_delete=models.SET_NULL, related_name="episodes"
+ )
episode_number = models.PositiveIntegerField(null=True)
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
- METADATA_COPY_LIST = ['title', 'brief', 'episode_number']
+ METADATA_COPY_LIST = ["title", "brief", "episode_number"]
diff --git a/catalog/tv/tests.py b/catalog/tv/tests.py
index ffc7ab05..210d514d 100644
--- a/catalog/tv/tests.py
+++ b/catalog/tv/tests.py
@@ -5,10 +5,10 @@ from catalog.tv.models import *
class TMDBTVTestCase(TestCase):
def test_parse(self):
- t_id = '57243'
- t_url = 'https://www.themoviedb.org/tv/57243-doctor-who'
- t_url1 = 'https://www.themoviedb.org/tv/57243-doctor-who/seasons'
- t_url2 = 'https://www.themoviedb.org/tv/57243'
+ t_id = "57243"
+ t_url = "https://www.themoviedb.org/tv/57243-doctor-who"
+ t_url1 = "https://www.themoviedb.org/tv/57243-doctor-who/seasons"
+ t_url2 = "https://www.themoviedb.org/tv/57243"
p1 = SiteManager.get_site_by_id_type(IdType.TMDB_TV)
self.assertIsNotNone(p1)
self.assertEqual(p1.validate_url(t_url), True)
@@ -17,29 +17,29 @@ class TMDBTVTestCase(TestCase):
p2 = SiteManager.get_site_by_url(t_url)
self.assertEqual(p1.id_to_url(t_id), t_url2)
self.assertEqual(p2.url_to_id(t_url), t_id)
- wrong_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/13'
+ wrong_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/13"
s1 = SiteManager.get_site_by_url(wrong_url)
self.assertNotIsInstance(s1, TVShow)
@use_local_response
def test_scrape(self):
- t_url = 'https://www.themoviedb.org/tv/57243-doctor-who'
+ t_url = "https://www.themoviedb.org/tv/57243-doctor-who"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
- self.assertEqual(site.id_value, '57243')
+ self.assertEqual(site.id_value, "57243")
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], '神秘博士')
+ self.assertEqual(site.resource.metadata["title"], "神秘博士")
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
- self.assertEqual(site.resource.item.__class__.__name__, 'TVShow')
- self.assertEqual(site.resource.item.imdb, 'tt0436992')
+ self.assertEqual(site.resource.item.__class__.__name__, "TVShow")
+ self.assertEqual(site.resource.item.imdb, "tt0436992")
class TMDBTVSeasonTestCase(TestCase):
def test_parse(self):
- t_id = '57243-11'
- t_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/11'
- t_url_unique = 'https://www.themoviedb.org/tv/57243/season/11'
+ t_id = "57243-11"
+ t_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/11"
+ t_url_unique = "https://www.themoviedb.org/tv/57243/season/11"
p1 = SiteManager.get_site_by_id_type(IdType.TMDB_TVSeason)
self.assertIsNotNone(p1)
self.assertEqual(p1.validate_url(t_url), True)
@@ -50,48 +50,48 @@ class TMDBTVSeasonTestCase(TestCase):
@use_local_response
def test_scrape(self):
- t_url = 'https://www.themoviedb.org/tv/57243-doctor-who/season/4'
+ t_url = "https://www.themoviedb.org/tv/57243-doctor-who/season/4"
site = SiteManager.get_site_by_url(t_url)
self.assertEqual(site.ready, False)
- self.assertEqual(site.id_value, '57243-4')
+ self.assertEqual(site.id_value, "57243-4")
site.get_resource_ready()
self.assertEqual(site.ready, True)
- self.assertEqual(site.resource.metadata['title'], '第 4 季')
+ self.assertEqual(site.resource.metadata["title"], "第 4 季")
self.assertEqual(site.resource.item.primary_lookup_id_type, IdType.IMDB)
- self.assertEqual(site.resource.item.__class__.__name__, 'TVSeason')
- self.assertEqual(site.resource.item.imdb, 'tt1159991')
+ self.assertEqual(site.resource.item.__class__.__name__, "TVSeason")
+ self.assertEqual(site.resource.item.imdb, "tt1159991")
self.assertIsNotNone(site.resource.item.show)
- self.assertEqual(site.resource.item.show.imdb, 'tt0436992')
+ self.assertEqual(site.resource.item.show.imdb, "tt0436992")
class DoubanMovieTVTestCase(TestCase):
@use_local_response
def test_scrape(self):
- url3 = 'https://movie.douban.com/subject/3627919/'
+ url3 = "https://movie.douban.com/subject/3627919/"
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
- self.assertEqual(p3.item.__class__.__name__, 'TVSeason')
+ self.assertEqual(p3.item.__class__.__name__, "TVSeason")
self.assertIsNotNone(p3.item.show)
- self.assertEqual(p3.item.show.imdb, 'tt0436992')
+ self.assertEqual(p3.item.show.imdb, "tt0436992")
@use_local_response
def test_scrape_singleseason(self):
- url3 = 'https://movie.douban.com/subject/26895436/'
+ url3 = "https://movie.douban.com/subject/26895436/"
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
- self.assertEqual(p3.item.__class__.__name__, 'TVShow')
+ self.assertEqual(p3.item.__class__.__name__, "TVShow")
@use_local_response
def test_scrape_fix_imdb(self):
- url = 'https://movie.douban.com/subject/35597581/'
+ url = "https://movie.douban.com/subject/35597581/"
item = SiteManager.get_site_by_url(url).get_resource_ready().item
# this douban links to S6E3, we'll reset it to S6E1 to keep consistant
- self.assertEqual(item.imdb, 'tt21599650')
+ self.assertEqual(item.imdb, "tt21599650")
class MultiTVSitesTestCase(TestCase):
@use_local_response
def test_tvshows(self):
- url1 = 'https://www.themoviedb.org/tv/57243-doctor-who'
- url2 = 'https://www.imdb.com/title/tt0436992/'
+ url1 = "https://www.themoviedb.org/tv/57243-doctor-who"
+ url2 = "https://www.imdb.com/title/tt0436992/"
# url3 = 'https://movie.douban.com/subject/3541415/'
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
@@ -101,9 +101,9 @@ class MultiTVSitesTestCase(TestCase):
@use_local_response
def test_tvseasons(self):
- url1 = 'https://www.themoviedb.org/tv/57243-doctor-who/season/4'
- url2 = 'https://www.imdb.com/title/tt1159991/'
- url3 = 'https://movie.douban.com/subject/3627919/'
+ url1 = "https://www.themoviedb.org/tv/57243-doctor-who/season/4"
+ url2 = "https://www.imdb.com/title/tt1159991/"
+ url3 = "https://movie.douban.com/subject/3627919/"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
@@ -114,18 +114,18 @@ class MultiTVSitesTestCase(TestCase):
@use_local_response
def test_miniseries(self):
- url1 = 'https://www.themoviedb.org/tv/86941-the-north-water'
- url3 = 'https://movie.douban.com/subject/26895436/'
+ url1 = "https://www.themoviedb.org/tv/86941-the-north-water"
+ url3 = "https://movie.douban.com/subject/26895436/"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
- self.assertEqual(p3.item.__class__.__name__, 'TVShow')
+ self.assertEqual(p3.item.__class__.__name__, "TVShow")
self.assertEqual(p1.item.id, p3.item.id)
@use_local_response
def test_tvspecial(self):
- url1 = 'https://www.themoviedb.org/movie/282758-doctor-who-the-runaway-bride'
- url2 = 'hhttps://www.imdb.com/title/tt0827573/'
- url3 = 'https://movie.douban.com/subject/4296866/'
+ url1 = "https://www.themoviedb.org/movie/282758-doctor-who-the-runaway-bride"
+ url2 = "hhttps://www.imdb.com/title/tt0827573/"
+ url3 = "https://movie.douban.com/subject/4296866/"
p1 = SiteManager.get_site_by_url(url1).get_resource_ready()
p2 = SiteManager.get_site_by_url(url2).get_resource_ready()
p3 = SiteManager.get_site_by_url(url3).get_resource_ready()
diff --git a/catalog/urls.py b/catalog/urls.py
index 723fedf8..66dde9c2 100644
--- a/catalog/urls.py
+++ b/catalog/urls.py
@@ -3,13 +3,13 @@ from .api import api
from .views import *
from .models import *
-app_name = 'catalog'
+app_name = "catalog"
def _get_all_url_paths():
- paths = ['item']
+ paths = ["item"]
for cls in Item.__subclasses__():
- p = getattr(cls, 'url_path', None)
+ p = getattr(cls, "url_path", None)
if p:
paths.append(p)
res = "|".join(paths)
@@ -17,9 +17,31 @@ def _get_all_url_paths():
urlpatterns = [
- re_path(r'^item/(?P[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12})?$', retrieve_by_uuid, name='retrieve_by_uuid'),
- re_path(r'^(?P' + _get_all_url_paths() + ')/(?P[A-Za-z0-9]{21,22})$', retrieve, name='retrieve'),
- re_path(r'^(?P' + _get_all_url_paths() + ')/(?P[A-Za-z0-9]{21,22})/reviews', review_list, name='review_list'),
- re_path(r'^(?P' + _get_all_url_paths() + ')/(?P[A-Za-z0-9]{21,22})/marks(?:/(?P\\w+))?', mark_list, name='mark_list'),
+ re_path(
+ r"^item/(?P[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12})?$",
+ retrieve_by_uuid,
+ name="retrieve_by_uuid",
+ ),
+ re_path(
+ r"^(?P"
+ + _get_all_url_paths()
+ + ")/(?P[A-Za-z0-9]{21,22})$",
+ retrieve,
+ name="retrieve",
+ ),
+ re_path(
+ r"^(?P"
+ + _get_all_url_paths()
+ + ")/(?P[A-Za-z0-9]{21,22})/reviews",
+ review_list,
+ name="review_list",
+ ),
+ re_path(
+ r"^(?P"
+ + _get_all_url_paths()
+ + ")/(?P[A-Za-z0-9]{21,22})/marks(?:/(?P\\w+))?",
+ mark_list,
+ name="mark_list",
+ ),
path("api/", api.urls),
]
diff --git a/catalog/views.py b/catalog/views.py
index 956e1523..37fb0882 100644
--- a/catalog/views.py
+++ b/catalog/views.py
@@ -34,9 +34,9 @@ def retrieve_by_uuid(request, item_uid):
def retrieve(request, item_path, item_uuid):
- if request.method == 'GET':
+ if request.method == "GET":
item = get_object_or_404(Item, uid=base62.decode(item_uuid))
- item_url = f'/{item_path}/{item_uuid}'
+ item_url = f"/{item_path}/{item_uuid}"
if item.url != item_url:
return redirect(item.url)
mark = None
@@ -44,26 +44,46 @@ def retrieve(request, item_path, item_uuid):
mark_list = None
review_list = None
collection_list = []
- shelf_types = [(n[1], n[2]) for n in iter(ShelfTypeNames) if n[0] == item.category]
+ shelf_types = [
+ (n[1], n[2]) for n in iter(ShelfTypeNames) if n[0] == item.category
+ ]
if request.user.is_authenticated:
visible = query_visible(request.user)
mark = Mark(request.user, item)
_logger.info(mark.rating)
review = mark.review
- collection_list = item.collections.all().filter(visible).annotate(like_counts=Count('likes')).order_by('-like_counts')
- mark_query = ShelfMember.objects.filter(item=item).filter(visible).order_by('-created_time')
- mark_list = [member.mark for member in mark_query[:NUM_REVIEWS_ON_ITEM_PAGE]]
- review_list = Review.objects.filter(item=item).filter(visible).order_by('-created_time')[:NUM_REVIEWS_ON_ITEM_PAGE]
+ collection_list = (
+ item.collections.all()
+ .filter(visible)
+ .annotate(like_counts=Count("likes"))
+ .order_by("-like_counts")
+ )
+ mark_query = (
+ ShelfMember.objects.filter(item=item)
+ .filter(visible)
+ .order_by("-created_time")
+ )
+ mark_list = [
+ member.mark for member in mark_query[:NUM_REVIEWS_ON_ITEM_PAGE]
+ ]
+ review_list = (
+ Review.objects.filter(item=item)
+ .filter(visible)
+ .order_by("-created_time")[:NUM_REVIEWS_ON_ITEM_PAGE]
+ )
- return render(request, item.class_name + '.html', {
- 'item': item,
- 'mark': mark,
- 'review': review,
- 'mark_list': mark_list,
- 'review_list': review_list,
- 'collection_list': collection_list,
- 'shelf_types': shelf_types,
- }
+ return render(
+ request,
+ item.class_name + ".html",
+ {
+ "item": item,
+ "mark": mark,
+ "review": review,
+ "mark_list": mark_list,
+ "review_list": review_list,
+ "collection_list": collection_list,
+ "shelf_types": shelf_types,
+ },
)
else:
return HttpResponseBadRequest()
@@ -73,23 +93,24 @@ def mark_list(request, item_path, item_uuid, following_only=False):
item = get_object_or_404(Item, uid=base62.decode(item_uuid))
if not item:
return HttpResponseNotFound("item not found")
- queryset = ShelfMember.objects.filter(item=item).order_by('-created_time')
+ queryset = ShelfMember.objects.filter(item=item).order_by("-created_time")
if following_only:
queryset = queryset.filter(query_following(request.user))
else:
queryset = queryset.filter(query_visible(request.user))
paginator = Paginator(queryset, NUM_REVIEWS_ON_LIST_PAGE)
- page_number = request.GET.get('page', default=1)
+ page_number = request.GET.get("page", default=1)
marks = paginator.get_page(page_number)
marks.pagination = PageLinksGenerator(
- PAGE_LINK_NUMBER, page_number, paginator.num_pages)
+ PAGE_LINK_NUMBER, page_number, paginator.num_pages
+ )
return render(
request,
- 'item_mark_list.html',
+ "item_mark_list.html",
{
- 'marks': marks,
- 'item': item,
- }
+ "marks": marks,
+ "item": item,
+ },
)
@@ -97,18 +118,19 @@ def review_list(request, item_path, item_uuid):
item = get_object_or_404(Item, uid=base62.decode(item_uuid))
if not item:
return HttpResponseNotFound("item not found")
- queryset = Review.objects.filter(item=item).order_by('-created_time')
+ queryset = Review.objects.filter(item=item).order_by("-created_time")
queryset = queryset.filter(query_visible(request.user))
paginator = Paginator(queryset, NUM_REVIEWS_ON_LIST_PAGE)
- page_number = request.GET.get('page', default=1)
+ page_number = request.GET.get("page", default=1)
reviews = paginator.get_page(page_number)
reviews.pagination = PageLinksGenerator(
- PAGE_LINK_NUMBER, page_number, paginator.num_pages)
+ PAGE_LINK_NUMBER, page_number, paginator.num_pages
+ )
return render(
request,
- 'item_review_list.html',
+ "item_review_list.html",
{
- 'reviews': reviews,
- 'item': item,
- }
+ "reviews": reviews,
+ "item": item,
+ },
)
diff --git a/journal/apps.py b/journal/apps.py
index afe76cb9..e10a1714 100644
--- a/journal/apps.py
+++ b/journal/apps.py
@@ -2,5 +2,5 @@ from django.apps import AppConfig
class JournalConfig(AppConfig):
- default_auto_field = 'django.db.models.BigAutoField'
- name = 'journal'
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "journal"
diff --git a/journal/forms.py b/journal/forms.py
index d64ee29b..56dcd292 100644
--- a/journal/forms.py
+++ b/journal/forms.py
@@ -12,27 +12,23 @@ from common.forms import PreviewImageInput
class ReviewForm(forms.ModelForm):
class Meta:
model = Review
- fields = [
- 'id',
- 'item',
- 'title',
- 'body',
- 'visibility'
- ]
+ fields = ["id", "item", "title", "body", "visibility"]
widgets = {
- 'item': forms.TextInput(attrs={"hidden": ""}),
+ "item": forms.TextInput(attrs={"hidden": ""}),
}
+
title = forms.CharField(label=_("评论标题"))
body = MarkdownxFormField(label=_("评论正文 (Markdown)"))
share_to_mastodon = forms.BooleanField(
- label=_("分享到联邦网络"), initial=True, required=False)
+ label=_("分享到联邦网络"), initial=True, required=False
+ )
id = forms.IntegerField(required=False, widget=forms.HiddenInput())
visibility = forms.TypedChoiceField(
label=_("可见性"),
initial=0,
coerce=int,
choices=VisibilityType.choices,
- widget=forms.RadioSelect
+ widget=forms.RadioSelect,
)
@@ -52,26 +48,26 @@ class CollectionForm(forms.ModelForm):
initial=0,
coerce=int,
choices=VisibilityType.choices,
- widget=forms.RadioSelect
+ widget=forms.RadioSelect,
)
collaborative = forms.TypedChoiceField(
label=_("协作整理权限"),
initial=0,
coerce=int,
choices=COLLABORATIVE_CHOICES,
- widget=forms.RadioSelect
+ widget=forms.RadioSelect,
)
class Meta:
model = Collection
fields = [
- 'title',
- 'cover',
- 'visibility',
- 'collaborative',
- 'brief',
+ "title",
+ "cover",
+ "visibility",
+ "collaborative",
+ "brief",
]
widgets = {
- 'cover': PreviewImageInput(),
+ "cover": PreviewImageInput(),
}
diff --git a/journal/mixins.py b/journal/mixins.py
index f4d6d529..2aa70efa 100644
--- a/journal/mixins.py
+++ b/journal/mixins.py
@@ -17,7 +17,11 @@ class UserOwnedObjectMixin:
return False
if self.visibility == 2:
return False
- if viewer.is_blocking(owner) or owner.is_blocking(viewer) or viewer.is_muting(owner):
+ if (
+ viewer.is_blocking(owner)
+ or owner.is_blocking(viewer)
+ or viewer.is_muting(owner)
+ ):
return False
if self.visibility == 1:
return viewer.is_following(owner)
@@ -25,12 +29,26 @@ class UserOwnedObjectMixin:
return True
def is_editable_by(self, viewer):
- return viewer.is_authenticated and (viewer.is_staff or viewer.is_superuser or viewer == self.owner)
+ return viewer.is_authenticated and (
+ viewer.is_staff or viewer.is_superuser or viewer == self.owner
+ )
@classmethod
def get_available(cls, entity, request_user, following_only=False):
# e.g. SongMark.get_available(song, request.user)
query_kwargs = {entity.__class__.__name__.lower(): entity}
- all_entities = cls.objects.filter(**query_kwargs).order_by("-created_time") # get all marks for song
- visible_entities = list(filter(lambda _entity: _entity.is_visible_to(request_user) and (_entity.owner.mastodon_username in request_user.mastodon_following if following_only else True), all_entities))
+ all_entities = cls.objects.filter(**query_kwargs).order_by(
+ "-created_time"
+ ) # get all marks for song
+ visible_entities = list(
+ filter(
+ lambda _entity: _entity.is_visible_to(request_user)
+ and (
+ _entity.owner.mastodon_username in request_user.mastodon_following
+ if following_only
+ else True
+ ),
+ all_entities,
+ )
+ )
return visible_entities
diff --git a/journal/templatetags/user_actions.py b/journal/templatetags/user_actions.py
index 3c08ae1c..b16eec7f 100644
--- a/journal/templatetags/user_actions.py
+++ b/journal/templatetags/user_actions.py
@@ -7,21 +7,21 @@ register = template.Library()
@register.simple_tag(takes_context=True)
def wish_item_action(context, item):
- user = context['request'].user
+ user = context["request"].user
if user and user.is_authenticated:
action = {
- 'taken': user.shelf_manager.locate_item(item) is not None,
- 'url': reverse("journal:wish", args=[item.uuid]),
+ "taken": user.shelf_manager.locate_item(item) is not None,
+ "url": reverse("journal:wish", args=[item.uuid]),
}
return action
@register.simple_tag(takes_context=True)
def like_piece_action(context, piece):
- user = context['request'].user
+ user = context["request"].user
if user and user.is_authenticated:
action = {
- 'taken': Like.objects.filter(target=piece, owner=user).first() is not None,
- 'url': reverse("journal:like", args=[piece.uuid]),
+ "taken": Like.objects.filter(target=piece, owner=user).first() is not None,
+ "url": reverse("journal:like", args=[piece.uuid]),
}
return action
diff --git a/social/apps.py b/social/apps.py
index 8af48774..b11df7b3 100644
--- a/social/apps.py
+++ b/social/apps.py
@@ -2,8 +2,8 @@ from django.apps import AppConfig
class SocialConfig(AppConfig):
- default_auto_field = 'django.db.models.BigAutoField'
- name = 'social'
+ default_auto_field = "django.db.models.BigAutoField"
+ name = "social"
def ready(self):
# load key modules in proper order, make sure class inject and signal works as expected
diff --git a/social/models.py b/social/models.py
index f47be08c..f1951c54 100644
--- a/social/models.py
+++ b/social/models.py
@@ -21,23 +21,27 @@ _logger = logging.getLogger(__name__)
class ActivityTemplate(models.TextChoices):
- """
- """
- MarkItem = 'mark_item'
- ReviewItem = 'review_item'
- CreateCollection = 'create_collection'
- LikeCollection = 'like_collection'
+ """ """
+
+ MarkItem = "mark_item"
+ ReviewItem = "review_item"
+ CreateCollection = "create_collection"
+ LikeCollection = "like_collection"
class LocalActivity(models.Model, UserOwnedObjectMixin):
owner = models.ForeignKey(User, on_delete=models.CASCADE)
- visibility = models.PositiveSmallIntegerField(default=0) # 0: Public / 1: Follower only / 2: Self only
- template = models.CharField(blank=False, choices=ActivityTemplate.choices, max_length=50)
+ visibility = models.PositiveSmallIntegerField(
+ default=0
+ ) # 0: Public / 1: Follower only / 2: Self only
+ template = models.CharField(
+ blank=False, choices=ActivityTemplate.choices, max_length=50
+ )
action_object = models.ForeignKey(Piece, on_delete=models.CASCADE)
created_time = models.DateTimeField(default=timezone.now, db_index=True)
def __str__(self):
- return f'Activity [{self.owner}:{self.template}:{self.action_object}]'
+ return f"Activity [{self.owner}:{self.template}:{self.action_object}]"
class ActivityManager:
@@ -48,7 +52,11 @@ class ActivityManager:
q = Q(owner_id__in=self.owner.following, visibility__lt=2) | Q(owner=self.owner)
if before_time:
q = q & Q(created_time__lt=before_time)
- return LocalActivity.objects.filter(q).order_by('-created_time').prefetch_related('action_object') # .select_related() https://github.com/django-polymorphic/django-polymorphic/pull/531
+ return (
+ LocalActivity.objects.filter(q)
+ .order_by("-created_time")
+ .prefetch_related("action_object")
+ ) # .select_related() https://github.com/django-polymorphic/django-polymorphic/pull/531
@staticmethod
def get_manager_for_user(user):
@@ -56,7 +64,7 @@ class ActivityManager:
User.activity_manager = cached_property(ActivityManager.get_manager_for_user)
-User.activity_manager.__set_name__(User, 'activity_manager')
+User.activity_manager.__set_name__(User, "activity_manager")
class DataSignalManager:
@@ -68,9 +76,9 @@ class DataSignalManager:
if processor_class:
processor = processor_class(instance)
if created:
- if hasattr(processor, 'created'):
+ if hasattr(processor, "created"):
processor.created()
- elif hasattr(processor, 'updated'):
+ elif hasattr(processor, "updated"):
processor.updated()
@staticmethod
@@ -78,7 +86,7 @@ class DataSignalManager:
processor_class = DataSignalManager.processors.get(instance.__class__)
if processor_class:
processor = processor_class(instance)
- if hasattr(processor, 'deleted'):
+ if hasattr(processor, "deleted"):
processor.deleted()
@staticmethod
@@ -103,15 +111,17 @@ class DefaultActivityProcessor:
def created(self):
params = {
- 'owner': self.action_object.owner,
- 'visibility': self.action_object.visibility,
- 'template': self.template,
- 'action_object': self.action_object,
+ "owner": self.action_object.owner,
+ "visibility": self.action_object.visibility,
+ "template": self.template,
+ "action_object": self.action_object,
}
LocalActivity.objects.create(**params)
def updated(self):
- activity = LocalActivity.objects.filter(action_object=self.action_object).first()
+ activity = LocalActivity.objects.filter(
+ action_object=self.action_object
+ ).first()
if not activity:
self.created()
elif activity.visibility != self.action_object.visibility:
diff --git a/social/urls.py b/social/urls.py
index 75a2664f..8df11801 100644
--- a/social/urls.py
+++ b/social/urls.py
@@ -2,8 +2,8 @@ from django.urls import path, re_path
from .views import *
-app_name = 'social'
+app_name = "social"
urlpatterns = [
- path('', feed, name='feed'),
- path('data', data, name='data'),
+ path("", feed, name="feed"),
+ path("data", data, name="data"),
]
diff --git a/social/views.py b/social/views.py
index cbeb5bc8..27b715a3 100644
--- a/social/views.py
+++ b/social/views.py
@@ -23,31 +23,35 @@ PAGE_SIZE = 10
@login_required
def feed(request):
- if request.method != 'GET':
+ if request.method != "GET":
return
user = request.user
- unread = Announcement.objects.filter(pk__gt=user.read_announcement_index).order_by('-pk')
+ unread = Announcement.objects.filter(pk__gt=user.read_announcement_index).order_by(
+ "-pk"
+ )
if unread:
- user.read_announcement_index = Announcement.objects.latest('pk').pk
- user.save(update_fields=['read_announcement_index'])
+ user.read_announcement_index = Announcement.objects.latest("pk").pk
+ user.save(update_fields=["read_announcement_index"])
return render(
request,
- 'feed.html',
+ "feed.html",
{
- 'top_tags': user.tag_manager.all_tags[:10],
- 'unread_announcements': unread,
- }
+ "top_tags": user.tag_manager.all_tags[:10],
+ "unread_announcements": unread,
+ },
)
@login_required
def data(request):
- if request.method != 'GET':
+ if request.method != "GET":
return
return render(
request,
- 'feed_data.html',
+ "feed_data.html",
{
- 'activities': ActivityManager(request.user).get_timeline(before_time=request.GET.get('last'))[:PAGE_SIZE],
- }
+ "activities": ActivityManager(request.user).get_timeline(
+ before_time=request.GET.get("last")
+ )[:PAGE_SIZE],
+ },
)