diff --git a/.github/workflows/django.yml b/.github/workflows/django.yml
index 4454d434..dfe51d98 100644
--- a/.github/workflows/django.yml
+++ b/.github/workflows/django.yml
@@ -43,5 +43,5 @@ jobs:
- name: Run Tests
run: |
PGPASSWORD=admin123 psql template1 -U postgres -h localhost -c 'create extension hstore;'
- new_data_model=1 python manage.py makemigrations auth mastodon users books movies games music sync management collection common sync management timeline catalog journal social
+ new_data_model=1 python manage.py makemigrations auth mastodon users books movies games music sync management collection common sync management timeline catalog journal social legacy
new_data_model=1 python manage.py test
diff --git a/boofilsic/settings.py b/boofilsic/settings.py
index f75610ec..7362d7c7 100644
--- a/boofilsic/settings.py
+++ b/boofilsic/settings.py
@@ -368,3 +368,5 @@ if ENABLE_NEW_MODEL:
INSTALLED_APPS.append('catalog.apps.CatalogConfig')
INSTALLED_APPS.append('journal.apps.JournalConfig')
INSTALLED_APPS.append('social.apps.SocialConfig')
+ INSTALLED_APPS.append('legacy.apps.LegacyConfig')
+
diff --git a/catalog/book/utils.py b/catalog/book/utils.py
index a8775ec5..6598e65b 100644
--- a/catalog/book/utils.py
+++ b/catalog/book/utils.py
@@ -50,7 +50,9 @@ def is_asin(asin):
def detect_isbn_asin(s):
- n = s.strip().upper() if s else ''
+ if not s:
+ return None, None
+ n = re.sub(r'[^0-9A-Z]', '', s.upper())
if is_isbn_13(n):
return IdType.ISBN, n
if is_isbn_10(n):
diff --git a/catalog/common/models.py b/catalog/common/models.py
index da5e6ba2..7cbe0825 100644
--- a/catalog/common/models.py
+++ b/catalog/common/models.py
@@ -318,6 +318,8 @@ class ExternalResource(models.Model):
self.metadata = resource_content.metadata
if resource_content.cover_image and resource_content.cover_image_extention:
self.cover = SimpleUploadedFile('temp.' + resource_content.cover_image_extention, resource_content.cover_image)
+ else:
+ self.cover = resource_content.metadata.get('cover_image_path')
self.scraped_time = timezone.now()
self.save()
diff --git a/catalog/common/sites.py b/catalog/common/sites.py
index 579e9061..1e9b8d25 100644
--- a/catalog/common/sites.py
+++ b/catalog/common/sites.py
@@ -23,6 +23,12 @@ class ResourceContent:
cover_image: bytes = None
cover_image_extention: str = None
+ def dict(self):
+ return {'metadata': self.metadata, 'lookup_ids': self.lookup_ids}
+
+ def to_json(self) -> str:
+ return json.dumps({'metadata': self.metadata, 'lookup_ids': self.lookup_ids})
+
class AbstractSite:
"""
@@ -67,10 +73,6 @@ class AbstractSite:
self.resource = ExternalResource(id_type=self.ID_TYPE, id_value=self.id_value, url=self.url)
return self.resource
- def bypass_scrape(self, data_from_link) -> ResourceContent:
- """subclass may implement this to use data from linked resource and bypass actual scrape"""
- return None
-
def scrape(self) -> ResourceContent:
"""subclass should implement this, return ResourceContent object"""
data = ResourceContent()
@@ -101,7 +103,7 @@ class AbstractSite:
def ready(self):
return bool(self.resource and self.resource.ready)
- def get_resource_ready(self, auto_save=True, auto_create=True, auto_link=True, data_from_link=None):
+ def get_resource_ready(self, auto_save=True, auto_create=True, auto_link=True, preloaded_content=None, reload=False):
"""return a resource scraped, or scrape if not yet"""
if auto_link:
auto_create = True
@@ -111,9 +113,12 @@ class AbstractSite:
resource_content = {}
if not self.resource:
return None
- if not p.ready:
- resource_content = self.bypass_scrape(data_from_link)
- if not resource_content:
+ if not p.ready or reload:
+ if isinstance(preloaded_content, ResourceContent):
+ resource_content = preloaded_content
+ elif isinstance(preloaded_content, dict):
+ resource_content = ResourceContent(**preloaded_content)
+ else:
resource_content = self.scrape()
p.update_content(resource_content)
if not p.ready:
@@ -127,12 +132,12 @@ class AbstractSite:
p.item.merge_data_from_external_resources()
p.item.save()
if auto_link:
- for linked_resources in p.required_resources:
- linked_site = SiteManager.get_site_by_url(linked_resources['url'])
+ for linked_resource in p.required_resources:
+ linked_site = SiteManager.get_site_by_url(linked_resource['url'])
if linked_site:
- linked_site.get_resource_ready(auto_link=False)
+ linked_site.get_resource_ready(auto_link=False, preloaded_content=linked_resource.get('content'))
else:
- _logger.error(f'unable to get site for {linked_resources["url"]}')
+ _logger.error(f'unable to get site for {linked_resource["url"]}')
p.item.update_linked_items_from_external_resource(p)
p.item.save()
return p
@@ -141,28 +146,28 @@ class AbstractSite:
class SiteManager:
registry = {}
- @classmethod
- def register(cls, target) -> Callable:
+ @staticmethod
+ def register(target) -> Callable:
id_type = target.ID_TYPE
- if id_type in cls.registry:
+ if id_type in SiteManager.registry:
raise ValueError(f'Site for {id_type} already exists')
- cls.registry[id_type] = target
+ SiteManager.registry[id_type] = target
return target
- @classmethod
- def get_site_by_id_type(cls, typ: str):
- return cls.registry[typ]() if typ in cls.registry else None
+ @staticmethod
+ def get_site_by_id_type(typ: str):
+ return SiteManager.registry[typ]() if typ in SiteManager.registry else None
- @classmethod
- def get_site_by_url(cls, url: str):
- cls = next(filter(lambda p: p.validate_url(url), cls.registry.values()), None)
+ @staticmethod
+ def get_site_by_url(url: str):
+ cls = next(filter(lambda p: p.validate_url(url), SiteManager.registry.values()), None)
if cls is None:
- cls = next(filter(lambda p: p.validate_url_fallback(url), cls.registry.values()), None)
+ cls = next(filter(lambda p: p.validate_url_fallback(url), SiteManager.registry.values()), None)
return cls(url) if cls else None
- @classmethod
- def get_id_by_url(cls, url: str):
- site = cls.get_site_by_url(url)
+ @staticmethod
+ def get_id_by_url(url: str):
+ site = SiteManager.get_site_by_url(url)
return site.url_to_id(url) if site else None
@staticmethod
diff --git a/catalog/game/models.py b/catalog/game/models.py
index bd4281e0..ea6b0b51 100644
--- a/catalog/game/models.py
+++ b/catalog/game/models.py
@@ -13,13 +13,14 @@ class Game(Item):
METADATA_COPY_LIST = [
'title',
+ 'brief',
'other_title',
'developer',
'publisher',
'release_date',
'genre',
'platform',
- 'brief',
+ 'official_site',
]
other_title = jsondata.ArrayField(
@@ -63,3 +64,7 @@ class Game(Item):
blank=True,
default=list,
)
+
+ official_site = jsondata.CharField(
+ default='',
+ )
diff --git a/catalog/movie/models.py b/catalog/movie/models.py
index 052a34b6..1d001437 100644
--- a/catalog/movie/models.py
+++ b/catalog/movie/models.py
@@ -9,7 +9,6 @@ class Movie(Item):
imdb = PrimaryLookupIdDescriptor(IdType.IMDB)
tmdb_movie = PrimaryLookupIdDescriptor(IdType.TMDB_Movie)
douban_movie = PrimaryLookupIdDescriptor(IdType.DoubanMovie)
- duration = jsondata.IntegerField(blank=True, default=None)
demonstrative = _('这部电影')
METADATA_COPY_LIST = [
diff --git a/catalog/sites/douban_book.py b/catalog/sites/douban_book.py
index 8532e07e..22ae9119 100644
--- a/catalog/sites/douban_book.py
+++ b/catalog/sites/douban_book.py
@@ -173,6 +173,7 @@ class DoubanBook(AbstractSite):
'id_value': r[1] if r else None,
'title': data['title'],
'url': works_element[0],
+ 'content': {'metadata': {'title': data['title']}}
}]
pd = ResourceContent(metadata=data)
@@ -195,14 +196,6 @@ class DoubanBook_Work(AbstractSite):
def id_to_url(self, id_value):
return "https://book.douban.com/works/" + id_value + "/"
- def bypass_scrape(self, data_from_link):
- if not data_from_link:
- return None
- pd = ResourceContent(metadata={
- 'title': data_from_link['title'],
- })
- return pd
-
def scrape(self):
content = DoubanDownloader(self.url).download().html()
title_elem = content.xpath("//h1/text()")
diff --git a/catalog/templates/game.html b/catalog/templates/game.html
index 41690755..0bfad623 100644
--- a/catalog/templates/game.html
+++ b/catalog/templates/game.html
@@ -73,13 +73,10 @@
{% endif %}
- {% if item.other_info %}
- {% for k, v in item.other_info.items %}
-
- {{ k }}:{{ v | urlize }}
+
{% if item.official_site %}
+ {% trans '官方网站:' %}{{ item.official_site|urlizetrunc:42 }}
+ {% endif %}
- {% endfor %}
- {% endif %}
diff --git a/catalog/urls.py b/catalog/urls.py
index 9be0f947..4f43cd6a 100644
--- a/catalog/urls.py
+++ b/catalog/urls.py
@@ -15,6 +15,7 @@ def _get_all_url_paths():
urlpatterns = [
+ re_path(r'item/(?P[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12})/', retrieve_by_uuid, name='retrieve_by_uuid'),
re_path(r'(?P' + _get_all_url_paths() + ')/(?P[A-Za-z0-9]{21,22})/', retrieve, name='retrieve'),
path("api/", api.urls),
]
diff --git a/catalog/views.py b/catalog/views.py
index ed2fab53..e16d23bd 100644
--- a/catalog/views.py
+++ b/catalog/views.py
@@ -27,6 +27,11 @@ from journal.models import Mark
_logger = logging.getLogger(__name__)
+def retrieve_by_uuid(request, item_uuid):
+ item = get_object_or_404(Item, uid=item_uuid)
+ return redirect(item.url)
+
+
def retrieve(request, item_path, item_uid):
if request.method == 'GET':
item = get_object_or_404(Item, uid=base62.decode(item_uid))
diff --git a/legacy/__init__.py b/legacy/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/legacy/admin.py b/legacy/admin.py
new file mode 100644
index 00000000..8c38f3f3
--- /dev/null
+++ b/legacy/admin.py
@@ -0,0 +1,3 @@
+from django.contrib import admin
+
+# Register your models here.
diff --git a/legacy/apps.py b/legacy/apps.py
new file mode 100644
index 00000000..4e1c1ece
--- /dev/null
+++ b/legacy/apps.py
@@ -0,0 +1,6 @@
+from django.apps import AppConfig
+
+
+class LegacyConfig(AppConfig):
+ default_auto_field = 'django.db.models.BigAutoField'
+ name = 'legacy'
diff --git a/legacy/management/commands/migrate_catalog.py b/legacy/management/commands/migrate_catalog.py
new file mode 100644
index 00000000..44c8241a
--- /dev/null
+++ b/legacy/management/commands/migrate_catalog.py
@@ -0,0 +1,216 @@
+from books.models import Book as Legacy_Book
+from movies.models import Movie as Legacy_Movie
+from music.models import Album as Legacy_Album
+from games.models import Game as Legacy_Game
+from catalog.common import *
+from catalog.models import *
+from catalog.sites import *
+from catalog.book.utils import detect_isbn_asin
+from journal import models as journal_models
+from social import models as social_models
+from django.core.management.base import BaseCommand
+from django.core.paginator import Paginator
+import pprint
+from tqdm import tqdm
+from django.db.models import Q, Count, Sum
+from django.utils import dateparse, timezone
+import re
+from legacy.models import *
+
+
+BATCH_SIZE = 1000
+
+
+def _book_convert(entity):
+ content = ResourceContent(metadata={
+ 'title': entity.title,
+ 'brief': entity.brief,
+ 'cover_image_path': str(entity.cover),
+
+ 'subtitle': entity.subtitle,
+ 'orig_title': entity.orig_title,
+ 'author': entity.author,
+ 'translator': entity.translator,
+ 'language': entity.language,
+ 'pub_house': entity.pub_house,
+ 'pub_year': entity.pub_year,
+ 'pub_month': entity.pub_month,
+ 'binding': entity.binding,
+ 'price': entity.price,
+ 'pages': entity.pages,
+ 'contents': entity.contents,
+ 'series': entity.other_info.get('丛书') if entity.other_info else None,
+ 'imprint': entity.other_info.get('出品方') if entity.other_info else None,
+ })
+ if entity.isbn:
+ t, v = detect_isbn_asin(entity.isbn)
+ if t:
+ content.lookup_ids[t] = v
+ if entity.other_info and entity.other_info.get('统一书号'):
+ content.lookup_ids[IdType.CUBN] = entity.other_info.get('统一书号')
+ return content
+
+
+def _album_convert(entity):
+ content = ResourceContent(metadata={
+ 'title': entity.title,
+ 'brief': entity.brief,
+ 'cover_image_path': str(entity.cover),
+
+ 'other_title': entity.other_info.get('又名') if entity.other_info else None,
+ 'album_type': entity.other_info.get('专辑类型') if entity.other_info else None,
+ 'media': entity.other_info.get('介质') if entity.other_info else None,
+ 'disc_count': entity.other_info.get('碟片数') if entity.other_info else None,
+ 'artist': entity.artist,
+ 'genre': entity.genre,
+ 'release_date': entity.release_date.strftime('%Y-%m-%d') if entity.release_date else None,
+ 'duration': entity.duration,
+ 'company': entity.company,
+ 'track_list': entity.track_list,
+ 'bandcamp_album_id': entity.other_info.get('bandcamp_album_id') if entity.other_info else None,
+ })
+ if entity.other_info and entity.other_info.get('ISRC'):
+ content.lookup_ids[IdType.ISRC] = entity.other_info.get('ISRC')
+ if entity.other_info and entity.other_info.get('条形码'):
+ content.lookup_ids[IdType.GTIN] = entity.other_info.get('条形码')
+ if entity.other_info and entity.other_info.get('UPC'):
+ content.lookup_ids[IdType.GTIN] = entity.other_info.get('UPC')
+ return content
+
+
+def _game_convert(entity):
+ content = ResourceContent(metadata={
+ 'title': entity.title,
+ 'brief': entity.brief,
+ 'cover_image_path': str(entity.cover),
+
+ 'other_title': entity.other_title,
+ 'developer': entity.developer,
+ 'publisher': entity.publisher,
+ 'release_date': entity.release_date.strftime('%Y-%m-%d') if entity.release_date else None,
+ 'genre': entity.genre,
+ 'platform': entity.platform,
+ 'official_site': entity.other_info.get('official_site') if entity.other_info else None,
+ })
+ if entity.other_info and entity.other_info.get('steam_url'):
+ content.lookup_ids[IdType.Steam] = re.search(r'store\.steampowered\.com/app/(\d+)', entity.other_info.get('steam_url'))[1]
+ return content
+
+
+def _movie_tv_convert(entity):
+ content = ResourceContent(metadata={
+ 'title': entity.title,
+ 'brief': entity.brief,
+ 'cover_image_path': str(entity.cover),
+
+ 'orig_title': entity.orig_title,
+ 'other_title': entity.other_title,
+ 'director': entity.director,
+ 'playwright': entity.playwright,
+ 'actor': entity.actor,
+ 'genre': entity.genre,
+ 'showtime': entity.showtime,
+ 'site': entity.site,
+ 'area': entity.area,
+ 'language': entity.language,
+ 'year': entity.year,
+ 'duration': entity.duration,
+ 'season_count': entity.other_info.get('Seasons') if entity.other_info else None,
+ 'season_number': entity.season,
+ 'episodes': entity.episodes,
+ 'single_episode_length': entity.single_episode_length,
+ 'is_series': entity.is_series,
+ })
+ if entity.imdb_code:
+ content.lookup_ids[IdType.IMDB] = entity.imdb_code
+ if entity.other_info and entity.other_info.get('TMDB_ID'):
+ content.lookup_ids[IdType.TMDB_TV] = entity.other_info.get('TMDB_ID')
+ return content
+
+
+Legacy_Book.convert = _book_convert
+Legacy_Movie.convert = _movie_tv_convert
+Legacy_Game.convert = _game_convert
+Legacy_Album.convert = _album_convert
+model_map = {
+ Legacy_Book: Edition,
+ Legacy_Movie: Movie,
+ Legacy_Game: Game,
+ Legacy_Album: Album,
+}
+model_link = {
+ Legacy_Book: BookLink,
+ Legacy_Movie: MovieLink,
+ Legacy_Game: GameLink,
+ Legacy_Album: AlbumLink,
+}
+
+
+class Command(BaseCommand):
+ help = 'Migrate legacy books'
+
+ def add_arguments(self, parser):
+ parser.add_argument('--book', dest='types', action='append_const', const=Legacy_Book)
+ parser.add_argument('--movie', dest='types', action='append_const', const=Legacy_Movie)
+ parser.add_argument('--album', dest='types', action='append_const', const=Legacy_Album)
+ parser.add_argument('--game', dest='types', action='append_const', const=Legacy_Game)
+ parser.add_argument('--id', help='id to convert; or, if using with --max-id, the min id')
+ parser.add_argument('--maxid', help='max id to convert')
+ parser.add_argument('--failstop', help='stop on fail', action='store_true')
+ parser.add_argument('--clearlink', help='clear legacy link table', action='store_true')
+ parser.add_argument('--reload', help='reload and ignore existing ExternalResource', action='store_true')
+
+ def handle(self, *args, **options):
+ types = options['types'] or [Legacy_Game, Legacy_Album, Legacy_Movie, Legacy_Book]
+ reload = options['reload']
+ for typ in types:
+ print(typ)
+ LinkModel = model_link[typ]
+ if options['clearlink']:
+ LinkModel.objects.all().delete()
+ qs = typ.objects.all().order_by('id') # if h == 0 else c.objects.filter(edited_time__gt=timezone.now() - timedelta(hours=h))
+ if options['id']:
+ if options['maxid']:
+ qs = qs.filter(id__gte=int(options['id']), id__lte=int(options['maxid']))
+ else:
+ qs = qs.filter(id=int(options['id']))
+
+ pg = Paginator(qs, BATCH_SIZE)
+ for p in tqdm(pg.page_range):
+ links = []
+ for entity in pg.get_page(p).object_list:
+ try:
+ content = entity.convert()
+ site = SiteManager.get_site_by_url(entity.source_url)
+ item = None
+ if site:
+ if not site.DEFAULT_MODEL and not content.metadata.get('preferred_model'):
+ if model_map[typ] != Movie or not content.metadata.get('is_series'):
+ content.metadata['preferred_model'] = model_map[typ].__name__
+ else: # TV
+ content.metadata['preferred_model'] = 'TVSeason' if content.metadata.get('season') else 'TVShow'
+ item = site.get_resource_ready(preloaded_content=content, reload=reload).item
+ else:
+ # not known site, try save item without external resource
+ item = None
+ model = Edition
+ t, v = None, None
+ if content.lookup_ids:
+ t, v = Item.get_best_lookup_id(content.lookup_ids)
+ item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first()
+ if not item:
+ obj = model.copy_metadata(content.metadata)
+ obj['primary_lookup_id_type'] = t
+ obj['primary_lookup_id_value'] = v
+ item = model.objects.create(**obj)
+ item.cover = content.metadata['cover_image_path']
+ item.save()
+ links.append(LinkModel(old_id=entity.id, new_uid=item.uid))
+ # pprint.pp(site.get_item())
+ except Exception as e:
+ print(f'Convert failed for {entity}: {e}')
+ if options['failstop']:
+ raise(e)
+ # return
+ LinkModel.objects.bulk_create(links)
+ self.stdout.write(self.style.SUCCESS(f'Done.'))
diff --git a/legacy/models.py b/legacy/models.py
new file mode 100644
index 00000000..f9e4fe51
--- /dev/null
+++ b/legacy/models.py
@@ -0,0 +1,21 @@
+from django.db import models
+
+
+class BookLink(models.Model):
+ old_id = models.IntegerField(db_index=True)
+ new_uid = models.UUIDField()
+
+
+class MovieLink(models.Model):
+ old_id = models.IntegerField(db_index=True)
+ new_uid = models.UUIDField()
+
+
+class AlbumLink(models.Model):
+ old_id = models.IntegerField(db_index=True)
+ new_uid = models.UUIDField()
+
+
+class GameLink(models.Model):
+ old_id = models.IntegerField(db_index=True)
+ new_uid = models.UUIDField()
diff --git a/legacy/tests.py b/legacy/tests.py
new file mode 100644
index 00000000..7ce503c2
--- /dev/null
+++ b/legacy/tests.py
@@ -0,0 +1,3 @@
+from django.test import TestCase
+
+# Create your tests here.
diff --git a/legacy/views.py b/legacy/views.py
new file mode 100644
index 00000000..91ea44a2
--- /dev/null
+++ b/legacy/views.py
@@ -0,0 +1,3 @@
+from django.shortcuts import render
+
+# Create your views here.