new data model: speed up migration

This commit is contained in:
Your Name 2022-12-18 20:28:39 -05:00
parent 260968ff61
commit 70019522b5
4 changed files with 86 additions and 54 deletions

View file

@ -60,7 +60,7 @@ class EditionOut(ItemOut):
@api.post("/catalog/fetch", response=ItemOut) @api.post("/catalog/fetch", response=ItemOut)
def fetch_edition(request, url: str): def fetch_item(request, url: str):
site = SiteManager.get_site_by_url(url) site = SiteManager.get_site_by_url(url)
if not site: if not site:
return Http404() return Http404()

View file

@ -290,7 +290,7 @@ class Shelf(List):
class ShelfLogEntry(models.Model): class ShelfLogEntry(models.Model):
owner = models.ForeignKey(User, on_delete=models.PROTECT) owner = models.ForeignKey(User, on_delete=models.PROTECT)
shelf = models.ForeignKey(Shelf, on_delete=models.PROTECT, related_name='entries', null=True) # None means removed from any shelf shelf = models.ForeignKey(Shelf, on_delete=models.CASCADE, related_name='entries', null=True) # None means removed from any shelf
item = models.ForeignKey(Item, on_delete=models.PROTECT) item = models.ForeignKey(Item, on_delete=models.PROTECT)
timestamp = models.DateTimeField(default=timezone.now) # this may later be changed by user timestamp = models.DateTimeField(default=timezone.now) # this may later be changed by user
metadata = models.JSONField(default=dict) metadata = models.JSONField(default=dict)
@ -511,7 +511,7 @@ class Mark:
@property @property
def id(self): def id(self):
return self.item.id if self.shelfmember else None return self.shelfmember.id if self.shelfmember else None
@property @property
def shelf_type(self): def shelf_type(self):
@ -525,6 +525,10 @@ class Mark:
def created_time(self): def created_time(self):
return self.shelfmember.created_time if self.shelfmember else None return self.shelfmember.created_time if self.shelfmember else None
@property
def metadata(self):
return self.shelfmember.metadata if self.shelfmember else None
@property @property
def visibility(self): def visibility(self):
return self.shelfmember.visibility if self.shelfmember else None return self.shelfmember.visibility if self.shelfmember else None
@ -551,21 +555,12 @@ class Mark:
def update(self, shelf_type, comment_text, rating_grade, visibility, metadata=None, created_time=None): def update(self, shelf_type, comment_text, rating_grade, visibility, metadata=None, created_time=None):
if shelf_type != self.shelf_type or visibility != self.visibility: if shelf_type != self.shelf_type or visibility != self.visibility:
self.shelfmember = self.owner.shelf_manager.move_item(self.item, shelf_type, visibility=visibility) self.shelfmember = self.owner.shelf_manager.move_item(self.item, shelf_type, visibility=visibility, metadata=metadata)
if self.shelfmember and (created_time or metadata is not None): if self.shelfmember and created_time:
if created_time: self.shelfmember.created_time = created_time
self.shelfmember.created_time = created_time
if metadata is not None:
self.shelfmember.metadata = metadata
self.shelfmember.save() self.shelfmember.save()
if comment_text != self.text or visibility != self.visibility: if comment_text != self.text or visibility != self.visibility:
self.comment = Comment.comment_item_by_user(self.item, self.owner, comment_text, visibility) self.comment = Comment.comment_item_by_user(self.item, self.owner, comment_text, visibility)
if self.comment and created_time:
self.comment.created_time = created_time
self.comment.save(update_fields=['created_time'])
if rating_grade != self.rating or visibility != self.visibility: if rating_grade != self.rating or visibility != self.visibility:
rating_content = Rating.rate_item_by_user(self.item, self.owner, rating_grade, visibility) Rating.rate_item_by_user(self.item, self.owner, rating_grade, visibility)
self.rating = rating_grade self.rating = rating_grade
if rating_content and created_time:
rating_content.created_time = created_time
rating_content.save(update_fields=['created_time'])

View file

@ -178,39 +178,40 @@ class Command(BaseCommand):
pg = Paginator(qs, BATCH_SIZE) pg = Paginator(qs, BATCH_SIZE)
for p in tqdm(pg.page_range): for p in tqdm(pg.page_range):
links = [] links = []
for entity in pg.get_page(p).object_list: with transaction.atomic():
try: for entity in pg.get_page(p).object_list:
content = entity.convert() try:
site = SiteManager.get_site_by_url(entity.source_url) content = entity.convert()
item = None site = SiteManager.get_site_by_url(entity.source_url)
if site:
if not site.DEFAULT_MODEL and not content.metadata.get('preferred_model'):
if model_map[typ] == Movie and entity.is_series:
content.metadata['preferred_model'] = 'TVSeason' if entity.season else 'TVShow'
else:
content.metadata['preferred_model'] = model_map[typ].__name__
item = site.get_resource_ready(preloaded_content=content, ignore_existing_content=reload).item
else:
# not known site, try save item without external resource
item = None item = None
model = Edition if site:
t, v = None, None if not site.DEFAULT_MODEL and not content.metadata.get('preferred_model'):
if content.lookup_ids: if model_map[typ] == Movie and entity.is_series:
t, v = Item.get_best_lookup_id(content.lookup_ids) content.metadata['preferred_model'] = 'TVSeason' if entity.season else 'TVShow'
item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first() else:
if not item: content.metadata['preferred_model'] = model_map[typ].__name__
obj = model.copy_metadata(content.metadata) item = site.get_resource_ready(preloaded_content=content, ignore_existing_content=reload).item
obj['primary_lookup_id_type'] = t else:
obj['primary_lookup_id_value'] = v # not known site, try save item without external resource
item = model.objects.create(**obj) item = None
item.cover = content.metadata['cover_image_path'] model = Edition
item.save() t, v = None, None
links.append(LinkModel(old_id=entity.id, new_uid=item.uid)) if content.lookup_ids:
# pprint.pp(site.get_item()) t, v = Item.get_best_lookup_id(content.lookup_ids)
except Exception as e: item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first()
print(f'Convert failed for {typ} {entity.id}: {e}') if not item:
if options['failstop']: obj = model.copy_metadata(content.metadata)
raise(e) obj['primary_lookup_id_type'] = t
# return obj['primary_lookup_id_value'] = v
item = model.objects.create(**obj)
item.cover = content.metadata['cover_image_path']
item.save()
links.append(LinkModel(old_id=entity.id, new_uid=item.uid))
# pprint.pp(site.get_item())
except Exception as e:
print(f'Convert failed for {typ} {entity.id}: {e}')
if options['failstop']:
raise(e)
# return
LinkModel.objects.bulk_create(links) LinkModel.objects.bulk_create(links)
self.stdout.write(self.style.SUCCESS(f'Done.')) self.stdout.write(self.style.SUCCESS(f'Done.'))

View file

@ -34,9 +34,9 @@ model_link = {
} }
shelf_map = { shelf_map = {
MarkStatusEnum.WISH: ShelfType.WISHLIST, ShelfType.WISHLIST: MarkStatusEnum.WISH,
MarkStatusEnum.DO: ShelfType.PROGRESS, ShelfType.PROGRESS: MarkStatusEnum.DO,
MarkStatusEnum.COLLECT: ShelfType.COMPLETE, ShelfType.COMPLETE: MarkStatusEnum.COLLECT,
} }
tag_map = { tag_map = {
@ -72,12 +72,16 @@ class Command(BaseCommand):
if options['clear']: if options['clear']:
print("Deleting all migrated user pieces") print("Deleting all migrated user pieces")
# Piece.objects.all().delete() # Piece.objects.all().delete()
for cls in [Review, Comment, Rating, Tag, ShelfLogEntry, ShelfMember, Shelf]: # Collection for cls in [Review, Comment, Rating, TagMember, Tag, ShelfLogEntry, ShelfMember]: # Collection
print(cls) print(cls)
cls.objects.all().delete() cls.objects.all().delete()
return return
types = options['types'] or [GameMark, AlbumMark, MovieMark, BookMark] types = options['types'] or [GameMark, AlbumMark, MovieMark, BookMark]
print('Preparing cache')
tag_cache = {f'{t.owner_id}_{t.title}': t.id for t in Tag.objects.all()}
shelf_cache = {f'{s.owner_id}_{s.item_category}_{shelf_map[s.shelf_type]}': s.id for s in Shelf.objects.all()}
for typ in types: for typ in types:
print(typ) print(typ)
LinkModel = model_link[typ] LinkModel = model_link[typ]
@ -96,6 +100,8 @@ class Command(BaseCommand):
try: try:
item_link = LinkModel.objects.get(old_id=entity.item.id) item_link = LinkModel.objects.get(old_id=entity.item.id)
item = Item.objects.get(uid=item_link.new_uid) item = Item.objects.get(uid=item_link.new_uid)
tags = [t.content for t in getattr(entity, tag_field).all()]
"""
mark = Mark(entity.owner, item) mark = Mark(entity.owner, item)
mark.update( mark.update(
shelf_type=shelf_map[entity.status], shelf_type=shelf_map[entity.status],
@ -105,8 +111,38 @@ class Command(BaseCommand):
metadata={'shared_link': entity.shared_link}, metadata={'shared_link': entity.shared_link},
created_time=entity.created_time created_time=entity.created_time
) )
tags = [t.content for t in getattr(entity, tag_field).all()]
TagManager.tag_item_by_user(item, entity.owner, tags) TagManager.tag_item_by_user(item, entity.owner, tags)
""" # rewrote above with direct create to speed up
user_id = entity.owner_id
item_id = item.id
visibility = entity.visibility
created_time = entity.created_time
if entity.rating:
Rating.objects.create(owner_id=user_id, item_id=item_id, grade=entity.rating, visibility=visibility)
if entity.text:
Comment.objects.create(owner_id=user_id, item_id=item_id, text=entity.text, visibility=visibility)
shelf = shelf_cache[f'{user_id}_{item.category}_{entity.status}']
ShelfMember.objects.create(
_shelf_id=shelf,
owner_id=user_id,
position=0,
item_id=item_id,
metadata={'shared_link': entity.shared_link},
created_time=created_time)
ShelfLogEntry.objects.create(owner_id=user_id, shelf_id=shelf, item_id=item_id, timestamp=created_time)
for title in tags:
tag_key = f'{user_id}_{title}'
if tag_key not in tag_cache:
tag = Tag.objects.create(owner_id=user_id, title=title, visibility=0).id
tag_cache[tag_key] = tag
else:
tag = tag_cache[tag_key]
TagMember.objects.create(
_tag_id=tag,
owner_id=user_id,
position=0,
item_id=item_id,
created_time=created_time)
except Exception as e: except Exception as e:
print(f'Convert failed for {typ} {entity.id}: {e}') print(f'Convert failed for {typ} {entity.id}: {e}')
if options['failstop']: if options['failstop']: