From 70019522b5a9612c6446bb0af8ced90e0a0b9cbc Mon Sep 17 00:00:00 2001 From: Your Name Date: Sun, 18 Dec 2022 20:28:39 -0500 Subject: [PATCH] new data model: speed up migration --- catalog/api.py | 2 +- journal/models.py | 25 +++---- legacy/management/commands/migrate_catalog.py | 67 ++++++++++--------- legacy/management/commands/migrate_journal.py | 46 +++++++++++-- 4 files changed, 86 insertions(+), 54 deletions(-) diff --git a/catalog/api.py b/catalog/api.py index a2f44cb2..26eb4f37 100644 --- a/catalog/api.py +++ b/catalog/api.py @@ -60,7 +60,7 @@ class EditionOut(ItemOut): @api.post("/catalog/fetch", response=ItemOut) -def fetch_edition(request, url: str): +def fetch_item(request, url: str): site = SiteManager.get_site_by_url(url) if not site: return Http404() diff --git a/journal/models.py b/journal/models.py index cb51e78b..e7d5120c 100644 --- a/journal/models.py +++ b/journal/models.py @@ -290,7 +290,7 @@ class Shelf(List): class ShelfLogEntry(models.Model): owner = models.ForeignKey(User, on_delete=models.PROTECT) - shelf = models.ForeignKey(Shelf, on_delete=models.PROTECT, related_name='entries', null=True) # None means removed from any shelf + shelf = models.ForeignKey(Shelf, on_delete=models.CASCADE, related_name='entries', null=True) # None means removed from any shelf item = models.ForeignKey(Item, on_delete=models.PROTECT) timestamp = models.DateTimeField(default=timezone.now) # this may later be changed by user metadata = models.JSONField(default=dict) @@ -511,7 +511,7 @@ class Mark: @property def id(self): - return self.item.id if self.shelfmember else None + return self.shelfmember.id if self.shelfmember else None @property def shelf_type(self): @@ -525,6 +525,10 @@ class Mark: def created_time(self): return self.shelfmember.created_time if self.shelfmember else None + @property + def metadata(self): + return self.shelfmember.metadata if self.shelfmember else None + @property def visibility(self): return self.shelfmember.visibility if self.shelfmember else None @@ -551,21 +555,12 @@ class Mark: def update(self, shelf_type, comment_text, rating_grade, visibility, metadata=None, created_time=None): if shelf_type != self.shelf_type or visibility != self.visibility: - self.shelfmember = self.owner.shelf_manager.move_item(self.item, shelf_type, visibility=visibility) - if self.shelfmember and (created_time or metadata is not None): - if created_time: - self.shelfmember.created_time = created_time - if metadata is not None: - self.shelfmember.metadata = metadata + self.shelfmember = self.owner.shelf_manager.move_item(self.item, shelf_type, visibility=visibility, metadata=metadata) + if self.shelfmember and created_time: + self.shelfmember.created_time = created_time self.shelfmember.save() if comment_text != self.text or visibility != self.visibility: self.comment = Comment.comment_item_by_user(self.item, self.owner, comment_text, visibility) - if self.comment and created_time: - self.comment.created_time = created_time - self.comment.save(update_fields=['created_time']) if rating_grade != self.rating or visibility != self.visibility: - rating_content = Rating.rate_item_by_user(self.item, self.owner, rating_grade, visibility) + Rating.rate_item_by_user(self.item, self.owner, rating_grade, visibility) self.rating = rating_grade - if rating_content and created_time: - rating_content.created_time = created_time - rating_content.save(update_fields=['created_time']) diff --git a/legacy/management/commands/migrate_catalog.py b/legacy/management/commands/migrate_catalog.py index 653bd8d1..782d3e30 100644 --- a/legacy/management/commands/migrate_catalog.py +++ b/legacy/management/commands/migrate_catalog.py @@ -178,39 +178,40 @@ class Command(BaseCommand): pg = Paginator(qs, BATCH_SIZE) for p in tqdm(pg.page_range): links = [] - for entity in pg.get_page(p).object_list: - try: - content = entity.convert() - site = SiteManager.get_site_by_url(entity.source_url) - item = None - if site: - if not site.DEFAULT_MODEL and not content.metadata.get('preferred_model'): - if model_map[typ] == Movie and entity.is_series: - content.metadata['preferred_model'] = 'TVSeason' if entity.season else 'TVShow' - else: - content.metadata['preferred_model'] = model_map[typ].__name__ - item = site.get_resource_ready(preloaded_content=content, ignore_existing_content=reload).item - else: - # not known site, try save item without external resource + with transaction.atomic(): + for entity in pg.get_page(p).object_list: + try: + content = entity.convert() + site = SiteManager.get_site_by_url(entity.source_url) item = None - model = Edition - t, v = None, None - if content.lookup_ids: - t, v = Item.get_best_lookup_id(content.lookup_ids) - item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first() - if not item: - obj = model.copy_metadata(content.metadata) - obj['primary_lookup_id_type'] = t - obj['primary_lookup_id_value'] = v - item = model.objects.create(**obj) - item.cover = content.metadata['cover_image_path'] - item.save() - links.append(LinkModel(old_id=entity.id, new_uid=item.uid)) - # pprint.pp(site.get_item()) - except Exception as e: - print(f'Convert failed for {typ} {entity.id}: {e}') - if options['failstop']: - raise(e) - # return + if site: + if not site.DEFAULT_MODEL and not content.metadata.get('preferred_model'): + if model_map[typ] == Movie and entity.is_series: + content.metadata['preferred_model'] = 'TVSeason' if entity.season else 'TVShow' + else: + content.metadata['preferred_model'] = model_map[typ].__name__ + item = site.get_resource_ready(preloaded_content=content, ignore_existing_content=reload).item + else: + # not known site, try save item without external resource + item = None + model = Edition + t, v = None, None + if content.lookup_ids: + t, v = Item.get_best_lookup_id(content.lookup_ids) + item = model.objects.filter(primary_lookup_id_type=t, primary_lookup_id_value=v).first() + if not item: + obj = model.copy_metadata(content.metadata) + obj['primary_lookup_id_type'] = t + obj['primary_lookup_id_value'] = v + item = model.objects.create(**obj) + item.cover = content.metadata['cover_image_path'] + item.save() + links.append(LinkModel(old_id=entity.id, new_uid=item.uid)) + # pprint.pp(site.get_item()) + except Exception as e: + print(f'Convert failed for {typ} {entity.id}: {e}') + if options['failstop']: + raise(e) + # return LinkModel.objects.bulk_create(links) self.stdout.write(self.style.SUCCESS(f'Done.')) diff --git a/legacy/management/commands/migrate_journal.py b/legacy/management/commands/migrate_journal.py index a12b2d94..d102a5e4 100644 --- a/legacy/management/commands/migrate_journal.py +++ b/legacy/management/commands/migrate_journal.py @@ -34,9 +34,9 @@ model_link = { } shelf_map = { - MarkStatusEnum.WISH: ShelfType.WISHLIST, - MarkStatusEnum.DO: ShelfType.PROGRESS, - MarkStatusEnum.COLLECT: ShelfType.COMPLETE, + ShelfType.WISHLIST: MarkStatusEnum.WISH, + ShelfType.PROGRESS: MarkStatusEnum.DO, + ShelfType.COMPLETE: MarkStatusEnum.COLLECT, } tag_map = { @@ -72,12 +72,16 @@ class Command(BaseCommand): if options['clear']: print("Deleting all migrated user pieces") # Piece.objects.all().delete() - for cls in [Review, Comment, Rating, Tag, ShelfLogEntry, ShelfMember, Shelf]: # Collection + for cls in [Review, Comment, Rating, TagMember, Tag, ShelfLogEntry, ShelfMember]: # Collection print(cls) cls.objects.all().delete() return types = options['types'] or [GameMark, AlbumMark, MovieMark, BookMark] + print('Preparing cache') + tag_cache = {f'{t.owner_id}_{t.title}': t.id for t in Tag.objects.all()} + shelf_cache = {f'{s.owner_id}_{s.item_category}_{shelf_map[s.shelf_type]}': s.id for s in Shelf.objects.all()} + for typ in types: print(typ) LinkModel = model_link[typ] @@ -96,6 +100,8 @@ class Command(BaseCommand): try: item_link = LinkModel.objects.get(old_id=entity.item.id) item = Item.objects.get(uid=item_link.new_uid) + tags = [t.content for t in getattr(entity, tag_field).all()] + """ mark = Mark(entity.owner, item) mark.update( shelf_type=shelf_map[entity.status], @@ -105,8 +111,38 @@ class Command(BaseCommand): metadata={'shared_link': entity.shared_link}, created_time=entity.created_time ) - tags = [t.content for t in getattr(entity, tag_field).all()] TagManager.tag_item_by_user(item, entity.owner, tags) + """ # rewrote above with direct create to speed up + user_id = entity.owner_id + item_id = item.id + visibility = entity.visibility + created_time = entity.created_time + if entity.rating: + Rating.objects.create(owner_id=user_id, item_id=item_id, grade=entity.rating, visibility=visibility) + if entity.text: + Comment.objects.create(owner_id=user_id, item_id=item_id, text=entity.text, visibility=visibility) + shelf = shelf_cache[f'{user_id}_{item.category}_{entity.status}'] + ShelfMember.objects.create( + _shelf_id=shelf, + owner_id=user_id, + position=0, + item_id=item_id, + metadata={'shared_link': entity.shared_link}, + created_time=created_time) + ShelfLogEntry.objects.create(owner_id=user_id, shelf_id=shelf, item_id=item_id, timestamp=created_time) + for title in tags: + tag_key = f'{user_id}_{title}' + if tag_key not in tag_cache: + tag = Tag.objects.create(owner_id=user_id, title=title, visibility=0).id + tag_cache[tag_key] = tag + else: + tag = tag_cache[tag_key] + TagMember.objects.create( + _tag_id=tag, + owner_id=user_id, + position=0, + item_id=item_id, + created_time=created_time) except Exception as e: print(f'Convert failed for {typ} {entity.id}: {e}') if options['failstop']: