new data model: speed up migration

This commit is contained in:
Your Name 2022-12-18 20:28:39 -05:00
parent 260968ff61
commit 70019522b5
4 changed files with 86 additions and 54 deletions

View file

@ -60,7 +60,7 @@ class EditionOut(ItemOut):
@api.post("/catalog/fetch", response=ItemOut)
def fetch_edition(request, url: str):
def fetch_item(request, url: str):
site = SiteManager.get_site_by_url(url)
if not site:
return Http404()

View file

@ -290,7 +290,7 @@ class Shelf(List):
class ShelfLogEntry(models.Model):
owner = models.ForeignKey(User, on_delete=models.PROTECT)
shelf = models.ForeignKey(Shelf, on_delete=models.PROTECT, related_name='entries', null=True) # None means removed from any shelf
shelf = models.ForeignKey(Shelf, on_delete=models.CASCADE, related_name='entries', null=True) # None means removed from any shelf
item = models.ForeignKey(Item, on_delete=models.PROTECT)
timestamp = models.DateTimeField(default=timezone.now) # this may later be changed by user
metadata = models.JSONField(default=dict)
@ -511,7 +511,7 @@ class Mark:
@property
def id(self):
return self.item.id if self.shelfmember else None
return self.shelfmember.id if self.shelfmember else None
@property
def shelf_type(self):
@ -525,6 +525,10 @@ class Mark:
def created_time(self):
return self.shelfmember.created_time if self.shelfmember else None
@property
def metadata(self):
return self.shelfmember.metadata if self.shelfmember else None
@property
def visibility(self):
return self.shelfmember.visibility if self.shelfmember else None
@ -551,21 +555,12 @@ class Mark:
def update(self, shelf_type, comment_text, rating_grade, visibility, metadata=None, created_time=None):
if shelf_type != self.shelf_type or visibility != self.visibility:
self.shelfmember = self.owner.shelf_manager.move_item(self.item, shelf_type, visibility=visibility)
if self.shelfmember and (created_time or metadata is not None):
if created_time:
self.shelfmember = self.owner.shelf_manager.move_item(self.item, shelf_type, visibility=visibility, metadata=metadata)
if self.shelfmember and created_time:
self.shelfmember.created_time = created_time
if metadata is not None:
self.shelfmember.metadata = metadata
self.shelfmember.save()
if comment_text != self.text or visibility != self.visibility:
self.comment = Comment.comment_item_by_user(self.item, self.owner, comment_text, visibility)
if self.comment and created_time:
self.comment.created_time = created_time
self.comment.save(update_fields=['created_time'])
if rating_grade != self.rating or visibility != self.visibility:
rating_content = Rating.rate_item_by_user(self.item, self.owner, rating_grade, visibility)
Rating.rate_item_by_user(self.item, self.owner, rating_grade, visibility)
self.rating = rating_grade
if rating_content and created_time:
rating_content.created_time = created_time
rating_content.save(update_fields=['created_time'])

View file

@ -178,6 +178,7 @@ class Command(BaseCommand):
pg = Paginator(qs, BATCH_SIZE)
for p in tqdm(pg.page_range):
links = []
with transaction.atomic():
for entity in pg.get_page(p).object_list:
try:
content = entity.convert()

View file

@ -34,9 +34,9 @@ model_link = {
}
shelf_map = {
MarkStatusEnum.WISH: ShelfType.WISHLIST,
MarkStatusEnum.DO: ShelfType.PROGRESS,
MarkStatusEnum.COLLECT: ShelfType.COMPLETE,
ShelfType.WISHLIST: MarkStatusEnum.WISH,
ShelfType.PROGRESS: MarkStatusEnum.DO,
ShelfType.COMPLETE: MarkStatusEnum.COLLECT,
}
tag_map = {
@ -72,12 +72,16 @@ class Command(BaseCommand):
if options['clear']:
print("Deleting all migrated user pieces")
# Piece.objects.all().delete()
for cls in [Review, Comment, Rating, Tag, ShelfLogEntry, ShelfMember, Shelf]: # Collection
for cls in [Review, Comment, Rating, TagMember, Tag, ShelfLogEntry, ShelfMember]: # Collection
print(cls)
cls.objects.all().delete()
return
types = options['types'] or [GameMark, AlbumMark, MovieMark, BookMark]
print('Preparing cache')
tag_cache = {f'{t.owner_id}_{t.title}': t.id for t in Tag.objects.all()}
shelf_cache = {f'{s.owner_id}_{s.item_category}_{shelf_map[s.shelf_type]}': s.id for s in Shelf.objects.all()}
for typ in types:
print(typ)
LinkModel = model_link[typ]
@ -96,6 +100,8 @@ class Command(BaseCommand):
try:
item_link = LinkModel.objects.get(old_id=entity.item.id)
item = Item.objects.get(uid=item_link.new_uid)
tags = [t.content for t in getattr(entity, tag_field).all()]
"""
mark = Mark(entity.owner, item)
mark.update(
shelf_type=shelf_map[entity.status],
@ -105,8 +111,38 @@ class Command(BaseCommand):
metadata={'shared_link': entity.shared_link},
created_time=entity.created_time
)
tags = [t.content for t in getattr(entity, tag_field).all()]
TagManager.tag_item_by_user(item, entity.owner, tags)
""" # rewrote above with direct create to speed up
user_id = entity.owner_id
item_id = item.id
visibility = entity.visibility
created_time = entity.created_time
if entity.rating:
Rating.objects.create(owner_id=user_id, item_id=item_id, grade=entity.rating, visibility=visibility)
if entity.text:
Comment.objects.create(owner_id=user_id, item_id=item_id, text=entity.text, visibility=visibility)
shelf = shelf_cache[f'{user_id}_{item.category}_{entity.status}']
ShelfMember.objects.create(
_shelf_id=shelf,
owner_id=user_id,
position=0,
item_id=item_id,
metadata={'shared_link': entity.shared_link},
created_time=created_time)
ShelfLogEntry.objects.create(owner_id=user_id, shelf_id=shelf, item_id=item_id, timestamp=created_time)
for title in tags:
tag_key = f'{user_id}_{title}'
if tag_key not in tag_cache:
tag = Tag.objects.create(owner_id=user_id, title=title, visibility=0).id
tag_cache[tag_key] = tag
else:
tag = tag_cache[tag_key]
TagMember.objects.create(
_tag_id=tag,
owner_id=user_id,
position=0,
item_id=item_id,
created_time=created_time)
except Exception as e:
print(f'Convert failed for {typ} {entity.id}: {e}')
if options['failstop']: