reindex in batch

This commit is contained in:
Their Name 2021-12-30 12:28:27 -05:00 committed by Your Name
parent 6273bf29dd
commit d2955bf3d0
2 changed files with 32 additions and 6 deletions

View file

@ -48,6 +48,10 @@ class Indexer:
self.instance().update_filterable_attributes(['_class', 'tags', 'genre', 'source_site'])
self.instance().update_settings({'displayedAttributes': ['_id', '_class', 'id', 'title', 'tags']})
@classmethod
def get_stats(self):
return self.instance().get_stats()
@classmethod
def update_model_indexable(self, model):
model.indexable_fields = ['tags']
@ -68,7 +72,7 @@ class Indexer:
post_delete.connect(item_post_delete_handler, sender=model)
@classmethod
def replace_item(self, obj):
def obj_to_dict(self, obj):
pk = f'{obj.__class__.__name__}-{obj.id}'
item = {
'_id': pk,
@ -86,8 +90,11 @@ class Indexer:
if d.__class__ is dict:
item.update(d)
item = {k: v for k, v in item.items() if v}
# print(item)
self.instance().add_documents([item])
return item
@classmethod
def replace_item(self, obj):
self.instance().add_documents([self.obj_to_dict(obj)])
@classmethod
def delete_item(self, obj):

View file

@ -5,15 +5,34 @@ from movies.models import Movie
from books.models import Book
from games.models import Game
from music.models import Album, Song
from django.core.paginator import Paginator
from tqdm import tqdm
from time import sleep
from datetime import timedelta
from django.utils import timezone
BATCH_SIZE = 1000
class Command(BaseCommand):
help = 'Regenerate the search index'
def add_arguments(self, parser):
parser.add_argument('hours', type=int, help='Re-index items modified in last N hours, 0 to reindex all')
def handle(self, *args, **options):
h = int(options['hours'])
print(f'Connecting to search server {settings.MEILISEARCH_SERVER} for index: {INDEX_NAME}')
Indexer.update_settings()
self.stdout.write(self.style.SUCCESS('Index settings updated.'))
for c in [Movie, Book, Album, Song, Game]:
for c in [Game, Movie, Book, Album, Song]:
print(f'Re-indexing {c}')
for i in c.objects.all():
Indexer.replace_item(i)
qs = c.objects.all() if h == 0 else c.objects.filter(edited_time__gt=timezone.now() - timedelta(hours=h))
pg = Paginator(qs.order_by('id'), BATCH_SIZE)
for p in tqdm(pg.page_range):
items = list(map(lambda o: Indexer.obj_to_dict(o), pg.get_page(p).object_list))
if items:
Indexer.instance().update_documents(documents=items)
while Indexer.get_stats()['isIndexing']:
sleep(0.1)