dedupe 2nd try

This commit is contained in:
Te Llamas 2022-11-14 18:34:31 +00:00
parent 281689f6e9
commit 9bf3f9a23f
2 changed files with 24 additions and 25 deletions

View file

@ -16,7 +16,7 @@ INDEXABLE_FLOAT_TYPES = ['DecimalField']
SORTING_ATTRIBUTE = None
# NONINDEXABLE_TYPES = ['ForeignKey', 'FileField',]
SEARCH_PAGE_SIZE = 20
DEDUP_KEYS = ['isbn', 'imdb_code']
logger = logging.getLogger(__name__)
@ -173,14 +173,6 @@ class Indexer:
except Exception as e:
logger.error(f"delete item error: \n{e}")
@classmethod
def item_key(self, item):
for key_name in DEDUP_KEYS:
if key_name in item:
return item[key_name]
@classmethod
def search(self, q, page=1, category=None, tag=None, sort=None):
f = []
@ -207,18 +199,8 @@ class Indexer:
# print(r)
import types
results = types.SimpleNamespace()
keys = []
hits = []
for i in r['hits']:
key = self.item_key(i['document'])
if key is None:
hits.append(i)
elif key not in keys:
keys.append(key)
hits.append(i)
results.items = list([x for x in map(lambda i: self.item_to_obj(i['document']), hits) if x is not None])
results.items = list([x for x in map(lambda i: self.item_to_obj(i['document']), r['hits']) if x is not None])
results.num_pages = (r['found'] + SEARCH_PAGE_SIZE - 1) // SEARCH_PAGE_SIZE
results.keys = keys
# print(results)
return results

View file

@ -48,11 +48,15 @@ def external_search(request):
category = None
keywords = request.GET.get("q", default='').strip()
page_number = int(request.GET.get('page', default=1))
items = ExternalSources.search(category, keywords, page_number) if keywords else []
dedupe_urls = request.session.get('search_dedupe_urls', [])
items = [i for i in items if i.source_url not in dedupe_urls]
return render(
request,
"common/external_search_result.html",
{
"external_items": ExternalSources.search(category, keywords, page_number) if keywords else [],
"external_items": items,
}
)
@ -85,18 +89,31 @@ def search(request):
pass
result = Indexer.search(keywords, page=page_number, category=category, tag=tag)
for item in result.items:
item.tag_list = item.all_tag_list[:TAG_NUMBER_ON_LIST]
keys = []
items = []
urls = []
for i in result.items:
key = i.isbn if hasattr(i, 'isbn') else (i.imdb_code if hasattr(i, 'imdb_code') else None)
if key is None:
items.append(i)
elif key not in keys:
keys.append(key)
items.append(i)
urls.append(i.source_url)
i.tag_list = i.all_tag_list[:TAG_NUMBER_ON_LIST]
if request.path.endswith('.json/'):
return JsonResponse({
'num_pages': result.num_pages,
'items':list(map(lambda i:i.get_json(), result.items))
'items':list(map(lambda i:i.get_json(), items))
})
request.session['search_dedupe_urls'] = urls
return render(
request,
"common/search_result.html",
{
"items": result.items,
"items": items,
"pagination": PageLinksGenerator(PAGE_LINK_NUMBER, page_number, result.num_pages),
"categories": ['book', 'movie', 'music', 'game'],
}