From 9bf3f9a23f4072fd7ceadd51e37bbfa797fccb57 Mon Sep 17 00:00:00 2001 From: Te Llamas Date: Mon, 14 Nov 2022 18:34:31 +0000 Subject: [PATCH] dedupe 2nd try --- common/search/typesense.py | 22 ++-------------------- common/views.py | 27 ++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 25 deletions(-) diff --git a/common/search/typesense.py b/common/search/typesense.py index 7a4c7e43..29123de4 100644 --- a/common/search/typesense.py +++ b/common/search/typesense.py @@ -16,7 +16,7 @@ INDEXABLE_FLOAT_TYPES = ['DecimalField'] SORTING_ATTRIBUTE = None # NONINDEXABLE_TYPES = ['ForeignKey', 'FileField',] SEARCH_PAGE_SIZE = 20 -DEDUP_KEYS = ['isbn', 'imdb_code'] + logger = logging.getLogger(__name__) @@ -173,14 +173,6 @@ class Indexer: except Exception as e: logger.error(f"delete item error: \n{e}") - - @classmethod - def item_key(self, item): - for key_name in DEDUP_KEYS: - if key_name in item: - return item[key_name] - - @classmethod def search(self, q, page=1, category=None, tag=None, sort=None): f = [] @@ -207,18 +199,8 @@ class Indexer: # print(r) import types results = types.SimpleNamespace() - keys = [] - hits = [] - for i in r['hits']: - key = self.item_key(i['document']) - if key is None: - hits.append(i) - elif key not in keys: - keys.append(key) - hits.append(i) - results.items = list([x for x in map(lambda i: self.item_to_obj(i['document']), hits) if x is not None]) + results.items = list([x for x in map(lambda i: self.item_to_obj(i['document']), r['hits']) if x is not None]) results.num_pages = (r['found'] + SEARCH_PAGE_SIZE - 1) // SEARCH_PAGE_SIZE - results.keys = keys # print(results) return results diff --git a/common/views.py b/common/views.py index e527e09a..cafce536 100644 --- a/common/views.py +++ b/common/views.py @@ -48,11 +48,15 @@ def external_search(request): category = None keywords = request.GET.get("q", default='').strip() page_number = int(request.GET.get('page', default=1)) + items = ExternalSources.search(category, keywords, page_number) if keywords else [] + dedupe_urls = request.session.get('search_dedupe_urls', []) + items = [i for i in items if i.source_url not in dedupe_urls] + return render( request, "common/external_search_result.html", { - "external_items": ExternalSources.search(category, keywords, page_number) if keywords else [], + "external_items": items, } ) @@ -85,18 +89,31 @@ def search(request): pass result = Indexer.search(keywords, page=page_number, category=category, tag=tag) - for item in result.items: - item.tag_list = item.all_tag_list[:TAG_NUMBER_ON_LIST] + keys = [] + items = [] + urls = [] + for i in result.items: + key = i.isbn if hasattr(i, 'isbn') else (i.imdb_code if hasattr(i, 'imdb_code') else None) + if key is None: + items.append(i) + elif key not in keys: + keys.append(key) + items.append(i) + urls.append(i.source_url) + i.tag_list = i.all_tag_list[:TAG_NUMBER_ON_LIST] + if request.path.endswith('.json/'): return JsonResponse({ 'num_pages': result.num_pages, - 'items':list(map(lambda i:i.get_json(), result.items)) + 'items':list(map(lambda i:i.get_json(), items)) }) + + request.session['search_dedupe_urls'] = urls return render( request, "common/search_result.html", { - "items": result.items, + "items": items, "pagination": PageLinksGenerator(PAGE_LINK_NUMBER, page_number, result.num_pages), "categories": ['book', 'movie', 'music', 'game'], }