From c9a4513dd8deb2fbf1c665487e34b8cfa57cfce7 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 10:52:50 +0000 Subject: [PATCH 01/15] encapsulated search functionality --- products/utils.py | 23 +++++++++++++++++++++++ products/views.py | 17 ++--------------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/products/utils.py b/products/utils.py index 7a08a70..a3bef8a 100644 --- a/products/utils.py +++ b/products/utils.py @@ -1,5 +1,10 @@ import logging +from django.db.models import Q + +from products.models import Product + + def extract_search_filters(result_set): """ @@ -49,3 +54,21 @@ def extract_search_filters(result_set): except Exception as e: logging.error(f'Extacting filters for {item}') return filter_dict + + +def find_related_products(keyword): + # search in tags + tags = Product.tags.tag_model.objects.filter(name__icontains=keyword) + # search in category + categories = Product.category.tag_model.objects.filter(name__icontains=keyword) + # search in attributes + attributes = Product.attributes.tag_model.objects.filter(name__icontains=keyword) + # unified tag search + products_qs = Product.objects.filter( + Q(name__icontains=keyword)| + Q(description__icontains=keyword)| + Q(tags__in=tags)| + Q(category__in=categories)| + Q(attributes__in=attributes) + ) + return products_qs diff --git a/products/views.py b/products/views.py index 61a332e..685e748 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters +from .utils import extract_search_filters, find_related_products from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -155,20 +155,7 @@ def product_search(request): chunks = query_string.split(' ') for chunk in chunks: - # search in tags - tags = Product.tags.tag_model.objects.filter(name__icontains=chunk) - # search in category - categories = Product.category.tag_model.objects.filter(name__icontains=chunk) - # search in attributes - attributes = Product.attributes.tag_model.objects.filter(name__icontains=chunk) - # unified tag search - products_qs = Product.objects.filter( - Q(name__icontains=chunk)| - Q(description__icontains=chunk)| - Q(tags__in=tags)| - Q(category__in=categories)| - Q(attributes__in=attributes) - ) + products_qs = find_related_products(chunk) for instance in products_qs: result_set.add(instance) From c3a7321c9eed56c884287778bdfce596bd46d9f1 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 11:54:01 +0000 Subject: [PATCH 02/15] fixes addtestdata, working on vectorized database search --- README.md | 6 ++++-- back_latienda/settings/base.py | 1 + core/management/commands/addtestdata.py | 5 ++++- products/tests.py | 3 +++ products/utils.py | 13 +++++++++++++ products/views.py | 5 +++-- 6 files changed, 28 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index bcceed1..7c2ba1d 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ By default a `LimitOffsetPagination` pagination is enabled Examples: `http://127.0.0.1:8000/api/v1/products/?limit=10&offset=0` -The response data has the following keyspayload: +The response data has the following keys: ``` dict_keys(['count', 'next', 'previous', 'results']) ``` @@ -294,4 +294,6 @@ To create a dataset of fake companies and products: `python manage.py addtestdata` -Creates 10 Companies, with 100 products each. +Creates 10 Companies, with 10 products each. + +WARNING: the script deletes existing instances of both Company and Product diff --git a/back_latienda/settings/base.py b/back_latienda/settings/base.py index afc4c78..ccb5e4b 100644 --- a/back_latienda/settings/base.py +++ b/back_latienda/settings/base.py @@ -42,6 +42,7 @@ INSTALLED_APPS = [ 'django.contrib.messages', 'django.contrib.staticfiles', 'django.contrib.gis', + 'django.contrib.postgres', # 3rd party 'rest_framework', diff --git a/core/management/commands/addtestdata.py b/core/management/commands/addtestdata.py index 7d98462..35d7bf9 100644 --- a/core/management/commands/addtestdata.py +++ b/core/management/commands/addtestdata.py @@ -70,7 +70,10 @@ class Command(BaseCommand): # TODO: apply automatic tags from tag list # TODO: write image to S3 storage # create instance - product = ProductFactory(name=name, description=description) + product = ProductFactory( + company=company, + name=name, + description=description) # get image response = requests.get(self.logo_url, stream=True) diff --git a/products/tests.py b/products/tests.py index a71db66..a8a5e5e 100644 --- a/products/tests.py +++ b/products/tests.py @@ -477,6 +477,9 @@ class ProductSearchTest(TestCase): url = f"{self.endpoint}?query_string={query_string}" # send in request response = self.client.get(url) + + import ipdb; ipdb.set_trace() + payload = response.json() # check response self.assertEqual(response.status_code, 200) diff --git a/products/utils.py b/products/utils.py index a3bef8a..b5505d5 100644 --- a/products/utils.py +++ b/products/utils.py @@ -1,6 +1,7 @@ import logging from django.db.models import Q +from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector from products.models import Product @@ -72,3 +73,15 @@ def find_related_products(keyword): Q(attributes__in=attributes) ) return products_qs + + +def search_by_phrase(phrase): + SearchQuery(phrase, search_type='phrase') + pass + + +def alt_find_related_products(keyword, fields=('tags', 'attributes', 'category')): + vector = SearchVector(*fields) + products_qs = Product.objects.annotate(search=vector).filter(search=keyword) + return products_qs + diff --git a/products/views.py b/products/views.py index 685e748..47d8e4c 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products +from .utils import extract_search_filters, find_related_products, alt_find_related_products from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -155,7 +155,8 @@ def product_search(request): chunks = query_string.split(' ') for chunk in chunks: - products_qs = find_related_products(chunk) + # products_qs = find_related_products(chunk) + products_qs = alt_find_related_products(chunk) for instance in products_qs: result_set.add(instance) From 20b9c395c273194817a5c67b53eab62f3f6056e0 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 12:09:45 +0000 Subject: [PATCH 03/15] searchvector doesnt like nested tags --- products/tests.py | 10 ++++++---- products/utils.py | 13 +++++++------ products/views.py | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/products/tests.py b/products/tests.py index a8a5e5e..2e4f24e 100644 --- a/products/tests.py +++ b/products/tests.py @@ -462,7 +462,9 @@ class ProductSearchTest(TestCase): def test_anon_user_can_search(self): expected_instances = [ self.factory(tags="lunares/blancos",description="zapatos verdes"), - self.factory(tags="colores/rojos, tono/brillante"), + # TODO: workaround vectorized search not liking nested tags + # self.factory(tags="colores/rojos, tono/brillante"), + self.factory(tags="colores, rojos"), self.factory(tags="lunares/azules", description="zapatos rojos"), self.factory(tags="lunares/rojos", description="zapatos"), self.factory(attributes='"zapatos de campo", tono/oscuro'), @@ -478,12 +480,12 @@ class ProductSearchTest(TestCase): # send in request response = self.client.get(url) - import ipdb; ipdb.set_trace() - - payload = response.json() # check response self.assertEqual(response.status_code, 200) + # load response data + payload = response.json() # check for object creation + self.assertEquals(len(payload['products']), len(expected_instances)) # check for filters self.assertNotEquals([], payload['filters']['singles']) diff --git a/products/utils.py b/products/utils.py index b5505d5..44565d6 100644 --- a/products/utils.py +++ b/products/utils.py @@ -75,13 +75,14 @@ def find_related_products(keyword): return products_qs -def search_by_phrase(phrase): - SearchQuery(phrase, search_type='phrase') - pass - - -def alt_find_related_products(keyword, fields=('tags', 'attributes', 'category')): +def alt_find_related_products(keyword): + fields=('name', 'description', 'tags__name', 'attributes__name', 'category__name') vector = SearchVector(*fields) products_qs = Product.objects.annotate(search=vector).filter(search=keyword) return products_qs + +def search_by_phrase(phrase): + SearchQuery(phrase, search_type='phrase') + pass + diff --git a/products/views.py b/products/views.py index 47d8e4c..ea8bc4b 100644 --- a/products/views.py +++ b/products/views.py @@ -166,4 +166,4 @@ def product_search(request): product_serializer = ProductSearchSerializer(result_set, many=True, context={'request': request}) return Response(data={"filters": filters, "products": product_serializer.data}) except Exception as e: - return Response({"errors": {"details": str(type(e))}}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) + return Response({"errors": {"details": str(e)}}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) From 47633c889d2d6dbe5897bebff40aff9d15a66dd3 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 13:08:23 +0000 Subject: [PATCH 04/15] working on ranked search results --- products/tests.py | 11 ++++++++--- products/utils.py | 19 ++++++++++++++----- products/views.py | 5 +++-- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/products/tests.py b/products/tests.py index 2e4f24e..412bae1 100644 --- a/products/tests.py +++ b/products/tests.py @@ -463,11 +463,13 @@ class ProductSearchTest(TestCase): expected_instances = [ self.factory(tags="lunares/blancos",description="zapatos verdes"), # TODO: workaround vectorized search not liking nested tags - # self.factory(tags="colores/rojos, tono/brillante"), - self.factory(tags="colores, rojos"), + self.factory(tags="colores/rojos, tono/brillante"), + # self.factory(tags="colores, rojos"), self.factory(tags="lunares/azules", description="zapatos rojos"), self.factory(tags="lunares/rojos", description="zapatos"), self.factory(attributes='"zapatos de campo", tono/oscuro'), + # TODO: workaround multi-word tags + # self.factory(attributes='zapatos, "zapatos de campo", tono/oscuro'), ] unexpected_instances = [ self.factory(description="chanclas"), @@ -485,8 +487,11 @@ class ProductSearchTest(TestCase): # load response data payload = response.json() # check for object creation - + import ipdb; ipdb.set_trace() self.assertEquals(len(payload['products']), len(expected_instances)) + # check ids + for i in range(len(payload['products'])): + self.assertTrue(payload['products'][i]['id'] == expected_instances[i].id) # check for filters self.assertNotEquals([], payload['filters']['singles']) self.assertTrue(len(payload['filters']) >= 2 ) diff --git a/products/utils.py b/products/utils.py index 44565d6..a4c0426 100644 --- a/products/utils.py +++ b/products/utils.py @@ -75,14 +75,23 @@ def find_related_products(keyword): return products_qs -def alt_find_related_products(keyword): +def alt_rank_find_related_products(keyword): + # TODO: figure out why it includes unrelated instances fields=('name', 'description', 'tags__name', 'attributes__name', 'category__name') vector = SearchVector(*fields) - products_qs = Product.objects.annotate(search=vector).filter(search=keyword) + query = SearchQuery(keyword) + products_qs = Product.objects.annotate( + rank=SearchRank(vector, query) + ).order_by('-rank') + import ipdb; ipdb.set_trace() return products_qs -def search_by_phrase(phrase): - SearchQuery(phrase, search_type='phrase') - pass +def alt_find_related_products(keyword): + fields=('name', 'description', 'tags__name', 'attributes__name', 'category__name') + vector = SearchVector(*fields) + products_qs = Product.objects.annotate( + search=vector + ).filter(search=keyword) + return products_qs diff --git a/products/views.py b/products/views.py index ea8bc4b..b1a6194 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products, alt_find_related_products +from .utils import extract_search_filters, find_related_products, alt_find_related_products, alt_rank_find_related_products from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -156,7 +156,8 @@ def product_search(request): for chunk in chunks: # products_qs = find_related_products(chunk) - products_qs = alt_find_related_products(chunk) + # products_qs = alt_find_related_products(chunk) + products_qs = alt_rank_find_related_products(chunk) for instance in products_qs: result_set.add(instance) From 978a7c9520d84720f831a5ed8427250874462777 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 13:22:08 +0000 Subject: [PATCH 05/15] fixed vector search not finding nested tags --- products/tests.py | 1 - products/utils.py | 29 ++++++++++++++--------------- products/views.py | 8 ++++---- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/products/tests.py b/products/tests.py index 412bae1..6ece0cb 100644 --- a/products/tests.py +++ b/products/tests.py @@ -487,7 +487,6 @@ class ProductSearchTest(TestCase): # load response data payload = response.json() # check for object creation - import ipdb; ipdb.set_trace() self.assertEquals(len(payload['products']), len(expected_instances)) # check ids for i in range(len(payload['products'])): diff --git a/products/utils.py b/products/utils.py index a4c0426..5bbbe75 100644 --- a/products/utils.py +++ b/products/utils.py @@ -57,7 +57,7 @@ def extract_search_filters(result_set): return filter_dict -def find_related_products(keyword): +def find_related_products_v1(keyword): # search in tags tags = Product.tags.tag_model.objects.filter(name__icontains=keyword) # search in category @@ -75,23 +75,22 @@ def find_related_products(keyword): return products_qs -def alt_rank_find_related_products(keyword): - # TODO: figure out why it includes unrelated instances - fields=('name', 'description', 'tags__name', 'attributes__name', 'category__name') - vector = SearchVector(*fields) - query = SearchQuery(keyword) - products_qs = Product.objects.annotate( - rank=SearchRank(vector, query) - ).order_by('-rank') - import ipdb; ipdb.set_trace() - return products_qs - - -def alt_find_related_products(keyword): - fields=('name', 'description', 'tags__name', 'attributes__name', 'category__name') +def find_related_products_v2(keyword): + fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') vector = SearchVector(*fields) products_qs = Product.objects.annotate( search=vector ).filter(search=keyword) return products_qs + +def alt_rank_find_related_products(keyword): + # TODO: figure out why it includes unrelated instances + fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') + vector = SearchVector(*fields) + query = SearchQuery(keyword) + products_qs = Product.objects.annotate( + rank=SearchRank(vector, query) + ).order_by('-rank') + return products_qs + diff --git a/products/views.py b/products/views.py index b1a6194..2e6d44f 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products, alt_find_related_products, alt_rank_find_related_products +from .utils import extract_search_filters, find_related_products_v1, find_related_products_v2, alt_rank_find_related_products from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -155,9 +155,9 @@ def product_search(request): chunks = query_string.split(' ') for chunk in chunks: - # products_qs = find_related_products(chunk) - # products_qs = alt_find_related_products(chunk) - products_qs = alt_rank_find_related_products(chunk) + products_qs = find_related_products_v1(chunk) + # products_qs = find_related_products_v2(chunk) + # products_qs = alt_rank_find_related_products(chunk) for instance in products_qs: result_set.add(instance) From a06e7eac4da8715203815165da1047bfee1d178d Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 13:25:35 +0000 Subject: [PATCH 06/15] added comments --- products/utils.py | 22 +++++++++++++++++++++- products/views.py | 4 ++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/products/utils.py b/products/utils.py index 5bbbe75..22c1a92 100644 --- a/products/utils.py +++ b/products/utils.py @@ -58,6 +58,12 @@ def extract_search_filters(result_set): def find_related_products_v1(keyword): + """ + Classical approach to the search + + Using Q objects + + """ # search in tags tags = Product.tags.tag_model.objects.filter(name__icontains=keyword) # search in category @@ -76,6 +82,11 @@ def find_related_products_v1(keyword): def find_related_products_v2(keyword): + """ + More advanced search + + Using search vectors + """ fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') vector = SearchVector(*fields) products_qs = Product.objects.annotate( @@ -84,7 +95,16 @@ def find_related_products_v2(keyword): return products_qs -def alt_rank_find_related_products(keyword): +def find_related_products_v3(keyword): + """ + Fully loaded product search + + SearchVectors for the fields + SearchQuery for the value + SearchRank for relevancy scoring and ranking + + PROBLEM: returns unrelated instances + """ # TODO: figure out why it includes unrelated instances fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') vector = SearchVector(*fields) diff --git a/products/views.py b/products/views.py index 2e6d44f..2c697d6 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products_v1, find_related_products_v2, alt_rank_find_related_products +from .utils import extract_search_filters, find_related_products_v1, find_related_products_v2, find_related_products_v3 from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -157,7 +157,7 @@ def product_search(request): for chunk in chunks: products_qs = find_related_products_v1(chunk) # products_qs = find_related_products_v2(chunk) - # products_qs = alt_rank_find_related_products(chunk) + # products_qs = find_related_products_v3(chunk) for instance in products_qs: result_set.add(instance) From fee9bdbd3f7156098f8506e4decc46d661c0fd0f Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 13:39:59 +0000 Subject: [PATCH 07/15] cleanup --- products/tests.py | 4 +--- products/utils.py | 2 +- products/views.py | 1 + 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/products/tests.py b/products/tests.py index 6ece0cb..841c2f2 100644 --- a/products/tests.py +++ b/products/tests.py @@ -462,13 +462,11 @@ class ProductSearchTest(TestCase): def test_anon_user_can_search(self): expected_instances = [ self.factory(tags="lunares/blancos",description="zapatos verdes"), - # TODO: workaround vectorized search not liking nested tags self.factory(tags="colores/rojos, tono/brillante"), - # self.factory(tags="colores, rojos"), self.factory(tags="lunares/azules", description="zapatos rojos"), self.factory(tags="lunares/rojos", description="zapatos"), self.factory(attributes='"zapatos de campo", tono/oscuro'), - # TODO: workaround multi-word tags + # TODO: workaround for v3 with multi-word tags # self.factory(attributes='zapatos, "zapatos de campo", tono/oscuro'), ] unexpected_instances = [ diff --git a/products/utils.py b/products/utils.py index 22c1a92..35d98b3 100644 --- a/products/utils.py +++ b/products/utils.py @@ -97,7 +97,7 @@ def find_related_products_v2(keyword): def find_related_products_v3(keyword): """ - Fully loaded product search + Ranked product search SearchVectors for the fields SearchQuery for the value diff --git a/products/views.py b/products/views.py index 2c697d6..6c0b14f 100644 --- a/products/views.py +++ b/products/views.py @@ -160,6 +160,7 @@ def product_search(request): # products_qs = find_related_products_v3(chunk) for instance in products_qs: result_set.add(instance) + # TODO: add search for entire phrase # extract filters from result_set filters = extract_search_filters(result_set) From 974f6f248de16ceca33b25723692e051c6a5f00e Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 18 Feb 2021 13:53:58 +0000 Subject: [PATCH 08/15] adde find_related_products_v4 to tes trigram indexing --- products/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/products/utils.py b/products/utils.py index 35d98b3..488d305 100644 --- a/products/utils.py +++ b/products/utils.py @@ -1,7 +1,7 @@ import logging from django.db.models import Q -from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector +from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity from products.models import Product @@ -114,3 +114,14 @@ def find_related_products_v3(keyword): ).order_by('-rank') return products_qs + +def find_related_products_v4(keyword): + """ + Using trigrams + """ + fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') + products_qs = Product.objects.annotate( + similarity=TrigramSimilarity(fields, keyword) + ).order_by('-similarity') + + return products_qs From aec9a0d7a1808aacb019e57da1f6f2928868e93b Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 10:06:26 +0000 Subject: [PATCH 09/15] new and improved Q-based search implementation --- products/utils.py | 18 +++++++++++++++--- products/views.py | 5 +++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/products/utils.py b/products/utils.py index 488d305..f3ff923 100644 --- a/products/utils.py +++ b/products/utils.py @@ -81,11 +81,23 @@ def find_related_products_v1(keyword): return products_qs +def find_related_products_v5(keyword): + """ + Single query solution, using Q objects + """ + products_qs = Product.objects.filter( + Q(name__icontains=keyword)| + Q(description__icontains=keyword)| + Q(tags__label__icontains=keyword)| + Q(category__name__icontains=keyword)| + Q(attributes__label__icontains=keyword) + ) + return products_qs + + def find_related_products_v2(keyword): """ - More advanced search - - Using search vectors + More advanced: using search vectors """ fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') vector = SearchVector(*fields) diff --git a/products/views.py b/products/views.py index 6c0b14f..70c75af 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products_v1, find_related_products_v2, find_related_products_v3 +from .utils import extract_search_filters, find_related_products_v5, find_related_products_v2, find_related_products_v3 from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -155,7 +155,8 @@ def product_search(request): chunks = query_string.split(' ') for chunk in chunks: - products_qs = find_related_products_v1(chunk) + # import ipdb; ipdb.set_trace() + products_qs = find_related_products_v5(chunk) # products_qs = find_related_products_v2(chunk) # products_qs = find_related_products_v3(chunk) for instance in products_qs: From d4738a83cc866fa4dd21ed13a4999a298f38b55d Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 10:31:34 +0000 Subject: [PATCH 10/15] tried trigram search, not working at all --- products/utils.py | 5 +++-- products/views.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/products/utils.py b/products/utils.py index f3ff923..51818a6 100644 --- a/products/utils.py +++ b/products/utils.py @@ -131,9 +131,10 @@ def find_related_products_v4(keyword): """ Using trigrams """ - fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') + # fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') + products_qs = Product.objects.annotate( - similarity=TrigramSimilarity(fields, keyword) + similarity=TrigramSimilarity('name', keyword), ).order_by('-similarity') return products_qs diff --git a/products/views.py b/products/views.py index 70c75af..33d65ea 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products_v5, find_related_products_v2, find_related_products_v3 +from .utils import extract_search_filters, find_related_products_v5, find_related_products_v4, find_related_products_v3 from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -157,7 +157,7 @@ def product_search(request): for chunk in chunks: # import ipdb; ipdb.set_trace() products_qs = find_related_products_v5(chunk) - # products_qs = find_related_products_v2(chunk) + # products_qs = find_related_products_v4(chunk) # products_qs = find_related_products_v3(chunk) for instance in products_qs: result_set.add(instance) From 8abcda74f878096c783527ae2ee51339d0dceda9 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 11:00:26 +0000 Subject: [PATCH 11/15] testing product search utils directly, now they return sets --- products/tests.py | 32 ++++++++++++++++++++++++++++++++ products/utils.py | 17 ++++++++++------- products/views.py | 11 +++++------ 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/products/tests.py b/products/tests.py index 841c2f2..72c8aee 100644 --- a/products/tests.py +++ b/products/tests.py @@ -13,6 +13,7 @@ from rest_framework import status from companies.factories import CompanyFactory from products.factories import ProductFactory from products.models import Product +from products.utils import find_related_products_v3 from core.factories import CustomUserFactory from core.utils import get_tokens_for_user @@ -536,3 +537,34 @@ class MyProductsViewTest(APITestCase): # check response self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + + +class FindRelatedProductsTest(APITestCase): + + def setUp(self): + """Tests setup + """ + self.factory = ProductFactory + self.model = Product + # clear table + self.model.objects.all().delete() + + def test_v3_find_by_single_tag(self): + # create tagged product + tag = 'cool' + expected_instances = [ + self.factory(tags=tag) + ] + # instance = self.factory() + # instance.tags.set(tag) + # instance.save() + # searh for it + results = find_related_products_v3(tag) + import ipdb; ipdb.set_trace() + + # assert result + self.assertTrue(len(results) == len(expected_instances)) + + + + diff --git a/products/utils.py b/products/utils.py index 51818a6..8353ef6 100644 --- a/products/utils.py +++ b/products/utils.py @@ -92,7 +92,7 @@ def find_related_products_v5(keyword): Q(category__name__icontains=keyword)| Q(attributes__label__icontains=keyword) ) - return products_qs + return set(products_qs) def find_related_products_v2(keyword): @@ -104,7 +104,7 @@ def find_related_products_v2(keyword): products_qs = Product.objects.annotate( search=vector ).filter(search=keyword) - return products_qs + return set(products_qs) def find_related_products_v3(keyword): @@ -118,13 +118,16 @@ def find_related_products_v3(keyword): PROBLEM: returns unrelated instances """ # TODO: figure out why it includes unrelated instances - fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') - vector = SearchVector(*fields) + # fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') + + vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name') query = SearchQuery(keyword) + products_qs = Product.objects.annotate( rank=SearchRank(vector, query) - ).order_by('-rank') - return products_qs + ).filter(rank__gt=0.05).order_by('-rank') + + return set(products_qs) def find_related_products_v4(keyword): @@ -137,4 +140,4 @@ def find_related_products_v4(keyword): similarity=TrigramSimilarity('name', keyword), ).order_by('-similarity') - return products_qs + return set(products_qs) diff --git a/products/views.py b/products/views.py index 33d65ea..44f8e6e 100644 --- a/products/views.py +++ b/products/views.py @@ -155,12 +155,11 @@ def product_search(request): chunks = query_string.split(' ') for chunk in chunks: - # import ipdb; ipdb.set_trace() - products_qs = find_related_products_v5(chunk) - # products_qs = find_related_products_v4(chunk) - # products_qs = find_related_products_v3(chunk) - for instance in products_qs: - result_set.add(instance) + product_set = find_related_products_v5(chunk) + # product_set = find_related_products_v4(chunk) + # product_set = find_related_products_v3(chunk) + # add to result set + result_set.update(product_set) # TODO: add search for entire phrase # extract filters from result_set From e31c64eea82cb1cfd6ea70242668e4b519f881d0 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 11:20:03 +0000 Subject: [PATCH 12/15] product search with ranking is working --- products/tests.py | 26 ++++++++++++++++++-------- products/utils.py | 10 +++------- products/views.py | 7 +++---- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/products/tests.py b/products/tests.py index 72c8aee..4f5ad49 100644 --- a/products/tests.py +++ b/products/tests.py @@ -467,8 +467,6 @@ class ProductSearchTest(TestCase): self.factory(tags="lunares/azules", description="zapatos rojos"), self.factory(tags="lunares/rojos", description="zapatos"), self.factory(attributes='"zapatos de campo", tono/oscuro'), - # TODO: workaround for v3 with multi-word tags - # self.factory(attributes='zapatos, "zapatos de campo", tono/oscuro'), ] unexpected_instances = [ self.factory(description="chanclas"), @@ -549,18 +547,29 @@ class FindRelatedProductsTest(APITestCase): # clear table self.model.objects.all().delete() - def test_v3_find_by_single_tag(self): + def test_v3_find_by_tags(self): # create tagged product tag = 'cool' expected_instances = [ - self.factory(tags=tag) + self.factory(tags=tag), + self.factory(tags=f'{tag} hat'), + self.factory(tags=f'temperatures/{tag}'), + self.factory(tags=f'temperatures/{tag}, body/hot'), + self.factory(tags=f'temperatures/{tag}, hats/{tag}'), + # multiple hits + self.factory(tags=tag, attributes=tag), + self.factory(tags=tag, attributes=tag, category=tag), + self.factory(tags=tag, attributes=tag, category=tag, name=tag), + self.factory(tags=tag, attributes=tag, category=tag, name=tag, description=tag), ] - # instance = self.factory() - # instance.tags.set(tag) - # instance.save() + + unexpected_instances = [ + self.factory(tags="notcool"), # shouldn't catch it + self.factory(tags="azules"), + ] + # searh for it results = find_related_products_v3(tag) - import ipdb; ipdb.set_trace() # assert result self.assertTrue(len(results) == len(expected_instances)) @@ -568,3 +577,4 @@ class FindRelatedProductsTest(APITestCase): + diff --git a/products/utils.py b/products/utils.py index 8353ef6..2b6f8e0 100644 --- a/products/utils.py +++ b/products/utils.py @@ -114,25 +114,21 @@ def find_related_products_v3(keyword): SearchVectors for the fields SearchQuery for the value SearchRank for relevancy scoring and ranking - - PROBLEM: returns unrelated instances """ - # TODO: figure out why it includes unrelated instances - # fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') - vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name') query = SearchQuery(keyword) products_qs = Product.objects.annotate( rank=SearchRank(vector, query) - ).filter(rank__gt=0.05).order_by('-rank') + ).filter(rank__gt=0.05) # removed order_by because its lost in casting return set(products_qs) def find_related_products_v4(keyword): """ - Using trigrams + Similarity-ranked search using trigrams + Not working """ # fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name') diff --git a/products/views.py b/products/views.py index 44f8e6e..494c5f4 100644 --- a/products/views.py +++ b/products/views.py @@ -23,7 +23,7 @@ from companies.models import Company from history.models import HistorySync from back_latienda.permissions import IsCreator -from .utils import extract_search_filters, find_related_products_v5, find_related_products_v4, find_related_products_v3 +from .utils import extract_search_filters, find_related_products_v3 from utils.tag_serializers import TaggitSerializer from utils.tag_filters import ProductTagFilter @@ -155,9 +155,7 @@ def product_search(request): chunks = query_string.split(' ') for chunk in chunks: - product_set = find_related_products_v5(chunk) - # product_set = find_related_products_v4(chunk) - # product_set = find_related_products_v3(chunk) + product_set = find_related_products_v3(chunk) # add to result set result_set.update(product_set) # TODO: add search for entire phrase @@ -166,6 +164,7 @@ def product_search(request): filters = extract_search_filters(result_set) # serialize and respond product_serializer = ProductSearchSerializer(result_set, many=True, context={'request': request}) + # TODO: send product data in order by rank value return Response(data={"filters": filters, "products": product_serializer.data}) except Exception as e: return Response({"errors": {"details": str(e)}}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) From b1e0cccd7faac9cce68eb313bf7ceba82bf45a94 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 11:51:38 +0000 Subject: [PATCH 13/15] work on returning search results in order --- products/serializers.py | 11 ----------- products/tests.py | 2 +- products/views.py | 14 +++++++++----- 3 files changed, 10 insertions(+), 17 deletions(-) diff --git a/products/serializers.py b/products/serializers.py index 0cd0dee..a6cc29b 100644 --- a/products/serializers.py +++ b/products/serializers.py @@ -17,17 +17,6 @@ class ProductSerializer(TaggitSerializer, serializers.ModelSerializer): exclude = ['created', 'updated', 'creator'] -class ProductSearchSerializer(TaggitSerializer, serializers.ModelSerializer): - - tags = TagListSerializerField(required=False) - category = SingleTagSerializerField(required=False) # main tag category - attributes = TagListSerializerField(required=False) - - class Meta: - model = Product - exclude = ['created', 'updated', 'creator'] - - class TagFilterSerializer(TaggitSerializer, serializers.ModelSerializer): tags = TagListSerializerField(required=False) diff --git a/products/tests.py b/products/tests.py index 4f5ad49..2fdb597 100644 --- a/products/tests.py +++ b/products/tests.py @@ -462,7 +462,7 @@ class ProductSearchTest(TestCase): def test_anon_user_can_search(self): expected_instances = [ - self.factory(tags="lunares/blancos",description="zapatos verdes"), + self.factory(tags="lunares/rojos", category='zapatos', description="zapatos verdes"), self.factory(tags="colores/rojos, tono/brillante"), self.factory(tags="lunares/azules", description="zapatos rojos"), self.factory(tags="lunares/rojos", description="zapatos"), diff --git a/products/views.py b/products/views.py index 494c5f4..e2f3df4 100644 --- a/products/views.py +++ b/products/views.py @@ -7,6 +7,7 @@ from functools import reduce from django.shortcuts import render from django.conf import settings from django.db.models import Q +from django.core import serializers # Create your views here. from rest_framework import status @@ -18,7 +19,7 @@ from rest_framework.decorators import api_view, permission_classes, action import requests from products.models import Product -from products.serializers import ProductSerializer, TagFilterSerializer, ProductSearchSerializer +from products.serializers import ProductSerializer, TagFilterSerializer from companies.models import Company from history.models import HistorySync @@ -162,9 +163,12 @@ def product_search(request): # extract filters from result_set filters = extract_search_filters(result_set) - # serialize and respond - product_serializer = ProductSearchSerializer(result_set, many=True, context={'request': request}) - # TODO: send product data in order by rank value - return Response(data={"filters": filters, "products": product_serializer.data}) + # order results and respond + result_list = list(result_set) + ranked_products = sorted(result_list, key= lambda rank:rank.rank, reverse=True) + # TODO: slice ranked_products as per pagination + import ipdb; ipdb.set_trace() + data = serializers.serialize('json', ranked_products) + return Response(data={"filters": filters, "products": data}) except Exception as e: return Response({"errors": {"details": str(e)}}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) From 64fa4eb34bc54112e70eeaef95b29d8a8c2b1466 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 12:12:33 +0000 Subject: [PATCH 14/15] added search result serializer, search results ordered by rank --- products/serializers.py | 12 ++++++++++++ products/views.py | 7 +++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/products/serializers.py b/products/serializers.py index a6cc29b..769c7c9 100644 --- a/products/serializers.py +++ b/products/serializers.py @@ -17,6 +17,18 @@ class ProductSerializer(TaggitSerializer, serializers.ModelSerializer): exclude = ['created', 'updated', 'creator'] +class SearchResultSerializer(TaggitSerializer, serializers.ModelSerializer): + + tags = TagListSerializerField(required=False) + category = SingleTagSerializerField(required=False) # main tag category + attributes = TagListSerializerField(required=False) + rank = serializers.FloatField() + + class Meta: + model = Product + exclude = ['created', 'updated', 'creator'] + + class TagFilterSerializer(TaggitSerializer, serializers.ModelSerializer): tags = TagListSerializerField(required=False) diff --git a/products/views.py b/products/views.py index e2f3df4..39466da 100644 --- a/products/views.py +++ b/products/views.py @@ -19,7 +19,7 @@ from rest_framework.decorators import api_view, permission_classes, action import requests from products.models import Product -from products.serializers import ProductSerializer, TagFilterSerializer +from products.serializers import ProductSerializer, TagFilterSerializer, SearchResultSerializer from companies.models import Company from history.models import HistorySync @@ -167,8 +167,7 @@ def product_search(request): result_list = list(result_set) ranked_products = sorted(result_list, key= lambda rank:rank.rank, reverse=True) # TODO: slice ranked_products as per pagination - import ipdb; ipdb.set_trace() - data = serializers.serialize('json', ranked_products) - return Response(data={"filters": filters, "products": data}) + serializer = SearchResultSerializer(ranked_products, many=True) + return Response(data={"filters": filters, "products": [dict(i) for i in serializer.data]}) except Exception as e: return Response({"errors": {"details": str(e)}}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) From 47d31a21cc30bc88fd318af0562645f5066bb9c5 Mon Sep 17 00:00:00 2001 From: Sam Date: Fri, 19 Feb 2021 12:31:31 +0000 Subject: [PATCH 15/15] search result pagination working --- products/tests.py | 31 +++++++++++++++++++++++++++++++ products/views.py | 21 +++++++++++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/products/tests.py b/products/tests.py index 2fdb597..46f57c7 100644 --- a/products/tests.py +++ b/products/tests.py @@ -488,10 +488,41 @@ class ProductSearchTest(TestCase): # check ids for i in range(len(payload['products'])): self.assertTrue(payload['products'][i]['id'] == expected_instances[i].id) + # check results ordered by rank + current = 1 + for i in range(len(payload['products'])): + self.assertTrue(payload['products'][i]['rank'] <= current ) + current = payload['products'][i]['rank'] # check for filters self.assertNotEquals([], payload['filters']['singles']) self.assertTrue(len(payload['filters']) >= 2 ) + def test_anon_user_can_paginate_search(self): + expected_instances = [ + self.factory(tags="lunares/rojos", category='zapatos', description="zapatos verdes"), + self.factory(tags="colores/rojos, tono/brillante"), + self.factory(tags="lunares/azules", description="zapatos rojos"), + self.factory(tags="lunares/rojos", description="zapatos"), + self.factory(attributes='"zapatos de campo", tono/oscuro'), + ] + unexpected_instances = [ + self.factory(description="chanclas"), + self.factory(tags="azules"), + ] + + query_string = quote("zapatos rojos") + limit = 2 + + url = f"{self.endpoint}?query_string={query_string}&limit=2" + # send in request + response = self.client.get(url) + + # check response + self.assertEqual(response.status_code, 200) + # load response data + payload = response.json() + # check for object creation + self.assertEquals(len(payload['products']), limit) class MyProductsViewTest(APITestCase): """my_products tests diff --git a/products/views.py b/products/views.py index 39466da..a54ea68 100644 --- a/products/views.py +++ b/products/views.py @@ -145,8 +145,14 @@ def load_coop_products(request): def product_search(request): """ Takes a string of data, return relevant products + + Params: + - query_string: used for search [MANDATORY] + - limit: max number of returned instances [OPTIONAL] + - offset: where to start counting results [OPTIONAL] """ query_string = request.GET.get('query_string', None) + if query_string is None: return Response({"errors": {"details": "No query string to parse"}}) try: @@ -166,8 +172,19 @@ def product_search(request): # order results and respond result_list = list(result_set) ranked_products = sorted(result_list, key= lambda rank:rank.rank, reverse=True) - # TODO: slice ranked_products as per pagination serializer = SearchResultSerializer(ranked_products, many=True) - return Response(data={"filters": filters, "products": [dict(i) for i in serializer.data]}) + product_results = [dict(i) for i in serializer.data] + # check for pagination + limit = request.GET.get('limit', None) + offset = request.GET.get('offset', None) + if limit is not None and offset is not None: + limit = int(limit) + offset = int(offset) + product_results = product_results[offset:(limit+offset)] + elif limit is not None: + limit = int(limit) + product_results = product_results[:limit] + + return Response(data={"filters": filters, "products": product_results}) except Exception as e: return Response({"errors": {"details": str(e)}}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)