144 lines
4.3 KiB
Python
144 lines
4.3 KiB
Python
import logging
|
|
|
|
from django.db.models import Q
|
|
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
|
|
|
|
from products.models import Product
|
|
|
|
|
|
|
|
def extract_search_filters(result_set):
|
|
"""
|
|
Returned object should look something like:
|
|
|
|
{
|
|
"singles": [], # non tree tags
|
|
"entry_1": [ 'tag1', 'tag2' ],
|
|
"entry_2": [ 'tag1', 'tag2' ],
|
|
}
|
|
"""
|
|
filter_dict = {
|
|
'singles': set(),
|
|
}
|
|
for item in result_set:
|
|
try:
|
|
# extract tags
|
|
tags = item.tags.all()
|
|
for tag in tags:
|
|
if len(tag.name.split('/')) == 1:
|
|
filter_dict['singles'].add(tag.name)
|
|
else:
|
|
# set penultimate tag as header
|
|
chunks = tag.name.split('/')
|
|
header = chunks[-2]
|
|
name = chunks[-1]
|
|
# check if
|
|
entry = filter_dict.get(header)
|
|
if entry is None:
|
|
filter_dict[header] = set()
|
|
filter_dict[header].add(name)
|
|
# extract attributes
|
|
attributes = item.attributes.all()
|
|
for tag in attributes:
|
|
if len(tag.name.split('/')) == 1:
|
|
filter_dict['singles'].add(tag.name)
|
|
else:
|
|
# set penultimate tag as header
|
|
chunks = tag.name.split('/')
|
|
header = chunks[-2]
|
|
name = chunks[-1]
|
|
# check if
|
|
entry = filter_dict.get(header)
|
|
if entry is None:
|
|
filter_dict[header] = set()
|
|
filter_dict[header].add(name)
|
|
except Exception as e:
|
|
logging.error(f'Extacting filters for {item}')
|
|
return filter_dict
|
|
|
|
|
|
def find_related_products_v1(keyword):
|
|
"""
|
|
Classical approach to the search
|
|
|
|
Using Q objects
|
|
|
|
"""
|
|
# search in tags
|
|
tags = Product.tags.tag_model.objects.filter(name__icontains=keyword)
|
|
# search in category
|
|
categories = Product.category.tag_model.objects.filter(name__icontains=keyword)
|
|
# search in attributes
|
|
attributes = Product.attributes.tag_model.objects.filter(name__icontains=keyword)
|
|
# unified tag search
|
|
products_qs = Product.objects.filter(
|
|
Q(name__icontains=keyword)|
|
|
Q(description__icontains=keyword)|
|
|
Q(tags__in=tags)|
|
|
Q(category__in=categories)|
|
|
Q(attributes__in=attributes)
|
|
)
|
|
return products_qs
|
|
|
|
|
|
def find_related_products_v5(keyword):
|
|
"""
|
|
Single query solution, using Q objects
|
|
"""
|
|
products_qs = Product.objects.filter(
|
|
Q(name__icontains=keyword)|
|
|
Q(description__icontains=keyword)|
|
|
Q(tags__label__icontains=keyword)|
|
|
Q(category__name__icontains=keyword)|
|
|
Q(attributes__label__icontains=keyword)
|
|
)
|
|
return set(products_qs)
|
|
|
|
|
|
def find_related_products_v2(keyword):
|
|
"""
|
|
More advanced: using search vectors
|
|
"""
|
|
fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')
|
|
vector = SearchVector(*fields)
|
|
products_qs = Product.objects.annotate(
|
|
search=vector
|
|
).filter(search=keyword)
|
|
return set(products_qs)
|
|
|
|
|
|
def find_related_products_v3(keyword):
|
|
"""
|
|
Ranked product search
|
|
|
|
SearchVectors for the fields
|
|
SearchQuery for the value
|
|
SearchRank for relevancy scoring and ranking
|
|
|
|
PROBLEM: returns unrelated instances
|
|
"""
|
|
# TODO: figure out why it includes unrelated instances
|
|
# fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')
|
|
|
|
vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
|
|
query = SearchQuery(keyword)
|
|
|
|
products_qs = Product.objects.annotate(
|
|
rank=SearchRank(vector, query)
|
|
).filter(rank__gt=0.05).order_by('-rank')
|
|
|
|
return set(products_qs)
|
|
|
|
|
|
def find_related_products_v4(keyword):
|
|
"""
|
|
Using trigrams
|
|
"""
|
|
# fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')
|
|
|
|
products_qs = Product.objects.annotate(
|
|
similarity=TrigramSimilarity('name', keyword),
|
|
).order_by('-similarity')
|
|
|
|
return set(products_qs)
|