291 lines
9.6 KiB
Python
291 lines
9.6 KiB
Python
import logging
|
|
import datetime
|
|
|
|
from django.db.models import Q
|
|
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
|
|
from django.db.models import Max, Min
|
|
from django.conf import settings
|
|
|
|
import requests
|
|
|
|
from products.models import Product
|
|
from companies.models import Company
|
|
from history.models import HistorySync
|
|
|
|
|
|
def extract_search_filters(result_set):
|
|
"""
|
|
Returned object should look something like:
|
|
|
|
{
|
|
"tags": {
|
|
'singles': set(),
|
|
'header1': []
|
|
},
|
|
"attributes": {
|
|
'singles': set(),
|
|
'header1': []
|
|
},
|
|
}
|
|
"""
|
|
filter_dict = {
|
|
"tags": {
|
|
'singles': set(),
|
|
},
|
|
"attributes": {
|
|
'singles': set(),
|
|
}
|
|
}
|
|
for item in result_set:
|
|
try:
|
|
# extract tags
|
|
tags = item.tags.all()
|
|
for tag in tags:
|
|
if len(tag.name.split('/')) == 1:
|
|
filter_dict['tags']['singles'].add(tag.name)
|
|
else:
|
|
# set penultimate tag as header
|
|
chunks = tag.name.split('/')
|
|
header = chunks[-2]
|
|
name = chunks[-1]
|
|
# check if
|
|
entry = filter_dict['tags'].get(header)
|
|
if entry is None:
|
|
filter_dict['tags'][header] = set()
|
|
filter_dict['tags'][header].add(name)
|
|
# extract attributes
|
|
attributes = item.attributes.all()
|
|
for tag in attributes:
|
|
if len(tag.name.split('/')) == 1:
|
|
filter_dict['attributes']['singles'].add(tag.name)
|
|
else:
|
|
# set penultimate tag as header
|
|
chunks = tag.name.split('/')
|
|
header = chunks[-2]
|
|
name = chunks[-1]
|
|
# check if
|
|
entry = filter_dict['attributes'].get(header)
|
|
if entry is None:
|
|
filter_dict['attributes'][header] = set()
|
|
filter_dict['attributes'][header].add(name)
|
|
except Exception as e:
|
|
logging.error(f'Extacting filters for {item}')
|
|
return filter_dict
|
|
|
|
|
|
def find_related_products_v1(keyword):
|
|
"""
|
|
Classical approach to the search
|
|
|
|
Using Q objects
|
|
|
|
"""
|
|
# search in tags
|
|
tags = Product.tags.tag_model.objects.filter(name__icontains=keyword)
|
|
# search in category
|
|
categories = Product.category.tag_model.objects.filter(name__icontains=keyword)
|
|
# search in attributes
|
|
attributes = Product.attributes.tag_model.objects.filter(name__icontains=keyword)
|
|
# unified tag search
|
|
products_qs = Product.objects.filter(
|
|
Q(name__icontains=keyword)|
|
|
Q(description__icontains=keyword)|
|
|
Q(tags__in=tags)|
|
|
Q(category__in=categories)|
|
|
Q(attributes__in=attributes)
|
|
)
|
|
return products_qs
|
|
|
|
|
|
def find_related_products_v5(keyword):
|
|
"""
|
|
Single query solution, using Q objects
|
|
"""
|
|
products_qs = Product.objects.filter(
|
|
Q(name__icontains=keyword)|
|
|
Q(description__icontains=keyword)|
|
|
Q(tags__label__icontains=keyword)|
|
|
Q(category__name__icontains=keyword)|
|
|
Q(attributes__label__icontains=keyword)
|
|
)
|
|
return set(products_qs)
|
|
|
|
|
|
def find_related_products_v2(keyword):
|
|
"""
|
|
More advanced: using search vectors
|
|
"""
|
|
fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')
|
|
vector = SearchVector(*fields)
|
|
products_qs = Product.objects.annotate(
|
|
search=vector
|
|
).filter(search=keyword)
|
|
return set(products_qs)
|
|
|
|
|
|
def find_related_products_v3(keyword):
|
|
"""
|
|
Ranked product search
|
|
|
|
SearchVectors for the fields
|
|
SearchQuery for the value
|
|
SearchRank for relevancy scoring and ranking
|
|
"""
|
|
vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
|
|
query = SearchQuery(keyword)
|
|
|
|
products_qs = Product.objects.annotate(
|
|
rank=SearchRank(vector, query)
|
|
).filter(rank__gt=0.05) # removed order_by because its lost in casting
|
|
|
|
return set(products_qs)
|
|
|
|
|
|
def find_related_products_v6(keyword, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None):
|
|
"""
|
|
Ranked product search
|
|
|
|
SearchVectors for the fields
|
|
SearchQuery for the value
|
|
SearchRank for relevancy scoring and ranking
|
|
|
|
allow filtering by:
|
|
- shipping cost
|
|
"""
|
|
vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
|
|
query = SearchQuery(keyword)
|
|
|
|
products_qs = Product.objects.annotate(
|
|
rank=SearchRank(vector, query)
|
|
).filter(rank__gt=0.05) # removed order_by because its lost in casting
|
|
|
|
# filter by category
|
|
if category is not None:
|
|
products_qs = products_qs.filter(category=category)
|
|
|
|
# filter by tags
|
|
if tags is not None:
|
|
products_qs = products_qs.filter(tags=tags)
|
|
|
|
# filter by shipping cost
|
|
if shipping_cost is True:
|
|
# only instances with shipping costs
|
|
products_qs = products_qs.filter(
|
|
Q(shipping_cost__isnull=False)&
|
|
Q(shipping_cost__gte=1)
|
|
)
|
|
elif shipping_cost is False:
|
|
# only intances without shpping costs
|
|
products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))
|
|
|
|
# filter by discount
|
|
if discount is True:
|
|
# only instances with shipping costs
|
|
products_qs = products_qs.filter(
|
|
Q(discount__isnull=False)&
|
|
Q(discount__gte=1)
|
|
)
|
|
elif discount is False:
|
|
# only intances without shpping costs
|
|
products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))
|
|
|
|
# filter by price
|
|
if price_min is not None:
|
|
products_qs = products_qs.filter(price__gt=price_min)
|
|
if price_max is not None:
|
|
products_qs = products_qs.filter(price__lt=price_max)
|
|
|
|
# get min_price and max_price
|
|
min_price = products_qs.aggregate(Min('price'))
|
|
max_price = products_qs.aggregate(Max('price'))
|
|
|
|
|
|
return set(products_qs), min_price, max_price
|
|
|
|
|
|
def find_related_products_v4(keyword):
|
|
"""
|
|
Similarity-ranked search using trigrams
|
|
Not working
|
|
"""
|
|
# fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')
|
|
|
|
products_qs = Product.objects.annotate(
|
|
similarity=TrigramSimilarity('name', keyword),
|
|
).order_by('-similarity')
|
|
|
|
return set(products_qs)
|
|
|
|
|
|
def product_loader(csv_reader, user):
|
|
"""
|
|
Parse csv data and extract:
|
|
|
|
- product data
|
|
|
|
Return counts
|
|
"""
|
|
counter = 0
|
|
# create historysync instance
|
|
history = HistorySync.objects.create(company=user.company, sync_date=datetime.datetime.now())
|
|
|
|
for row in csv_reader:
|
|
# trim strings
|
|
for key in row:
|
|
if row[key]: row[key] = row[key].strip().lower()
|
|
|
|
# check required data
|
|
if '' in (row['nombre-producto'], row['descripcion'], row['precio'], row['categoria']):
|
|
logging.error(f"Required data missing: {row}")
|
|
continue
|
|
|
|
try:
|
|
# TODO: if tags is empty, auto-generate tags
|
|
|
|
# assemble instance data
|
|
product_data = {
|
|
'company': user.company,
|
|
'name': row['nombre-producto'].strip(),
|
|
'description': row['descripcion'].strip(),
|
|
#'url': row['url'].strip(),
|
|
#'price': row['precio'].strip(),
|
|
#'shipping_cost': row['gastos-envio'].strip(),
|
|
#'shipping_terms': row['cond-envio'].strip(),
|
|
#'discount': row['descuento'].strip(),
|
|
#'stock': row['stock'].strip(),
|
|
#'tags': row['tags'].strip(),
|
|
#'category': row['categoria'].strip(),
|
|
#'identifiers': row['identificadores'].strip(),
|
|
#'history': history,
|
|
'creator': user,
|
|
}
|
|
|
|
product = Product.objects.create(**product_data)
|
|
# image logo data
|
|
if row['imagen'] is not None:
|
|
try:
|
|
# get image
|
|
headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
|
|
response = requests.get(row['imagen'], stream=True, headers=headers)
|
|
assert(response.status_code==200)
|
|
response.raw.decode_content = True
|
|
image = Image.open(response.raw)
|
|
# save using File object
|
|
img_io = BytesIO()
|
|
image.save(img_io, format=image.format)
|
|
product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
|
|
product.save()
|
|
except AssertionError as e:
|
|
logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
|
|
except Exception as e:
|
|
logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
|
|
logging.info(f"Created Product {product.id}")
|
|
counter += 1
|
|
except Exception as e:
|
|
import ipdb; ipdb.set_trace()
|
|
logging.error(f"Could not parse {row}")
|
|
|
|
history.quantity = counter
|
|
history.save()
|
|
return counter
|