Files
consumocuidado-server/products/utils.py
2021-03-11 12:17:40 +00:00

303 lines
10 KiB
Python

import logging
import datetime
from decimal import Decimal
from django.db.models import Q
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
from django.db.models import Max, Min
from django.conf import settings
from django.utils import timezone
from django.contrib.gis.geos import Point
from django.contrib.gis.measure import D
import requests
from products.models import Product
from companies.models import Company
from history.models import HistorySync
from PIL import Image
from io import BytesIO
from django.core.files import File
logging.basicConfig(
filename='logs/csv-load.log',
filemode='w',
format='%(levelname)s:%(message)s',
level=logging.INFO,
)
def extract_search_filters(result_set):
"""
Returned object should look something like:
{
"tags": {
'singles': set(),
'header1': []
},
"attributes": {
'singles': set(),
'header1': []
},
}
"""
filter_dict = {
"tags": {
'singles': set(),
},
"attributes": {
'singles': set(),
}
}
for item in result_set:
try:
# extract tags
tags = item.tags.all()
for tag in tags:
if len(tag.name.split('/')) == 1:
filter_dict['tags']['singles'].add(tag.name)
else:
# set penultimate tag as header
chunks = tag.name.split('/')
header = chunks[-2]
name = chunks[-1]
# check if
entry = filter_dict['tags'].get(header)
if entry is None:
filter_dict['tags'][header] = set()
filter_dict['tags'][header].add(name)
# extract attributes
attributes = item.attributes.all()
for tag in attributes:
if len(tag.name.split('/')) == 1:
filter_dict['attributes']['singles'].add(tag.name)
else:
# set penultimate tag as header
chunks = tag.name.split('/')
header = chunks[-2]
name = chunks[-1]
# check if
entry = filter_dict['attributes'].get(header)
if entry is None:
filter_dict['attributes'][header] = set()
filter_dict['attributes'][header].add(name)
except Exception as e:
logging.error(f'Extacting filters for {item}')
return filter_dict
def get_related_products(product):
"""Make different db searches until you get 10 instances to return
"""
total_results = []
# search by category
category_qs = Product.objects.filter(category=product.category)[:10]
# add to results
for item in category_qs:
total_results.append(item)
# check size
if len(total_results) < 10:
# search by tags
tags_qs = Product.objects.filter(tags__in=product.tags.all())[:10]
# add to results
for item in tags_qs:
total_results.append(item)
# check size
if len(total_results) < 10:
# search by coop
coop_qs = Product.objects.filter(company=product.company)[:10]
# add to results
for item in coop_qs:
total_results.append(item)
# check size
if len(total_results) < 10:
# search by latest
latest_qs = Product.objects.order_by('-created')[:10]
# add to results
for item in coop_qs:
total_results.append(item)
return total_results[:10]
def ranked_product_search(keywords, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None, coordinates=None):
"""
Ranked product search
SearchVectors for the fields
SearchQuery for the value
SearchRank for relevancy scoring and ranking
allow filtering by:
- shipping cost
Response includes:
- result_set
- min_price
- max_price
- georesult
"""
vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__label') + SearchVector('company__company_name')
query = SearchQuery(keywords, search_type='plain')
products_qs = Product.objects.annotate(
rank=SearchRank(vector, query)
).filter(rank__gt=0.05, active=True)
# geolocation filtering
if coordinates is not None:
point = Point(coordinates)
filtered_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=10)))
georesult = '10k'
if filtered_qs.count() <= 10:
products_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=50)))
georesult = '50k'
if filtered_qs.count() <= 10:
products_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=200)))
georesult = '200k'
if filtered_qs.count() > 10:
products_qs = filtered_qs
else:
georesult = None
else:
georesult = None
# filter by category
if category is not None:
products_qs = products_qs.filter(category=category)
# filter by tags
if tags is not None:
products_qs = products_qs.filter(tags=tags)
# filter by shipping cost
if shipping_cost is True:
# only instances with shipping costs
products_qs = products_qs.filter(
Q(shipping_cost__isnull=False)&
Q(shipping_cost__gte=1)
)
elif shipping_cost is False:
# only intances without shpping costs
products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))
# filter by discount
if discount is True:
# only instances with shipping costs
products_qs = products_qs.filter(
Q(discount__isnull=False)&
Q(discount__gte=1)
)
elif discount is False:
# only intances without shpping costs
products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))
# filter by price
if price_min is not None:
products_qs = products_qs.filter(price__gte=price_min)
if price_max is not None:
products_qs = products_qs.filter(price__lte=price_max)
# get min_price and max_price
min_price = products_qs.aggregate(Min('price'))
max_price = products_qs.aggregate(Max('price'))
return set(products_qs), min_price, max_price, georesult
def product_loader(csv_reader, user, company=None):
"""
Parse csv data and extract:
- product data
Returns count
"""
counter = 0
# get company
if company is None and user is not None:
if user.company is not None:
company = user.company
else:
# cannot add products without a company
return None
# create historysync instance
history = HistorySync.objects.create(company=company, sync_date=timezone.now())
for row in csv_reader:
# trim strings
for key in row:
try:
if row[key]:
if 'imagen' in key or 'categoria' in key:
row[key] = row[key].strip()
elif key in ['precio', 'gastos-envio']:
row[key] = Decimal(row[key][:-1].strip().replace(',','.'))
else:
row[key] = row[key].strip()
else:
row[key] = None
except Exception as e:
logging.error(f"Could not access key {key}: {str(e)}")
continue
# check required data
if '' in (row['nombre-producto'], row['descripcion'], row['precio'],):
logging.error(f"Required data missing: {row}")
continue
try:
# TODO: if tags is empty, auto-generate tags
# assemble instance data
product_data = {
'company': company,
'sku': row['sku'],
'name': row['nombre-producto'],
'description': row['descripcion'],
'url': row['url'].strip(),
'price': row['precio'],
'shipping_cost': row['gastos-envio'],
'shipping_terms': row['cond-envio'],
'discount': row['descuento'],
'stock': row['stock'],
'tags': row['tags'],
'category': row['categoria'],
'identifiers': row['identificadores'],
'history': history,
'creator': user,
# 'valid': True
}
product = Product.objects.create(**product_data)
# image logo data
if row['imagen'] is not None:
try:
# get image
headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
response = requests.get(row['imagen'], stream=True, headers=headers)
assert(response.status_code==200)
response.raw.decode_content = True
image = Image.open(response.raw)
# save using File object
img_io = BytesIO()
image.save(img_io, format=image.format)
product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
product.save()
except AssertionError as e:
logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
except Exception as e:
logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
logging.info(f"Created Product {product.id}")
counter += 1
except Exception as e:
logging.error(f"Could not parse {counter}: {str(e)}")
history.quantity = counter
history.save()
return counter