Files
consumocuidado-server/products/utils.py
2021-03-19 14:11:56 +01:00

293 lines
9.8 KiB
Python

import logging
import datetime
from decimal import Decimal
from django.db.models import Q
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
from django.db.models import Max, Min
from django.conf import settings
from django.utils import timezone
import requests
from products.models import Product, CategoryTag
from companies.models import Company
from history.models import HistorySync
from PIL import Image
from io import BytesIO
from django.core.files import File
logging.basicConfig(
filename='logs/csv-load.log',
filemode='w',
format='%(levelname)s:%(message)s',
level=logging.INFO,
)
def extract_search_filters(result_set):
"""
Returned object should look something like:
{
"tags": {
'singles': set(),
'header1': []
},
"attributes": {
'singles': set(),
'header1': []
},
}
"""
filter_dict = {
"tags": {
'singles': set(),
},
"attributes": {
'singles': set(),
}
}
for item in result_set:
try:
# extract tags
tags = item.tags.all()
for tag in tags:
if len(tag.name.split('/')) == 1:
filter_dict['tags']['singles'].add(tag.name)
else:
# set penultimate tag as header
chunks = tag.name.split('/')
header = chunks[-2]
name = chunks[-1]
# check if
entry = filter_dict['tags'].get(header)
if entry is None:
filter_dict['tags'][header] = set()
filter_dict['tags'][header].add(name)
# extract attributes
attributes = item.attributes.all()
for tag in attributes:
if len(tag.name.split('/')) == 1:
filter_dict['attributes']['singles'].add(tag.name)
else:
# set penultimate tag as header
chunks = tag.name.split('/')
header = chunks[-2]
name = chunks[-1]
# check if
entry = filter_dict['attributes'].get(header)
if entry is None:
filter_dict['attributes'][header] = set()
filter_dict['attributes'][header].add(name)
except Exception as e:
logging.error(f'Extacting filters for {item}')
return filter_dict
def get_related_products(product):
"""Make different db searches until you get 10 instances to return
"""
total_results = []
# search by category
category_qs = Product.objects.filter(category=product.category)[:10]
# add to results
for item in category_qs:
total_results.append(item)
# check size
if len(total_results) < 10:
# search by tags
tags_qs = Product.objects.filter(tags__in=product.tags.all())[:10]
# add to results
for item in tags_qs:
total_results.append(item)
# check size
if len(total_results) < 10:
# search by coop
coop_qs = Product.objects.filter(company=product.company)[:10]
# add to results
for item in coop_qs:
total_results.append(item)
# check size
if len(total_results) < 10:
# search by latest
latest_qs = Product.objects.order_by('-created')[:10]
# add to results
for item in coop_qs:
total_results.append(item)
return total_results[:10]
def ranked_product_search(keyword, shipping_cost=None, discount=None, categories=None, tags=None, price_min=None,price_max=None):
"""
Ranked product search
SearchVectors for the fields
SearchQuery for the value
SearchRank for relevancy scoring and ranking
allow filtering by:
- shipping cost
"""
vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name') + SearchVector('company__company_name')
query = SearchQuery(keyword)
products_qs = Product.objects.annotate(
rank=SearchRank(vector, query)
).filter(rank__gt=0.05, active=True)
# filter by category
if categories is not None:
<<<<<<< HEAD
query = Q()
for entry in categories:
query = query | Q(category__name__contains=entry)
products_qs = products_qs.filter(query)
# products_qs = products_qs.filter(category__name__in=categories)
=======
# products_qs = products_qs.filter(category__name__in=categories)
descendants = []
for entry in categories:
cat = CategoryTag.objects.filter(label__iexact=entry).first()
# append category tag, and children
descendants.append(cat)
descendants.extend(cat.children.all())
products_qs = products_qs.filter(category__in=descendants)
>>>>>>> development
# filter by tags
if tags is not None:
products_qs = products_qs.filter(tags=tags)
# filter by shipping cost
if shipping_cost is True:
# only instances with shipping costs
products_qs = products_qs.filter(
Q(shipping_cost__isnull=False)&
Q(shipping_cost__gte=1)
)
elif shipping_cost is False:
# only intances without shpping costs
products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))
# filter by discount
if discount is True:
# only instances with shipping costs
products_qs = products_qs.filter(
Q(discount__isnull=False)&
Q(discount__gte=1)
)
elif discount is False:
# only intances without shpping costs
products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))
# filter by price
if price_min is not None:
products_qs = products_qs.filter(price__gte=price_min)
if price_max is not None:
products_qs = products_qs.filter(price__lte=price_max)
# get min_price and max_price
min_price = products_qs.aggregate(Min('price'))
max_price = products_qs.aggregate(Max('price'))
return set(products_qs), min_price, max_price
def product_loader(csv_reader, user, company=None):
"""
Parse csv data and extract:
- product data
Returns count
"""
counter = 0
# get company
if company is None and user is not None:
if user.company is not None:
company = user.company
else:
# cannot add products without a company
return None
# create historysync instance
history = HistorySync.objects.create(company=company, sync_date=timezone.now())
for row in csv_reader:
# trim strings
for key in row:
try:
if row[key]:
if 'imagen' in key or 'categoria' in key:
row[key] = row[key].strip()
elif key in ['precio', 'gastos-envio']:
row[key] = Decimal(row[key][:-1].strip().replace(',','.'))
else:
row[key] = row[key].strip()
else:
row[key] = None
except Exception as e:
logging.error(f"Could not access key {key}: {str(e)}")
continue
# check required data
if '' in (row['nombre-producto'], row['descripcion'], row['precio'],):
logging.error(f"Required data missing: {row}")
continue
try:
# TODO: if tags is empty, auto-generate tags
# assemble instance data
product_data = {
'company': company,
'sku': row['sku'],
'name': row['nombre-producto'],
'description': row['descripcion'],
'url': row['url'].strip(),
'price': row['precio'],
'shipping_cost': row['gastos-envio'],
'shipping_terms': row['cond-envio'],
'discount': row['descuento'],
'stock': row['stock'],
'tags': row['tags'],
'category': row['categoria'],
'identifiers': row['identificadores'],
'history': history,
'creator': user,
# 'valid': True
}
product = Product.objects.create(**product_data)
# image logo data
if row['imagen'] is not None:
try:
# get image
headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
response = requests.get(row['imagen'], stream=True, headers=headers)
assert(response.status_code==200)
response.raw.decode_content = True
image = Image.open(response.raw)
# save using File object
img_io = BytesIO()
image.save(img_io, format=image.format)
product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
product.save()
except AssertionError as e:
logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
except Exception as e:
logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
logging.info(f"Created Product {product.id}")
counter += 1
except Exception as e:
logging.error(f"Could not parse {counter}: {str(e)}")
history.quantity = counter
history.save()
return counter