import logging import datetime from decimal import Decimal from django.db.models import Q from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity from django.db.models import Max, Min from django.conf import settings from django.utils import timezone from django.contrib.gis.geos import Point from django.contrib.gis.measure import D import requests from products.models import Product from companies.models import Company from history.models import HistorySync from PIL import Image from io import BytesIO from django.core.files import File logging.basicConfig( filename='logs/csv-load.log', filemode='w', format='%(levelname)s:%(message)s', level=logging.INFO, ) def extract_search_filters(result_set): """ Returned object should look something like: { "tags": { 'singles': set(), 'header1': [] }, "attributes": { 'singles': set(), 'header1': [] }, } """ filter_dict = { "tags": { 'singles': set(), }, "attributes": { 'singles': set(), } } for item in result_set: try: # extract tags tags = item.tags.all() for tag in tags: if len(tag.name.split('/')) == 1: filter_dict['tags']['singles'].add(tag.name) else: # set penultimate tag as header chunks = tag.name.split('/') header = chunks[-2] name = chunks[-1] # check if entry = filter_dict['tags'].get(header) if entry is None: filter_dict['tags'][header] = set() filter_dict['tags'][header].add(name) # extract attributes attributes = item.attributes.all() for tag in attributes: if len(tag.name.split('/')) == 1: filter_dict['attributes']['singles'].add(tag.name) else: # set penultimate tag as header chunks = tag.name.split('/') header = chunks[-2] name = chunks[-1] # check if entry = filter_dict['attributes'].get(header) if entry is None: filter_dict['attributes'][header] = set() filter_dict['attributes'][header].add(name) except Exception as e: logging.error(f'Extacting filters for {item}') return filter_dict def get_related_products(product): """Make different db searches until you get 10 instances to return """ total_results = [] # search by category category_qs = Product.objects.filter(category=product.category)[:10] # add to results for item in category_qs: total_results.append(item) # check size if len(total_results) < 10: # search by tags tags_qs = Product.objects.filter(tags__in=product.tags.all())[:10] # add to results for item in tags_qs: total_results.append(item) # check size if len(total_results) < 10: # search by coop coop_qs = Product.objects.filter(company=product.company)[:10] # add to results for item in coop_qs: total_results.append(item) # check size if len(total_results) < 10: # search by latest latest_qs = Product.objects.order_by('-created')[:10] # add to results for item in coop_qs: total_results.append(item) return total_results[:10] def ranked_product_search(keywords, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None, coordinates=None): """ Ranked product search SearchVectors for the fields SearchQuery for the value SearchRank for relevancy scoring and ranking allow filtering by: - shipping cost Response includes: - result_set - min_price - max_price - georesult """ vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__label') + SearchVector('company__company_name') query = SearchQuery(keywords, search_type='plain') products_qs = Product.objects.annotate( rank=SearchRank(vector, query) ).filter(rank__gt=0.05, active=True) # geolocation filtering if coordinates is not None: point = Point(coordinates) filtered_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=10))) georesult = '10k' if filtered_qs.count() <= 10: products_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=50))) georesult = '50k' if filtered_qs.count() <= 10: products_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=200))) georesult = '200k' if filtered_qs.count() > 10: products_qs = filtered_qs else: georesult = None else: georesult = None # filter by category if category is not None: products_qs = products_qs.filter(category=category) # filter by tags if tags is not None: products_qs = products_qs.filter(tags=tags) # filter by shipping cost if shipping_cost is True: # only instances with shipping costs products_qs = products_qs.filter( Q(shipping_cost__isnull=False)& Q(shipping_cost__gte=1) ) elif shipping_cost is False: # only intances without shpping costs products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00)) # filter by discount if discount is True: # only instances with shipping costs products_qs = products_qs.filter( Q(discount__isnull=False)& Q(discount__gte=1) ) elif discount is False: # only intances without shpping costs products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00)) # filter by price if price_min is not None: products_qs = products_qs.filter(price__gte=price_min) if price_max is not None: products_qs = products_qs.filter(price__lte=price_max) # get min_price and max_price min_price = products_qs.aggregate(Min('price')) max_price = products_qs.aggregate(Max('price')) return set(products_qs), min_price, max_price, georesult def product_loader(csv_reader, user, company=None): """ Parse csv data and extract: - product data Returns count """ counter = 0 # get company if company is None and user is not None: if user.company is not None: company = user.company else: # cannot add products without a company return None # create historysync instance history = HistorySync.objects.create(company=company, sync_date=timezone.now()) for row in csv_reader: # trim strings for key in row: try: if row[key]: if 'imagen' in key or 'categoria' in key: row[key] = row[key].strip() elif key in ['precio', 'gastos-envio']: row[key] = Decimal(row[key][:-1].strip().replace(',','.')) else: row[key] = row[key].strip() else: row[key] = None except Exception as e: logging.error(f"Could not access key {key}: {str(e)}") continue # check required data if '' in (row['nombre-producto'], row['descripcion'], row['precio'],): logging.error(f"Required data missing: {row}") continue try: # TODO: if tags is empty, auto-generate tags # assemble instance data product_data = { 'company': company, 'sku': row['sku'], 'name': row['nombre-producto'], 'description': row['descripcion'], 'url': row['url'].strip(), 'price': row['precio'], 'shipping_cost': row['gastos-envio'], 'shipping_terms': row['cond-envio'], 'discount': row['descuento'], 'stock': row['stock'], 'tags': row['tags'], 'category': row['categoria'], 'identifiers': row['identificadores'], 'history': history, 'creator': user, # 'valid': True } product = Product.objects.create(**product_data) # image logo data if row['imagen'] is not None: try: # get image headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"} response = requests.get(row['imagen'], stream=True, headers=headers) assert(response.status_code==200) response.raw.decode_content = True image = Image.open(response.raw) # save using File object img_io = BytesIO() image.save(img_io, format=image.format) product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False) product.save() except AssertionError as e: logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}") except Exception as e: logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}") logging.info(f"Created Product {product.id}") counter += 1 except Exception as e: logging.error(f"Could not parse {counter}: {str(e)}") history.quantity = counter history.save() return counter