import logging import datetime from decimal import Decimal from django.db.models import Q from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity from django.db.models import Max, Min from django.conf import settings import requests from products.models import Product from companies.models import Company from history.models import HistorySync from PIL import Image from io import BytesIO from django.core.files import File logging.basicConfig( filename='logs/csv-load.log', filemode='w', format='%(levelname)s:%(message)s', level=logging.INFO, ) def extract_search_filters(result_set): """ Returned object should look something like: { "tags": { 'singles': set(), 'header1': [] }, "attributes": { 'singles': set(), 'header1': [] }, } """ filter_dict = { "tags": { 'singles': set(), }, "attributes": { 'singles': set(), } } for item in result_set: try: # extract tags tags = item.tags.all() for tag in tags: if len(tag.name.split('/')) == 1: filter_dict['tags']['singles'].add(tag.name) else: # set penultimate tag as header chunks = tag.name.split('/') header = chunks[-2] name = chunks[-1] # check if entry = filter_dict['tags'].get(header) if entry is None: filter_dict['tags'][header] = set() filter_dict['tags'][header].add(name) # extract attributes attributes = item.attributes.all() for tag in attributes: if len(tag.name.split('/')) == 1: filter_dict['attributes']['singles'].add(tag.name) else: # set penultimate tag as header chunks = tag.name.split('/') header = chunks[-2] name = chunks[-1] # check if entry = filter_dict['attributes'].get(header) if entry is None: filter_dict['attributes'][header] = set() filter_dict['attributes'][header].add(name) except Exception as e: logging.error(f'Extacting filters for {item}') return filter_dict def get_related_products(product): """Make different db searches until you get 10 instances to return """ total_results = [] # search by category category_qs = Product.objects.filter(category=product.category)[:10] # add to results for item in category_qs: total_results.append(item) # check size if len(total_results) < 10: # search by tags tags_qs = Product.objects.filter(tags__in=product.tags.all())[:10] # add to results for item in tags_qs: total_results.append(item) # check size if len(total_results) < 10: # search by coop coop_qs = Product.objects.filter(company=product.company)[:10] # add to results for item in coop_qs: total_results.append(item) # check size if len(total_results) < 10: # search by latest latest_qs = Product.objects.order_by('-created')[:10] # add to results for item in coop_qs: total_results.append(item) return total_results[:10] def ranked_product_search(keyword, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None): """ Ranked product search SearchVectors for the fields SearchQuery for the value SearchRank for relevancy scoring and ranking allow filtering by: - shipping cost """ vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__label') + SearchVector('company__company_name') query = SearchQuery(keyword) products_qs = Product.objects.annotate( rank=SearchRank(vector, query) ).filter(rank__gt=0.05, active=True) # filter by category if category is not None: products_qs = products_qs.filter(category=category) # filter by tags if tags is not None: products_qs = products_qs.filter(tags=tags) # filter by shipping cost if shipping_cost is True: # only instances with shipping costs products_qs = products_qs.filter( Q(shipping_cost__isnull=False)& Q(shipping_cost__gte=1) ) elif shipping_cost is False: # only intances without shpping costs products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00)) # filter by discount if discount is True: # only instances with shipping costs products_qs = products_qs.filter( Q(discount__isnull=False)& Q(discount__gte=1) ) elif discount is False: # only intances without shpping costs products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00)) # filter by price if price_min is not None: products_qs = products_qs.filter(price__gt=price_min) if price_max is not None: products_qs = products_qs.filter(price__lt=price_max) # get min_price and max_price min_price = products_qs.aggregate(Min('price')) max_price = products_qs.aggregate(Max('price')) return set(products_qs), min_price, max_price def product_loader(csv_reader, user, company=None): """ Parse csv data and extract: - product data Returns count """ counter = 0 # get company if company is None and user is not None: if user.company is not None: company = user.company else: # cannot add products without a company return None # create historysync instance history = HistorySync.objects.create(company=company, sync_date=datetime.datetime.now()) for row in csv_reader: # trim strings for key in row: try: if row[key]: if 'imagen' in key or 'categoria' in key: row[key] = row[key].strip() elif key in ['precio', 'gastos-envio']: row[key] = Decimal(row[key][:-1].strip().replace(',','.')) else: row[key] = row[key].strip() else: row[key] = None except Exception as e: logging.error(f"Could not access key {key}: {str(e)}") continue # check required data if '' in (row['nombre-producto'], row['descripcion'], row['precio'],): logging.error(f"Required data missing: {row}") continue try: # TODO: if tags is empty, auto-generate tags # assemble instance data product_data = { 'company': company, 'sku': row['sku'], 'name': row['nombre-producto'], 'description': row['descripcion'], 'url': row['url'].strip(), 'price': row['precio'], 'shipping_cost': row['gastos-envio'], 'shipping_terms': row['cond-envio'], 'discount': row['descuento'], 'stock': row['stock'], 'tags': row['tags'], 'category': row['categoria'], 'identifiers': row['identificadores'], 'history': history, 'creator': user, # 'valid': True } product = Product.objects.create(**product_data) # image logo data if row['imagen'] is not None: try: # get image headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"} response = requests.get(row['imagen'], stream=True, headers=headers) assert(response.status_code==200) response.raw.decode_content = True image = Image.open(response.raw) # save using File object img_io = BytesIO() image.save(img_io, format=image.format) product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False) product.save() except AssertionError as e: logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}") except Exception as e: logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}") logging.info(f"Created Product {product.id}") counter += 1 except Exception as e: logging.error(f"Could not parse {counter}: {str(e)}") history.quantity = counter history.save() return counter