consumocuidado-server/products/utils.py

import logging
import datetime
from decimal import Decimal

from django.db.models import Q
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
from django.db.models import Max, Min
from django.conf import settings

import requests

from products.models import Product
from companies.models import Company
from history.models import HistorySync
from PIL import Image
from io import BytesIO
from django.core.files import File


logging.basicConfig(
    filename='logs/csv-load.log',
    filemode='w',
    format='%(levelname)s:%(message)s',
    level=logging.INFO,
    )


def extract_search_filters(result_set):
    """
    Returned object should look something like:

    {
        "tags": {
            'singles': set(),
            'header1': []
        },
        "attributes": {
            'singles': set(),
            'header1': []
        },
    }
    """
    filter_dict = {
        "tags": {
            'singles': set(),
        },
        "attributes": {
            'singles': set(),
        }
    }
    for item in result_set:
        try:
            # extract tags
            tags = item.tags.all()
            for tag in tags:
                if len(tag.name.split('/')) == 1:
                    filter_dict['tags']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['tags'].get(header)
                    if entry is None:
                        filter_dict['tags'][header] = set()
                    filter_dict['tags'][header].add(name)
            # extract attributes
            attributes = item.attributes.all()
            for tag in attributes:
                if len(tag.name.split('/')) == 1:
                    filter_dict['attributes']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['attributes'].get(header)
                    if entry is None:
                        filter_dict['attributes'][header] = set()
                    filter_dict['attributes'][header].add(name)
        except Exception as e:
            logging.error(f'Extacting filters for {item}')
    return filter_dict


def find_related_products_v3(keyword):
    """
    Ranked product search

    SearchVectors for the fields
    SearchQuery for the value
    SearchRank for relevancy scoring and ranking
    """
    vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
    query = SearchQuery(keyword)

    products_qs = Product.objects.annotate(
        rank=SearchRank(vector, query)
    ).filter(rank__gt=0.05)     # removed order_by because its lost in casting

    return set(products_qs)


def find_related_products_v6(keyword, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None):
    """
    Ranked product search

    SearchVectors for the fields
    SearchQuery for the value
    SearchRank for relevancy scoring and ranking

    allow filtering by:
    - shipping cost
    """
    vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
    query = SearchQuery(keyword)

    products_qs = Product.objects.annotate(
        rank=SearchRank(vector, query)
    ).filter(rank__gt=0.05)     # removed order_by because its lost in casting

    # filter by category
    if category is not None:
        products_qs = products_qs.filter(category=category)

    # filter by tags
    if tags is not None:
        products_qs = products_qs.filter(tags=tags)

    # filter by shipping cost
    if shipping_cost is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(shipping_cost__isnull=False)&
            Q(shipping_cost__gte=1)
        )
    elif shipping_cost is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))

    # filter by discount
    if discount is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(discount__isnull=False)&
            Q(discount__gte=1)
        )
    elif discount is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))

    # filter by price
    if price_min is not None:
        products_qs = products_qs.filter(price__gt=price_min)
    if price_max is not None:
        products_qs = products_qs.filter(price__lt=price_max)

    # get  min_price and max_price
    min_price = products_qs.aggregate(Min('price'))
    max_price = products_qs.aggregate(Max('price'))


    return set(products_qs), min_price, max_price


def product_loader(csv_reader, user, company=None):
    """
    Parse csv data and extract:

    - product data

    Return counts
    """
    counter = 0
    # get company
    if company is None and user is not None:
        if user.company is not None:
            company = user.company
        else:
            # cannot add products without a company
            return None

    # create historysync instance
    history = HistorySync.objects.create(company=company, sync_date=datetime.datetime.now())
    for row in csv_reader:
        # trim strings
        for key in row:
            if row[key]:
                if 'imagen' in key or 'categoria' in key:
                    row[key] = row[key].strip()
                elif key in ['precio', 'gastos-envio']:
                    row[key] = Decimal(row[key][:-1].strip().replace(',','.'))
                else:
                    row[key] = row[key].strip()
            if row[key] == '':
                row[key] = None

        # check required data
        if '' in (row['nombre-producto'], row['descripcion'], row['precio'],):
            logging.error(f"Required data missing: {row}")
            continue
        try:
            # TODO: if tags is empty, auto-generate tags
            # assemble instance data
            product_data = {
                'company': company,
                'sku': row['sku'],
                'name': row['nombre-producto'],
                'description': row['descripcion'],
                'url': row['url'].strip(),
                'price': row['precio'],
                'shipping_cost': row['gastos-envio'],
                'shipping_terms': row['cond-envio'],
                'discount': row['descuento'],
                'stock': row['stock'],
                'tags': row['tags'],
                'category': row['categoria'],
                'identifiers': row['identificadores'],
                'history': history,
                'creator': user,
            }

            product = Product.objects.create(**product_data)
            # image logo data
            if row['imagen'] is not None:
                try:
                    # get image
                    headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
                    response = requests.get(row['imagen'], stream=True, headers=headers)
                    assert(response.status_code==200)
                    response.raw.decode_content = True
                    image = Image.open(response.raw)
                    # save using File object
                    img_io = BytesIO()
                    image.save(img_io, format=image.format)
                    product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
                    product.save()
                except AssertionError as e:
                    logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
                except Exception as e:
                    logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
            logging.info(f"Created Product {product.id}")
            counter += 1
        except Exception as e:
            logging.error(f"Could not parse {counter}: {str(e)}")
    import ipdb; ipdb.set_trace()

    history.quantity = counter
    history.save()
    return counter