consumocuidado-server/products/utils.py

import logging
import datetime
from decimal import Decimal

from django.db.models import Q
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
from django.db.models import Max, Min
from django.conf import settings
from django.utils import timezone

import requests

from products.models import Product, CategoryTag
from companies.models import Company
from history.models import HistorySync
from PIL import Image
from io import BytesIO
from django.core.files import File


logging.basicConfig(
    filename='logs/csv-load.log',
    filemode='w',
    format='%(levelname)s:%(message)s',
    level=logging.INFO,
    )


def extract_search_filters(result_set):
    """
    Returned object should look something like:

    {
        "tags": {
            'singles': set(),
            'header1': []
        },
        "attributes": {
            'singles': set(),
            'header1': []
        },
    }
    """
    filter_dict = {
        "tags": {
            'singles': set(),
        },
        "attributes": {
            'singles': set(),
        }
    }
    for item in result_set:
        try:
            # extract tags
            tags = item.tags.all()
            for tag in tags:
                if len(tag.name.split('/')) == 1:
                    filter_dict['tags']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['tags'].get(header)
                    if entry is None:
                        filter_dict['tags'][header] = set()
                    filter_dict['tags'][header].add(name)
            # extract attributes
            attributes = item.attributes.all()
            for tag in attributes:
                if len(tag.name.split('/')) == 1:
                    filter_dict['attributes']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['attributes'].get(header)
                    if entry is None:
                        filter_dict['attributes'][header] = set()
                    filter_dict['attributes'][header].add(name)
        except Exception as e:
            logging.error(f'Extacting filters for {item}')
    return filter_dict


def get_related_products(product):
    """Make different db searches until you get 10 instances to return
    """
    total_results = []

    # search by category
    category_qs = Product.objects.filter(category=product.category)[:10]
    # add to results
    for item in category_qs:
        total_results.append(item)

    # check size
    if len(total_results) < 10:
        # search by tags
        tags_qs = Product.objects.filter(tags__in=product.tags.all())[:10]
        # add to results
        for item in tags_qs:
            total_results.append(item)

    # check size
    if len(total_results) < 10:
        # search by coop
        coop_qs = Product.objects.filter(company=product.company)[:10]
        # add to results
        for item in coop_qs:
            total_results.append(item)

    # check size
    if len(total_results) < 10:
        # search by latest
        latest_qs = Product.objects.order_by('-created')[:10]
        # add to results
        for item in coop_qs:
            total_results.append(item)

    return total_results[:10]


def ranked_product_search(keyword, shipping_cost=None, discount=None, categories=None, tags=None, price_min=None,price_max=None):
    """
    Ranked product search

    SearchVectors for the fields
    SearchQuery for the value
    SearchRank for relevancy scoring and ranking

    allow filtering by:
    - shipping cost
    """
    vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name') + SearchVector('company__company_name')
    query = SearchQuery(keyword)

    products_qs = Product.objects.annotate(
        rank=SearchRank(vector, query)
    ).filter(rank__gt=0.05, active=True)

    # filter by category
    if categories is not None:
<<<<<<< HEAD
        query = Q()
        for entry in categories:
            query = query | Q(category__name__contains=entry)
        products_qs = products_qs.filter(query)
        # products_qs = products_qs.filter(category__name__in=categories)
=======
        # products_qs = products_qs.filter(category__name__in=categories)
        descendants = []
        for entry in categories:
            cat = CategoryTag.objects.filter(label__iexact=entry).first()
            # append category tag, and children
            descendants.append(cat)
            descendants.extend(cat.children.all())

        products_qs = products_qs.filter(category__in=descendants)
>>>>>>> development

    # filter by tags
    if tags is not None:
        products_qs = products_qs.filter(tags=tags)

    # filter by shipping cost
    if shipping_cost is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(shipping_cost__isnull=False)&
            Q(shipping_cost__gte=1)
        )
    elif shipping_cost is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))

    # filter by discount
    if discount is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(discount__isnull=False)&
            Q(discount__gte=1)
        )
    elif discount is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))

    # filter by price
    if price_min is not None:
        products_qs = products_qs.filter(price__gte=price_min)
    if price_max is not None:
        products_qs = products_qs.filter(price__lte=price_max)

    # get  min_price and max_price
    min_price = products_qs.aggregate(Min('price'))
    max_price = products_qs.aggregate(Max('price'))


    return set(products_qs), min_price, max_price


def product_loader(csv_reader, user, company=None):
    """
    Parse csv data and extract:

    - product data

    Returns count
    """
    counter = 0
    # get company
    if company is None and user is not None:
        if user.company is not None:
            company = user.company
        else:
            # cannot add products without a company
            return None

    # create historysync instance
    history = HistorySync.objects.create(company=company, sync_date=timezone.now())
    for row in csv_reader:
        # trim strings
        for key in row:
            try:
                if row[key]:
                    if 'imagen' in key or 'categoria' in key:
                        row[key] = row[key].strip()
                    elif key in ['precio', 'gastos-envio']:
                        row[key] = Decimal(row[key][:-1].strip().replace(',','.'))
                    else:
                        row[key] = row[key].strip()
                else:
                    row[key] = None
            except Exception as e:
                logging.error(f"Could not access key {key}: {str(e)}")
                continue
        # check required data
        if '' in (row['nombre-producto'], row['descripcion'], row['precio'],):
            logging.error(f"Required data missing: {row}")
            continue
        try:
            # TODO: if tags is empty, auto-generate tags
            # assemble instance data
            product_data = {
                'company': company,
                'sku': row['sku'],
                'name': row['nombre-producto'],
                'description': row['descripcion'],
                'url': row['url'].strip(),
                'price': row['precio'],
                'shipping_cost': row['gastos-envio'],
                'shipping_terms': row['cond-envio'],
                'discount': row['descuento'],
                'stock': row['stock'],
                'tags': row['tags'],
                'category': row['categoria'],
                'identifiers': row['identificadores'],
                'history': history,
                'creator': user,
                # 'valid': True
            }

            product = Product.objects.create(**product_data)
            # image logo data
            if row['imagen'] is not None:
                try:
                    # get image
                    headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
                    response = requests.get(row['imagen'], stream=True, headers=headers)
                    assert(response.status_code==200)
                    response.raw.decode_content = True
                    image = Image.open(response.raw)
                    # save using File object
                    img_io = BytesIO()
                    image.save(img_io, format=image.format)
                    product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
                    product.save()
                except AssertionError as e:
                    logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
                except Exception as e:
                    logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
            logging.info(f"Created Product {product.id}")
            counter += 1
        except Exception as e:
            logging.error(f"Could not parse {counter}: {str(e)}")

    history.quantity = counter
    history.save()
    return counter