consumocuidado-server/products/utils.py

import logging
import datetime
from decimal import Decimal

from django.db.models import Q
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
from django.db.models import Max, Min
from django.conf import settings
from django.utils import timezone
from django.contrib.gis.geos import Point
from django.contrib.gis.measure import D

import requests

from products.models import Product
from companies.models import Company
from history.models import HistorySync
from PIL import Image
from io import BytesIO
from django.core.files import File


logging.basicConfig(
    filename='logs/csv-load.log',
    filemode='w',
    format='%(levelname)s:%(message)s',
    level=logging.INFO,
    )


def extract_search_filters(result_set):
    """
    Returned object should look something like:

    {
        "tags": {
            'singles': set(),
            'header1': []
        },
        "attributes": {
            'singles': set(),
            'header1': []
        },
    }
    """
    filter_dict = {
        "tags": {
            'singles': set(),
        },
        "attributes": {
            'singles': set(),
        }
    }
    for item in result_set:
        try:
            # extract tags
            tags = item.tags.all()
            for tag in tags:
                if len(tag.name.split('/')) == 1:
                    filter_dict['tags']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['tags'].get(header)
                    if entry is None:
                        filter_dict['tags'][header] = set()
                    filter_dict['tags'][header].add(name)
            # extract attributes
            attributes = item.attributes.all()
            for tag in attributes:
                if len(tag.name.split('/')) == 1:
                    filter_dict['attributes']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['attributes'].get(header)
                    if entry is None:
                        filter_dict['attributes'][header] = set()
                    filter_dict['attributes'][header].add(name)
        except Exception as e:
            logging.error(f'Extacting filters for {item}')
    return filter_dict


def get_related_products(product):
    """Make different db searches until you get 10 instances to return
    """
    total_results = []

    # search by category
    category_qs = Product.objects.filter(category=product.category)[:10]
    # add to results
    for item in category_qs:
        total_results.append(item)

    # check size
    if len(total_results) < 10:
        # search by tags
        tags_qs = Product.objects.filter(tags__in=product.tags.all())[:10]
        # add to results
        for item in tags_qs:
            total_results.append(item)

    # check size
    if len(total_results) < 10:
        # search by coop
        coop_qs = Product.objects.filter(company=product.company)[:10]
        # add to results
        for item in coop_qs:
            total_results.append(item)

    # check size
    if len(total_results) < 10:
        # search by latest
        latest_qs = Product.objects.order_by('-created')[:10]
        # add to results
        for item in coop_qs:
            total_results.append(item)

    return total_results[:10]


def ranked_product_search(keywords, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None, coordinates=None):
    """
    Ranked product search

    SearchVectors for the fields
    SearchQuery for the value
    SearchRank for relevancy scoring and ranking

    allow filtering by:
    - shipping cost

    Response includes:
    - result_set
    - min_price
    - max_price
    - georesult
    """
    vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__label') + SearchVector('company__company_name')

    query = SearchQuery(keywords, search_type='plain')

    products_qs = Product.objects.annotate(
        rank=SearchRank(vector, query)
    ).filter(rank__gt=0.05, active=True)
    # geolocation filtering
    if coordinates is not None:
        point = Point(coordinates)
        filtered_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=10)))
        georesult = '10k'
        if filtered_qs.count() <= 10:
            products_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=50)))
            georesult = '50k'
        if filtered_qs.count() <= 10:
            products_qs = products_qs.filter(company__geo__distance_lte=(point, D(km=200)))
            georesult = '200k'
        if filtered_qs.count() > 10:
            products_qs = filtered_qs
        else:
            georesult = None
    else:
        georesult = None

    # filter by category
    if category is not None:
        products_qs = products_qs.filter(category=category)

    # filter by tags
    if tags is not None:
        products_qs = products_qs.filter(tags=tags)

    # filter by shipping cost
    if shipping_cost is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(shipping_cost__isnull=False)&
            Q(shipping_cost__gte=1)
        )
    elif shipping_cost is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))

    # filter by discount
    if discount is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(discount__isnull=False)&
            Q(discount__gte=1)
        )
    elif discount is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))

    # filter by price
    if price_min is not None:
        products_qs = products_qs.filter(price__gte=price_min)
    if price_max is not None:
        products_qs = products_qs.filter(price__lte=price_max)

    # get  min_price and max_price
    min_price = products_qs.aggregate(Min('price'))
    max_price = products_qs.aggregate(Max('price'))


    return set(products_qs), min_price, max_price, georesult


def product_loader(csv_reader, user, company=None):
    """
    Parse csv data and extract:

    - product data

    Returns count
    """
    counter = 0
    # get company
    if company is None and user is not None:
        if user.company is not None:
            company = user.company
        else:
            # cannot add products without a company
            return None

    # create historysync instance
    history = HistorySync.objects.create(company=company, sync_date=timezone.now())
    for row in csv_reader:
        # trim strings
        for key in row:
            try:
                if row[key]:
                    if 'imagen' in key or 'categoria' in key:
                        row[key] = row[key].strip()
                    elif key in ['precio', 'gastos-envio']:
                        row[key] = Decimal(row[key][:-1].strip().replace(',','.'))
                    else:
                        row[key] = row[key].strip()
                else:
                    row[key] = None
            except Exception as e:
                logging.error(f"Could not access key {key}: {str(e)}")
                continue
        # check required data
        if '' in (row['nombre-producto'], row['descripcion'], row['precio'],):
            logging.error(f"Required data missing: {row}")
            continue
        try:
            # TODO: if tags is empty, auto-generate tags
            # assemble instance data
            product_data = {
                'company': company,
                'sku': row['sku'],
                'name': row['nombre-producto'],
                'description': row['descripcion'],
                'url': row['url'].strip(),
                'price': row['precio'],
                'shipping_cost': row['gastos-envio'],
                'shipping_terms': row['cond-envio'],
                'discount': row['descuento'],
                'stock': row['stock'],
                'tags': row['tags'],
                'category': row['categoria'],
                'identifiers': row['identificadores'],
                'history': history,
                'creator': user,
                # 'valid': True
            }

            product = Product.objects.create(**product_data)
            # image logo data
            if row['imagen'] is not None:
                try:
                    # get image
                    headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
                    response = requests.get(row['imagen'], stream=True, headers=headers)
                    assert(response.status_code==200)
                    response.raw.decode_content = True
                    image = Image.open(response.raw)
                    # save using File object
                    img_io = BytesIO()
                    image.save(img_io, format=image.format)
                    product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
                    product.save()
                except AssertionError as e:
                    logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
                except Exception as e:
                    logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
            logging.info(f"Created Product {product.id}")
            counter += 1
        except Exception as e:
            logging.error(f"Could not parse {counter}: {str(e)}")

    history.quantity = counter
    history.save()
    return counter