consumocuidado-server/products/utils.py

import logging
import datetime

from django.db.models import Q
from django.contrib.postgres.search import SearchQuery, SearchRank, SearchVector, TrigramSimilarity
from django.db.models import Max, Min
from django.conf import settings

import requests

from products.models import Product
from companies.models import Company
from history.models import HistorySync


def extract_search_filters(result_set):
    """
    Returned object should look something like:

    {
        "tags": {
            'singles': set(),
            'header1': []
        },
        "attributes": {
            'singles': set(),
            'header1': []
        },
    }
    """
    filter_dict = {
        "tags": {
            'singles': set(),
        },
        "attributes": {
            'singles': set(),
        }
    }
    for item in result_set:
        try:
            # extract tags
            tags = item.tags.all()
            for tag in tags:
                if len(tag.name.split('/')) == 1:
                    filter_dict['tags']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['tags'].get(header)
                    if entry is None:
                        filter_dict['tags'][header] = set()
                    filter_dict['tags'][header].add(name)
            # extract attributes
            attributes = item.attributes.all()
            for tag in attributes:
                if len(tag.name.split('/')) == 1:
                    filter_dict['attributes']['singles'].add(tag.name)
                else:
                    # set penultimate tag as header
                    chunks = tag.name.split('/')
                    header = chunks[-2]
                    name = chunks[-1]
                    # check if
                    entry = filter_dict['attributes'].get(header)
                    if entry is None:
                        filter_dict['attributes'][header] = set()
                    filter_dict['attributes'][header].add(name)
        except Exception as e:
            logging.error(f'Extacting filters for {item}')
    return filter_dict


def find_related_products_v1(keyword):
    """
    Classical approach to the search

    Using Q objects

    """
    # search in tags
    tags = Product.tags.tag_model.objects.filter(name__icontains=keyword)
    # search in category
    categories = Product.category.tag_model.objects.filter(name__icontains=keyword)
    # search in attributes
    attributes = Product.attributes.tag_model.objects.filter(name__icontains=keyword)
    # unified tag search
    products_qs = Product.objects.filter(
        Q(name__icontains=keyword)|
        Q(description__icontains=keyword)|
        Q(tags__in=tags)|
        Q(category__in=categories)|
        Q(attributes__in=attributes)
    )
    return products_qs


def find_related_products_v5(keyword):
    """
    Single query solution, using Q objects
    """
    products_qs = Product.objects.filter(
        Q(name__icontains=keyword)|
        Q(description__icontains=keyword)|
        Q(tags__label__icontains=keyword)|
        Q(category__name__icontains=keyword)|
        Q(attributes__label__icontains=keyword)
    )
    return set(products_qs)


def find_related_products_v2(keyword):
    """
    More advanced: using search vectors
    """
    fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')
    vector = SearchVector(*fields)
    products_qs = Product.objects.annotate(
        search=vector
    ).filter(search=keyword)
    return set(products_qs)


def find_related_products_v3(keyword):
    """
    Ranked product search

    SearchVectors for the fields
    SearchQuery for the value
    SearchRank for relevancy scoring and ranking
    """
    vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
    query = SearchQuery(keyword)

    products_qs = Product.objects.annotate(
        rank=SearchRank(vector, query)
    ).filter(rank__gt=0.05)     # removed order_by because its lost in casting

    return set(products_qs)


def find_related_products_v6(keyword, shipping_cost=None, discount=None, category=None, tags=None, price_min=None,price_max=None):
    """
    Ranked product search

    SearchVectors for the fields
    SearchQuery for the value
    SearchRank for relevancy scoring and ranking

    allow filtering by:
    - shipping cost
    """
    vector = SearchVector('name') + SearchVector('description') + SearchVector('tags__label') + SearchVector('attributes__label') + SearchVector('category__name')
    query = SearchQuery(keyword)

    products_qs = Product.objects.annotate(
        rank=SearchRank(vector, query)
    ).filter(rank__gt=0.05)     # removed order_by because its lost in casting

    # filter by category
    if category is not None:
        products_qs = products_qs.filter(category=category)

    # filter by tags
    if tags is not None:
        products_qs = products_qs.filter(tags=tags)

    # filter by shipping cost
    if shipping_cost is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(shipping_cost__isnull=False)&
            Q(shipping_cost__gte=1)
        )
    elif shipping_cost is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(shipping_cost=None)|Q(shipping_cost=0.00))

    # filter by discount
    if discount is True:
        # only instances with shipping costs
        products_qs = products_qs.filter(
            Q(discount__isnull=False)&
            Q(discount__gte=1)
        )
    elif discount is False:
        # only intances without shpping costs
        products_qs = products_qs.filter(Q(discount=None)|Q(discount=0.00))

    # filter by price
    if price_min is not None:
        products_qs = products_qs.filter(price__gt=price_min)
    if price_max is not None:
        products_qs = products_qs.filter(price__lt=price_max)

    # get  min_price and max_price
    min_price = products_qs.aggregate(Min('price'))
    max_price = products_qs.aggregate(Max('price'))


    return set(products_qs), min_price, max_price


def find_related_products_v4(keyword):
    """
    Similarity-ranked search using trigrams
    Not working
    """
    # fields=('name', 'description', 'tags__label', 'attributes__label', 'category__name')

    products_qs = Product.objects.annotate(
        similarity=TrigramSimilarity('name', keyword),
    ).order_by('-similarity')

    return set(products_qs)


def product_loader(csv_reader, user):
    """
    Parse csv data and extract:

    - product data

    Return counts
    """
    counter = 0
    # create historysync instance
    history = HistorySync.objects.create(company=user.company, sync_date=datetime.datetime.now())

    for row in csv_reader:
        # trim strings
        for key in row:
            if row[key]: row[key] = row[key].strip().lower()

        # check required data
        if '' in (row['nombre-producto'], row['descripcion'], row['precio'], row['categoria']):
            logging.error(f"Required data missing: {row}")
            continue

        try:
            # TODO: if tags is empty, auto-generate tags

            # assemble instance data
            product_data = {
                    'company': user.company,
                    'name': row['nombre-producto'].strip(),
                    'description': row['descripcion'].strip(),
                    #'url': row['url'].strip(),
                    #'price': row['precio'].strip(),
                    #'shipping_cost': row['gastos-envio'].strip(),
                    #'shipping_terms': row['cond-envio'].strip(),
                    #'discount': row['descuento'].strip(),
                    #'stock': row['stock'].strip(),
                    #'tags': row['tags'].strip(),
                    #'category': row['categoria'].strip(),
                    #'identifiers': row['identificadores'].strip(),
                    #'history': history,
                    'creator': user,
            }

            product = Product.objects.create(**product_data)
            # image logo data
            if row['imagen'] is not None:
                try:
                    # get image
                    headers={"User-Agent" : "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36"}
                    response = requests.get(row['imagen'], stream=True, headers=headers)
                    assert(response.status_code==200)
                    response.raw.decode_content = True
                    image = Image.open(response.raw)
                    # save using File object
                    img_io = BytesIO()
                    image.save(img_io, format=image.format)
                    product.image.save(f"{product.company.company_name}{product.name}.{image.format.lower()}", File(img_io), save=False)
                    product.save()
                except AssertionError as e:
                    logging.error(f"Source image [{row['imagen']}] not reachable: {response.status_code}")
                except Exception as e:
                    logging.error(f"Could not add image to product from [{row['imagen']}]: {str(e)}")
            logging.info(f"Created Product {product.id}")
            counter += 1
        except Exception as e:
            import ipdb; ipdb.set_trace()
            logging.error(f"Could not parse {row}")

    history.quantity = counter
    history.save()
    return counter