From 64c56514b70aa510df485d7373ad47ba36b012d4 Mon Sep 17 00:00:00 2001 From: Sam Date: Wed, 17 Feb 2021 13:07:07 +0000 Subject: [PATCH] added command to extract top level tags --- core/management/commands/extractparenttags.py | 31 +++++++++++++++++++ datasets/top_tags.txt | 22 +++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 core/management/commands/extractparenttags.py create mode 100644 datasets/top_tags.txt diff --git a/core/management/commands/extractparenttags.py b/core/management/commands/extractparenttags.py new file mode 100644 index 0000000..7e14657 --- /dev/null +++ b/core/management/commands/extractparenttags.py @@ -0,0 +1,31 @@ +import logging + +from django.core.management.base import BaseCommand +from django.conf import settings + +from core.models import TreeTag + + +class Command(BaseCommand): + + help = 'Extract top level tags' + + def handle(self, *args, **kwargs): + # get all instances + tags = TreeTag.objects.all() + top_tags = [] + + print("Extracting top-level tags from TreeTag instances") + # extract tags with no ancestor + for tag in tags: + if not tag.get_ancestors(): + top_tags.append(tag.name) + + print("Saving top-level tags to file") + # save results to dataset/top_tags.txt + path = f"{settings.BASE_DIR}/../datasets/top_tags.txt" + with open(path, 'wt') as f: + f.writelines([tag + '\n' for tag in top_tags]) + + # print out results + logging.info(f"Extracted {len(top_tags)} to {path}") diff --git a/datasets/top_tags.txt b/datasets/top_tags.txt new file mode 100644 index 0000000..294d7cb --- /dev/null +++ b/datasets/top_tags.txt @@ -0,0 +1,22 @@ +Alimentación, bebida y tabaco +Arte y ocio +Bebés y niños pequeños +Bricolaje +Cámaras y ópticas +Casa y jardín +Economía e industria +Electrónica +Elementos religiosos y ceremoniales +Equipamiento deportivo +# Google_Product_Taxonomy_Version: 2015-02-19 +Juegos y juguetes +Maletas y bolsos de viaje +Material de oficina +Mobiliario +Multimedia +Productos para adultos +Productos para mascotas y animales +Ropa y accesorios +Salud y belleza +Software +Vehículos y recambios