added command to extract top level tags
This commit is contained in:
31
core/management/commands/extractparenttags.py
Normal file
31
core/management/commands/extractparenttags.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import logging
|
||||
|
||||
from django.core.management.base import BaseCommand
|
||||
from django.conf import settings
|
||||
|
||||
from core.models import TreeTag
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
|
||||
help = 'Extract top level tags'
|
||||
|
||||
def handle(self, *args, **kwargs):
|
||||
# get all instances
|
||||
tags = TreeTag.objects.all()
|
||||
top_tags = []
|
||||
|
||||
print("Extracting top-level tags from TreeTag instances")
|
||||
# extract tags with no ancestor
|
||||
for tag in tags:
|
||||
if not tag.get_ancestors():
|
||||
top_tags.append(tag.name)
|
||||
|
||||
print("Saving top-level tags to file")
|
||||
# save results to dataset/top_tags.txt
|
||||
path = f"{settings.BASE_DIR}/../datasets/top_tags.txt"
|
||||
with open(path, 'wt') as f:
|
||||
f.writelines([tag + '\n' for tag in top_tags])
|
||||
|
||||
# print out results
|
||||
logging.info(f"Extracted {len(top_tags)} to {path}")
|
||||
22
datasets/top_tags.txt
Normal file
22
datasets/top_tags.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
Alimentación, bebida y tabaco
|
||||
Arte y ocio
|
||||
Bebés y niños pequeños
|
||||
Bricolaje
|
||||
Cámaras y ópticas
|
||||
Casa y jardín
|
||||
Economía e industria
|
||||
Electrónica
|
||||
Elementos religiosos y ceremoniales
|
||||
Equipamiento deportivo
|
||||
# Google_Product_Taxonomy_Version: 2015-02-19
|
||||
Juegos y juguetes
|
||||
Maletas y bolsos de viaje
|
||||
Material de oficina
|
||||
Mobiliario
|
||||
Multimedia
|
||||
Productos para adultos
|
||||
Productos para mascotas y animales
|
||||
Ropa y accesorios
|
||||
Salud y belleza
|
||||
Software
|
||||
Vehículos y recambios
|
||||
Reference in New Issue
Block a user