added command to extract top level tags

This commit is contained in:
Sam
2021-02-17 13:07:07 +00:00
parent e42811300b
commit 64c56514b7
2 changed files with 53 additions and 0 deletions

View File

@@ -0,0 +1,31 @@
import logging
from django.core.management.base import BaseCommand
from django.conf import settings
from core.models import TreeTag
class Command(BaseCommand):
help = 'Extract top level tags'
def handle(self, *args, **kwargs):
# get all instances
tags = TreeTag.objects.all()
top_tags = []
print("Extracting top-level tags from TreeTag instances")
# extract tags with no ancestor
for tag in tags:
if not tag.get_ancestors():
top_tags.append(tag.name)
print("Saving top-level tags to file")
# save results to dataset/top_tags.txt
path = f"{settings.BASE_DIR}/../datasets/top_tags.txt"
with open(path, 'wt') as f:
f.writelines([tag + '\n' for tag in top_tags])
# print out results
logging.info(f"Extracted {len(top_tags)} to {path}")