Update machine learning ini files

This commit is contained in:
cronopioelectronico
2021-09-07 18:24:04 +02:00
parent 6d6888f201
commit b6f0b1b063
4 changed files with 90 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
[PREPROCESSING]
#stanza_model_lang = es
stanza_model_lang = en
#stopwords_lang = spanish
stopwords_lang = english
noun_lemmatisation = True
n_gram_min_count = 50
stanza_download = True
nltk_download = True
[RELATED_PROPOSALS]
# Max number of related proposals to find for each proposal:
numb_related_proposals = 2
[TOPIC_MODELLING]
# Number of topics:
numb_topics = 3
# Number of tags/keywords for each topic:
numb_topkeywords_pertopic = 5
# Number of top representative proposals to extract for each topic:
n_top_represent_props = 2
# Consider only the top 'n_features' words of the corpus (ordered by word frequency):
n_features = 10000
# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
min_df_val = 0.01
# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
max_df_val = 0.9
[LOGGING]
logging_level=INFO

View File

@@ -0,0 +1,15 @@
[PREPROCESSING]
#stopwords_lang = spanish
stopwords_lang = english
#sent_token_lang = spanish
sent_token_lang = english
nltk_download = True
[SUMMARISATION]
glove_file_es = glove-sbwc.i25.vec
glove_file_en = glove.6B.300d.txt
threshold_factor = 1.0
max_size_of_summaries = 50
[LOGGING]
logging_level=INFO

View File

@@ -0,0 +1,30 @@
[PREPROCESSING]
#stanza_model_lang = es
stanza_model_lang = en
#stopwords_lang = spanish
stopwords_lang = english
noun_lemmatisation = True
n_gram_min_count = 50
stanza_download = True
nltk_download = True
[RELATED_PROPOSALS]
# Max number of related proposals to find for each proposal:
numb_related_proposals = 2
[TOPIC_MODELLING]
# Number of topics:
numb_topics = 3
# Number of tags/keywords for each topic:
numb_topkeywords_pertopic = 5
# Number of top representative proposals to extract for each topic:
n_top_represent_props = 2
# Consider only the top 'n_features' words of the corpus (ordered by word frequency):
n_features = 10000
# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
min_df_val = 0.01
# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
max_df_val = 0.9
[LOGGING]
logging_level=INFO

View File

@@ -0,0 +1,15 @@
[PREPROCESSING]
#stopwords_lang = spanish
stopwords_lang = english
#sent_token_lang = spanish
sent_token_lang = english
nltk_download = True
[SUMMARISATION]
glove_file_es = glove-sbwc.i25.vec
glove_file_en = glove.6B.300d.txt
threshold_factor = 1.0
max_size_of_summaries = 50
[LOGGING]
logging_level=INFO