Update machine learning ini files

2021-09-07 18:24:04 +02:00
parent 6d6888f201
commit b6f0b1b063
4 changed files with 90 additions and 0 deletions
--- a/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini
+++ b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini
@@ -0,0 +1,30 @@
 [PREPROCESSING]
 #stanza_model_lang = es
 stanza_model_lang = en
 #stopwords_lang = spanish
 stopwords_lang = english
 noun_lemmatisation = True
 n_gram_min_count = 50
 stanza_download = True
 nltk_download = True
 [RELATED_PROPOSALS]
 # Max number of related proposals to find for each proposal:
 numb_related_proposals = 2
 [TOPIC_MODELLING]
 # Number of topics:
 numb_topics = 3
 # Number of tags/keywords for each topic:
 numb_topkeywords_pertopic = 5
 # Number of top representative proposals to extract for each topic:
 n_top_represent_props = 2
 # Consider only the top 'n_features' words of the corpus (ordered by word frequency):
 n_features = 10000
 # Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
 min_df_val = 0.01
 # Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
 max_df_val = 0.9
 [LOGGING]
 logging_level=INFO
--- a/public/machine_learning/scripts/budgets_summary_comments_textrank.ini
+++ b/public/machine_learning/scripts/budgets_summary_comments_textrank.ini
@@ -0,0 +1,15 @@
 [PREPROCESSING]
 #stopwords_lang = spanish
 stopwords_lang = english
 #sent_token_lang = spanish
 sent_token_lang = english
 nltk_download = True
 [SUMMARISATION]
 glove_file_es = glove-sbwc.i25.vec
 glove_file_en = glove.6B.300d.txt
 threshold_factor = 1.0
 max_size_of_summaries = 50
 [LOGGING]
 logging_level=INFO
--- a/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini
+++ b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini
@@ -0,0 +1,30 @@
 [PREPROCESSING]
 #stanza_model_lang = es
 stanza_model_lang = en
 #stopwords_lang = spanish
 stopwords_lang = english
 noun_lemmatisation = True
 n_gram_min_count = 50
 stanza_download = True
 nltk_download = True
 [RELATED_PROPOSALS]
 # Max number of related proposals to find for each proposal:
 numb_related_proposals = 2
 [TOPIC_MODELLING]
 # Number of topics:
 numb_topics = 3
 # Number of tags/keywords for each topic:
 numb_topkeywords_pertopic = 5
 # Number of top representative proposals to extract for each topic:
 n_top_represent_props = 2
 # Consider only the top 'n_features' words of the corpus (ordered by word frequency):
 n_features = 10000
 # Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
 min_df_val = 0.01
 # Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
 max_df_val = 0.9
 [LOGGING]
 logging_level=INFO
--- a/public/machine_learning/scripts/proposals_summary_comments_textrank.ini
+++ b/public/machine_learning/scripts/proposals_summary_comments_textrank.ini
@@ -0,0 +1,15 @@
 [PREPROCESSING]
 #stopwords_lang = spanish
 stopwords_lang = english
 #sent_token_lang = spanish
 sent_token_lang = english
 nltk_download = True
 [SUMMARISATION]
 glove_file_es = glove-sbwc.i25.vec
 glove_file_en = glove.6B.300d.txt
 threshold_factor = 1.0
 max_size_of_summaries = 50
 [LOGGING]
 logging_level=INFO