From b6f0b1b0633c3aa7d47ab51459858f7a7dc581ae Mon Sep 17 00:00:00 2001
From: cronopioelectronico <cronopioelectronico@gmail.com>
Date: Tue, 7 Sep 2021 18:24:04 +0200
Subject: [PATCH] Update machine learning ini files

---
 .../budgets_related_content_and_tags_nmf.ini  | 30 +++++++++++++++++++
 .../budgets_summary_comments_textrank.ini     | 15 ++++++++++
 ...proposals_related_content_and_tags_nmf.ini | 30 +++++++++++++++++++
 .../proposals_summary_comments_textrank.ini   | 15 ++++++++++
 4 files changed, 90 insertions(+)
 create mode 100644 public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini
 create mode 100644 public/machine_learning/scripts/budgets_summary_comments_textrank.ini
 create mode 100644 public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini
 create mode 100644 public/machine_learning/scripts/proposals_summary_comments_textrank.ini

diff --git a/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini
new file mode 100644
index 000000000..1ba317e91
--- /dev/null
+++ b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini
@@ -0,0 +1,30 @@
+[PREPROCESSING]
+#stanza_model_lang = es
+stanza_model_lang = en
+#stopwords_lang = spanish
+stopwords_lang = english
+noun_lemmatisation = True
+n_gram_min_count = 50
+stanza_download = True
+nltk_download = True
+
+[RELATED_PROPOSALS]
+# Max number of related proposals to find for each proposal:
+numb_related_proposals = 2
+
+[TOPIC_MODELLING]
+# Number of topics:
+numb_topics = 3
+# Number of tags/keywords for each topic:
+numb_topkeywords_pertopic = 5
+# Number of top representative proposals to extract for each topic:
+n_top_represent_props = 2
+# Consider only the top 'n_features' words of the corpus (ordered by word frequency):
+n_features = 10000
+# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
+min_df_val = 0.01
+# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
+max_df_val = 0.9
+
+[LOGGING]
+logging_level=INFO
\ No newline at end of file
diff --git a/public/machine_learning/scripts/budgets_summary_comments_textrank.ini b/public/machine_learning/scripts/budgets_summary_comments_textrank.ini
new file mode 100644
index 000000000..da3915cf7
--- /dev/null
+++ b/public/machine_learning/scripts/budgets_summary_comments_textrank.ini
@@ -0,0 +1,15 @@
+[PREPROCESSING]
+#stopwords_lang = spanish
+stopwords_lang = english
+#sent_token_lang = spanish
+sent_token_lang = english
+nltk_download = True
+
+[SUMMARISATION]
+glove_file_es = glove-sbwc.i25.vec
+glove_file_en = glove.6B.300d.txt
+threshold_factor = 1.0
+max_size_of_summaries = 50
+
+[LOGGING]
+logging_level=INFO
\ No newline at end of file
diff --git a/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini
new file mode 100644
index 000000000..1ba317e91
--- /dev/null
+++ b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini
@@ -0,0 +1,30 @@
+[PREPROCESSING]
+#stanza_model_lang = es
+stanza_model_lang = en
+#stopwords_lang = spanish
+stopwords_lang = english
+noun_lemmatisation = True
+n_gram_min_count = 50
+stanza_download = True
+nltk_download = True
+
+[RELATED_PROPOSALS]
+# Max number of related proposals to find for each proposal:
+numb_related_proposals = 2
+
+[TOPIC_MODELLING]
+# Number of topics:
+numb_topics = 3
+# Number of tags/keywords for each topic:
+numb_topkeywords_pertopic = 5
+# Number of top representative proposals to extract for each topic:
+n_top_represent_props = 2
+# Consider only the top 'n_features' words of the corpus (ordered by word frequency):
+n_features = 10000
+# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
+min_df_val = 0.01
+# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
+max_df_val = 0.9
+
+[LOGGING]
+logging_level=INFO
\ No newline at end of file
diff --git a/public/machine_learning/scripts/proposals_summary_comments_textrank.ini b/public/machine_learning/scripts/proposals_summary_comments_textrank.ini
new file mode 100644
index 000000000..da3915cf7
--- /dev/null
+++ b/public/machine_learning/scripts/proposals_summary_comments_textrank.ini
@@ -0,0 +1,15 @@
+[PREPROCESSING]
+#stopwords_lang = spanish
+stopwords_lang = english
+#sent_token_lang = spanish
+sent_token_lang = english
+nltk_download = True
+
+[SUMMARISATION]
+glove_file_es = glove-sbwc.i25.vec
+glove_file_en = glove.6B.300d.txt
+threshold_factor = 1.0
+max_size_of_summaries = 50
+
+[LOGGING]
+logging_level=INFO
\ No newline at end of file