From b6f0b1b0633c3aa7d47ab51459858f7a7dc581ae Mon Sep 17 00:00:00 2001 From: cronopioelectronico Date: Tue, 7 Sep 2021 18:24:04 +0200 Subject: [PATCH] Update machine learning ini files --- .../budgets_related_content_and_tags_nmf.ini | 30 +++++++++++++++++++ .../budgets_summary_comments_textrank.ini | 15 ++++++++++ ...proposals_related_content_and_tags_nmf.ini | 30 +++++++++++++++++++ .../proposals_summary_comments_textrank.ini | 15 ++++++++++ 4 files changed, 90 insertions(+) create mode 100644 public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini create mode 100644 public/machine_learning/scripts/budgets_summary_comments_textrank.ini create mode 100644 public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini create mode 100644 public/machine_learning/scripts/proposals_summary_comments_textrank.ini diff --git a/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini new file mode 100644 index 000000000..1ba317e91 --- /dev/null +++ b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.ini @@ -0,0 +1,30 @@ +[PREPROCESSING] +#stanza_model_lang = es +stanza_model_lang = en +#stopwords_lang = spanish +stopwords_lang = english +noun_lemmatisation = True +n_gram_min_count = 50 +stanza_download = True +nltk_download = True + +[RELATED_PROPOSALS] +# Max number of related proposals to find for each proposal: +numb_related_proposals = 2 + +[TOPIC_MODELLING] +# Number of topics: +numb_topics = 3 +# Number of tags/keywords for each topic: +numb_topkeywords_pertopic = 5 +# Number of top representative proposals to extract for each topic: +n_top_represent_props = 2 +# Consider only the top 'n_features' words of the corpus (ordered by word frequency): +n_features = 10000 +# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval): +min_df_val = 0.01 +# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval): +max_df_val = 0.9 + +[LOGGING] +logging_level=INFO \ No newline at end of file diff --git a/public/machine_learning/scripts/budgets_summary_comments_textrank.ini b/public/machine_learning/scripts/budgets_summary_comments_textrank.ini new file mode 100644 index 000000000..da3915cf7 --- /dev/null +++ b/public/machine_learning/scripts/budgets_summary_comments_textrank.ini @@ -0,0 +1,15 @@ +[PREPROCESSING] +#stopwords_lang = spanish +stopwords_lang = english +#sent_token_lang = spanish +sent_token_lang = english +nltk_download = True + +[SUMMARISATION] +glove_file_es = glove-sbwc.i25.vec +glove_file_en = glove.6B.300d.txt +threshold_factor = 1.0 +max_size_of_summaries = 50 + +[LOGGING] +logging_level=INFO \ No newline at end of file diff --git a/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini new file mode 100644 index 000000000..1ba317e91 --- /dev/null +++ b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.ini @@ -0,0 +1,30 @@ +[PREPROCESSING] +#stanza_model_lang = es +stanza_model_lang = en +#stopwords_lang = spanish +stopwords_lang = english +noun_lemmatisation = True +n_gram_min_count = 50 +stanza_download = True +nltk_download = True + +[RELATED_PROPOSALS] +# Max number of related proposals to find for each proposal: +numb_related_proposals = 2 + +[TOPIC_MODELLING] +# Number of topics: +numb_topics = 3 +# Number of tags/keywords for each topic: +numb_topkeywords_pertopic = 5 +# Number of top representative proposals to extract for each topic: +n_top_represent_props = 2 +# Consider only the top 'n_features' words of the corpus (ordered by word frequency): +n_features = 10000 +# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval): +min_df_val = 0.01 +# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval): +max_df_val = 0.9 + +[LOGGING] +logging_level=INFO \ No newline at end of file diff --git a/public/machine_learning/scripts/proposals_summary_comments_textrank.ini b/public/machine_learning/scripts/proposals_summary_comments_textrank.ini new file mode 100644 index 000000000..da3915cf7 --- /dev/null +++ b/public/machine_learning/scripts/proposals_summary_comments_textrank.ini @@ -0,0 +1,15 @@ +[PREPROCESSING] +#stopwords_lang = spanish +stopwords_lang = english +#sent_token_lang = spanish +sent_token_lang = english +nltk_download = True + +[SUMMARISATION] +glove_file_es = glove-sbwc.i25.vec +glove_file_en = glove.6B.300d.txt +threshold_factor = 1.0 +max_size_of_summaries = 50 + +[LOGGING] +logging_level=INFO \ No newline at end of file