Update machine learning ini files
This commit is contained in:
@@ -0,0 +1,30 @@
|
|||||||
|
[PREPROCESSING]
|
||||||
|
#stanza_model_lang = es
|
||||||
|
stanza_model_lang = en
|
||||||
|
#stopwords_lang = spanish
|
||||||
|
stopwords_lang = english
|
||||||
|
noun_lemmatisation = True
|
||||||
|
n_gram_min_count = 50
|
||||||
|
stanza_download = True
|
||||||
|
nltk_download = True
|
||||||
|
|
||||||
|
[RELATED_PROPOSALS]
|
||||||
|
# Max number of related proposals to find for each proposal:
|
||||||
|
numb_related_proposals = 2
|
||||||
|
|
||||||
|
[TOPIC_MODELLING]
|
||||||
|
# Number of topics:
|
||||||
|
numb_topics = 3
|
||||||
|
# Number of tags/keywords for each topic:
|
||||||
|
numb_topkeywords_pertopic = 5
|
||||||
|
# Number of top representative proposals to extract for each topic:
|
||||||
|
n_top_represent_props = 2
|
||||||
|
# Consider only the top 'n_features' words of the corpus (ordered by word frequency):
|
||||||
|
n_features = 10000
|
||||||
|
# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
|
||||||
|
min_df_val = 0.01
|
||||||
|
# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
|
||||||
|
max_df_val = 0.9
|
||||||
|
|
||||||
|
[LOGGING]
|
||||||
|
logging_level=INFO
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
[PREPROCESSING]
|
||||||
|
#stopwords_lang = spanish
|
||||||
|
stopwords_lang = english
|
||||||
|
#sent_token_lang = spanish
|
||||||
|
sent_token_lang = english
|
||||||
|
nltk_download = True
|
||||||
|
|
||||||
|
[SUMMARISATION]
|
||||||
|
glove_file_es = glove-sbwc.i25.vec
|
||||||
|
glove_file_en = glove.6B.300d.txt
|
||||||
|
threshold_factor = 1.0
|
||||||
|
max_size_of_summaries = 50
|
||||||
|
|
||||||
|
[LOGGING]
|
||||||
|
logging_level=INFO
|
||||||
@@ -0,0 +1,30 @@
|
|||||||
|
[PREPROCESSING]
|
||||||
|
#stanza_model_lang = es
|
||||||
|
stanza_model_lang = en
|
||||||
|
#stopwords_lang = spanish
|
||||||
|
stopwords_lang = english
|
||||||
|
noun_lemmatisation = True
|
||||||
|
n_gram_min_count = 50
|
||||||
|
stanza_download = True
|
||||||
|
nltk_download = True
|
||||||
|
|
||||||
|
[RELATED_PROPOSALS]
|
||||||
|
# Max number of related proposals to find for each proposal:
|
||||||
|
numb_related_proposals = 2
|
||||||
|
|
||||||
|
[TOPIC_MODELLING]
|
||||||
|
# Number of topics:
|
||||||
|
numb_topics = 3
|
||||||
|
# Number of tags/keywords for each topic:
|
||||||
|
numb_topkeywords_pertopic = 5
|
||||||
|
# Number of top representative proposals to extract for each topic:
|
||||||
|
n_top_represent_props = 2
|
||||||
|
# Consider only the top 'n_features' words of the corpus (ordered by word frequency):
|
||||||
|
n_features = 10000
|
||||||
|
# Ignore the words that appear in < 'min_df_val' percent of documents (ratio in [0.0, 1.0] interval):
|
||||||
|
min_df_val = 0.01
|
||||||
|
# Ignore the words that appear in > 'max_df_val' percent of documents (ratio in [0.0, 1.0] interval):
|
||||||
|
max_df_val = 0.9
|
||||||
|
|
||||||
|
[LOGGING]
|
||||||
|
logging_level=INFO
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
[PREPROCESSING]
|
||||||
|
#stopwords_lang = spanish
|
||||||
|
stopwords_lang = english
|
||||||
|
#sent_token_lang = spanish
|
||||||
|
sent_token_lang = english
|
||||||
|
nltk_download = True
|
||||||
|
|
||||||
|
[SUMMARISATION]
|
||||||
|
glove_file_es = glove-sbwc.i25.vec
|
||||||
|
glove_file_en = glove.6B.300d.txt
|
||||||
|
threshold_factor = 1.0
|
||||||
|
max_size_of_summaries = 50
|
||||||
|
|
||||||
|
[LOGGING]
|
||||||
|
logging_level=INFO
|
||||||
Reference in New Issue
Block a user