diff --git a/.dockerignore b/.dockerignore index 41853f5ae..8c75b254c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -23,6 +23,7 @@ public/sitemap.xml public/tenants/*/sitemap.xml public/assets/ public/machine_learning/data/ +public/tenants/*/machine_learning/data/ # Bundler config, cache and gemsets **/.bundle/ diff --git a/.gitignore b/.gitignore index 3ac23e93f..28d1c676c 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ tmp/ /public/tenants/*/sitemap.xml /public/assets/ /public/machine_learning/data/ +/public/tenants/*/machine_learning/data/ # Bundler config, cache and gemsets .bundle/ diff --git a/app/models/machine_learning.rb b/app/models/machine_learning.rb index 76be044b8..0b680e212 100644 --- a/app/models/machine_learning.rb +++ b/app/models/machine_learning.rb @@ -3,7 +3,6 @@ class MachineLearning attr_accessor :job SCRIPTS_FOLDER = Rails.root.join("public", "machine_learning", "scripts").freeze - DATA_FOLDER = Rails.root.join("public", "machine_learning", "data").freeze def initialize(job) @job = job @@ -11,6 +10,10 @@ class MachineLearning @previous_modified_date = set_previous_modified_date end + def data_folder + self.class.data_folder + end + def run begin export_proposals_to_json @@ -81,17 +84,33 @@ class MachineLearning "comments.json" end + def data_folder + Rails.root.join("public", tenant_data_folder) + end + + def tenant_data_folder + File.join(tenant_subfolder, "machine_learning", "data").delete_prefix("/") + end + + def tenant_subfolder + if Tenant.default? + "" + else + File.join("tenants", Tenant.current_schema) + end + end + def data_output_files files = { tags: [], related_content: [], comments_summary: [] } - files[:tags] << proposals_tags_filename if File.exists?(DATA_FOLDER.join(proposals_tags_filename)) - files[:tags] << proposals_taggings_filename if File.exists?(DATA_FOLDER.join(proposals_taggings_filename)) - files[:tags] << investments_tags_filename if File.exists?(DATA_FOLDER.join(investments_tags_filename)) - files[:tags] << investments_taggings_filename if File.exists?(DATA_FOLDER.join(investments_taggings_filename)) - files[:related_content] << proposals_related_filename if File.exists?(DATA_FOLDER.join(proposals_related_filename)) - files[:related_content] << investments_related_filename if File.exists?(DATA_FOLDER.join(investments_related_filename)) - files[:comments_summary] << proposals_comments_summary_filename if File.exists?(DATA_FOLDER.join(proposals_comments_summary_filename)) - files[:comments_summary] << investments_comments_summary_filename if File.exists?(DATA_FOLDER.join(investments_comments_summary_filename)) + files[:tags] << proposals_tags_filename if File.exists?(data_folder.join(proposals_tags_filename)) + files[:tags] << proposals_taggings_filename if File.exists?(data_folder.join(proposals_taggings_filename)) + files[:tags] << investments_tags_filename if File.exists?(data_folder.join(investments_tags_filename)) + files[:tags] << investments_taggings_filename if File.exists?(data_folder.join(investments_taggings_filename)) + files[:related_content] << proposals_related_filename if File.exists?(data_folder.join(proposals_related_filename)) + files[:related_content] << investments_related_filename if File.exists?(data_folder.join(investments_related_filename)) + files[:comments_summary] << proposals_comments_summary_filename if File.exists?(data_folder.join(proposals_comments_summary_filename)) + files[:comments_summary] << investments_comments_summary_filename if File.exists?(data_folder.join(investments_comments_summary_filename)) files end @@ -110,10 +129,10 @@ class MachineLearning proposals_comments_summary_filename, investments_comments_summary_filename ] - json = Dir[DATA_FOLDER.join("*.json")].map do |full_path_filename| + json = Dir[data_folder.join("*.json")].map do |full_path_filename| full_path_filename.split("/").last end - csv = Dir[DATA_FOLDER.join("*.csv")].map do |full_path_filename| + csv = Dir[data_folder.join("*.csv")].map do |full_path_filename| full_path_filename.split("/").last end (json + csv - excluded).sort @@ -152,7 +171,7 @@ class MachineLearning end def data_path(filename) - "/machine_learning/data/" + filename + "/#{tenant_data_folder}/#{filename}" end def script_kinds @@ -196,29 +215,35 @@ class MachineLearning private def create_data_folder - FileUtils.mkdir_p DATA_FOLDER + FileUtils.mkdir_p data_folder end def export_proposals_to_json create_data_folder - filename = DATA_FOLDER.join(MachineLearning.proposals_filename) + filename = data_folder.join(MachineLearning.proposals_filename) Proposal::Exporter.new.to_json_file(filename) end def export_budget_investments_to_json create_data_folder - filename = DATA_FOLDER.join(MachineLearning.investments_filename) + filename = data_folder.join(MachineLearning.investments_filename) Budget::Investment::Exporter.new(Array.new).to_json_file(filename) end def export_comments_to_json create_data_folder - filename = DATA_FOLDER.join(MachineLearning.comments_filename) + filename = data_folder.join(MachineLearning.comments_filename) Comment::Exporter.new.to_json_file(filename) end def run_machine_learning_scripts - output = `cd #{SCRIPTS_FOLDER} && python #{job.script} 2>&1` + command = if Tenant.default? + "python #{job.script}" + else + "CONSUL_TENANT=#{Tenant.current_schema} python #{job.script}" + end + + output = `cd #{SCRIPTS_FOLDER} && #{command} 2>&1` result = $?.success? if result == false job.update!(finished_at: Time.current, error: output) @@ -254,7 +279,7 @@ class MachineLearning end def import_ml_proposals_comments_summary - json_file = DATA_FOLDER.join(MachineLearning.proposals_comments_summary_filename) + json_file = data_folder.join(MachineLearning.proposals_comments_summary_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |attributes| attributes.delete(:id) @@ -266,7 +291,7 @@ class MachineLearning end def import_ml_investments_comments_summary - json_file = DATA_FOLDER.join(MachineLearning.investments_comments_summary_filename) + json_file = data_folder.join(MachineLearning.investments_comments_summary_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |attributes| attributes.delete(:id) @@ -278,7 +303,7 @@ class MachineLearning end def import_proposals_related_content - json_file = DATA_FOLDER.join(MachineLearning.proposals_related_filename) + json_file = data_folder.join(MachineLearning.proposals_related_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |related| id = related.delete(:id) @@ -306,7 +331,7 @@ class MachineLearning end def import_budget_investments_related_content - json_file = DATA_FOLDER.join(MachineLearning.investments_related_filename) + json_file = data_folder.join(MachineLearning.investments_related_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |related| id = related.delete(:id) @@ -335,7 +360,7 @@ class MachineLearning def import_ml_proposals_tags ids = {} - json_file = DATA_FOLDER.join(MachineLearning.proposals_tags_filename) + json_file = data_folder.join(MachineLearning.proposals_tags_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |attributes| if attributes[:name].present? @@ -348,7 +373,7 @@ class MachineLearning end end - json_file = DATA_FOLDER.join(MachineLearning.proposals_taggings_filename) + json_file = data_folder.join(MachineLearning.proposals_taggings_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |attributes| if attributes[:tag_id].present? @@ -365,7 +390,7 @@ class MachineLearning def import_ml_investments_tags ids = {} - json_file = DATA_FOLDER.join(MachineLearning.investments_tags_filename) + json_file = data_folder.join(MachineLearning.investments_tags_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |attributes| if attributes[:name].present? @@ -378,7 +403,7 @@ class MachineLearning end end - json_file = DATA_FOLDER.join(MachineLearning.investments_taggings_filename) + json_file = data_folder.join(MachineLearning.investments_taggings_filename) json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!) json_data.each do |attributes| if attributes[:tag_id].present? @@ -421,13 +446,13 @@ class MachineLearning end def last_modified_date_for(filename) - return nil unless File.exists? DATA_FOLDER.join(filename) + return nil unless File.exists? data_folder.join(filename) - File.mtime DATA_FOLDER.join(filename) + File.mtime data_folder.join(filename) end def updated_file?(filename) - return false unless File.exists? DATA_FOLDER.join(filename) + return false unless File.exists? data_folder.join(filename) return true unless previous_modified_date[filename].present? last_modified_date_for(filename) > previous_modified_date[filename] diff --git a/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.py b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.py index da40f73b3..d43cfbc70 100644 --- a/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.py +++ b/public/machine_learning/scripts/budgets_related_content_and_tags_nmf.py @@ -63,14 +63,17 @@ tqdm_notebook = True # In[2]: +import os +if os.environ.get("CONSUL_TENANT"): + data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data' +else: + data_path = '../data' -data_path = '../data' config_file = 'budgets_related_content_and_tags_nmf.ini' logging_file ='budgets_related_content_and_tags_nmf.log' # Read the configuration file -import os import configparser config = configparser.ConfigParser() check_file(os.path.join(data_path,config_file)) diff --git a/public/machine_learning/scripts/budgets_summary_comments_textrank.py b/public/machine_learning/scripts/budgets_summary_comments_textrank.py index 1c0faf07b..a1dec2b6f 100644 --- a/public/machine_learning/scripts/budgets_summary_comments_textrank.py +++ b/public/machine_learning/scripts/budgets_summary_comments_textrank.py @@ -60,14 +60,17 @@ tqdm_notebook = True # In[ ]: +import os +if os.environ.get("CONSUL_TENANT"): + data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data' +else: + data_path = '../data' -data_path = '../data' config_file = 'budgets_summary_comments_textrank.ini' logging_file ='budgets_summary_comments_textrank.log' # Read the configuration file -import os import configparser config = configparser.ConfigParser() check_file(os.path.join(data_path,config_file)) diff --git a/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.py b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.py index 4c303ad28..df0af7945 100644 --- a/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.py +++ b/public/machine_learning/scripts/proposals_related_content_and_tags_nmf.py @@ -63,14 +63,17 @@ tqdm_notebook = True # In[2]: +import os +if os.environ.get("CONSUL_TENANT"): + data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data' +else: + data_path = '../data' -data_path = '../data' config_file = 'proposals_related_content_and_tags_nmf.ini' logging_file ='proposals_related_content_and_tags_nmf.log' # Read the configuration file -import os import configparser config = configparser.ConfigParser() check_file(os.path.join(data_path,config_file)) diff --git a/public/machine_learning/scripts/proposals_summary_comments_textrank.py b/public/machine_learning/scripts/proposals_summary_comments_textrank.py index 440083558..ac5d2569a 100644 --- a/public/machine_learning/scripts/proposals_summary_comments_textrank.py +++ b/public/machine_learning/scripts/proposals_summary_comments_textrank.py @@ -60,14 +60,17 @@ tqdm_notebook = True # In[3]: +import os +if os.environ.get("CONSUL_TENANT"): + data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data' +else: + data_path = '../data' -data_path = '../data' config_file = 'proposals_summary_comments_textrank.ini' logging_file ='proposals_summary_comments_textrank.log' # Read the configuration file -import os import configparser config = configparser.ConfigParser() check_file(os.path.join(data_path,config_file)) diff --git a/spec/models/machine_learning_spec.rb b/spec/models/machine_learning_spec.rb index da0788e15..f3f8af8f7 100644 --- a/spec/models/machine_learning_spec.rb +++ b/spec/models/machine_learning_spec.rb @@ -309,7 +309,7 @@ describe MachineLearning do machine_learning = MachineLearning.new(job) machine_learning.send(:export_proposals_to_json) - json_file = MachineLearning::DATA_FOLDER.join("proposals.json") + json_file = MachineLearning.data_folder.join("proposals.json") json = JSON.parse(File.read(json_file)) expect(json).to be_an Array @@ -335,7 +335,7 @@ describe MachineLearning do machine_learning = MachineLearning.new(job) machine_learning.send(:export_budget_investments_to_json) - json_file = MachineLearning::DATA_FOLDER.join("budget_investments.json") + json_file = MachineLearning.data_folder.join("budget_investments.json") json = JSON.parse(File.read(json_file)) expect(json).to be_an Array @@ -359,7 +359,7 @@ describe MachineLearning do machine_learning = MachineLearning.new(job) machine_learning.send(:export_comments_to_json) - json_file = MachineLearning::DATA_FOLDER.join("comments.json") + json_file = MachineLearning.data_folder.join("comments.json") json = JSON.parse(File.read(json_file)) expect(json).to be_an Array @@ -428,7 +428,7 @@ describe MachineLearning do ] filename = "ml_comments_summaries_proposals.json" - json_file = MachineLearning::DATA_FOLDER.join(filename) + json_file = MachineLearning.data_folder.join(filename) expect(File).to receive(:read).with(json_file).and_return data.to_json machine_learning.send(:import_ml_proposals_comments_summary) @@ -450,7 +450,7 @@ describe MachineLearning do ] filename = "ml_comments_summaries_budgets.json" - json_file = MachineLearning::DATA_FOLDER.join(filename) + json_file = MachineLearning.data_folder.join(filename) expect(File).to receive(:read).with(json_file).and_return data.to_json machine_learning.send(:import_ml_investments_comments_summary) @@ -476,7 +476,7 @@ describe MachineLearning do ] filename = "ml_related_content_proposals.json" - json_file = MachineLearning::DATA_FOLDER.join(filename) + json_file = MachineLearning.data_folder.join(filename) expect(File).to receive(:read).with(json_file).and_return data.to_json machine_learning.send(:import_proposals_related_content) @@ -504,7 +504,7 @@ describe MachineLearning do ] filename = "ml_related_content_budgets.json" - json_file = MachineLearning::DATA_FOLDER.join(filename) + json_file = MachineLearning.data_folder.join(filename) expect(File).to receive(:read).with(json_file).and_return data.to_json machine_learning.send(:import_budget_investments_related_content) @@ -538,11 +538,11 @@ describe MachineLearning do ] tags_filename = "ml_tags_proposals.json" - tags_json_file = MachineLearning::DATA_FOLDER.join(tags_filename) + tags_json_file = MachineLearning.data_folder.join(tags_filename) expect(File).to receive(:read).with(tags_json_file).and_return tags_data.to_json taggings_filename = "ml_taggings_proposals.json" - taggings_json_file = MachineLearning::DATA_FOLDER.join(taggings_filename) + taggings_json_file = MachineLearning.data_folder.join(taggings_filename) expect(File).to receive(:read).with(taggings_json_file).and_return taggings_data.to_json machine_learning.send(:import_ml_proposals_tags) @@ -580,11 +580,11 @@ describe MachineLearning do ] tags_filename = "ml_tags_budgets.json" - tags_json_file = MachineLearning::DATA_FOLDER.join(tags_filename) + tags_json_file = MachineLearning.data_folder.join(tags_filename) expect(File).to receive(:read).with(tags_json_file).and_return tags_data.to_json taggings_filename = "ml_taggings_budgets.json" - taggings_json_file = MachineLearning::DATA_FOLDER.join(taggings_filename) + taggings_json_file = MachineLearning.data_folder.join(taggings_filename) expect(File).to receive(:read).with(taggings_json_file).and_return taggings_data.to_json machine_learning.send(:import_ml_investments_tags) diff --git a/spec/system/admin/machine_learning_spec.rb b/spec/system/admin/machine_learning_spec.rb index 03a476026..d058f8eb9 100644 --- a/spec/system/admin/machine_learning_spec.rb +++ b/spec/system/admin/machine_learning_spec.rb @@ -207,7 +207,7 @@ describe "Machine learning" do end scenario "Show output files info on settins page" do - FileUtils.mkdir_p MachineLearning::DATA_FOLDER + FileUtils.mkdir_p MachineLearning.data_folder allow_any_instance_of(MachineLearning).to receive(:run) do MachineLearningJob.first.update!(finished_at: 2.minutes.from_now) @@ -215,9 +215,9 @@ describe "Machine learning" do script: "proposals_summary_comments_textrank.py", kind: "comments_summary", updated_at: 2.minutes.from_now) - comments_file = MachineLearning::DATA_FOLDER.join(MachineLearning.comments_filename) + comments_file = MachineLearning.data_folder.join(MachineLearning.comments_filename) File.write(comments_file, [].to_json) - proposals_comments_summary_file = MachineLearning::DATA_FOLDER.join(MachineLearning.proposals_comments_summary_filename) + proposals_comments_summary_file = MachineLearning.data_folder.join(MachineLearning.proposals_comments_summary_filename) File.write(proposals_comments_summary_file, [].to_json) end