Use a different machine learning folder per tenant
We're using the "tenants" subfolder for consistency with the folder structure we use in ActiveStorage and because some CONSUL installations might have folders inside the `data` folder which might conflict with the folders created by tenants. Note that the Python scripts have a lot of duplication, meaning we need to change all of them. I'm not refactoring them because I'm not familiar enough with these scripts (or with Python, for that matter). Also note that the scripts folder is still shared by all tenants, meaning it isn't possible to have different scripts for different tenants. I'm not sure how this situation should be handled; again, I'm not familiar enough with this feature.
This commit is contained in:
@@ -23,6 +23,7 @@ public/sitemap.xml
|
||||
public/tenants/*/sitemap.xml
|
||||
public/assets/
|
||||
public/machine_learning/data/
|
||||
public/tenants/*/machine_learning/data/
|
||||
|
||||
# Bundler config, cache and gemsets
|
||||
**/.bundle/
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -25,6 +25,7 @@ tmp/
|
||||
/public/tenants/*/sitemap.xml
|
||||
/public/assets/
|
||||
/public/machine_learning/data/
|
||||
/public/tenants/*/machine_learning/data/
|
||||
|
||||
# Bundler config, cache and gemsets
|
||||
.bundle/
|
||||
|
||||
@@ -3,7 +3,6 @@ class MachineLearning
|
||||
attr_accessor :job
|
||||
|
||||
SCRIPTS_FOLDER = Rails.root.join("public", "machine_learning", "scripts").freeze
|
||||
DATA_FOLDER = Rails.root.join("public", "machine_learning", "data").freeze
|
||||
|
||||
def initialize(job)
|
||||
@job = job
|
||||
@@ -11,6 +10,10 @@ class MachineLearning
|
||||
@previous_modified_date = set_previous_modified_date
|
||||
end
|
||||
|
||||
def data_folder
|
||||
self.class.data_folder
|
||||
end
|
||||
|
||||
def run
|
||||
begin
|
||||
export_proposals_to_json
|
||||
@@ -81,17 +84,33 @@ class MachineLearning
|
||||
"comments.json"
|
||||
end
|
||||
|
||||
def data_folder
|
||||
Rails.root.join("public", tenant_data_folder)
|
||||
end
|
||||
|
||||
def tenant_data_folder
|
||||
File.join(tenant_subfolder, "machine_learning", "data").delete_prefix("/")
|
||||
end
|
||||
|
||||
def tenant_subfolder
|
||||
if Tenant.default?
|
||||
""
|
||||
else
|
||||
File.join("tenants", Tenant.current_schema)
|
||||
end
|
||||
end
|
||||
|
||||
def data_output_files
|
||||
files = { tags: [], related_content: [], comments_summary: [] }
|
||||
|
||||
files[:tags] << proposals_tags_filename if File.exists?(DATA_FOLDER.join(proposals_tags_filename))
|
||||
files[:tags] << proposals_taggings_filename if File.exists?(DATA_FOLDER.join(proposals_taggings_filename))
|
||||
files[:tags] << investments_tags_filename if File.exists?(DATA_FOLDER.join(investments_tags_filename))
|
||||
files[:tags] << investments_taggings_filename if File.exists?(DATA_FOLDER.join(investments_taggings_filename))
|
||||
files[:related_content] << proposals_related_filename if File.exists?(DATA_FOLDER.join(proposals_related_filename))
|
||||
files[:related_content] << investments_related_filename if File.exists?(DATA_FOLDER.join(investments_related_filename))
|
||||
files[:comments_summary] << proposals_comments_summary_filename if File.exists?(DATA_FOLDER.join(proposals_comments_summary_filename))
|
||||
files[:comments_summary] << investments_comments_summary_filename if File.exists?(DATA_FOLDER.join(investments_comments_summary_filename))
|
||||
files[:tags] << proposals_tags_filename if File.exists?(data_folder.join(proposals_tags_filename))
|
||||
files[:tags] << proposals_taggings_filename if File.exists?(data_folder.join(proposals_taggings_filename))
|
||||
files[:tags] << investments_tags_filename if File.exists?(data_folder.join(investments_tags_filename))
|
||||
files[:tags] << investments_taggings_filename if File.exists?(data_folder.join(investments_taggings_filename))
|
||||
files[:related_content] << proposals_related_filename if File.exists?(data_folder.join(proposals_related_filename))
|
||||
files[:related_content] << investments_related_filename if File.exists?(data_folder.join(investments_related_filename))
|
||||
files[:comments_summary] << proposals_comments_summary_filename if File.exists?(data_folder.join(proposals_comments_summary_filename))
|
||||
files[:comments_summary] << investments_comments_summary_filename if File.exists?(data_folder.join(investments_comments_summary_filename))
|
||||
|
||||
files
|
||||
end
|
||||
@@ -110,10 +129,10 @@ class MachineLearning
|
||||
proposals_comments_summary_filename,
|
||||
investments_comments_summary_filename
|
||||
]
|
||||
json = Dir[DATA_FOLDER.join("*.json")].map do |full_path_filename|
|
||||
json = Dir[data_folder.join("*.json")].map do |full_path_filename|
|
||||
full_path_filename.split("/").last
|
||||
end
|
||||
csv = Dir[DATA_FOLDER.join("*.csv")].map do |full_path_filename|
|
||||
csv = Dir[data_folder.join("*.csv")].map do |full_path_filename|
|
||||
full_path_filename.split("/").last
|
||||
end
|
||||
(json + csv - excluded).sort
|
||||
@@ -152,7 +171,7 @@ class MachineLearning
|
||||
end
|
||||
|
||||
def data_path(filename)
|
||||
"/machine_learning/data/" + filename
|
||||
"/#{tenant_data_folder}/#{filename}"
|
||||
end
|
||||
|
||||
def script_kinds
|
||||
@@ -196,29 +215,35 @@ class MachineLearning
|
||||
private
|
||||
|
||||
def create_data_folder
|
||||
FileUtils.mkdir_p DATA_FOLDER
|
||||
FileUtils.mkdir_p data_folder
|
||||
end
|
||||
|
||||
def export_proposals_to_json
|
||||
create_data_folder
|
||||
filename = DATA_FOLDER.join(MachineLearning.proposals_filename)
|
||||
filename = data_folder.join(MachineLearning.proposals_filename)
|
||||
Proposal::Exporter.new.to_json_file(filename)
|
||||
end
|
||||
|
||||
def export_budget_investments_to_json
|
||||
create_data_folder
|
||||
filename = DATA_FOLDER.join(MachineLearning.investments_filename)
|
||||
filename = data_folder.join(MachineLearning.investments_filename)
|
||||
Budget::Investment::Exporter.new(Array.new).to_json_file(filename)
|
||||
end
|
||||
|
||||
def export_comments_to_json
|
||||
create_data_folder
|
||||
filename = DATA_FOLDER.join(MachineLearning.comments_filename)
|
||||
filename = data_folder.join(MachineLearning.comments_filename)
|
||||
Comment::Exporter.new.to_json_file(filename)
|
||||
end
|
||||
|
||||
def run_machine_learning_scripts
|
||||
output = `cd #{SCRIPTS_FOLDER} && python #{job.script} 2>&1`
|
||||
command = if Tenant.default?
|
||||
"python #{job.script}"
|
||||
else
|
||||
"CONSUL_TENANT=#{Tenant.current_schema} python #{job.script}"
|
||||
end
|
||||
|
||||
output = `cd #{SCRIPTS_FOLDER} && #{command} 2>&1`
|
||||
result = $?.success?
|
||||
if result == false
|
||||
job.update!(finished_at: Time.current, error: output)
|
||||
@@ -254,7 +279,7 @@ class MachineLearning
|
||||
end
|
||||
|
||||
def import_ml_proposals_comments_summary
|
||||
json_file = DATA_FOLDER.join(MachineLearning.proposals_comments_summary_filename)
|
||||
json_file = data_folder.join(MachineLearning.proposals_comments_summary_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |attributes|
|
||||
attributes.delete(:id)
|
||||
@@ -266,7 +291,7 @@ class MachineLearning
|
||||
end
|
||||
|
||||
def import_ml_investments_comments_summary
|
||||
json_file = DATA_FOLDER.join(MachineLearning.investments_comments_summary_filename)
|
||||
json_file = data_folder.join(MachineLearning.investments_comments_summary_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |attributes|
|
||||
attributes.delete(:id)
|
||||
@@ -278,7 +303,7 @@ class MachineLearning
|
||||
end
|
||||
|
||||
def import_proposals_related_content
|
||||
json_file = DATA_FOLDER.join(MachineLearning.proposals_related_filename)
|
||||
json_file = data_folder.join(MachineLearning.proposals_related_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |related|
|
||||
id = related.delete(:id)
|
||||
@@ -306,7 +331,7 @@ class MachineLearning
|
||||
end
|
||||
|
||||
def import_budget_investments_related_content
|
||||
json_file = DATA_FOLDER.join(MachineLearning.investments_related_filename)
|
||||
json_file = data_folder.join(MachineLearning.investments_related_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |related|
|
||||
id = related.delete(:id)
|
||||
@@ -335,7 +360,7 @@ class MachineLearning
|
||||
|
||||
def import_ml_proposals_tags
|
||||
ids = {}
|
||||
json_file = DATA_FOLDER.join(MachineLearning.proposals_tags_filename)
|
||||
json_file = data_folder.join(MachineLearning.proposals_tags_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |attributes|
|
||||
if attributes[:name].present?
|
||||
@@ -348,7 +373,7 @@ class MachineLearning
|
||||
end
|
||||
end
|
||||
|
||||
json_file = DATA_FOLDER.join(MachineLearning.proposals_taggings_filename)
|
||||
json_file = data_folder.join(MachineLearning.proposals_taggings_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |attributes|
|
||||
if attributes[:tag_id].present?
|
||||
@@ -365,7 +390,7 @@ class MachineLearning
|
||||
|
||||
def import_ml_investments_tags
|
||||
ids = {}
|
||||
json_file = DATA_FOLDER.join(MachineLearning.investments_tags_filename)
|
||||
json_file = data_folder.join(MachineLearning.investments_tags_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |attributes|
|
||||
if attributes[:name].present?
|
||||
@@ -378,7 +403,7 @@ class MachineLearning
|
||||
end
|
||||
end
|
||||
|
||||
json_file = DATA_FOLDER.join(MachineLearning.investments_taggings_filename)
|
||||
json_file = data_folder.join(MachineLearning.investments_taggings_filename)
|
||||
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
|
||||
json_data.each do |attributes|
|
||||
if attributes[:tag_id].present?
|
||||
@@ -421,13 +446,13 @@ class MachineLearning
|
||||
end
|
||||
|
||||
def last_modified_date_for(filename)
|
||||
return nil unless File.exists? DATA_FOLDER.join(filename)
|
||||
return nil unless File.exists? data_folder.join(filename)
|
||||
|
||||
File.mtime DATA_FOLDER.join(filename)
|
||||
File.mtime data_folder.join(filename)
|
||||
end
|
||||
|
||||
def updated_file?(filename)
|
||||
return false unless File.exists? DATA_FOLDER.join(filename)
|
||||
return false unless File.exists? data_folder.join(filename)
|
||||
return true unless previous_modified_date[filename].present?
|
||||
|
||||
last_modified_date_for(filename) > previous_modified_date[filename]
|
||||
|
||||
@@ -63,14 +63,17 @@ tqdm_notebook = True
|
||||
|
||||
|
||||
# In[2]:
|
||||
import os
|
||||
|
||||
if os.environ.get("CONSUL_TENANT"):
|
||||
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
|
||||
else:
|
||||
data_path = '../data'
|
||||
|
||||
data_path = '../data'
|
||||
config_file = 'budgets_related_content_and_tags_nmf.ini'
|
||||
logging_file ='budgets_related_content_and_tags_nmf.log'
|
||||
|
||||
# Read the configuration file
|
||||
import os
|
||||
import configparser
|
||||
config = configparser.ConfigParser()
|
||||
check_file(os.path.join(data_path,config_file))
|
||||
|
||||
@@ -60,14 +60,17 @@ tqdm_notebook = True
|
||||
|
||||
|
||||
# In[ ]:
|
||||
import os
|
||||
|
||||
if os.environ.get("CONSUL_TENANT"):
|
||||
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
|
||||
else:
|
||||
data_path = '../data'
|
||||
|
||||
data_path = '../data'
|
||||
config_file = 'budgets_summary_comments_textrank.ini'
|
||||
logging_file ='budgets_summary_comments_textrank.log'
|
||||
|
||||
# Read the configuration file
|
||||
import os
|
||||
import configparser
|
||||
config = configparser.ConfigParser()
|
||||
check_file(os.path.join(data_path,config_file))
|
||||
|
||||
@@ -63,14 +63,17 @@ tqdm_notebook = True
|
||||
|
||||
|
||||
# In[2]:
|
||||
import os
|
||||
|
||||
if os.environ.get("CONSUL_TENANT"):
|
||||
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
|
||||
else:
|
||||
data_path = '../data'
|
||||
|
||||
data_path = '../data'
|
||||
config_file = 'proposals_related_content_and_tags_nmf.ini'
|
||||
logging_file ='proposals_related_content_and_tags_nmf.log'
|
||||
|
||||
# Read the configuration file
|
||||
import os
|
||||
import configparser
|
||||
config = configparser.ConfigParser()
|
||||
check_file(os.path.join(data_path,config_file))
|
||||
|
||||
@@ -60,14 +60,17 @@ tqdm_notebook = True
|
||||
|
||||
|
||||
# In[3]:
|
||||
import os
|
||||
|
||||
if os.environ.get("CONSUL_TENANT"):
|
||||
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
|
||||
else:
|
||||
data_path = '../data'
|
||||
|
||||
data_path = '../data'
|
||||
config_file = 'proposals_summary_comments_textrank.ini'
|
||||
logging_file ='proposals_summary_comments_textrank.log'
|
||||
|
||||
# Read the configuration file
|
||||
import os
|
||||
import configparser
|
||||
config = configparser.ConfigParser()
|
||||
check_file(os.path.join(data_path,config_file))
|
||||
|
||||
@@ -309,7 +309,7 @@ describe MachineLearning do
|
||||
machine_learning = MachineLearning.new(job)
|
||||
machine_learning.send(:export_proposals_to_json)
|
||||
|
||||
json_file = MachineLearning::DATA_FOLDER.join("proposals.json")
|
||||
json_file = MachineLearning.data_folder.join("proposals.json")
|
||||
json = JSON.parse(File.read(json_file))
|
||||
|
||||
expect(json).to be_an Array
|
||||
@@ -335,7 +335,7 @@ describe MachineLearning do
|
||||
machine_learning = MachineLearning.new(job)
|
||||
machine_learning.send(:export_budget_investments_to_json)
|
||||
|
||||
json_file = MachineLearning::DATA_FOLDER.join("budget_investments.json")
|
||||
json_file = MachineLearning.data_folder.join("budget_investments.json")
|
||||
json = JSON.parse(File.read(json_file))
|
||||
|
||||
expect(json).to be_an Array
|
||||
@@ -359,7 +359,7 @@ describe MachineLearning do
|
||||
machine_learning = MachineLearning.new(job)
|
||||
machine_learning.send(:export_comments_to_json)
|
||||
|
||||
json_file = MachineLearning::DATA_FOLDER.join("comments.json")
|
||||
json_file = MachineLearning.data_folder.join("comments.json")
|
||||
json = JSON.parse(File.read(json_file))
|
||||
|
||||
expect(json).to be_an Array
|
||||
@@ -428,7 +428,7 @@ describe MachineLearning do
|
||||
]
|
||||
|
||||
filename = "ml_comments_summaries_proposals.json"
|
||||
json_file = MachineLearning::DATA_FOLDER.join(filename)
|
||||
json_file = MachineLearning.data_folder.join(filename)
|
||||
expect(File).to receive(:read).with(json_file).and_return data.to_json
|
||||
|
||||
machine_learning.send(:import_ml_proposals_comments_summary)
|
||||
@@ -450,7 +450,7 @@ describe MachineLearning do
|
||||
]
|
||||
|
||||
filename = "ml_comments_summaries_budgets.json"
|
||||
json_file = MachineLearning::DATA_FOLDER.join(filename)
|
||||
json_file = MachineLearning.data_folder.join(filename)
|
||||
expect(File).to receive(:read).with(json_file).and_return data.to_json
|
||||
|
||||
machine_learning.send(:import_ml_investments_comments_summary)
|
||||
@@ -476,7 +476,7 @@ describe MachineLearning do
|
||||
]
|
||||
|
||||
filename = "ml_related_content_proposals.json"
|
||||
json_file = MachineLearning::DATA_FOLDER.join(filename)
|
||||
json_file = MachineLearning.data_folder.join(filename)
|
||||
expect(File).to receive(:read).with(json_file).and_return data.to_json
|
||||
|
||||
machine_learning.send(:import_proposals_related_content)
|
||||
@@ -504,7 +504,7 @@ describe MachineLearning do
|
||||
]
|
||||
|
||||
filename = "ml_related_content_budgets.json"
|
||||
json_file = MachineLearning::DATA_FOLDER.join(filename)
|
||||
json_file = MachineLearning.data_folder.join(filename)
|
||||
expect(File).to receive(:read).with(json_file).and_return data.to_json
|
||||
|
||||
machine_learning.send(:import_budget_investments_related_content)
|
||||
@@ -538,11 +538,11 @@ describe MachineLearning do
|
||||
]
|
||||
|
||||
tags_filename = "ml_tags_proposals.json"
|
||||
tags_json_file = MachineLearning::DATA_FOLDER.join(tags_filename)
|
||||
tags_json_file = MachineLearning.data_folder.join(tags_filename)
|
||||
expect(File).to receive(:read).with(tags_json_file).and_return tags_data.to_json
|
||||
|
||||
taggings_filename = "ml_taggings_proposals.json"
|
||||
taggings_json_file = MachineLearning::DATA_FOLDER.join(taggings_filename)
|
||||
taggings_json_file = MachineLearning.data_folder.join(taggings_filename)
|
||||
expect(File).to receive(:read).with(taggings_json_file).and_return taggings_data.to_json
|
||||
|
||||
machine_learning.send(:import_ml_proposals_tags)
|
||||
@@ -580,11 +580,11 @@ describe MachineLearning do
|
||||
]
|
||||
|
||||
tags_filename = "ml_tags_budgets.json"
|
||||
tags_json_file = MachineLearning::DATA_FOLDER.join(tags_filename)
|
||||
tags_json_file = MachineLearning.data_folder.join(tags_filename)
|
||||
expect(File).to receive(:read).with(tags_json_file).and_return tags_data.to_json
|
||||
|
||||
taggings_filename = "ml_taggings_budgets.json"
|
||||
taggings_json_file = MachineLearning::DATA_FOLDER.join(taggings_filename)
|
||||
taggings_json_file = MachineLearning.data_folder.join(taggings_filename)
|
||||
expect(File).to receive(:read).with(taggings_json_file).and_return taggings_data.to_json
|
||||
|
||||
machine_learning.send(:import_ml_investments_tags)
|
||||
|
||||
@@ -207,7 +207,7 @@ describe "Machine learning" do
|
||||
end
|
||||
|
||||
scenario "Show output files info on settins page" do
|
||||
FileUtils.mkdir_p MachineLearning::DATA_FOLDER
|
||||
FileUtils.mkdir_p MachineLearning.data_folder
|
||||
|
||||
allow_any_instance_of(MachineLearning).to receive(:run) do
|
||||
MachineLearningJob.first.update!(finished_at: 2.minutes.from_now)
|
||||
@@ -215,9 +215,9 @@ describe "Machine learning" do
|
||||
script: "proposals_summary_comments_textrank.py",
|
||||
kind: "comments_summary",
|
||||
updated_at: 2.minutes.from_now)
|
||||
comments_file = MachineLearning::DATA_FOLDER.join(MachineLearning.comments_filename)
|
||||
comments_file = MachineLearning.data_folder.join(MachineLearning.comments_filename)
|
||||
File.write(comments_file, [].to_json)
|
||||
proposals_comments_summary_file = MachineLearning::DATA_FOLDER.join(MachineLearning.proposals_comments_summary_filename)
|
||||
proposals_comments_summary_file = MachineLearning.data_folder.join(MachineLearning.proposals_comments_summary_filename)
|
||||
File.write(proposals_comments_summary_file, [].to_json)
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user