Use a different machine learning folder per tenant

We're using the "tenants" subfolder for consistency with the folder
structure we use in ActiveStorage and because some CONSUL installations
might have folders inside the `data` folder which might conflict with
the folders created by tenants.

Note that the Python scripts have a lot of duplication, meaning we need
to change all of them. I'm not refactoring them because I'm not familiar
enough with these scripts (or with Python, for that matter).

Also note that the scripts folder is still shared by all tenants,
meaning it isn't possible to have different scripts for different
tenants. I'm not sure how this situation should be handled; again, I'm
not familiar enough with this feature.
This commit is contained in:
Javi Martín
2022-10-19 01:41:07 +02:00
parent 58c9e8462d
commit 2f312bf474
9 changed files with 89 additions and 50 deletions

View File

@@ -23,6 +23,7 @@ public/sitemap.xml
public/tenants/*/sitemap.xml
public/assets/
public/machine_learning/data/
public/tenants/*/machine_learning/data/
# Bundler config, cache and gemsets
**/.bundle/

1
.gitignore vendored
View File

@@ -25,6 +25,7 @@ tmp/
/public/tenants/*/sitemap.xml
/public/assets/
/public/machine_learning/data/
/public/tenants/*/machine_learning/data/
# Bundler config, cache and gemsets
.bundle/

View File

@@ -3,7 +3,6 @@ class MachineLearning
attr_accessor :job
SCRIPTS_FOLDER = Rails.root.join("public", "machine_learning", "scripts").freeze
DATA_FOLDER = Rails.root.join("public", "machine_learning", "data").freeze
def initialize(job)
@job = job
@@ -11,6 +10,10 @@ class MachineLearning
@previous_modified_date = set_previous_modified_date
end
def data_folder
self.class.data_folder
end
def run
begin
export_proposals_to_json
@@ -81,17 +84,33 @@ class MachineLearning
"comments.json"
end
def data_folder
Rails.root.join("public", tenant_data_folder)
end
def tenant_data_folder
File.join(tenant_subfolder, "machine_learning", "data").delete_prefix("/")
end
def tenant_subfolder
if Tenant.default?
""
else
File.join("tenants", Tenant.current_schema)
end
end
def data_output_files
files = { tags: [], related_content: [], comments_summary: [] }
files[:tags] << proposals_tags_filename if File.exists?(DATA_FOLDER.join(proposals_tags_filename))
files[:tags] << proposals_taggings_filename if File.exists?(DATA_FOLDER.join(proposals_taggings_filename))
files[:tags] << investments_tags_filename if File.exists?(DATA_FOLDER.join(investments_tags_filename))
files[:tags] << investments_taggings_filename if File.exists?(DATA_FOLDER.join(investments_taggings_filename))
files[:related_content] << proposals_related_filename if File.exists?(DATA_FOLDER.join(proposals_related_filename))
files[:related_content] << investments_related_filename if File.exists?(DATA_FOLDER.join(investments_related_filename))
files[:comments_summary] << proposals_comments_summary_filename if File.exists?(DATA_FOLDER.join(proposals_comments_summary_filename))
files[:comments_summary] << investments_comments_summary_filename if File.exists?(DATA_FOLDER.join(investments_comments_summary_filename))
files[:tags] << proposals_tags_filename if File.exists?(data_folder.join(proposals_tags_filename))
files[:tags] << proposals_taggings_filename if File.exists?(data_folder.join(proposals_taggings_filename))
files[:tags] << investments_tags_filename if File.exists?(data_folder.join(investments_tags_filename))
files[:tags] << investments_taggings_filename if File.exists?(data_folder.join(investments_taggings_filename))
files[:related_content] << proposals_related_filename if File.exists?(data_folder.join(proposals_related_filename))
files[:related_content] << investments_related_filename if File.exists?(data_folder.join(investments_related_filename))
files[:comments_summary] << proposals_comments_summary_filename if File.exists?(data_folder.join(proposals_comments_summary_filename))
files[:comments_summary] << investments_comments_summary_filename if File.exists?(data_folder.join(investments_comments_summary_filename))
files
end
@@ -110,10 +129,10 @@ class MachineLearning
proposals_comments_summary_filename,
investments_comments_summary_filename
]
json = Dir[DATA_FOLDER.join("*.json")].map do |full_path_filename|
json = Dir[data_folder.join("*.json")].map do |full_path_filename|
full_path_filename.split("/").last
end
csv = Dir[DATA_FOLDER.join("*.csv")].map do |full_path_filename|
csv = Dir[data_folder.join("*.csv")].map do |full_path_filename|
full_path_filename.split("/").last
end
(json + csv - excluded).sort
@@ -152,7 +171,7 @@ class MachineLearning
end
def data_path(filename)
"/machine_learning/data/" + filename
"/#{tenant_data_folder}/#{filename}"
end
def script_kinds
@@ -196,29 +215,35 @@ class MachineLearning
private
def create_data_folder
FileUtils.mkdir_p DATA_FOLDER
FileUtils.mkdir_p data_folder
end
def export_proposals_to_json
create_data_folder
filename = DATA_FOLDER.join(MachineLearning.proposals_filename)
filename = data_folder.join(MachineLearning.proposals_filename)
Proposal::Exporter.new.to_json_file(filename)
end
def export_budget_investments_to_json
create_data_folder
filename = DATA_FOLDER.join(MachineLearning.investments_filename)
filename = data_folder.join(MachineLearning.investments_filename)
Budget::Investment::Exporter.new(Array.new).to_json_file(filename)
end
def export_comments_to_json
create_data_folder
filename = DATA_FOLDER.join(MachineLearning.comments_filename)
filename = data_folder.join(MachineLearning.comments_filename)
Comment::Exporter.new.to_json_file(filename)
end
def run_machine_learning_scripts
output = `cd #{SCRIPTS_FOLDER} && python #{job.script} 2>&1`
command = if Tenant.default?
"python #{job.script}"
else
"CONSUL_TENANT=#{Tenant.current_schema} python #{job.script}"
end
output = `cd #{SCRIPTS_FOLDER} && #{command} 2>&1`
result = $?.success?
if result == false
job.update!(finished_at: Time.current, error: output)
@@ -254,7 +279,7 @@ class MachineLearning
end
def import_ml_proposals_comments_summary
json_file = DATA_FOLDER.join(MachineLearning.proposals_comments_summary_filename)
json_file = data_folder.join(MachineLearning.proposals_comments_summary_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
attributes.delete(:id)
@@ -266,7 +291,7 @@ class MachineLearning
end
def import_ml_investments_comments_summary
json_file = DATA_FOLDER.join(MachineLearning.investments_comments_summary_filename)
json_file = data_folder.join(MachineLearning.investments_comments_summary_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
attributes.delete(:id)
@@ -278,7 +303,7 @@ class MachineLearning
end
def import_proposals_related_content
json_file = DATA_FOLDER.join(MachineLearning.proposals_related_filename)
json_file = data_folder.join(MachineLearning.proposals_related_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |related|
id = related.delete(:id)
@@ -306,7 +331,7 @@ class MachineLearning
end
def import_budget_investments_related_content
json_file = DATA_FOLDER.join(MachineLearning.investments_related_filename)
json_file = data_folder.join(MachineLearning.investments_related_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |related|
id = related.delete(:id)
@@ -335,7 +360,7 @@ class MachineLearning
def import_ml_proposals_tags
ids = {}
json_file = DATA_FOLDER.join(MachineLearning.proposals_tags_filename)
json_file = data_folder.join(MachineLearning.proposals_tags_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:name].present?
@@ -348,7 +373,7 @@ class MachineLearning
end
end
json_file = DATA_FOLDER.join(MachineLearning.proposals_taggings_filename)
json_file = data_folder.join(MachineLearning.proposals_taggings_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:tag_id].present?
@@ -365,7 +390,7 @@ class MachineLearning
def import_ml_investments_tags
ids = {}
json_file = DATA_FOLDER.join(MachineLearning.investments_tags_filename)
json_file = data_folder.join(MachineLearning.investments_tags_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:name].present?
@@ -378,7 +403,7 @@ class MachineLearning
end
end
json_file = DATA_FOLDER.join(MachineLearning.investments_taggings_filename)
json_file = data_folder.join(MachineLearning.investments_taggings_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:tag_id].present?
@@ -421,13 +446,13 @@ class MachineLearning
end
def last_modified_date_for(filename)
return nil unless File.exists? DATA_FOLDER.join(filename)
return nil unless File.exists? data_folder.join(filename)
File.mtime DATA_FOLDER.join(filename)
File.mtime data_folder.join(filename)
end
def updated_file?(filename)
return false unless File.exists? DATA_FOLDER.join(filename)
return false unless File.exists? data_folder.join(filename)
return true unless previous_modified_date[filename].present?
last_modified_date_for(filename) > previous_modified_date[filename]

View File

@@ -63,14 +63,17 @@ tqdm_notebook = True
# In[2]:
import os
if os.environ.get("CONSUL_TENANT"):
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
else:
data_path = '../data'
data_path = '../data'
config_file = 'budgets_related_content_and_tags_nmf.ini'
logging_file ='budgets_related_content_and_tags_nmf.log'
# Read the configuration file
import os
import configparser
config = configparser.ConfigParser()
check_file(os.path.join(data_path,config_file))

View File

@@ -60,14 +60,17 @@ tqdm_notebook = True
# In[ ]:
import os
if os.environ.get("CONSUL_TENANT"):
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
else:
data_path = '../data'
data_path = '../data'
config_file = 'budgets_summary_comments_textrank.ini'
logging_file ='budgets_summary_comments_textrank.log'
# Read the configuration file
import os
import configparser
config = configparser.ConfigParser()
check_file(os.path.join(data_path,config_file))

View File

@@ -63,14 +63,17 @@ tqdm_notebook = True
# In[2]:
import os
if os.environ.get("CONSUL_TENANT"):
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
else:
data_path = '../data'
data_path = '../data'
config_file = 'proposals_related_content_and_tags_nmf.ini'
logging_file ='proposals_related_content_and_tags_nmf.log'
# Read the configuration file
import os
import configparser
config = configparser.ConfigParser()
check_file(os.path.join(data_path,config_file))

View File

@@ -60,14 +60,17 @@ tqdm_notebook = True
# In[3]:
import os
if os.environ.get("CONSUL_TENANT"):
data_path = '../../tenants/' + os.environ["CONSUL_TENANT"] + '/machine_learning/data'
else:
data_path = '../data'
data_path = '../data'
config_file = 'proposals_summary_comments_textrank.ini'
logging_file ='proposals_summary_comments_textrank.log'
# Read the configuration file
import os
import configparser
config = configparser.ConfigParser()
check_file(os.path.join(data_path,config_file))

View File

@@ -309,7 +309,7 @@ describe MachineLearning do
machine_learning = MachineLearning.new(job)
machine_learning.send(:export_proposals_to_json)
json_file = MachineLearning::DATA_FOLDER.join("proposals.json")
json_file = MachineLearning.data_folder.join("proposals.json")
json = JSON.parse(File.read(json_file))
expect(json).to be_an Array
@@ -335,7 +335,7 @@ describe MachineLearning do
machine_learning = MachineLearning.new(job)
machine_learning.send(:export_budget_investments_to_json)
json_file = MachineLearning::DATA_FOLDER.join("budget_investments.json")
json_file = MachineLearning.data_folder.join("budget_investments.json")
json = JSON.parse(File.read(json_file))
expect(json).to be_an Array
@@ -359,7 +359,7 @@ describe MachineLearning do
machine_learning = MachineLearning.new(job)
machine_learning.send(:export_comments_to_json)
json_file = MachineLearning::DATA_FOLDER.join("comments.json")
json_file = MachineLearning.data_folder.join("comments.json")
json = JSON.parse(File.read(json_file))
expect(json).to be_an Array
@@ -428,7 +428,7 @@ describe MachineLearning do
]
filename = "ml_comments_summaries_proposals.json"
json_file = MachineLearning::DATA_FOLDER.join(filename)
json_file = MachineLearning.data_folder.join(filename)
expect(File).to receive(:read).with(json_file).and_return data.to_json
machine_learning.send(:import_ml_proposals_comments_summary)
@@ -450,7 +450,7 @@ describe MachineLearning do
]
filename = "ml_comments_summaries_budgets.json"
json_file = MachineLearning::DATA_FOLDER.join(filename)
json_file = MachineLearning.data_folder.join(filename)
expect(File).to receive(:read).with(json_file).and_return data.to_json
machine_learning.send(:import_ml_investments_comments_summary)
@@ -476,7 +476,7 @@ describe MachineLearning do
]
filename = "ml_related_content_proposals.json"
json_file = MachineLearning::DATA_FOLDER.join(filename)
json_file = MachineLearning.data_folder.join(filename)
expect(File).to receive(:read).with(json_file).and_return data.to_json
machine_learning.send(:import_proposals_related_content)
@@ -504,7 +504,7 @@ describe MachineLearning do
]
filename = "ml_related_content_budgets.json"
json_file = MachineLearning::DATA_FOLDER.join(filename)
json_file = MachineLearning.data_folder.join(filename)
expect(File).to receive(:read).with(json_file).and_return data.to_json
machine_learning.send(:import_budget_investments_related_content)
@@ -538,11 +538,11 @@ describe MachineLearning do
]
tags_filename = "ml_tags_proposals.json"
tags_json_file = MachineLearning::DATA_FOLDER.join(tags_filename)
tags_json_file = MachineLearning.data_folder.join(tags_filename)
expect(File).to receive(:read).with(tags_json_file).and_return tags_data.to_json
taggings_filename = "ml_taggings_proposals.json"
taggings_json_file = MachineLearning::DATA_FOLDER.join(taggings_filename)
taggings_json_file = MachineLearning.data_folder.join(taggings_filename)
expect(File).to receive(:read).with(taggings_json_file).and_return taggings_data.to_json
machine_learning.send(:import_ml_proposals_tags)
@@ -580,11 +580,11 @@ describe MachineLearning do
]
tags_filename = "ml_tags_budgets.json"
tags_json_file = MachineLearning::DATA_FOLDER.join(tags_filename)
tags_json_file = MachineLearning.data_folder.join(tags_filename)
expect(File).to receive(:read).with(tags_json_file).and_return tags_data.to_json
taggings_filename = "ml_taggings_budgets.json"
taggings_json_file = MachineLearning::DATA_FOLDER.join(taggings_filename)
taggings_json_file = MachineLearning.data_folder.join(taggings_filename)
expect(File).to receive(:read).with(taggings_json_file).and_return taggings_data.to_json
machine_learning.send(:import_ml_investments_tags)

View File

@@ -207,7 +207,7 @@ describe "Machine learning" do
end
scenario "Show output files info on settins page" do
FileUtils.mkdir_p MachineLearning::DATA_FOLDER
FileUtils.mkdir_p MachineLearning.data_folder
allow_any_instance_of(MachineLearning).to receive(:run) do
MachineLearningJob.first.update!(finished_at: 2.minutes.from_now)
@@ -215,9 +215,9 @@ describe "Machine learning" do
script: "proposals_summary_comments_textrank.py",
kind: "comments_summary",
updated_at: 2.minutes.from_now)
comments_file = MachineLearning::DATA_FOLDER.join(MachineLearning.comments_filename)
comments_file = MachineLearning.data_folder.join(MachineLearning.comments_filename)
File.write(comments_file, [].to_json)
proposals_comments_summary_file = MachineLearning::DATA_FOLDER.join(MachineLearning.proposals_comments_summary_filename)
proposals_comments_summary_file = MachineLearning.data_folder.join(MachineLearning.proposals_comments_summary_filename)
File.write(proposals_comments_summary_file, [].to_json)
end