Files
grecia/app/models/machine_learning.rb
Javi Martín edd47877c2 Extract methods to get tenant subfolder/file paths
We were using the same logic in many different places, so we're
simplifying the code. I'm not convinced about the method names, though,
so we might change them in the future.

Note using this method for the default tenant in the `TenantDiskService`
class resulted in a `//` in the path, which is probably harmless but
very ugly and it also generates a different key than the one we got
until now. I've added an extra test to make sure that isn't the case.
2022-11-11 01:41:14 +01:00

461 lines
16 KiB
Ruby

class MachineLearning
attr_reader :user, :script, :previous_modified_date
attr_accessor :job
SCRIPTS_FOLDER = Rails.root.join("public", "machine_learning", "scripts").freeze
def initialize(job)
@job = job
@user = job.user
@previous_modified_date = set_previous_modified_date
end
def data_folder
self.class.data_folder
end
def run
begin
export_proposals_to_json
export_budget_investments_to_json
export_comments_to_json
return unless run_machine_learning_scripts
if updated_file?(MachineLearning.proposals_taggings_filename) && updated_file?(MachineLearning.proposals_tags_filename)
cleanup_proposals_tags!
import_ml_proposals_tags
update_machine_learning_info_for("tags")
end
if updated_file?(MachineLearning.investments_taggings_filename) && updated_file?(MachineLearning.investments_tags_filename)
cleanup_investments_tags!
import_ml_investments_tags
update_machine_learning_info_for("tags")
end
if updated_file?(MachineLearning.proposals_related_filename)
cleanup_proposals_related_content!
import_proposals_related_content
update_machine_learning_info_for("related_content")
end
if updated_file?(MachineLearning.investments_related_filename)
cleanup_investments_related_content!
import_budget_investments_related_content
update_machine_learning_info_for("related_content")
end
if updated_file?(MachineLearning.proposals_comments_summary_filename)
cleanup_proposals_comments_summary!
import_ml_proposals_comments_summary
update_machine_learning_info_for("comments_summary")
end
if updated_file?(MachineLearning.investments_comments_summary_filename)
cleanup_investments_comments_summary!
import_ml_investments_comments_summary
update_machine_learning_info_for("comments_summary")
end
job.update!(finished_at: Time.current)
Mailer.machine_learning_success(user).deliver_later
rescue Exception => error
handle_error(error)
raise error
end
end
handle_asynchronously :run, queue: "machine_learning"
class << self
def enabled?
Setting["feature.machine_learning"].present?
end
def proposals_filename
"proposals.json"
end
def investments_filename
"budget_investments.json"
end
def comments_filename
"comments.json"
end
def data_folder
Rails.root.join("public", tenant_data_folder)
end
def tenant_data_folder
Tenant.path_with_subfolder("machine_learning/data")
end
def data_output_files
files = { tags: [], related_content: [], comments_summary: [] }
files[:tags] << proposals_tags_filename if File.exists?(data_folder.join(proposals_tags_filename))
files[:tags] << proposals_taggings_filename if File.exists?(data_folder.join(proposals_taggings_filename))
files[:tags] << investments_tags_filename if File.exists?(data_folder.join(investments_tags_filename))
files[:tags] << investments_taggings_filename if File.exists?(data_folder.join(investments_taggings_filename))
files[:related_content] << proposals_related_filename if File.exists?(data_folder.join(proposals_related_filename))
files[:related_content] << investments_related_filename if File.exists?(data_folder.join(investments_related_filename))
files[:comments_summary] << proposals_comments_summary_filename if File.exists?(data_folder.join(proposals_comments_summary_filename))
files[:comments_summary] << investments_comments_summary_filename if File.exists?(data_folder.join(investments_comments_summary_filename))
files
end
def data_intermediate_files
excluded = [
proposals_filename,
investments_filename,
comments_filename,
proposals_tags_filename,
proposals_taggings_filename,
investments_tags_filename,
investments_taggings_filename,
proposals_related_filename,
investments_related_filename,
proposals_comments_summary_filename,
investments_comments_summary_filename
]
json = Dir[data_folder.join("*.json")].map do |full_path_filename|
full_path_filename.split("/").last
end
csv = Dir[data_folder.join("*.csv")].map do |full_path_filename|
full_path_filename.split("/").last
end
(json + csv - excluded).sort
end
def proposals_tags_filename
"ml_tags_proposals.json"
end
def proposals_taggings_filename
"ml_taggings_proposals.json"
end
def investments_tags_filename
"ml_tags_budgets.json"
end
def investments_taggings_filename
"ml_taggings_budgets.json"
end
def proposals_related_filename
"ml_related_content_proposals.json"
end
def investments_related_filename
"ml_related_content_budgets.json"
end
def proposals_comments_summary_filename
"ml_comments_summaries_proposals.json"
end
def investments_comments_summary_filename
"ml_comments_summaries_budgets.json"
end
def data_path(filename)
"/#{tenant_data_folder}/#{filename}"
end
def script_kinds
%w[tags related_content comments_summary]
end
def scripts_info
scripts_info = []
Dir[SCRIPTS_FOLDER.join("*.py")].each do |full_path_filename|
scripts_info << {
name: full_path_filename.split("/").last,
description: description_from(full_path_filename)
}
end
scripts_info.sort_by { |script_info| script_info[:name] }
end
def description_from(script_filename)
description = ""
delimiter = '"""'
break_line = "<br>"
comment_found = false
File.readlines(script_filename).each do |line|
if line.start_with?(delimiter) && !comment_found
comment_found = true
line.slice!(delimiter)
description << line.strip.concat(break_line) if line.present?
elsif line.include?(delimiter)
line.slice!(delimiter)
description << line.strip if line.present?
break
elsif comment_found
description << line.strip.concat(break_line)
end
end
description.delete_suffix(break_line)
end
end
private
def create_data_folder
FileUtils.mkdir_p data_folder
end
def export_proposals_to_json
create_data_folder
filename = data_folder.join(MachineLearning.proposals_filename)
Proposal::Exporter.new.to_json_file(filename)
end
def export_budget_investments_to_json
create_data_folder
filename = data_folder.join(MachineLearning.investments_filename)
Budget::Investment::Exporter.new(Array.new).to_json_file(filename)
end
def export_comments_to_json
create_data_folder
filename = data_folder.join(MachineLearning.comments_filename)
Comment::Exporter.new.to_json_file(filename)
end
def run_machine_learning_scripts
command = if Tenant.default?
"python #{job.script}"
else
"CONSUL_TENANT=#{Tenant.current_schema} python #{job.script}"
end
output = `cd #{SCRIPTS_FOLDER} && #{command} 2>&1`
result = $?.success?
if result == false
job.update!(finished_at: Time.current, error: output)
Mailer.machine_learning_error(user).deliver_later
end
result
end
def cleanup_proposals_tags!
Tagging.where(context: "ml_tags", taggable_type: "Proposal").find_each(&:destroy!)
Tag.find_each { |tag| tag.destroy! if Tagging.where(tag: tag).empty? }
end
def cleanup_investments_tags!
Tagging.where(context: "ml_tags", taggable_type: "Budget::Investment").find_each(&:destroy!)
Tag.find_each { |tag| tag.destroy! if Tagging.where(tag: tag).empty? }
end
def cleanup_proposals_related_content!
RelatedContent.with_hidden.for_proposals.from_machine_learning.find_each(&:really_destroy!)
end
def cleanup_investments_related_content!
RelatedContent.with_hidden.for_investments.from_machine_learning.find_each(&:really_destroy!)
end
def cleanup_proposals_comments_summary!
MlSummaryComment.where(commentable_type: "Proposal").find_each(&:destroy!)
end
def cleanup_investments_comments_summary!
MlSummaryComment.where(commentable_type: "Budget::Investment").find_each(&:destroy!)
end
def import_ml_proposals_comments_summary
json_file = data_folder.join(MachineLearning.proposals_comments_summary_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
attributes.delete(:id)
unless MlSummaryComment.find_by(commentable_id: attributes[:commentable_id],
commentable_type: "Proposal")
MlSummaryComment.create!(attributes)
end
end
end
def import_ml_investments_comments_summary
json_file = data_folder.join(MachineLearning.investments_comments_summary_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
attributes.delete(:id)
unless MlSummaryComment.find_by(commentable_id: attributes[:commentable_id],
commentable_type: "Budget::Investment")
MlSummaryComment.create!(attributes)
end
end
end
def import_proposals_related_content
json_file = data_folder.join(MachineLearning.proposals_related_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |related|
id = related.delete(:id)
score = related.size
related.each do |_, related_id|
if related_id.present?
attributes = {
parent_relationable_id: id,
parent_relationable_type: "Proposal",
child_relationable_id: related_id,
child_relationable_type: "Proposal"
}
related_content = RelatedContent.find_by(attributes)
if related_content.present?
related_content.update!(machine_learning_score: score)
else
RelatedContent.create!(attributes.merge(machine_learning: true,
author: user,
machine_learning_score: score))
end
end
score -= 1
end
end
end
def import_budget_investments_related_content
json_file = data_folder.join(MachineLearning.investments_related_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |related|
id = related.delete(:id)
score = related.size
related.each do |_, related_id|
if related_id.present?
attributes = {
parent_relationable_id: id,
parent_relationable_type: "Budget::Investment",
child_relationable_id: related_id,
child_relationable_type: "Budget::Investment"
}
related_content = RelatedContent.find_by(attributes)
if related_content.present?
related_content.update!(machine_learning_score: score)
else
RelatedContent.create!(attributes.merge(machine_learning: true,
author: user,
machine_learning_score: score))
end
end
score -= 1
end
end
end
def import_ml_proposals_tags
ids = {}
json_file = data_folder.join(MachineLearning.proposals_tags_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:name].present?
attributes.delete(:taggings_count)
if attributes[:name].length >= 150
attributes[:name] = attributes[:name].truncate(150)
end
tag = Tag.find_or_create_by!(name: attributes[:name])
ids[attributes[:id]] = tag.id
end
end
json_file = data_folder.join(MachineLearning.proposals_taggings_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:tag_id].present?
tag_id = ids[attributes[:tag_id]]
if Tag.find_by(id: tag_id) && attributes[:taggable_id].present?
attributes[:tag_id] = tag_id
attributes[:taggable_type] = "Proposal"
attributes[:context] = "ml_tags"
Tagging.create!(attributes)
end
end
end
end
def import_ml_investments_tags
ids = {}
json_file = data_folder.join(MachineLearning.investments_tags_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:name].present?
attributes.delete(:taggings_count)
if attributes[:name].length >= 150
attributes[:name] = attributes[:name].truncate(150)
end
tag = Tag.find_or_create_by!(name: attributes[:name])
ids[attributes[:id]] = tag.id
end
end
json_file = data_folder.join(MachineLearning.investments_taggings_filename)
json_data = JSON.parse(File.read(json_file)).each(&:deep_symbolize_keys!)
json_data.each do |attributes|
if attributes[:tag_id].present?
tag_id = ids[attributes[:tag_id]]
if Tag.find_by(id: tag_id) && attributes[:taggable_id].present?
attributes[:tag_id] = tag_id
attributes[:taggable_type] = "Budget::Investment"
attributes[:context] = "ml_tags"
Tagging.create!(attributes)
end
end
end
end
def update_machine_learning_info_for(kind)
MachineLearningInfo.find_or_create_by!(kind: kind)
.update!(generated_at: job.started_at, script: job.script)
end
def set_previous_modified_date
proposals_tags_filename = MachineLearning.proposals_tags_filename
proposals_taggings_filename = MachineLearning.proposals_taggings_filename
investments_tags_filename = MachineLearning.investments_tags_filename
investments_taggings_filename = MachineLearning.investments_taggings_filename
proposals_related_filename = MachineLearning.proposals_related_filename
investments_related_filename = MachineLearning.investments_related_filename
proposals_comments_summary_filename = MachineLearning.proposals_comments_summary_filename
investments_comments_summary_filename = MachineLearning.investments_comments_summary_filename
{
proposals_tags_filename => last_modified_date_for(proposals_tags_filename),
proposals_taggings_filename => last_modified_date_for(proposals_taggings_filename),
investments_tags_filename => last_modified_date_for(investments_tags_filename),
investments_taggings_filename => last_modified_date_for(investments_taggings_filename),
proposals_related_filename => last_modified_date_for(proposals_related_filename),
investments_related_filename => last_modified_date_for(investments_related_filename),
proposals_comments_summary_filename => last_modified_date_for(proposals_comments_summary_filename),
investments_comments_summary_filename => last_modified_date_for(investments_comments_summary_filename)
}
end
def last_modified_date_for(filename)
return nil unless File.exists? data_folder.join(filename)
File.mtime data_folder.join(filename)
end
def updated_file?(filename)
return false unless File.exists? data_folder.join(filename)
return true unless previous_modified_date[filename].present?
last_modified_date_for(filename) > previous_modified_date[filename]
end
def handle_error(error)
message = error.message
backtrace = error.backtrace.select { |line| line.include?("machine_learning.rb") }
full_error = ([message] + backtrace).join("<br>")
job.update!(finished_at: Time.current, error: full_error)
Mailer.machine_learning_error(user).deliver_later
end
end