From 80dcbfc23c218366678f0e87e4bf3b6e5bed6312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javi=20Mart=C3=ADn?= Date: Sun, 7 Apr 2024 19:17:44 +0200 Subject: [PATCH] Improve performance generating stats Debugging shows that the bottleneck in the stats calculation is the number of times we're querying the users table using the same array of IDs in the `where` condition but each time combined with other conditions. So we're inserting the results of querying the users table with the array of IDs in a temporary table and using this temporary table for the other calculations. When querying this temporary table, there's no need to filter for IDs anymore. For budget stats, the `generate` method is now about 10-20 times faster for a budget with 20,000 participants. For budgets with only a few dozen participants, there's no significant difference in performance. I thought about modifying the `participants` method and use the temporary table there. The problem, however, is that in this case it isn't clear when to drop the temporary table, and we could end up with thousands of temporary tables in the database if we don't do it right. Creating and dropping the temporary table in the same transaction, on the other hand, guarantees that won't be the case. Note there's no risk of duplicate tables since they're created and dropped inside a transaction, so we're always using the same table name for the same resource. We're adding a test that fails with a `PG::DuplicateTable: ERROR: relation "participants__1"` error if we don't use a transaction. --- app/models/concerns/statisticable.rb | 39 +++++++++++++++++++++++++++- spec/models/statisticable_spec.rb | 29 ++++++++++++++++++++- 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/app/models/concerns/statisticable.rb b/app/models/concerns/statisticable.rb index c3ee15139..826d7dc68 100644 --- a/app/models/concerns/statisticable.rb +++ b/app/models/concerns/statisticable.rb @@ -47,7 +47,23 @@ module Statisticable end def generate - stats_methods.each { |stat_name| send(stat_name) } + User.transaction do + create_participants_table + + begin + define_singleton_method :participants do + participants_class.all + end + + stats_methods.each { |stat_name| send(stat_name) } + ensure + define_singleton_method :participants do + participants_from_original_table + end + end + + drop_participants_table + end end def stats_methods @@ -77,6 +93,7 @@ module Statisticable def participants @participants ||= User.unscoped.where(id: participant_ids) end + alias_method :participants_from_original_table, :participants def total_male_participants participants.male.count @@ -151,6 +168,26 @@ module Statisticable participations.map { |participation| self.class.send("#{participation}_methods") }.flatten end + def create_participants_table + User.connection.create_table( + participants_table_name, + temporary: true, + as: participants_from_original_table.to_sql + ) + end + + def drop_participants_table + User.connection.drop_table(participants_table_name, if_exists: true, temporary: true) + end + + def participants_table_name + @participants_table_name ||= "participants_#{resource.class.table_name}_#{resource.id}" + end + + def participants_class + @participants_class ||= Class.new(User).tap { |klass| klass.table_name = participants_table_name } + end + def total_participants_with_gender @total_participants_with_gender ||= participants.where.not(gender: nil).distinct.count end diff --git a/spec/models/statisticable_spec.rb b/spec/models/statisticable_spec.rb index 9768b39ee..f1817fbbd 100644 --- a/spec/models/statisticable_spec.rb +++ b/spec/models/statisticable_spec.rb @@ -8,6 +8,11 @@ describe Statisticable do def participants User.all end + alias_method :participants_from_original_table, :participants + + def total_participants + User.count + end def participation_date Time.current @@ -17,7 +22,7 @@ describe Statisticable do stub_const("DummyStats", dummy_stats) end - let(:stats) { DummyStats.new(nil) } + let(:stats) { DummyStats.new(double(id: 1, class: double(table_name: ""))) } describe "#gender?" do context "No participants" do @@ -194,4 +199,26 @@ describe Statisticable do end end end + + describe "#generate" do + it "drops the temporary table after finishing" do + stats.generate + + expect { stats.send(:participants_class).first }.to raise_exception(ActiveRecord::StatementInvalid) + end + + it "can be executed twice without errors" do + stats.generate + + expect { stats.generate }.not_to raise_exception + end + + it "can be executed twice in parallel since it uses a transaction" do + other_stats = DummyStats.new(stats.resource) + + [stats, other_stats].map do |stat| + Thread.new { stat.generate } + end.each(&:join) + end + end end