Improve performance generating stats

Debugging shows that the bottleneck in the stats calculation is the
number of times we're querying the users table using the same array of
IDs in the `where` condition but each time combined with other
conditions.

So we're inserting the results of querying the users table with the
array of IDs in a temporary table and using this temporary table for the
other calculations. When querying this temporary table, there's no need
to filter for IDs anymore.

For budget stats, the `generate` method is now about 10-20 times faster
for a budget with 20,000 participants. For budgets with only a few dozen
participants, there's no significant difference in performance.

I thought about modifying the `participants` method and use the
temporary table there. The problem, however, is that in this case it
isn't clear when to drop the temporary table, and we could end up with
thousands of temporary tables in the database if we don't do it right.
Creating and dropping the temporary table in the same transaction, on
the other hand, guarantees that won't be the case.

Note there's no risk of duplicate tables since they're created and
dropped inside a transaction, so we're always using the same table name
for the same resource. We're adding a test that fails with a
`PG::DuplicateTable: ERROR:  relation "participants__1"` error if we
don't use a transaction.
This commit is contained in:
Javi Martín
2024-04-07 19:17:44 +02:00
parent 6f0c27c0fb
commit 80dcbfc23c
2 changed files with 66 additions and 2 deletions

View File

@@ -47,7 +47,23 @@ module Statisticable
end
def generate
stats_methods.each { |stat_name| send(stat_name) }
User.transaction do
create_participants_table
begin
define_singleton_method :participants do
participants_class.all
end
stats_methods.each { |stat_name| send(stat_name) }
ensure
define_singleton_method :participants do
participants_from_original_table
end
end
drop_participants_table
end
end
def stats_methods
@@ -77,6 +93,7 @@ module Statisticable
def participants
@participants ||= User.unscoped.where(id: participant_ids)
end
alias_method :participants_from_original_table, :participants
def total_male_participants
participants.male.count
@@ -151,6 +168,26 @@ module Statisticable
participations.map { |participation| self.class.send("#{participation}_methods") }.flatten
end
def create_participants_table
User.connection.create_table(
participants_table_name,
temporary: true,
as: participants_from_original_table.to_sql
)
end
def drop_participants_table
User.connection.drop_table(participants_table_name, if_exists: true, temporary: true)
end
def participants_table_name
@participants_table_name ||= "participants_#{resource.class.table_name}_#{resource.id}"
end
def participants_class
@participants_class ||= Class.new(User).tap { |klass| klass.table_name = participants_table_name }
end
def total_participants_with_gender
@total_participants_with_gender ||= participants.where.not(gender: nil).distinct.count
end

View File

@@ -8,6 +8,11 @@ describe Statisticable do
def participants
User.all
end
alias_method :participants_from_original_table, :participants
def total_participants
User.count
end
def participation_date
Time.current
@@ -17,7 +22,7 @@ describe Statisticable do
stub_const("DummyStats", dummy_stats)
end
let(:stats) { DummyStats.new(nil) }
let(:stats) { DummyStats.new(double(id: 1, class: double(table_name: ""))) }
describe "#gender?" do
context "No participants" do
@@ -194,4 +199,26 @@ describe Statisticable do
end
end
end
describe "#generate" do
it "drops the temporary table after finishing" do
stats.generate
expect { stats.send(:participants_class).first }.to raise_exception(ActiveRecord::StatementInvalid)
end
it "can be executed twice without errors" do
stats.generate
expect { stats.generate }.not_to raise_exception
end
it "can be executed twice in parallel since it uses a transaction" do
other_stats = DummyStats.new(stats.resource)
[stats, other_stats].map do |stat|
Thread.new { stat.generate }
end.each(&:join)
end
end
end