Improve performance generating stats
Debugging shows that the bottleneck in the stats calculation is the number of times we're querying the users table using the same array of IDs in the `where` condition but each time combined with other conditions. So we're inserting the results of querying the users table with the array of IDs in a temporary table and using this temporary table for the other calculations. When querying this temporary table, there's no need to filter for IDs anymore. For budget stats, the `generate` method is now about 10-20 times faster for a budget with 20,000 participants. For budgets with only a few dozen participants, there's no significant difference in performance. I thought about modifying the `participants` method and use the temporary table there. The problem, however, is that in this case it isn't clear when to drop the temporary table, and we could end up with thousands of temporary tables in the database if we don't do it right. Creating and dropping the temporary table in the same transaction, on the other hand, guarantees that won't be the case. Note there's no risk of duplicate tables since they're created and dropped inside a transaction, so we're always using the same table name for the same resource. We're adding a test that fails with a `PG::DuplicateTable: ERROR: relation "participants__1"` error if we don't use a transaction.
This commit is contained in:
@@ -47,7 +47,23 @@ module Statisticable
|
||||
end
|
||||
|
||||
def generate
|
||||
stats_methods.each { |stat_name| send(stat_name) }
|
||||
User.transaction do
|
||||
create_participants_table
|
||||
|
||||
begin
|
||||
define_singleton_method :participants do
|
||||
participants_class.all
|
||||
end
|
||||
|
||||
stats_methods.each { |stat_name| send(stat_name) }
|
||||
ensure
|
||||
define_singleton_method :participants do
|
||||
participants_from_original_table
|
||||
end
|
||||
end
|
||||
|
||||
drop_participants_table
|
||||
end
|
||||
end
|
||||
|
||||
def stats_methods
|
||||
@@ -77,6 +93,7 @@ module Statisticable
|
||||
def participants
|
||||
@participants ||= User.unscoped.where(id: participant_ids)
|
||||
end
|
||||
alias_method :participants_from_original_table, :participants
|
||||
|
||||
def total_male_participants
|
||||
participants.male.count
|
||||
@@ -151,6 +168,26 @@ module Statisticable
|
||||
participations.map { |participation| self.class.send("#{participation}_methods") }.flatten
|
||||
end
|
||||
|
||||
def create_participants_table
|
||||
User.connection.create_table(
|
||||
participants_table_name,
|
||||
temporary: true,
|
||||
as: participants_from_original_table.to_sql
|
||||
)
|
||||
end
|
||||
|
||||
def drop_participants_table
|
||||
User.connection.drop_table(participants_table_name, if_exists: true, temporary: true)
|
||||
end
|
||||
|
||||
def participants_table_name
|
||||
@participants_table_name ||= "participants_#{resource.class.table_name}_#{resource.id}"
|
||||
end
|
||||
|
||||
def participants_class
|
||||
@participants_class ||= Class.new(User).tap { |klass| klass.table_name = participants_table_name }
|
||||
end
|
||||
|
||||
def total_participants_with_gender
|
||||
@total_participants_with_gender ||= participants.where.not(gender: nil).distinct.count
|
||||
end
|
||||
|
||||
@@ -8,6 +8,11 @@ describe Statisticable do
|
||||
def participants
|
||||
User.all
|
||||
end
|
||||
alias_method :participants_from_original_table, :participants
|
||||
|
||||
def total_participants
|
||||
User.count
|
||||
end
|
||||
|
||||
def participation_date
|
||||
Time.current
|
||||
@@ -17,7 +22,7 @@ describe Statisticable do
|
||||
stub_const("DummyStats", dummy_stats)
|
||||
end
|
||||
|
||||
let(:stats) { DummyStats.new(nil) }
|
||||
let(:stats) { DummyStats.new(double(id: 1, class: double(table_name: ""))) }
|
||||
|
||||
describe "#gender?" do
|
||||
context "No participants" do
|
||||
@@ -194,4 +199,26 @@ describe Statisticable do
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe "#generate" do
|
||||
it "drops the temporary table after finishing" do
|
||||
stats.generate
|
||||
|
||||
expect { stats.send(:participants_class).first }.to raise_exception(ActiveRecord::StatementInvalid)
|
||||
end
|
||||
|
||||
it "can be executed twice without errors" do
|
||||
stats.generate
|
||||
|
||||
expect { stats.generate }.not_to raise_exception
|
||||
end
|
||||
|
||||
it "can be executed twice in parallel since it uses a transaction" do
|
||||
other_stats = DummyStats.new(stats.resource)
|
||||
|
||||
[stats, other_stats].map do |stat|
|
||||
Thread.new { stat.generate }
|
||||
end.each(&:join)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user