From 610200de9fbb5c68f8687dddf3beddd25d05c808 Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Thu, 2 Jan 2025 10:40:59 -0800 Subject: [PATCH] Speed up survey item by item report from 75 to 23 seconds. Reduce number of queries to the database. Create a second thread of execution --- app/models/report/survey_item_by_item.rb | 179 +++++++++++------------ app/models/survey_item.rb | 6 + 2 files changed, 91 insertions(+), 94 deletions(-) diff --git a/app/models/report/survey_item_by_item.rb b/app/models/report/survey_item_by_item.rb index e6fea760..84bf5128 100644 --- a/app/models/report/survey_item_by_item.rb +++ b/app/models/report/survey_item_by_item.rb @@ -60,79 +60,91 @@ module Report data = [] data << headers + survey_items_by_id = ::SurveyItem.by_id_includes_all academic_years.each do |academic_year| - schools.each do |school| - # for each survey item id - survey_ids_to_grades.sort_by { |id, _value| ::SurveyItem.find(id).prompt }.each do |id, school_grades| - school_grades = school_grades.reject(&:nil?) - row = [] - survey_item = survey_item_for_id(id) - row.concat(survey_item_info(survey_item:)) # fills prompt + categories - row.append("Students") - row.append(school.name) - row.append(academic_year.range) - - # add padding before grade average section - starting_grade = school_grades.sort.first - starting_grade = grades.index(starting_grade) || 0 - padding = Array.new(starting_grade) { "" } - row.concat(padding) - - school_grades.sort.each do |grade| - next if grade == -1 - - if school_grades.include?(grade) - # we already know grade has sufficient responses - row.append("#{survey_item.survey_item_responses.where(school:, academic_year:, - grade:).average(:likert_score).to_f.round(2)}") - else - row.append("N/A") + mutex = Thread::Mutex.new + + pool_size = 2 + jobs = Queue.new + schools.each { |school| jobs << school } + + workers = pool_size.times.map do + Thread.new do + while school = jobs.pop(true) + # for each survey item id + survey_ids_to_grades.sort_by do |id, _value| + survey_items_by_id[id].prompt + end.each do |id, school_grades| + school_grades = school_grades.reject(&:nil?) + row = [] + survey_item = survey_items_by_id[id] + row.concat(survey_item_info(survey_item:)) # fills prompt + categories + row.append("Students") + row.append(school.name) + row.append(academic_year.range) + + # add padding before grade average section + starting_grade = school_grades.sort.first + starting_grade = grades.index(starting_grade) || 0 + padding = Array.new(starting_grade) { "" } + row.concat(padding) + + school_grades.sort.each do |grade| + next if grade == -1 + + if school_grades.include?(grade) + # we already know grade has sufficient responses + row.append("#{survey_item.survey_item_responses.where(school:, academic_year:, + grade:).average(:likert_score).to_f.round(2)}") + else + row.append("N/A") + end + end + + # add padding after the grade average section + ending_grade = school_grades.sort.last + ending_grade = grades.index(ending_grade) + 1 || 0 + padding = Array.new(grades.length - ending_grade) { "" } + row.concat(padding) + + # filter out response rate at subcategory level <24.5% for school average + if ::StudentResponseRateCalculator.new(subcategory: survey_item.subcategory, school:, + academic_year:).meets_student_threshold? + row.append("#{survey_item.survey_item_responses.where( + # We allow the nil (unknown) grades in the school survey item average + # also filter less than 10 responses in the whole school + 'school_id = ? and academic_year_id = ? and (grade IS NULL or grade IN (?))', school.id, academic_year.id, school.grades(academic_year:) + ).group('survey_item_id').having('count(*) >= 10').average(:likert_score).values[0].to_f.round(2)}") + else + row.append("N/A") + end + data << row + end + # Next up is teacher data + # each key is a survey item id + ::SurveyItemResponse.teacher_survey_items_with_sufficient_responses(school:, + academic_year:).keys.sort_by do |id| + survey_items_by_id[id].prompt + end.each do |key| + row = [] + survey_item = survey_items_by_id[key] + row.concat(survey_item_info(survey_item:)) + row.append("Teacher") + row.append(school.name) + row.append(academic_year.range) + # we need to add padding to skip the grades columns and the 'all school' column + padding = Array.new(grades.length + 1) { "" } + row.concat(padding) + # we already know that the survey item we are looking at has sufficient responses + row.append("#{survey_item.survey_item_responses.where(school:, + academic_year:).average(:likert_score).to_f.round(2)}") + data << row end end - - # add padding after the grade average section - ending_grade = school_grades.sort.last - ending_grade = grades.index(ending_grade) + 1 || 0 - padding = Array.new(grades.length - ending_grade) { "" } - row.concat(padding) - - # filter out response rate at subcategory level <24.5% for school average - subcategory = scale_for_id(survey_item.scale_id).measure.subcategory - # measure = ::Measure.find_by_id(scale.measure_id) - # subcategory = ::Subcategory.find_by_id(measure.subcategory_id) - if response_rate(subcategory:, school:, academic_year:).meets_student_threshold? - row.append("#{survey_item.survey_item_responses.where( - # We allow the nil (unknown) grades in the school survey item average - # also filter less than 10 responses in the whole school - 'school_id = ? and academic_year_id = ? and (grade IS NULL or grade IN (?))', school.id, academic_year.id, school.grades(academic_year:) - ).group('survey_item_id').having('count(*) >= 10').average(:likert_score).values[0].to_f.round(2)}") - - else - row.append("N/A") - end - data << row - end - # Next up is teacher data - # each key is a survey item id - ::SurveyItemResponse.teacher_survey_items_with_sufficient_responses(school:, - academic_year:).keys.sort_by do |id| - ::SurveyItem.find(id).prompt - end.each do |key| - row = [] - survey_item = survey_item_for_id(key) - row.concat(survey_item_info(survey_item:)) - row.append("Teacher") - row.append(school.name) - row.append(academic_year.range) - # we need to add padding to skip the grades columns and the 'all school' column - padding = Array.new(grades.length + 1) { "" } - row.concat(padding) - # we already know that the survey item we are looking at has sufficient responses - row.append("#{survey_item.survey_item_responses.where(school:, - academic_year:).average(:likert_score).to_f.round(2)}") - data << row + rescue ThreadError end end + workers.each(&:join) end CSV.generate do |csv| @@ -142,37 +154,16 @@ module Report end end - def self.response_rate(subcategory:, school:, academic_year:) - @response_rate ||= Hash.new do |memo, subcategory, school, academic_year| - memo[[subcategory, school, academic_year]] = - ::StudentResponseRateCalculator.new(subcategory:, school:, academic_year:) - end - @response_rate[[subcategory, school, academic_year]] - end - - def self.survey_item_for_id(survey_item_id) - @survey_items ||= ::SurveyItem.all.map do |survey_item| - [survey_item.id, survey_item] - end.to_h - @survey_items[survey_item_id] - end - - def self.scale_for_id(scale_id) - @scales ||= Scale.includes([measure: :subcategory]).all.map { |scale| [scale.id, scale] }.to_h - @scales[scale_id] - end - def self.write_csv(csv:, filepath:) File.write(filepath, csv) end def self.survey_item_info(survey_item:) - prompt = survey_item.prompt - scale = Scale.find_by_id(survey_item.scale_id) - measure = ::Measure.find_by_id(scale.measure_id) - subcategory = ::Subcategory.find_by_id(measure.subcategory_id) - category = Category.find_by_id(subcategory.category_id) - [prompt, category.name, subcategory.name, measure.name, scale.scale_id] + [survey_item.prompt, + survey_item.category.name, + survey_item.subcategory.name, + survey_item.measure.name, + survey_item.scale.scale_id] end end end diff --git a/app/models/survey_item.rb b/app/models/survey_item.rb index ec025abf..7b2a1159 100644 --- a/app/models/survey_item.rb +++ b/app/models/survey_item.rb @@ -4,10 +4,16 @@ class SurveyItem < ActiveRecord::Base belongs_to :scale, counter_cache: true has_one :measure, through: :scale has_one :subcategory, through: :measure + has_one :category, through: :subcategory has_many :survey_item_responses validates :survey_item_id, uniqueness: true + scope :by_id_includes_all, lambda { + all.includes(%i[scale measure subcategory category]).map do |survey_item| + [survey_item.id, survey_item] + end.to_h + } def score(school:, academic_year:) @score ||= Hash.new do |memo, (school, academic_year)|