From 43bc55ed32ac203b47276e0955c882c71567294e Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Mon, 28 Nov 2022 11:58:58 -0800 Subject: [PATCH] Query for survey items once instead of on each row --- app/services/survey_responses_data_loader.rb | 33 ++++++++++---------- db/schema.rb | 2 ++ 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/app/services/survey_responses_data_loader.rb b/app/services/survey_responses_data_loader.rb index 7e935f8c..9f64d00b 100644 --- a/app/services/survey_responses_data_loader.rb +++ b/app/services/survey_responses_data_loader.rb @@ -7,10 +7,11 @@ class SurveyResponsesDataLoader File.open(filepath) do |file| headers = file.first genders_hash = genders + all_survey_items = survey_items(headers:) file.lazy.each_slice(500) do |lines| survey_item_responses = CSV.parse(lines.join, headers:).map do |row| - process_row row: Values.new(row:, headers:, genders: genders_hash) + process_row row: Values.new(row:, headers:, genders: genders_hash, survey_items: all_survey_items) end SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500 @@ -61,19 +62,32 @@ class SurveyResponsesDataLoader gender_hash end + def self.survey_items(headers:) + SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:)) + end + + def self.get_survey_item_ids_from_headers(headers:) + CSV.parse(headers, headers: true).headers + .filter(&:present?) + .filter { |header| header.start_with? 't-' or header.start_with? 's-' } + end + private_class_method :process_row private_class_method :process_survey_items private_class_method :create_or_update_response private_class_method :genders + private_class_method :survey_items + private_class_method :get_survey_item_ids_from_headers end class Values - attr_reader :row, :headers, :genders + attr_reader :row, :headers, :genders, :survey_items - def initialize(row:, headers:, genders:) + def initialize(row:, headers:, genders:, survey_items:) @row = row @headers = headers @genders = genders + @survey_items = survey_items end def dese_id? @@ -124,11 +138,6 @@ class Values @school ||= School.find_by_dese_id(dese_id) end - # TODO: pass survey_items as an argument so we're not looking it up for every row. The set of survey items only needs to be determined once from the file headers. - def survey_items - @survey_items ||= SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:)) - end - def grade @grade ||= begin raw_grade = (row['grade'] || row['Grade'] || row['What grade are you in?']).to_i @@ -143,14 +152,6 @@ class Values gender_code = 99 if gender_code.zero? genders[gender_code] end - - private - - def get_survey_item_ids_from_headers(headers:) - CSV.parse(headers, headers: true).headers - .filter(&:present?) - .filter { |header| header.start_with? 't-' or header.start_with? 's-' } - end end module StringMonkeyPatches diff --git a/db/schema.rb b/db/schema.rb index e93d00e6..490031be 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -346,6 +346,7 @@ ActiveRecord::Schema[7.0].define(version: 2022_10_22_225523) do t.datetime "updated_at", null: false t.index ["academic_year_id"], name: "index_response_rates_on_academic_year_id" t.index ["school_id", "subcategory_id"], name: "index_response_rates_on_school_id_and_subcategory_id" + t.index ["school_id"], name: "index_response_rates_on_school_id" t.index ["subcategory_id"], name: "index_response_rates_on_subcategory_id" end @@ -435,6 +436,7 @@ ActiveRecord::Schema[7.0].define(version: 2022_10_22_225523) do t.index ["response_id"], name: "index_survey_item_responses_on_response_id" t.index ["school_id", "academic_year_id"], name: "index_survey_item_responses_on_school_id_and_academic_year_id" t.index ["school_id", "survey_item_id", "academic_year_id", "grade"], name: "index_survey_responses_on_grade" + t.index ["school_id"], name: "index_survey_item_responses_on_school_id" t.index ["student_id"], name: "index_survey_item_responses_on_student_id" t.index ["survey_item_id"], name: "index_survey_item_responses_on_survey_item_id" end