diff --git a/app/services/survey_responses_data_loader.rb b/app/services/survey_responses_data_loader.rb index e498a389..d8464bb2 100644 --- a/app/services/survey_responses_data_loader.rb +++ b/app/services/survey_responses_data_loader.rb @@ -3,23 +3,26 @@ require 'csv' class SurveyResponsesDataLoader def self.load_data(filepath:) - csv_file = File.read(filepath) + File.open(filepath) do |file| + headers = file.first - parsed_csv_file = CSV.parse(csv_file, headers: true) - survey_items = parsed_csv_file.headers - .filter { |header| !header.nil? } - .filter { |header| header.start_with? 't-' or header.start_with? 's-' } - .map { |survey_item_id| SurveyItem.find_by_survey_item_id survey_item_id } + survey_items = CSV.parse(headers, headers: true).headers + .filter { |header| !header.nil? } + .filter { |header| header.start_with? 't-' or header.start_with? 's-' } + .map { |survey_item_id| SurveyItem.find_by_survey_item_id survey_item_id } - survey_item_responses = parsed_csv_file.map do |row| - process_row row: row, survey_items: survey_items - end + batch_progress = ->(rows_size, num_batches, current_batch_number, batch_duration_in_secs) { + puts "======================> Number of survey item responses: #{rows_size}, Number of batches: #{num_batches}, Current batch number: #{current_batch_number}" + } - batch_progress = ->(rows_size, num_batches, current_batch_number, batch_duration_in_secs) { - puts "======================> Row size: #{rows_size}, Number of batches: #{num_batches}, Current batch number: #{current_batch_number}" - } + file.lazy.each_slice(1000) do |lines| + survey_item_responses = CSV.parse(lines.join, headers: headers).map do |row| + process_row row: row, survey_items: survey_items + end - SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 10, batch_progress: batch_progress + SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000, batch_progress: batch_progress + end + end end private