Process CSV file rows in batches

pull/1/head
Alex Basson 4 years ago
parent a269bb0ec1
commit b4f6daa406

@ -3,23 +3,26 @@ require 'csv'
class SurveyResponsesDataLoader
def self.load_data(filepath:)
csv_file = File.read(filepath)
File.open(filepath) do |file|
headers = file.first
parsed_csv_file = CSV.parse(csv_file, headers: true)
survey_items = parsed_csv_file.headers
.filter { |header| !header.nil? }
.filter { |header| header.start_with? 't-' or header.start_with? 's-' }
.map { |survey_item_id| SurveyItem.find_by_survey_item_id survey_item_id }
survey_items = CSV.parse(headers, headers: true).headers
.filter { |header| !header.nil? }
.filter { |header| header.start_with? 't-' or header.start_with? 's-' }
.map { |survey_item_id| SurveyItem.find_by_survey_item_id survey_item_id }
survey_item_responses = parsed_csv_file.map do |row|
process_row row: row, survey_items: survey_items
end
batch_progress = ->(rows_size, num_batches, current_batch_number, batch_duration_in_secs) {
puts "======================> Number of survey item responses: #{rows_size}, Number of batches: #{num_batches}, Current batch number: #{current_batch_number}"
}
batch_progress = ->(rows_size, num_batches, current_batch_number, batch_duration_in_secs) {
puts "======================> Row size: #{rows_size}, Number of batches: #{num_batches}, Current batch number: #{current_batch_number}"
}
file.lazy.each_slice(1000) do |lines|
survey_item_responses = CSV.parse(lines.join, headers: headers).map do |row|
process_row row: row, survey_items: survey_items
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 10, batch_progress: batch_progress
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000, batch_progress: batch_progress
end
end
end
private

Loading…
Cancel
Save