From deb70d4b88ae76848220a5a786d5bd55a691e295 Mon Sep 17 00:00:00 2001 From: Gabe Farrell Date: Thu, 18 Apr 2024 17:13:03 -0400 Subject: [PATCH] Memoize admin data loader --- app/services/dese/loader.rb | 29 +++++++++++++++++++++++------ lib/tasks/data.rake | 5 ++++- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/app/services/dese/loader.rb b/app/services/dese/loader.rb index 99c59c07..8f2f76bc 100644 --- a/app/services/dese/loader.rb +++ b/app/services/dese/loader.rb @@ -1,7 +1,9 @@ module Dese class Loader + @memo = Hash.new def self.load_data(filepath:) admin_data_values = [] + @memo = Hash.new CSV.parse(File.read(filepath), headers: true) do |row| score = likert_score(row:) next unless valid_likert_score(likert_score: score) @@ -35,16 +37,31 @@ module Dese row['Admin Data Item'] || row['Item ID'] || row['Item Id'] || row['Item ID'] end + # these three methods do the memoization + def self.find_school(dese_id:) + return @memo["school"+dese_id] if @memo.key? "school"+dese_id + @memo["school"+dese_id] ||= School.find_by_dese_id(dese_id.to_i) + end + def self.find_admin_data_item(admin_data_item_id:) + return @memo["admin"+admin_data_item_id] if @memo.key? "admin"+admin_data_item_id + @memo["admin"+admin_data_item_id] ||= AdminDataItem.find_by_admin_data_item_id(admin_data_item_id) + end + def self.find_ay(ay:) + return @memo["year"+ay] if @memo.key? "year"+ay + @memo["year"+ay] ||= AcademicYear.find_by_range(ay) + end + def self.create_admin_data_value(row:, score:) - school = School.find_by_dese_id(dese_id(row:).to_i) + school = find_school(dese_id: dese_id(row:)) admin_data_item_id = admin_data_item(row:) + admin_data_item = find_admin_data_item(admin_data_item_id:) + academic_year = find_ay(ay: ay(row:)) return if school.nil? return if admin_data_item_id.nil? || admin_data_item_id.blank? - admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)), - school:, - admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item_id)) + admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:) + if admin_data_value.present? admin_data_value.likert_score = score admin_data_value.save @@ -52,9 +69,9 @@ module Dese else AdminDataValue.new( likert_score: score, - academic_year: AcademicYear.find_by_range(ay(row:)), + academic_year:, school:, - admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)) + admin_data_item:, ) end end diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 45ebaac6..44ed6790 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -46,10 +46,13 @@ namespace :data do desc "load admin_data" task load_admin_data: :environment do original_count = AdminDataValue.count - Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath| + jobs = Queue.new + Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each { |filepath| jobs << filepath } + while filepath = jobs.pop(true) puts "=====================> Loading data from csv at path: #{filepath}" Dese::Loader.load_data filepath: end + rescue ThreadError puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end