From 3f493727b3bbefdce5cf53c44afe217884f161d3 Mon Sep 17 00:00:00 2001 From: rebuilt Date: Mon, 22 Aug 2022 11:29:38 -0700 Subject: [PATCH] Load a single year of student demographic data and race scores at a time --- app/services/race_score_loader.rb | 40 +++++++++++++++++++++---------- app/services/student_loader.rb | 2 +- lib/tasks/one_off.rake | 5 ++-- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/app/services/race_score_loader.rb b/app/services/race_score_loader.rb index 408bbca6..16b66f4e 100644 --- a/app/services/race_score_loader.rb +++ b/app/services/race_score_loader.rb @@ -1,25 +1,39 @@ class RaceScoreLoader - def self.reset(schools: School.all, academic_years: AcademicYear.all, measures: Measure.all, races: Race.all) + def self.reset(schools: School.all, academic_years: AcademicYear.all, measures: Measure.all, races: Race.all, fast_processing: true) RaceScore.where(school: schools, academic_year: academic_years, measure: measures, race: races).delete_all measures.each do |measure| - schools.each do |school| - loadable_race_scores = [] - loadable_race_scores = academic_years.map do |academic_year| - races.map do |race| - process_score(measure:, school:, academic_year:, race:) - end - end - RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all) - @grouped_responses = nil - @total_responses = nil - @response_rate = nil - @sufficient_responses = nil + if fast_processing + large_memory_use(measure:, schools:, academic_years:, races:) + else + slow_loading_time(measure:, schools:, academic_years:, races:) end end end private + def self.large_memory_use(measure:, schools:, academic_years:, races:) + loadable_race_scores = schools.map do |school| + academic_years.map do |academic_year| + races.map do |race| + process_score(measure:, school:, academic_year:, race:) + end + end + end + RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all) + end + + def self.slow_loading_time(measure:, schools:, academic_years:, races:) + schools.each do |school| + loadable_race_scores = academic_years.map do |academic_year| + races.map do |race| + process_score(measure:, school:, academic_year:, race:) + end + end + RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all) + end + end + def self.process_score(measure:, school:, academic_year:, race:) score = race_score(measure:, school:, academic_year:, race:) { measure_id: measure.id, school_id: school.id, academic_year_id: academic_year.id, race_id: race.id, average: score.average, diff --git a/app/services/student_loader.rb b/app/services/student_loader.rb index 09ae5ee3..052623e6 100644 --- a/app/services/student_loader.rb +++ b/app/services/student_loader.rb @@ -6,7 +6,7 @@ require 'csv' class StudentLoader - def self.load_data(filepath:, reinitialize: false) + def self.load_data(filepath:, reinitialize: true) destroy_students if reinitialize File.open(filepath) do |file| diff --git a/lib/tasks/one_off.rake b/lib/tasks/one_off.rake index 8aff4450..451c56ec 100644 --- a/lib/tasks/one_off.rake +++ b/lib/tasks/one_off.rake @@ -71,7 +71,7 @@ namespace :one_off do desc 'load students' task load_students: :environment do - Dir.glob(Rails.root.join('data', 'survey_responses', '2019-20_*student*.csv')).each do |file| + Dir.glob(Rails.root.join('data', 'survey_responses', '2021-22_*student*.csv')).each do |file| puts "=====================> Loading student data from csv at path: #{file}" StudentLoader.load_data filepath: file end @@ -119,7 +119,8 @@ namespace :one_off do desc 'reset race score calculations' task reset_race_scores: :environment do puts 'Resetting race scores' - RaceScoreLoader.reset(schools: [School.find_by_slug('a-irvin-studley-elementary-school')]) + academic_years = [AcademicYear.find_by_range('2021-22')] + RaceScoreLoader.reset(academic_years:) Rails.cache.clear puts "=====================> Completed loading #{RaceScore.count} race scores" end