Load a single year of student demographic data and race scores at a time

pull/1/head
rebuilt 3 years ago
parent 4a32ea3332
commit 3f493727b3

@ -1,25 +1,39 @@
class RaceScoreLoader
def self.reset(schools: School.all, academic_years: AcademicYear.all, measures: Measure.all, races: Race.all)
def self.reset(schools: School.all, academic_years: AcademicYear.all, measures: Measure.all, races: Race.all, fast_processing: true)
RaceScore.where(school: schools, academic_year: academic_years, measure: measures, race: races).delete_all
measures.each do |measure|
schools.each do |school|
loadable_race_scores = []
loadable_race_scores = academic_years.map do |academic_year|
races.map do |race|
process_score(measure:, school:, academic_year:, race:)
end
end
RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
@grouped_responses = nil
@total_responses = nil
@response_rate = nil
@sufficient_responses = nil
if fast_processing
large_memory_use(measure:, schools:, academic_years:, races:)
else
slow_loading_time(measure:, schools:, academic_years:, races:)
end
end
end
private
def self.large_memory_use(measure:, schools:, academic_years:, races:)
loadable_race_scores = schools.map do |school|
academic_years.map do |academic_year|
races.map do |race|
process_score(measure:, school:, academic_year:, race:)
end
end
end
RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
def self.slow_loading_time(measure:, schools:, academic_years:, races:)
schools.each do |school|
loadable_race_scores = academic_years.map do |academic_year|
races.map do |race|
process_score(measure:, school:, academic_year:, race:)
end
end
RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
end
def self.process_score(measure:, school:, academic_year:, race:)
score = race_score(measure:, school:, academic_year:, race:)
{ measure_id: measure.id, school_id: school.id, academic_year_id: academic_year.id, race_id: race.id, average: score.average,

@ -6,7 +6,7 @@
require 'csv'
class StudentLoader
def self.load_data(filepath:, reinitialize: false)
def self.load_data(filepath:, reinitialize: true)
destroy_students if reinitialize
File.open(filepath) do |file|

@ -71,7 +71,7 @@ namespace :one_off do
desc 'load students'
task load_students: :environment do
Dir.glob(Rails.root.join('data', 'survey_responses', '2019-20_*student*.csv')).each do |file|
Dir.glob(Rails.root.join('data', 'survey_responses', '2021-22_*student*.csv')).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file
end
@ -119,7 +119,8 @@ namespace :one_off do
desc 'reset race score calculations'
task reset_race_scores: :environment do
puts 'Resetting race scores'
RaceScoreLoader.reset(schools: [School.find_by_slug('a-irvin-studley-elementary-school')])
academic_years = [AcademicYear.find_by_range('2021-22')]
RaceScoreLoader.reset(academic_years:)
Rails.cache.clear
puts "=====================> Completed loading #{RaceScore.count} race scores"
end

Loading…
Cancel
Save