From 1a707eb6bc1345207da21ae1f5af4a49a5fe586e Mon Sep 17 00:00:00 2001
From: rebuilt <memoryman51@hotmail.com>
Date: Thu, 2 Nov 2023 09:45:30 -0700
Subject: [PATCH] feat: load student responses in the same pass as loading the
 survey responses

chore: remove student loader since loading students is now done with the survey response loader
---
 app/services/student_loader.rb                |  64 -----
 app/services/survey_item_values.rb            |  26 +-
 app/services/survey_responses_data_loader.rb  |  38 +--
 lib/tasks/data.rake                           |  24 --
 lib/tasks/one_off.rake                        | 245 ++----------------
 spec/services/student_loader_spec.rb          | 160 ------------
 .../survey_responses_data_loader_spec.rb      |  23 ++
 7 files changed, 76 insertions(+), 504 deletions(-)
 delete mode 100644 app/services/student_loader.rb
 delete mode 100644 spec/services/student_loader_spec.rb

diff --git a/app/services/student_loader.rb b/app/services/student_loader.rb
deleted file mode 100644
index 97163a78..00000000
--- a/app/services/student_loader.rb
+++ /dev/null
@@ -1,64 +0,0 @@
-# frozen_string_literal: true
-
-class StudentLoader
-  def self.load_data(filepath:, rules: [])
-    File.open(filepath) do |file|
-      headers = file.first
-      headers_array = headers.split(",")
-
-      file.lazy.each_slice(1_000) do |lines|
-        CSV.parse(lines.join, headers:).map do |row|
-          row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
-          next if rules.any? do |rule|
-                    rule.new(row:).skip_row?
-                  end
-
-          process_row(row:)
-        end
-      end
-    end
-  end
-
-  def self.from_file(file:, rules: [])
-    headers = file.gets
-    headers_array = headers.split(",")
-
-    until file.eof?
-      line = file.gets
-      next unless line.present?
-
-      CSV.parse(line, headers:).map do |row|
-        row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
-        next if rules.any? do |rule|
-                  rule.new(row:).skip_row?
-                end
-
-        process_row(row:)
-      end
-    end
-  end
-
-  def self.process_row(row:)
-    student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
-    student.races.delete_all
-    races = row.races
-    races.map do |race|
-      student.races << race
-    end
-    assign_student_to_responses(student:, response_id: row.response_id)
-  end
-
-  def self.schools
-    @schools ||= School.all.map { |school| [school.dese_id, school] }.to_h
-  end
-
-  def self.assign_student_to_responses(student:, response_id:)
-    responses = SurveyItemResponse.where(response_id:)
-    loadable_responses = responses.map do |response|
-      response.student = student
-      response
-    end
-
-    SurveyItemResponse.import(loadable_responses.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
-  end
-end
diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb
index 3da94fb5..0139009b 100644
--- a/app/services/survey_item_values.rb
+++ b/app/services/survey_item_values.rb
@@ -116,20 +116,24 @@ class SurveyItemValues
   end
 
   def gender
-    gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i)
-    gender_code ||= 99
-    gender_code = gender_code.to_i
-    gender_code = 4 if gender_code == 3
-    gender_code = 99 if gender_code.zero?
-    genders[gender_code]
+    @gender ||= begin
+      gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i)
+      gender_code ||= 99
+      gender_code = gender_code.to_i
+      gender_code = 4 if gender_code == 3
+      gender_code = 99 if gender_code.zero?
+      genders[gender_code]
+    end
   end
 
   def races
-    race_codes = value_from(pattern: /RACE/i)
-    race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
-    race_codes ||= value_from(pattern: /Race Secondary/i) || ""
-    race_codes = race_codes.split(",").map(&:to_i) || []
-    process_races(codes: race_codes)
+    @races ||= begin
+      race_codes = value_from(pattern: /RACE/i)
+      race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
+      race_codes ||= value_from(pattern: /Race Secondary/i) || ""
+      race_codes = race_codes.split(",").map(&:to_i) || []
+      process_races(codes: race_codes)
+    end
   end
 
   def lasid
diff --git a/app/services/survey_responses_data_loader.rb b/app/services/survey_responses_data_loader.rb
index f7abce63..0917d73c 100644
--- a/app/services/survey_responses_data_loader.rb
+++ b/app/services/survey_responses_data_loader.rb
@@ -12,7 +12,7 @@ class SurveyResponsesDataLoader
           process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:),
                       rules:)
         end
-        SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500
+        SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
       end
     end
   end
@@ -23,7 +23,6 @@ class SurveyResponsesDataLoader
     all_survey_items = survey_items(headers:)
 
     survey_item_responses = []
-    row_count = 0
     until file.eof?
       line = file.gets
       next unless line.present?
@@ -32,16 +31,9 @@ class SurveyResponsesDataLoader
         survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:),
                                              rules:)
       end
-
-      row_count += 1
-      next unless row_count == 1000
-
-      SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000
-      survey_item_responses = []
-      row_count = 0
     end
 
-    SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000
+    SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000, on_duplicate_key_update: :all
   end
 
   private
@@ -78,6 +70,13 @@ class SurveyResponsesDataLoader
   end
 
   def process_survey_items(row:)
+    student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
+    student.races.delete_all
+    races = row.races
+    races.map do |race|
+      student.races << race
+    end
+
     row.survey_items.map do |survey_item|
       likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
 
@@ -86,23 +85,30 @@ class SurveyResponsesDataLoader
         next
       end
       response = row.survey_item_response(survey_item:)
-      create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:)
+      create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
     end.compact
   end
 
-  def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:)
+  def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:)
     gender = row.gender
     grade = row.grade
     income = incomes[row.income.parameterize]
     ell = ells[row.ell]
     sped = speds[row.sped]
+
     if survey_item_response.present?
-      survey_item_response.update!(likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:,
-                                   sped:)
-      []
+      survey_item_response.likert_score = likert_score
+      survey_item_response.grade = grade
+      survey_item_response.gender = gender
+      survey_item_response.recorded_date = row.recorded_date
+      survey_item_response.income = income
+      survey_item_response.ell = ell
+      survey_item_response.sped = sped
+      survey_item_response.student = student
+      survey_item_response
     else
       SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:,
-                             likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:)
+                             likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:)
     end
   end
 
diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake
index 73223ff9..495b4a2d 100644
--- a/lib/tasks/data.rake
+++ b/lib/tasks/data.rake
@@ -9,11 +9,6 @@ namespace :data do
     end
     puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
 
-    Sftp::Directory.open(path:) do |file|
-      StudentLoader.from_file(file:, rules: [])
-    end
-    puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
-
     Rails.cache.clear
   end
 
@@ -27,11 +22,6 @@ namespace :data do
     end
     puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
 
-    Sftp::Directory.open(path:) do |file|
-      StudentLoader.from_file(file:, rules: [])
-    end
-    puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
-
     Rails.cache.clear
   end
 
@@ -50,20 +40,6 @@ namespace :data do
     puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
   end
 
-  desc "load students"
-  task load_students: :environment do
-    SurveyItemResponse.update_all(student_id: nil)
-    StudentRace.delete_all
-    Student.delete_all
-    Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
-      puts "=====================> Loading student data from csv at path: #{file}"
-      StudentLoader.load_data filepath: file
-    end
-    puts "=====================> Completed loading #{Student.count} students"
-
-    Rails.cache.clear
-  end
-
   desc "reset all cache counters"
   task reset_cache_counters: :environment do
     puts "=====================> Resetting Category counters"
diff --git a/lib/tasks/one_off.rake b/lib/tasks/one_off.rake
index c5ac715a..d02ab053 100644
--- a/lib/tasks/one_off.rake
+++ b/lib/tasks/one_off.rake
@@ -5,10 +5,10 @@ namespace :one_off do
 
     qualtrics_schools = {}
 
-    csv_file = Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
+    csv_file = Rails.root.join("data", "master_list_of_schools_and_districts.csv")
     CSV.parse(File.read(csv_file), headers: true) do |row|
-      district_id = row['District Code'].to_i
-      school_id = row['School Code'].to_i
+      district_id = row["District Code"].to_i
+      school_id = row["School Code"].to_i
 
       if qualtrics_schools[[district_id, school_id]].present?
         puts "Duplicate entry row #{row}"
@@ -24,14 +24,14 @@ namespace :one_off do
       end
 
       if school.nil?
-        school_name = csv_row['School Name'].strip
+        school_name = csv_row["School Name"].strip
         puts "Could not find school '#{school_name}' with district id: #{district_id}, school id: #{school_id}"
-        potential_school_ids = School.where('name like ?', "%#{school_name}%").map(&:id)
+        potential_school_ids = School.where("name like ?", "%#{school_name}%").map(&:id)
         puts "Potential ID matches: #{potential_school_ids}" if potential_school_ids.present?
         next
       end
 
-      school.update!(dese_id: csv_row['DESE School ID'])
+      school.update!(dese_id: csv_row["DESE School ID"])
       updated_schools << school.id
     end
 
@@ -40,7 +40,7 @@ namespace :one_off do
     end
   end
 
-  desc 'list scales that have no survey responses'
+  desc "list scales that have no survey responses"
   task list_scales_that_lack_survey_responses: :environment do
     output = AcademicYear.all.map do |academic_year|
       Scale.all.map do |scale|
@@ -48,11 +48,11 @@ namespace :one_off do
       end
     end
 
-    output = output.map { |year| year.reject { |scale| scale[2] > 0 || scale[1].starts_with?('a-') } }
+    output = output.map { |year| year.reject { |scale| scale[2] > 0 || scale[1].starts_with?("a-") } }
     pp output
   end
 
-  desc 'list survey_items that have no survey responses by district'
+  desc "list survey_items that have no survey responses by district"
   task list_survey_items_that_lack_responses: :environment do
     output = AcademicYear.all.map do |academic_year|
       District.all.map do |district|
@@ -66,14 +66,14 @@ namespace :one_off do
     output = output.map do |year|
       year.map do |district|
         district.reject do |survey_item|
-          survey_item[2] > 0 || survey_item[1].starts_with?('a-')
+          survey_item[2] > 0 || survey_item[1].starts_with?("a-")
         end
       end
     end
     pp output
   end
 
-  desc 'list the most recent admin data values'
+  desc "list the most recent admin data values"
   task list_recent_admin_data_values: :environment do
     range = 4.weeks.ago..1.second.ago
     values = AdminDataValue.where(updated_at: range).group(:admin_data_item).count.map do |item|
@@ -82,240 +82,27 @@ namespace :one_off do
     puts values
   end
 
-  desc 'delete 2022-23 survey responses'
+  desc "delete 2022-23 survey responses"
   task delete_survey_responses_2022_23: :environment do
     response_count = SurveyItemResponse.all.count
-    SurveyItemResponse.where(academic_year: AcademicYear.find_by_range('2022-23')).delete_all
+    SurveyItemResponse.where(academic_year: AcademicYear.find_by_range("2022-23")).delete_all
 
     puts "=====================> Deleted #{response_count - SurveyItemResponse.all.count} survey responses"
     # should be somewhere near 295738
   end
 
-  desc 'load survey responses'
+  desc "load survey responses"
   task load_survey_responses: :environment do
     survey_item_response_count = SurveyItemResponse.count
     student_count = Student.count
-    path = '/data/survey_responses/clean/'
-    schools = District.find_by_slug('maynard-public-schools').schools
+    path = "/data/survey_responses/clean/"
+    schools = District.find_by_slug("maynard-public-schools").schools
 
     Sftp::Directory.open(path:) do |file|
       SurveyResponsesDataLoader.new.from_file(file:)
     end
     puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
 
-    Sftp::Directory.open(path:) do |file|
-      StudentLoader.from_file(file:, rules: [])
-    end
-    puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
-
     Rails.cache.clear
   end
-
-  desc "Generate CSV report of teacher survey item responses"
-  task teacher_survey_questions_csv: :environment do
-    headers = ['School ID', 'Academic Year', 'Survey Item', 'Count','Percentage_diff']
-    output_rows = []
-    counts={}
-    avg=0
-    School.all.each do |sc|
-      AcademicYear.all.each do |ay|
-        sum=0
-        nof_si=0
-        threshold = Respondent.where(school: sc, academic_year: ay).pluck(:total_teachers)
-        SurveyItem.teacher_survey_items.all.each do |si|
-          c = SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).group(:school_id).count
-          if c.any? && (c.values.first >= 10 || c.values.first > threshold/4)
-            counts[[sc.id, ay.id, si.id]] = c
-            sum+=c.values.first
-            nof_si+=1
-          end
-        end
-
-
-          avg = sum.to_f/ nof_si
-        counts.each do |key, value|
-          if key[0] == sc.id && key[1] == ay.id
-            count = value.values.first
-            percentage_diff = ((count-avg) / avg) * 100
-            counts[key] = { count: count, percentage_diff: percentage_diff }
-        end
-      end
-    end  end
-
-    counts.each do |key, value|
-      output_rows << [School.where(id:key[0]).pluck(:name) , AcademicYear.where(id:key[1]).pluck(:range) , SurveyItem.where(id:key[2]).pluck(:survey_item_id), SurveyItem.where(id:key[2]).pluck(:prompt), value[:count], value[:percentage_diff]]
-    end
-
-    file = File.new('teacher_survey_questions.csv', 'w')
-    CSV.open(file, 'w', write_headers: true, headers: headers) do |csv|
-      output_rows.each do |row|
-        csv << row
-      end
-    end
-
-    file.close
-    puts "CSV report of teacher survey item responses with removed stray responses.csv"
-  end
-
-
-  desc "Generate CSV report of short survey item responses"
-  task short_survey_questions_csv: :environment do
-    headers = ['School ID', 'Academic Year', 'Survey Item', 'Count','Percentage_diff']
-    output_rows = []
-    counts={}
-    avg=0
-    temp=0
-    count_response_grades_not_in_grades=0
-    School.all.each do |sc|
-      if Respondent.where(school: sc).any?
-      grades_count=Respondent.where(school:sc).first.counts_by_grade
-      grades= grades_count.keys
-      AcademicYear.all.each do |ay|
-        sum=0
-        nof_si=0
-        threshold=0
-        threshold = Respondent.where(school: sc, academic_year: ay).pluck(:total_students)
-        SurveyItem.short_form_items.all.each do |si|
-          c = SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).group(:school_id).count
-          response_grades = SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).pluck(:grade)
-          count_response_grades_not_in_grades = response_grades.count { |grade| !grades.include?(grade) }
-           if !count_response_grades_not_in_grades.nil? && c.any?
-            c[c.keys.first] = c.values.first - count_response_grades_not_in_grades
-          end
-
-           if threshold.any?
-          if c.any? && (c.values.first >= 10 || c.values.first > threshold.first/4)
-            counts[[sc.id, ay.id, si.id]] = c
-            sum+=c.values.first
-            nof_si+=1
-          end
-        end
-        end
-
-
-          avg = sum.to_f/ nof_si
-        counts.each do |key, value|
-          if key[0] == sc.id && key[1] == ay.id
-            count = value.values.first
-            percentage_diff = ((count-avg) / avg) * 100
-            counts[key] = { count: count, percentage_diff: percentage_diff }
-        end
-      end
-    end  end end
-
-    counts.each do |key, value|
-      output_rows << [School.where(id:key[0]).pluck(:name) , AcademicYear.where(id:key[1]).pluck(:range) , SurveyItem.where(id:key[2]).pluck(:survey_item_id), SurveyItem.where(id:key[2]).pluck(:prompt), value[:count], value[:percentage_diff]]
-    end
-
-    file = File.new('short_survey_questions.csv', 'w')
-    CSV.open(file, 'w', write_headers: true, headers: headers) do |csv|
-      output_rows.each do |row|
-        csv << row
-      end
-    end
-
-    file.close
-    puts "CSV report of short survey item responses with removed stray responses.csv"
-  end
-
-
-    desc "Generate CSV report of early_education_surveysitem responses"
-  task early_education_survey_questions_csv: :environment do
-    headers = ['School ID', 'Academic Year', 'Survey Item', 'Count','Percentage_diff']
-    output_rows = []
-    counts={}
-    avg=0
-    temp=0
-    count_response_grades_not_in_grades=0
-    School.all.each do |sc|
-      if Respondent.where(school: sc).any?
-      grades_count=Respondent.where(school:sc).first.counts_by_grade
-      grades= grades_count.keys
-      AcademicYear.all.each do |ay|
-        sum=0
-        nof_si=0
-        threshold=0
-        threshold = Respondent.where(school: sc, academic_year: ay).pluck(:total_students)
-        SurveyItem.early_education_surveys.all.each do |si|
-          c = SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).group(:school_id).count
-          response_grades = SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).pluck(:grade)
-          count_response_grades_not_in_grades = response_grades.count { |grade| !grades.include?(grade) }
-           if !count_response_grades_not_in_grades.nil? && c.any?
-            c[c.keys.first] = c.values.first - count_response_grades_not_in_grades
-          end
-
-           if threshold.any?
-          if c.any? && (c.values.first >= 10 || c.values.first > threshold.first/4)
-            counts[[sc.id, ay.id, si.id]] = c
-            sum+=c.values.first
-            nof_si+=1
-          end
-        end
-        end
-
-
-          avg = sum.to_f/ nof_si
-        counts.each do |key, value|
-          if key[0] == sc.id && key[1] == ay.id
-            count = value.values.first
-            percentage_diff = ((count-avg) / avg) * 100
-            counts[key] = { count: count, percentage_diff: percentage_diff }
-        end
-      end
-    end  end end
-
-    counts.each do |key, value|
-      output_rows << [School.where(id:key[0]).pluck(:name) , AcademicYear.where(id:key[1]).pluck(:range) , SurveyItem.where(id:key[2]).pluck(:survey_item_id), SurveyItem.where(id:key[2]).pluck(:prompt), value[:count], value[:percentage_diff]]
-    end
-
-    file = File.new('early_education_surveys_questions.csv', 'w')
-    CSV.open(file, 'w', write_headers: true, headers: headers) do |csv|
-      output_rows.each do |row|
-        csv << row
-      end
-    end
-
-    file.close
-    puts "CSV report of early_education_surveys items with removed stray responses.csv"
-  end
-
-
-  desc "Generate CSV report of survey item responses"
-  task stray_responses: :environment do
-
-    headers = ['School ID', 'Academic Year', 'Survey Item', 'Count','SurveyItemResponse ids']
-
-
-    output_rows = []
-    sir_ids=[]
-
-
-      School.all.each do |sc|
-      AcademicYear.all.each do |ay|
-        SurveyItem.all.each do |si|
-          count = SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).count
-          sir_ids= SurveyItemResponse.where(school: sc, academic_year: ay, survey_item: si).pluck(:response_id)
-
-          if count > 0 && count < 10
-
-            output_rows << [sc.name, ay.range, si.survey_item_id, count,sir_ids]
-          end
-        end
-      end
-    end
-
-
-    file = File.new('stray_responses.csv', 'w')
-
-    CSV.open(file, 'w', write_headers: true, headers: headers) do |csv|
-      output_rows.each do |row|
-        csv << row
-      end
-    end
-
-
-    file.close
-
-    puts "CSV report of survey item responses created in stray_responses.csv"
-  end
 end
diff --git a/spec/services/student_loader_spec.rb b/spec/services/student_loader_spec.rb
deleted file mode 100644
index e998ea7f..00000000
--- a/spec/services/student_loader_spec.rb
+++ /dev/null
@@ -1,160 +0,0 @@
-require "rails_helper"
-
-describe StudentLoader do
-  let(:path_to_student_responses) { Rails.root.join("spec", "fixtures", "test_2020-21_student_survey_responses.csv") }
-  let(:american_indian) { create(:race, qualtrics_code: 1) }
-  let(:asian)           { create(:race, qualtrics_code: 2) }
-  let(:black)           { create(:race, qualtrics_code: 3) }
-  let(:latinx)          { create(:race, qualtrics_code: 4) }
-  let(:white)           { create(:race, qualtrics_code: 5) }
-  let(:middle_eastern)  { create(:race, qualtrics_code: 8) }
-  let(:unknown_race)    { create(:race, qualtrics_code: 99) }
-  let(:multiracial)     { create(:race, qualtrics_code: 100) }
-  let(:female)          { create(:gender, qualtrics_code: 1) }
-  let(:male)            { create(:gender, qualtrics_code: 2) }
-  let(:another_gender)  { create(:gender, qualtrics_code: 3) }
-  let(:non_binary)      { create(:gender, qualtrics_code: 4) }
-  let(:unknown_gender)  { create(:gender, qualtrics_code: 99) }
-
-  before :each do
-    american_indian
-    asian
-    black
-    latinx
-    white
-    middle_eastern
-    unknown_race
-    multiracial
-    female
-    male
-    another_gender
-    non_binary
-    unknown_gender
-  end
-
-  after :each do
-    DatabaseCleaner.clean
-  end
-  xdescribe "#process_races" do
-    context "as a standalone function" do
-      it "race codes of 6 or 7 get classified as an unknown race" do
-        codes = ["NA"]
-        expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
-        codes = []
-        expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
-        codes = [1]
-        expect(StudentLoader.process_races(codes:)).to eq [american_indian]
-        codes = [2]
-        expect(StudentLoader.process_races(codes:)).to eq [asian]
-        codes = [3]
-        expect(StudentLoader.process_races(codes:)).to eq [black]
-        codes = [4]
-        expect(StudentLoader.process_races(codes:)).to eq [latinx]
-        codes = [5]
-        expect(StudentLoader.process_races(codes:)).to eq [white]
-        codes = [8]
-        expect(StudentLoader.process_races(codes:)).to eq [middle_eastern]
-        codes = [6]
-        expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
-        codes = [7]
-        expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
-        codes = [6, 7]
-        expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
-        codes = [1, 6, 7]
-        expect(StudentLoader.process_races(codes:)).to eq [american_indian]
-        codes = [1, 6, 7, 2]
-        expect(StudentLoader.process_races(codes:)).to eq [american_indian, asian, multiracial]
-        codes = [3, 6, 7, 6, 6, 7, 7, 6, 2]
-        expect(StudentLoader.process_races(codes:)).to eq [black, asian, multiracial]
-        codes = [8, 2]
-        expect(StudentLoader.process_races(codes:)).to eq [middle_eastern, asian, multiracial]
-      end
-    end
-  end
-
-  xdescribe "#add_multiracial_designation" do
-    it "adds the multiracial race code to the list of races" do
-      races = [unknown_race]
-      expect(StudentLoader.add_multiracial_designation(races:)).to eq [unknown_race]
-      races = [american_indian, asian]
-      expect(StudentLoader.add_multiracial_designation(races:)).to eq [american_indian, asian, multiracial]
-      races = [white]
-      expect(StudentLoader.add_multiracial_designation(races:)).to eq [white]
-    end
-  end
-
-  # This fails in CI because github does not know what the key derivation salt is.
-  # I'm not sure how to securely set the key derivation salt as an environment variable in CI
-  describe "self.load_data" do
-    context "load student data for all schools" do
-      before :each do
-        SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses
-        StudentLoader.load_data filepath: path_to_student_responses
-      end
-
-      it "ensures student responses load correctly" do
-        assigns_student_to_the_survey_item_responses
-        assigns_races_to_students
-        is_idempotent_for_students
-      end
-    end
-
-    # TODO: get this test to run correctly.  Since we are no longer seeding, we need to define schools, and districts; some Lowell, some not
-    xcontext "When using the rule to skip non Lowell schools" do
-      before :each do
-        SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses
-        StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools]
-      end
-
-      it "only loads student data for lowell" do
-        expect(Student.find_by_response_id("student_survey_response_1")).to eq nil
-        expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
-        expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
-        expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
-                                                                                      middle_eastern, multiracial]
-        expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
-                                                                                      middle_eastern, multiracial]
-        expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
-      end
-    end
-  end
-end
-
-def assigns_student_to_the_survey_item_responses
-  # The csv file has no responses for `student_survey_response_2` so we can't assign a student to nil responses
-  expect(SurveyItemResponse.find_by_response_id("student_survey_response_2")).to eq nil
-
-  response_ids = %w[student_survey_response_1 student_survey_response_3
-                    student_survey_response_4
-                    student_survey_response_5
-                    student_survey_response_6
-                    student_survey_response_7]
-
-  response_ids.each do |response_id|
-    responses = SurveyItemResponse.where(response_id:)
-    responses.each do |response|
-      expect(response.student).not_to eq nil
-      expect(response.student).to eq Student.find_by_response_id(response_id)
-    end
-  end
-end
-
-def assigns_races_to_students
-  expect(Student.find_by_response_id("student_survey_response_1").races).to eq [american_indian]
-  expect(Student.find_by_response_id("student_survey_response_2").races).to eq [asian, black, latinx, multiracial]
-  expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
-  expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
-  expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
-                                                                                middle_eastern, multiracial]
-  expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
-                                                                                middle_eastern, multiracial]
-  expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
-end
-
-def is_idempotent_for_students
-  number_of_students = Student.count
-  number_of_responses = SurveyItemResponse.count
-  StudentLoader.load_data filepath: path_to_student_responses
-  expect(Student.count).to eq number_of_students
-  expect(SurveyItemResponse.count).to eq number_of_responses
-end
diff --git a/spec/services/survey_responses_data_loader_spec.rb b/spec/services/survey_responses_data_loader_spec.rb
index d0d70fae..9e2931b6 100644
--- a/spec/services/survey_responses_data_loader_spec.rb
+++ b/spec/services/survey_responses_data_loader_spec.rb
@@ -51,13 +51,24 @@ describe SurveyResponsesDataLoader do
   let(:another_gender) { create(:gender, qualtrics_code: 3) }
   let(:non_binary) { create(:gender, qualtrics_code: 4) }
   let(:unknown_gender) { create(:gender, qualtrics_code: 99) }
+
   let(:low_income) { create(:income, designation: "Economically Disadvantaged – Y") }
   let(:high_income) { create(:income, designation: "Economically Disadvantaged – N") }
   let(:unknown_income) { create(:income, designation: "Unknown") }
+
   let(:yes_ell) { create(:ell, designation: "ELL") }
   let(:not_ell) { create(:ell, designation: "Not ELL") }
   let(:unknown_ell) { create(:ell, designation: "Unknown") }
 
+  let(:american_indian) { create(:race, qualtrics_code: 1) }
+  let(:asian)           { create(:race, qualtrics_code: 2) }
+  let(:black)           { create(:race, qualtrics_code: 3) }
+  let(:latinx)          { create(:race, qualtrics_code: 4) }
+  let(:white)           { create(:race, qualtrics_code: 5) }
+  let(:middle_eastern)  { create(:race, qualtrics_code: 8) }
+  let(:unknown_race)    { create(:race, qualtrics_code: 99) }
+  let(:multiracial)     { create(:race, qualtrics_code: 100) }
+
   let(:setup) do
     ay_2020_21
     ay_2022_23
@@ -87,17 +98,29 @@ describe SurveyResponsesDataLoader do
     s_emsa_q1
     s_emsa_q2
     s_emsa_q3
+
     female
     male
     another_gender
     non_binary
     unknown_gender
+
     low_income
     high_income
     unknown_income
+
     yes_ell
     not_ell
     unknown_ell
+
+    american_indian
+    asian
+    black
+    latinx
+    white
+    middle_eastern
+    unknown_race
+    multiracial
   end
 
   before :each do