feat: load student responses in the same pass as loading the survey responses

chore: remove student loader since loading students is now done with the survey response loader
rpp-main
rebuilt 2 years ago
parent 9110be31c6
commit 019b954ffa

@ -1,64 +0,0 @@
# frozen_string_literal: true
class StudentLoader
def self.load_data(filepath:, rules: [])
File.open(filepath) do |file|
headers = file.first
headers_array = headers.split(",")
file.lazy.each_slice(1_000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
next if rules.any? do |rule|
rule.new(row:).skip_row?
end
process_row(row:)
end
end
end
end
def self.from_file(file:, rules: [])
headers = file.gets
headers_array = headers.split(",")
until file.eof?
line = file.gets
next unless line.present?
CSV.parse(line, headers:).map do |row|
row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
next if rules.any? do |rule|
rule.new(row:).skip_row?
end
process_row(row:)
end
end
end
def self.process_row(row:)
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
student.races.delete_all
races = row.races
races.map do |race|
student.races << race
end
assign_student_to_responses(student:, response_id: row.response_id)
end
def self.schools
@schools ||= School.all.map { |school| [school.dese_id, school] }.to_h
end
def self.assign_student_to_responses(student:, response_id:)
responses = SurveyItemResponse.where(response_id:)
loadable_responses = responses.map do |response|
response.student = student
response
end
SurveyItemResponse.import(loadable_responses.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
end

@ -116,20 +116,24 @@ class SurveyItemValues
end
def gender
gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= 99
gender_code = gender_code.to_i
gender_code = 4 if gender_code == 3
gender_code = 99 if gender_code.zero?
genders[gender_code]
@gender ||= begin
gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= 99
gender_code = gender_code.to_i
gender_code = 4 if gender_code == 3
gender_code = 99 if gender_code.zero?
genders[gender_code]
end
end
def races
race_codes = value_from(pattern: /RACE/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i) || ""
race_codes = race_codes.split(",").map(&:to_i) || []
process_races(codes: race_codes)
@races ||= begin
race_codes = value_from(pattern: /RACE/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i) || ""
race_codes = race_codes.split(",").map(&:to_i) || []
process_races(codes: race_codes)
end
end
def lasid

@ -12,7 +12,7 @@ class SurveyResponsesDataLoader
process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:),
rules:)
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
end
end
end
@ -23,7 +23,6 @@ class SurveyResponsesDataLoader
all_survey_items = survey_items(headers:)
survey_item_responses = []
row_count = 0
until file.eof?
line = file.gets
next unless line.present?
@ -32,16 +31,9 @@ class SurveyResponsesDataLoader
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:),
rules:)
end
row_count += 1
next unless row_count == 1000
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000
survey_item_responses = []
row_count = 0
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000, on_duplicate_key_update: :all
end
private
@ -78,6 +70,13 @@ class SurveyResponsesDataLoader
end
def process_survey_items(row:)
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
student.races.delete_all
races = row.races
races.map do |race|
student.races << race
end
row.survey_items.map do |survey_item|
likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
@ -86,23 +85,30 @@ class SurveyResponsesDataLoader
next
end
response = row.survey_item_response(survey_item:)
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:)
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
end.compact
end
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:)
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:)
gender = row.gender
grade = row.grade
income = incomes[row.income.parameterize]
ell = ells[row.ell]
sped = speds[row.sped]
if survey_item_response.present?
survey_item_response.update!(likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:,
sped:)
[]
survey_item_response.likert_score = likert_score
survey_item_response.grade = grade
survey_item_response.gender = gender
survey_item_response.recorded_date = row.recorded_date
survey_item_response.income = income
survey_item_response.ell = ell
survey_item_response.sped = sped
survey_item_response.student = student
survey_item_response
else
SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:,
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:)
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:)
end
end

@ -11,15 +11,6 @@ namespace :data do
end
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
Sftp::Directory.open(path:) do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
@ -33,11 +24,6 @@ namespace :data do
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Sftp::Directory.open(path:) do |file|
StudentLoader.from_file(file:, rules: [])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
Rails.cache.clear
end
@ -67,24 +53,6 @@ namespace :data do
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
end
desc "load students"
task load_students: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file
end
puts "=====================> Completed loading #{Student.count} students"
puts "Resetting race scores"
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc "reset all cache counters"
task reset_cache_counters: :environment do
puts "=====================> Resetting Category counters"

@ -91,34 +91,18 @@ namespace :one_off do
# should be somewhere near 295738
end
desc "delete 2023-24 survey responses"
task delete_survey_responses_2023_24: :environment do
response_count = SurveyItemResponse.all.count
SurveyItemResponse.where(academic_year: AcademicYear.find_by_range("2023-24")).delete_all
puts "=====================> Deleted #{response_count - SurveyItemResponse.all.count} survey responses"
end
desc "load survey responses for lowell schools 2022-23"
task load_survey_responses_for_lowell_2022_23: :environment do
desc "load survey responses"
task load_survey_responses: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/2022-23/"
path = "/data/survey_responses/clean/"
schools = District.find_by_slug("maynard-public-schools").schools
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.new.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Sftp::Directory.open(path:) do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
RaceScoreLoader.reset(fast_processing: false, academic_years: [AcademicYear.find_by_range("2022-23")])
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
end

@ -1,160 +0,0 @@
require "rails_helper"
describe StudentLoader do
let(:path_to_student_responses) { Rails.root.join("spec", "fixtures", "test_2020-21_student_survey_responses.csv") }
let(:american_indian) { create(:race, qualtrics_code: 1) }
let(:asian) { create(:race, qualtrics_code: 2) }
let(:black) { create(:race, qualtrics_code: 3) }
let(:latinx) { create(:race, qualtrics_code: 4) }
let(:white) { create(:race, qualtrics_code: 5) }
let(:middle_eastern) { create(:race, qualtrics_code: 8) }
let(:unknown_race) { create(:race, qualtrics_code: 99) }
let(:multiracial) { create(:race, qualtrics_code: 100) }
let(:female) { create(:gender, qualtrics_code: 1) }
let(:male) { create(:gender, qualtrics_code: 2) }
let(:another_gender) { create(:gender, qualtrics_code: 3) }
let(:non_binary) { create(:gender, qualtrics_code: 4) }
let(:unknown_gender) { create(:gender, qualtrics_code: 99) }
before :each do
american_indian
asian
black
latinx
white
middle_eastern
unknown_race
multiracial
female
male
another_gender
non_binary
unknown_gender
end
after :each do
DatabaseCleaner.clean
end
xdescribe "#process_races" do
context "as a standalone function" do
it "race codes of 6 or 7 get classified as an unknown race" do
codes = ["NA"]
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
codes = []
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
codes = [1]
expect(StudentLoader.process_races(codes:)).to eq [american_indian]
codes = [2]
expect(StudentLoader.process_races(codes:)).to eq [asian]
codes = [3]
expect(StudentLoader.process_races(codes:)).to eq [black]
codes = [4]
expect(StudentLoader.process_races(codes:)).to eq [latinx]
codes = [5]
expect(StudentLoader.process_races(codes:)).to eq [white]
codes = [8]
expect(StudentLoader.process_races(codes:)).to eq [middle_eastern]
codes = [6]
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
codes = [7]
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
codes = [6, 7]
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
codes = [1, 6, 7]
expect(StudentLoader.process_races(codes:)).to eq [american_indian]
codes = [1, 6, 7, 2]
expect(StudentLoader.process_races(codes:)).to eq [american_indian, asian, multiracial]
codes = [3, 6, 7, 6, 6, 7, 7, 6, 2]
expect(StudentLoader.process_races(codes:)).to eq [black, asian, multiracial]
codes = [8, 2]
expect(StudentLoader.process_races(codes:)).to eq [middle_eastern, asian, multiracial]
end
end
end
xdescribe "#add_multiracial_designation" do
it "adds the multiracial race code to the list of races" do
races = [unknown_race]
expect(StudentLoader.add_multiracial_designation(races:)).to eq [unknown_race]
races = [american_indian, asian]
expect(StudentLoader.add_multiracial_designation(races:)).to eq [american_indian, asian, multiracial]
races = [white]
expect(StudentLoader.add_multiracial_designation(races:)).to eq [white]
end
end
# This fails in CI because github does not know what the key derivation salt is.
# I'm not sure how to securely set the key derivation salt as an environment variable in CI
describe "self.load_data" do
context "load student data for all schools" do
before :each do
SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses
StudentLoader.load_data filepath: path_to_student_responses
end
it "ensures student responses load correctly" do
assigns_student_to_the_survey_item_responses
assigns_races_to_students
is_idempotent_for_students
end
end
# TODO: get this test to run correctly. Since we are no longer seeding, we need to define schools, and districts; some Lowell, some not
xcontext "When using the rule to skip non Lowell schools" do
before :each do
SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses
StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools]
end
it "only loads student data for lowell" do
expect(Student.find_by_response_id("student_survey_response_1")).to eq nil
expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
middle_eastern, multiracial]
expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
middle_eastern, multiracial]
expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
end
end
end
end
def assigns_student_to_the_survey_item_responses
# The csv file has no responses for `student_survey_response_2` so we can't assign a student to nil responses
expect(SurveyItemResponse.find_by_response_id("student_survey_response_2")).to eq nil
response_ids = %w[student_survey_response_1 student_survey_response_3
student_survey_response_4
student_survey_response_5
student_survey_response_6
student_survey_response_7]
response_ids.each do |response_id|
responses = SurveyItemResponse.where(response_id:)
responses.each do |response|
expect(response.student).not_to eq nil
expect(response.student).to eq Student.find_by_response_id(response_id)
end
end
end
def assigns_races_to_students
expect(Student.find_by_response_id("student_survey_response_1").races).to eq [american_indian]
expect(Student.find_by_response_id("student_survey_response_2").races).to eq [asian, black, latinx, multiracial]
expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
middle_eastern, multiracial]
expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
middle_eastern, multiracial]
expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
end
def is_idempotent_for_students
number_of_students = Student.count
number_of_responses = SurveyItemResponse.count
StudentLoader.load_data filepath: path_to_student_responses
expect(Student.count).to eq number_of_students
expect(SurveyItemResponse.count).to eq number_of_responses
end

@ -51,13 +51,24 @@ describe SurveyResponsesDataLoader do
let(:another_gender) { create(:gender, qualtrics_code: 3) }
let(:non_binary) { create(:gender, qualtrics_code: 4) }
let(:unknown_gender) { create(:gender, qualtrics_code: 99) }
let(:low_income) { create(:income, designation: "Economically Disadvantaged Y") }
let(:high_income) { create(:income, designation: "Economically Disadvantaged N") }
let(:unknown_income) { create(:income, designation: "Unknown") }
let(:yes_ell) { create(:ell, designation: "ELL") }
let(:not_ell) { create(:ell, designation: "Not ELL") }
let(:unknown_ell) { create(:ell, designation: "Unknown") }
let(:american_indian) { create(:race, qualtrics_code: 1) }
let(:asian) { create(:race, qualtrics_code: 2) }
let(:black) { create(:race, qualtrics_code: 3) }
let(:latinx) { create(:race, qualtrics_code: 4) }
let(:white) { create(:race, qualtrics_code: 5) }
let(:middle_eastern) { create(:race, qualtrics_code: 8) }
let(:unknown_race) { create(:race, qualtrics_code: 99) }
let(:multiracial) { create(:race, qualtrics_code: 100) }
let(:setup) do
ay_2020_21
ay_2022_23
@ -87,17 +98,29 @@ describe SurveyResponsesDataLoader do
s_emsa_q1
s_emsa_q2
s_emsa_q3
female
male
another_gender
non_binary
unknown_gender
low_income
high_income
unknown_income
yes_ell
not_ell
unknown_ell
american_indian
asian
black
latinx
white
middle_eastern
unknown_race
multiracial
end
before :each do

Loading…
Cancel
Save