mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
feat: load student responses in the same pass as loading the survey responses
chore: remove student loader since loading students is now done with the survey response loader
This commit is contained in:
parent
9110be31c6
commit
019b954ffa
7 changed files with 64 additions and 303 deletions
|
|
@ -1,64 +0,0 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
class StudentLoader
|
||||
def self.load_data(filepath:, rules: [])
|
||||
File.open(filepath) do |file|
|
||||
headers = file.first
|
||||
headers_array = headers.split(",")
|
||||
|
||||
file.lazy.each_slice(1_000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
|
||||
next if rules.any? do |rule|
|
||||
rule.new(row:).skip_row?
|
||||
end
|
||||
|
||||
process_row(row:)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.from_file(file:, rules: [])
|
||||
headers = file.gets
|
||||
headers_array = headers.split(",")
|
||||
|
||||
until file.eof?
|
||||
line = file.gets
|
||||
next unless line.present?
|
||||
|
||||
CSV.parse(line, headers:).map do |row|
|
||||
row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
|
||||
next if rules.any? do |rule|
|
||||
rule.new(row:).skip_row?
|
||||
end
|
||||
|
||||
process_row(row:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.process_row(row:)
|
||||
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
|
||||
student.races.delete_all
|
||||
races = row.races
|
||||
races.map do |race|
|
||||
student.races << race
|
||||
end
|
||||
assign_student_to_responses(student:, response_id: row.response_id)
|
||||
end
|
||||
|
||||
def self.schools
|
||||
@schools ||= School.all.map { |school| [school.dese_id, school] }.to_h
|
||||
end
|
||||
|
||||
def self.assign_student_to_responses(student:, response_id:)
|
||||
responses = SurveyItemResponse.where(response_id:)
|
||||
loadable_responses = responses.map do |response|
|
||||
response.student = student
|
||||
response
|
||||
end
|
||||
|
||||
SurveyItemResponse.import(loadable_responses.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
|
||||
end
|
||||
end
|
||||
|
|
@ -116,20 +116,24 @@ class SurveyItemValues
|
|||
end
|
||||
|
||||
def gender
|
||||
gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i)
|
||||
gender_code ||= 99
|
||||
gender_code = gender_code.to_i
|
||||
gender_code = 4 if gender_code == 3
|
||||
gender_code = 99 if gender_code.zero?
|
||||
genders[gender_code]
|
||||
@gender ||= begin
|
||||
gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i)
|
||||
gender_code ||= 99
|
||||
gender_code = gender_code.to_i
|
||||
gender_code = 4 if gender_code == 3
|
||||
gender_code = 99 if gender_code.zero?
|
||||
genders[gender_code]
|
||||
end
|
||||
end
|
||||
|
||||
def races
|
||||
race_codes = value_from(pattern: /RACE/i)
|
||||
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
|
||||
race_codes ||= value_from(pattern: /Race Secondary/i) || ""
|
||||
race_codes = race_codes.split(",").map(&:to_i) || []
|
||||
process_races(codes: race_codes)
|
||||
@races ||= begin
|
||||
race_codes = value_from(pattern: /RACE/i)
|
||||
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
|
||||
race_codes ||= value_from(pattern: /Race Secondary/i) || ""
|
||||
race_codes = race_codes.split(",").map(&:to_i) || []
|
||||
process_races(codes: race_codes)
|
||||
end
|
||||
end
|
||||
|
||||
def lasid
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ class SurveyResponsesDataLoader
|
|||
process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:),
|
||||
rules:)
|
||||
end
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -23,7 +23,6 @@ class SurveyResponsesDataLoader
|
|||
all_survey_items = survey_items(headers:)
|
||||
|
||||
survey_item_responses = []
|
||||
row_count = 0
|
||||
until file.eof?
|
||||
line = file.gets
|
||||
next unless line.present?
|
||||
|
|
@ -32,16 +31,9 @@ class SurveyResponsesDataLoader
|
|||
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:),
|
||||
rules:)
|
||||
end
|
||||
|
||||
row_count += 1
|
||||
next unless row_count == 1000
|
||||
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000
|
||||
survey_item_responses = []
|
||||
row_count = 0
|
||||
end
|
||||
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000, on_duplicate_key_update: :all
|
||||
end
|
||||
|
||||
private
|
||||
|
|
@ -78,6 +70,13 @@ class SurveyResponsesDataLoader
|
|||
end
|
||||
|
||||
def process_survey_items(row:)
|
||||
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
|
||||
student.races.delete_all
|
||||
races = row.races
|
||||
races.map do |race|
|
||||
student.races << race
|
||||
end
|
||||
|
||||
row.survey_items.map do |survey_item|
|
||||
likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
|
||||
|
||||
|
|
@ -86,23 +85,30 @@ class SurveyResponsesDataLoader
|
|||
next
|
||||
end
|
||||
response = row.survey_item_response(survey_item:)
|
||||
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:)
|
||||
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
|
||||
end.compact
|
||||
end
|
||||
|
||||
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:)
|
||||
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:)
|
||||
gender = row.gender
|
||||
grade = row.grade
|
||||
income = incomes[row.income.parameterize]
|
||||
ell = ells[row.ell]
|
||||
sped = speds[row.sped]
|
||||
|
||||
if survey_item_response.present?
|
||||
survey_item_response.update!(likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:,
|
||||
sped:)
|
||||
[]
|
||||
survey_item_response.likert_score = likert_score
|
||||
survey_item_response.grade = grade
|
||||
survey_item_response.gender = gender
|
||||
survey_item_response.recorded_date = row.recorded_date
|
||||
survey_item_response.income = income
|
||||
survey_item_response.ell = ell
|
||||
survey_item_response.sped = sped
|
||||
survey_item_response.student = student
|
||||
survey_item_response
|
||||
else
|
||||
SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:,
|
||||
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:)
|
||||
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -11,15 +11,6 @@ namespace :data do
|
|||
end
|
||||
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
|
||||
|
||||
Sftp::Directory.open(path:) do |file|
|
||||
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
|
||||
end
|
||||
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
puts "=====================> Completed loading #{RaceScore.count} race scores"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
|
|
@ -33,11 +24,6 @@ namespace :data do
|
|||
end
|
||||
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
|
||||
|
||||
Sftp::Directory.open(path:) do |file|
|
||||
StudentLoader.from_file(file:, rules: [])
|
||||
end
|
||||
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
|
|
@ -67,24 +53,6 @@ namespace :data do
|
|||
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
|
||||
end
|
||||
|
||||
desc "load students"
|
||||
task load_students: :environment do
|
||||
SurveyItemResponse.update_all(student_id: nil)
|
||||
StudentRace.delete_all
|
||||
Student.delete_all
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
|
||||
puts "=====================> Loading student data from csv at path: #{file}"
|
||||
StudentLoader.load_data filepath: file
|
||||
end
|
||||
puts "=====================> Completed loading #{Student.count} students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
puts "=====================> Completed loading #{RaceScore.count} survey responses"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
desc "reset all cache counters"
|
||||
task reset_cache_counters: :environment do
|
||||
puts "=====================> Resetting Category counters"
|
||||
|
|
|
|||
|
|
@ -91,34 +91,18 @@ namespace :one_off do
|
|||
# should be somewhere near 295738
|
||||
end
|
||||
|
||||
desc "delete 2023-24 survey responses"
|
||||
task delete_survey_responses_2023_24: :environment do
|
||||
response_count = SurveyItemResponse.all.count
|
||||
SurveyItemResponse.where(academic_year: AcademicYear.find_by_range("2023-24")).delete_all
|
||||
|
||||
puts "=====================> Deleted #{response_count - SurveyItemResponse.all.count} survey responses"
|
||||
end
|
||||
|
||||
desc "load survey responses for lowell schools 2022-23"
|
||||
task load_survey_responses_for_lowell_2022_23: :environment do
|
||||
desc "load survey responses"
|
||||
task load_survey_responses: :environment do
|
||||
survey_item_response_count = SurveyItemResponse.count
|
||||
student_count = Student.count
|
||||
path = "/data/survey_responses/2022-23/"
|
||||
path = "/data/survey_responses/clean/"
|
||||
schools = District.find_by_slug("maynard-public-schools").schools
|
||||
|
||||
Sftp::Directory.open(path:) do |file|
|
||||
SurveyResponsesDataLoader.new.from_file(file:)
|
||||
end
|
||||
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
|
||||
|
||||
Sftp::Directory.open(path:) do |file|
|
||||
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
|
||||
end
|
||||
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
RaceScoreLoader.reset(fast_processing: false, academic_years: [AcademicYear.find_by_range("2022-23")])
|
||||
puts "=====================> Completed loading #{RaceScore.count} race scores"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,160 +0,0 @@
|
|||
require "rails_helper"
|
||||
|
||||
describe StudentLoader do
|
||||
let(:path_to_student_responses) { Rails.root.join("spec", "fixtures", "test_2020-21_student_survey_responses.csv") }
|
||||
let(:american_indian) { create(:race, qualtrics_code: 1) }
|
||||
let(:asian) { create(:race, qualtrics_code: 2) }
|
||||
let(:black) { create(:race, qualtrics_code: 3) }
|
||||
let(:latinx) { create(:race, qualtrics_code: 4) }
|
||||
let(:white) { create(:race, qualtrics_code: 5) }
|
||||
let(:middle_eastern) { create(:race, qualtrics_code: 8) }
|
||||
let(:unknown_race) { create(:race, qualtrics_code: 99) }
|
||||
let(:multiracial) { create(:race, qualtrics_code: 100) }
|
||||
let(:female) { create(:gender, qualtrics_code: 1) }
|
||||
let(:male) { create(:gender, qualtrics_code: 2) }
|
||||
let(:another_gender) { create(:gender, qualtrics_code: 3) }
|
||||
let(:non_binary) { create(:gender, qualtrics_code: 4) }
|
||||
let(:unknown_gender) { create(:gender, qualtrics_code: 99) }
|
||||
|
||||
before :each do
|
||||
american_indian
|
||||
asian
|
||||
black
|
||||
latinx
|
||||
white
|
||||
middle_eastern
|
||||
unknown_race
|
||||
multiracial
|
||||
female
|
||||
male
|
||||
another_gender
|
||||
non_binary
|
||||
unknown_gender
|
||||
end
|
||||
|
||||
after :each do
|
||||
DatabaseCleaner.clean
|
||||
end
|
||||
xdescribe "#process_races" do
|
||||
context "as a standalone function" do
|
||||
it "race codes of 6 or 7 get classified as an unknown race" do
|
||||
codes = ["NA"]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
codes = []
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
codes = [1]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [american_indian]
|
||||
codes = [2]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [asian]
|
||||
codes = [3]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [black]
|
||||
codes = [4]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [latinx]
|
||||
codes = [5]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [white]
|
||||
codes = [8]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [middle_eastern]
|
||||
codes = [6]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
codes = [7]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
codes = [6, 7]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
codes = [1, 6, 7]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [american_indian]
|
||||
codes = [1, 6, 7, 2]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [american_indian, asian, multiracial]
|
||||
codes = [3, 6, 7, 6, 6, 7, 7, 6, 2]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [black, asian, multiracial]
|
||||
codes = [8, 2]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [middle_eastern, asian, multiracial]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
xdescribe "#add_multiracial_designation" do
|
||||
it "adds the multiracial race code to the list of races" do
|
||||
races = [unknown_race]
|
||||
expect(StudentLoader.add_multiracial_designation(races:)).to eq [unknown_race]
|
||||
races = [american_indian, asian]
|
||||
expect(StudentLoader.add_multiracial_designation(races:)).to eq [american_indian, asian, multiracial]
|
||||
races = [white]
|
||||
expect(StudentLoader.add_multiracial_designation(races:)).to eq [white]
|
||||
end
|
||||
end
|
||||
|
||||
# This fails in CI because github does not know what the key derivation salt is.
|
||||
# I'm not sure how to securely set the key derivation salt as an environment variable in CI
|
||||
describe "self.load_data" do
|
||||
context "load student data for all schools" do
|
||||
before :each do
|
||||
SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses
|
||||
StudentLoader.load_data filepath: path_to_student_responses
|
||||
end
|
||||
|
||||
it "ensures student responses load correctly" do
|
||||
assigns_student_to_the_survey_item_responses
|
||||
assigns_races_to_students
|
||||
is_idempotent_for_students
|
||||
end
|
||||
end
|
||||
|
||||
# TODO: get this test to run correctly. Since we are no longer seeding, we need to define schools, and districts; some Lowell, some not
|
||||
xcontext "When using the rule to skip non Lowell schools" do
|
||||
before :each do
|
||||
SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses
|
||||
StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools]
|
||||
end
|
||||
|
||||
it "only loads student data for lowell" do
|
||||
expect(Student.find_by_response_id("student_survey_response_1")).to eq nil
|
||||
expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def assigns_student_to_the_survey_item_responses
|
||||
# The csv file has no responses for `student_survey_response_2` so we can't assign a student to nil responses
|
||||
expect(SurveyItemResponse.find_by_response_id("student_survey_response_2")).to eq nil
|
||||
|
||||
response_ids = %w[student_survey_response_1 student_survey_response_3
|
||||
student_survey_response_4
|
||||
student_survey_response_5
|
||||
student_survey_response_6
|
||||
student_survey_response_7]
|
||||
|
||||
response_ids.each do |response_id|
|
||||
responses = SurveyItemResponse.where(response_id:)
|
||||
responses.each do |response|
|
||||
expect(response.student).not_to eq nil
|
||||
expect(response.student).to eq Student.find_by_response_id(response_id)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def assigns_races_to_students
|
||||
expect(Student.find_by_response_id("student_survey_response_1").races).to eq [american_indian]
|
||||
expect(Student.find_by_response_id("student_survey_response_2").races).to eq [asian, black, latinx, multiracial]
|
||||
expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
|
||||
end
|
||||
|
||||
def is_idempotent_for_students
|
||||
number_of_students = Student.count
|
||||
number_of_responses = SurveyItemResponse.count
|
||||
StudentLoader.load_data filepath: path_to_student_responses
|
||||
expect(Student.count).to eq number_of_students
|
||||
expect(SurveyItemResponse.count).to eq number_of_responses
|
||||
end
|
||||
|
|
@ -51,13 +51,24 @@ describe SurveyResponsesDataLoader do
|
|||
let(:another_gender) { create(:gender, qualtrics_code: 3) }
|
||||
let(:non_binary) { create(:gender, qualtrics_code: 4) }
|
||||
let(:unknown_gender) { create(:gender, qualtrics_code: 99) }
|
||||
|
||||
let(:low_income) { create(:income, designation: "Economically Disadvantaged – Y") }
|
||||
let(:high_income) { create(:income, designation: "Economically Disadvantaged – N") }
|
||||
let(:unknown_income) { create(:income, designation: "Unknown") }
|
||||
|
||||
let(:yes_ell) { create(:ell, designation: "ELL") }
|
||||
let(:not_ell) { create(:ell, designation: "Not ELL") }
|
||||
let(:unknown_ell) { create(:ell, designation: "Unknown") }
|
||||
|
||||
let(:american_indian) { create(:race, qualtrics_code: 1) }
|
||||
let(:asian) { create(:race, qualtrics_code: 2) }
|
||||
let(:black) { create(:race, qualtrics_code: 3) }
|
||||
let(:latinx) { create(:race, qualtrics_code: 4) }
|
||||
let(:white) { create(:race, qualtrics_code: 5) }
|
||||
let(:middle_eastern) { create(:race, qualtrics_code: 8) }
|
||||
let(:unknown_race) { create(:race, qualtrics_code: 99) }
|
||||
let(:multiracial) { create(:race, qualtrics_code: 100) }
|
||||
|
||||
let(:setup) do
|
||||
ay_2020_21
|
||||
ay_2022_23
|
||||
|
|
@ -87,17 +98,29 @@ describe SurveyResponsesDataLoader do
|
|||
s_emsa_q1
|
||||
s_emsa_q2
|
||||
s_emsa_q3
|
||||
|
||||
female
|
||||
male
|
||||
another_gender
|
||||
non_binary
|
||||
unknown_gender
|
||||
|
||||
low_income
|
||||
high_income
|
||||
unknown_income
|
||||
|
||||
yes_ell
|
||||
not_ell
|
||||
unknown_ell
|
||||
|
||||
american_indian
|
||||
asian
|
||||
black
|
||||
latinx
|
||||
white
|
||||
middle_eastern
|
||||
unknown_race
|
||||
multiracial
|
||||
end
|
||||
|
||||
before :each do
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue