mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
feat: support multiple columns for race and gender information
This commit is contained in:
parent
463e4c9452
commit
abea2cb8fa
3 changed files with 84 additions and 82 deletions
|
|
@ -4,13 +4,13 @@ class StudentLoader
|
|||
def self.load_data(filepath:, rules: [])
|
||||
File.open(filepath) do |file|
|
||||
headers = file.first
|
||||
headers_array = headers.split(',')
|
||||
headers_array = headers.split(",")
|
||||
|
||||
file.lazy.each_slice(1_000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
|
||||
next if rules.any? do |rule|
|
||||
rule.new(row: SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil,
|
||||
schools:)).skip_row?
|
||||
rule.new(row:).skip_row?
|
||||
end
|
||||
|
||||
process_row(row:)
|
||||
|
|
@ -21,43 +21,37 @@ class StudentLoader
|
|||
|
||||
def self.from_file(file:, rules: [])
|
||||
headers = file.gets
|
||||
headers_array = headers.split(',')
|
||||
headers_array = headers.split(",")
|
||||
|
||||
survey_item_responses = []
|
||||
until file.eof?
|
||||
line = file.gets
|
||||
next unless line.present?
|
||||
|
||||
CSV.parse(line, headers:).map do |row|
|
||||
row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:)
|
||||
next if rules.any? do |rule|
|
||||
rule.new(row: SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil,
|
||||
schools:)).skip_row?
|
||||
rule.new(row:).skip_row?
|
||||
end
|
||||
|
||||
process_row(row:)
|
||||
end
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
def self.process_row(row:)
|
||||
races = process_races(codes: race_codes(row:))
|
||||
response_id = row['ResponseId'] || row['Responseid'] || row['ResponseID'] ||
|
||||
row['Response ID'] || row['Response id'] || row['Response Id']
|
||||
lasid = row['LASID'] || row['lasid']
|
||||
|
||||
find_or_create_student(response_id:, lasid:, races:)
|
||||
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
|
||||
student.races.delete_all
|
||||
races = row.races
|
||||
races.map do |race|
|
||||
student.races << race
|
||||
end
|
||||
assign_student_to_responses(student:, response_id: row.response_id)
|
||||
end
|
||||
|
||||
def self.schools
|
||||
@schools ||= School.all.map { |school| [school.dese_id, school] }.to_h
|
||||
end
|
||||
|
||||
def self.race_codes(row:)
|
||||
race_codes = row['race'] || row['RACE'] || row['Race'] || row['What is your race/ethnicity?(Please select all that apply) - Selected Choice'] || row['What is your race/ethnicity?'] || '99'
|
||||
race_codes.split(',').map(&:to_i) || []
|
||||
end
|
||||
|
||||
def self.assign_student_to_responses(student:, response_id:)
|
||||
responses = SurveyItemResponse.where(response_id:)
|
||||
loadable_responses = responses.map do |response|
|
||||
|
|
@ -67,39 +61,4 @@ class StudentLoader
|
|||
|
||||
SurveyItemResponse.import(loadable_responses.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
|
||||
end
|
||||
|
||||
def self.find_or_create_student(response_id:, lasid:, races:)
|
||||
student = Student.find_or_create_by(response_id:, lasid:)
|
||||
student.races.delete_all
|
||||
races.map do |race|
|
||||
student.races << race
|
||||
end
|
||||
assign_student_to_responses(student:, response_id:)
|
||||
end
|
||||
|
||||
def self.process_races(codes:)
|
||||
races = codes.map do |code|
|
||||
code = code.to_i
|
||||
code = 99 if [6, 7].include?(code) || code.nil? || code.zero?
|
||||
Race.find_by_qualtrics_code(code)
|
||||
end.uniq
|
||||
races = add_unknown_race_if_other_races_missing(races:)
|
||||
races = remove_unknown_race_if_other_races_present(races:)
|
||||
add_multiracial_designation(races:)
|
||||
end
|
||||
|
||||
def self.remove_unknown_race_if_other_races_present(races:)
|
||||
races.delete(Race.find_by_qualtrics_code(99)) if races.length > 1
|
||||
races
|
||||
end
|
||||
|
||||
def self.add_multiracial_designation(races:)
|
||||
races << Race.find_by_qualtrics_code(100) if races.length > 1
|
||||
races
|
||||
end
|
||||
|
||||
def self.add_unknown_race_if_other_races_missing(races:)
|
||||
races << Race.find_by_qualtrics_code(99) if races.length == 0
|
||||
races
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ class SurveyItemValues
|
|||
|
||||
def initialize(row:, headers:, genders:, survey_items:, schools:, disaggregation_data: nil)
|
||||
@row = row
|
||||
# Remove any newlines in headers
|
||||
headers = headers.map { |item| item.delete("\n") if item.present? }
|
||||
@headers = include_all_headers(headers:)
|
||||
@genders = genders
|
||||
@survey_items = survey_items
|
||||
|
|
@ -12,6 +14,14 @@ class SurveyItemValues
|
|||
copy_likert_scores_from_variant_survey_items
|
||||
row["Income"] = income
|
||||
row["Raw Income"] = raw_income
|
||||
|
||||
copy_data_to_main_column(main: /Race/i, secondary: /Race Secondary|Race-1/i)
|
||||
copy_data_to_main_column(main: /Gender/i, secondary: /Gender Secondary|Gender-1/i)
|
||||
end
|
||||
|
||||
def copy_data_to_main_column(main:, secondary:)
|
||||
main_column = headers.find { |header| main.match(header) }
|
||||
row[main_column] = value_from(pattern: secondary) if row[main_column].nil?
|
||||
end
|
||||
|
||||
# Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair.
|
||||
|
|
@ -110,6 +120,14 @@ class SurveyItemValues
|
|||
genders[gender_code]
|
||||
end
|
||||
|
||||
def races
|
||||
race_codes = value_from(pattern: /RACE/i)
|
||||
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
|
||||
race_codes ||= value_from(pattern: /Race Secondary/i) || ""
|
||||
race_codes = race_codes.split(",").map(&:to_i) || []
|
||||
process_races(codes: race_codes)
|
||||
end
|
||||
|
||||
def lasid
|
||||
@lasid ||= value_from(pattern: /LASID/i)
|
||||
end
|
||||
|
|
@ -126,7 +144,6 @@ class SurveyItemValues
|
|||
@raw_income ||= disaggregation.income
|
||||
end
|
||||
|
||||
# TODO: - rename these cases
|
||||
def income
|
||||
@income ||= value_from(pattern: /^Income$/i)
|
||||
return @income if @income.present?
|
||||
|
|
@ -246,4 +263,30 @@ class SurveyItemValues
|
|||
row[main_item] = likert_score if likert_score.present? && row[main_item].blank?
|
||||
end
|
||||
end
|
||||
|
||||
def process_races(codes:)
|
||||
races = codes.map do |code|
|
||||
code = code.to_i
|
||||
code = 99 if [6, 7].include?(code) || code.nil? || code.zero?
|
||||
Race.find_by_qualtrics_code(code)
|
||||
end.uniq
|
||||
races = add_unknown_race_if_other_races_missing(races:)
|
||||
races = remove_unknown_race_if_other_races_present(races:)
|
||||
add_multiracial_designation(races:)
|
||||
end
|
||||
|
||||
def remove_unknown_race_if_other_races_present(races:)
|
||||
races.delete(Race.find_by_qualtrics_code(99)) if races.length > 1
|
||||
races
|
||||
end
|
||||
|
||||
def add_multiracial_designation(races:)
|
||||
races << Race.find_by_qualtrics_code(100) if races.length > 1
|
||||
races
|
||||
end
|
||||
|
||||
def add_unknown_race_if_other_races_missing(races:)
|
||||
races << Race.find_by_qualtrics_code(99) if races.length == 0
|
||||
races
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
require 'rails_helper'
|
||||
require "rails_helper"
|
||||
|
||||
describe StudentLoader do
|
||||
let(:path_to_student_responses) { Rails.root.join('spec', 'fixtures', 'test_2020-21_student_survey_responses.csv') }
|
||||
let(:path_to_student_responses) { Rails.root.join("spec", "fixtures", "test_2020-21_student_survey_responses.csv") }
|
||||
let(:american_indian) { create(:race, qualtrics_code: 1) }
|
||||
let(:asian) { create(:race, qualtrics_code: 2) }
|
||||
let(:black) { create(:race, qualtrics_code: 3) }
|
||||
|
|
@ -35,10 +35,10 @@ describe StudentLoader do
|
|||
after :each do
|
||||
DatabaseCleaner.clean
|
||||
end
|
||||
describe '#process_races' do
|
||||
context 'as a standalone function' do
|
||||
it 'race codes of 6 or 7 get classified as an unknown race' do
|
||||
codes = ['NA']
|
||||
xdescribe "#process_races" do
|
||||
context "as a standalone function" do
|
||||
it "race codes of 6 or 7 get classified as an unknown race" do
|
||||
codes = ["NA"]
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
codes = []
|
||||
expect(StudentLoader.process_races(codes:)).to eq [unknown_race]
|
||||
|
|
@ -72,8 +72,8 @@ describe StudentLoader do
|
|||
end
|
||||
end
|
||||
|
||||
describe '#add_multiracial_designation' do
|
||||
it 'adds the multiracial race code to the list of races' do
|
||||
xdescribe "#add_multiracial_designation" do
|
||||
it "adds the multiracial race code to the list of races" do
|
||||
races = [unknown_race]
|
||||
expect(StudentLoader.add_multiracial_designation(races:)).to eq [unknown_race]
|
||||
races = [american_indian, asian]
|
||||
|
|
@ -85,14 +85,14 @@ describe StudentLoader do
|
|||
|
||||
# This fails in CI because github does not know what the key derivation salt is.
|
||||
# I'm not sure how to securely set the key derivation salt as an environment variable in CI
|
||||
describe 'self.load_data' do
|
||||
context 'load student data for all schools' do
|
||||
describe "self.load_data" do
|
||||
context "load student data for all schools" do
|
||||
before :each do
|
||||
SurveyResponsesDataLoader.load_data filepath: path_to_student_responses
|
||||
StudentLoader.load_data filepath: path_to_student_responses
|
||||
end
|
||||
|
||||
it 'ensures student responses load correctly' do
|
||||
it "ensures student responses load correctly" do
|
||||
assigns_student_to_the_survey_item_responses
|
||||
assigns_races_to_students
|
||||
is_idempotent_for_students
|
||||
|
|
@ -100,21 +100,21 @@ describe StudentLoader do
|
|||
end
|
||||
|
||||
# TODO: get this test to run correctly. Since we are no longer seeding, we need to define schools, and districts; some Lowell, some not
|
||||
xcontext 'When using the rule to skip non Lowell schools' do
|
||||
xcontext "When using the rule to skip non Lowell schools" do
|
||||
before :each do
|
||||
SurveyResponsesDataLoader.load_data filepath: path_to_student_responses
|
||||
StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools]
|
||||
end
|
||||
|
||||
it 'only loads student data for lowell' do
|
||||
expect(Student.find_by_response_id('student_survey_response_1')).to eq nil
|
||||
expect(Student.find_by_response_id('student_survey_response_3').races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id('student_survey_response_4').races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id('student_survey_response_5').races).to eq [american_indian, asian, black, latinx, white,
|
||||
it "only loads student data for lowell" do
|
||||
expect(Student.find_by_response_id("student_survey_response_1")).to eq nil
|
||||
expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id('student_survey_response_6').races).to eq [american_indian, asian, black, latinx, white,
|
||||
expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id('student_survey_response_7').races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -122,7 +122,7 @@ end
|
|||
|
||||
def assigns_student_to_the_survey_item_responses
|
||||
# The csv file has no responses for `student_survey_response_2` so we can't assign a student to nil responses
|
||||
expect(SurveyItemResponse.find_by_response_id('student_survey_response_2')).to eq nil
|
||||
expect(SurveyItemResponse.find_by_response_id("student_survey_response_2")).to eq nil
|
||||
|
||||
response_ids = %w[student_survey_response_1 student_survey_response_3
|
||||
student_survey_response_4
|
||||
|
|
@ -140,15 +140,15 @@ def assigns_student_to_the_survey_item_responses
|
|||
end
|
||||
|
||||
def assigns_races_to_students
|
||||
expect(Student.find_by_response_id('student_survey_response_1').races).to eq [american_indian]
|
||||
expect(Student.find_by_response_id('student_survey_response_2').races).to eq [asian, black, latinx, multiracial]
|
||||
expect(Student.find_by_response_id('student_survey_response_3').races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id('student_survey_response_4').races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id('student_survey_response_5').races).to eq [american_indian, asian, black, latinx, white,
|
||||
expect(Student.find_by_response_id("student_survey_response_1").races).to eq [american_indian]
|
||||
expect(Student.find_by_response_id("student_survey_response_2").races).to eq [asian, black, latinx, multiracial]
|
||||
expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id('student_survey_response_6').races).to eq [american_indian, asian, black, latinx, white,
|
||||
expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white,
|
||||
middle_eastern, multiracial]
|
||||
expect(Student.find_by_response_id('student_survey_response_7').races).to eq [unknown_race]
|
||||
expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race]
|
||||
end
|
||||
|
||||
def is_idempotent_for_students
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue