Convert gender and race text into qualtrics codes during cleaning. Abide by 'prefer not to disclose' for self reported race. Give priority to self reported data but use SIS information as backup

rpp-main
Nelson Jovel 2 years ago
parent 97ddb09167
commit f27a590c5a

@ -2,4 +2,21 @@ class Gender < ApplicationRecord
scope :by_qualtrics_code, lambda { scope :by_qualtrics_code, lambda {
all.map { |gender| [gender.qualtrics_code, gender] }.to_h all.map { |gender| [gender.qualtrics_code, gender] }.to_h
} }
def self.qualtrics_code_from(word)
case word
when /Female|F|1/i
1
when /Male|M|2/i
2
when /Another\s*Gender|Gender Identity not listed above|3/i
4
when /Non-Binary|N|4/i
4
when %r{^#*N/*A$}i
nil
else
99
end
end
end end

@ -3,4 +3,29 @@ class Race < ApplicationRecord
has_many :student_races has_many :student_races
has_many :students, through: :student_races has_many :students, through: :student_races
friendly_id :designation, use: [:slugged] friendly_id :designation, use: [:slugged]
def self.qualtrics_code_from(word)
case word
when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i
1
when /Asian|Pacific\s*Island|2/i
2
when /Black|African\s*American|3/i
3
when /Hispanic|Latinx|4/i
4
when /White|Caucasian|5/i
5
when /Prefer not to disclose|6/i
6
when /Prefer to self-describe|7/i
7
when /Middle\s*Eastern|North\s*African|8/i
8
when %r{^#*N/*A$}i
nil
else
99
end
end
end end

@ -35,7 +35,15 @@ class Cleaner
row.district.short_name row.district.short_name
end.to_set.to_a end.to_set.to_a
districts.join(".").to_s + "." + survey_type.to_s + "." + range + ".csv" schools = data.map do |row|
row.school.name
end.to_set
# Only add school to filename when there's a single school
school_name = ""
school_name = schools.first.parameterize + "." if schools.length == 1
districts.join(".").to_s + "." + school_name + survey_type.to_s + "." + range + ".csv"
end end
def process_raw_file(file:) def process_raw_file(file:)

@ -114,27 +114,30 @@ class SurveyItemValues
def gender def gender
@gender ||= begin @gender ||= begin
gender_code ||= value_from(pattern: /Gender self report/i)
gender_code ||= value_from(pattern: /^Gender$/i) gender_code ||= value_from(pattern: /^Gender$/i)
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i) gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i) gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i) gender_code ||= value_from(pattern: /Gender/i)
gender_code ||= 99 gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
gender_code = gender_code.to_i gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code = 4 if gender_code == 3 gender_code = Gender.qualtrics_code_from(gender_code)
gender_code = 99 if gender_code.zero?
genders[gender_code] if genders genders[gender_code] if genders
end end
end end
def races def races
@races ||= begin @races ||= begin
race_codes = value_from(pattern: /^RACE$/i) hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
race_codes ||= value_from(pattern: /Race\s*self\s*report/i)
race_codes ||= value_from(pattern: /^RACE$/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i) race_codes ||= value_from(pattern: /Race Secondary/i)
race_codes ||= value_from(pattern: /Race-\s*SIS/i)
race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i) race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i)
race_codes ||= value_from(pattern: /RACE/i) || "" race_codes ||= value_from(pattern: /RACE/i) || ""
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase race_codes ||= []
race_codes = race_codes.split(",").map(&:to_i) || [] race_codes = race_codes.split(",").map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1 race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1
race_codes = race_codes.push(4) if hispanic == "true" race_codes = race_codes.push(4) if hispanic == "true"
process_races(codes: race_codes) process_races(codes: race_codes)
@ -198,6 +201,9 @@ class SurveyItemValues
matches.each do |match| matches.each do |match|
output ||= row[match] output ||= row[match]
end end
return nil if output&.match?(%r{^#*N/*A}i) || output.blank?
output output
end end

@ -7,7 +7,7 @@ namespace :data do
student_count = Student.count student_count = Student.count
path = "/data/survey_responses/clean/" path = "/data/survey_responses/clean/"
Sftp::Directory.open(path:) do |file| Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:) SurveyResponsesDataLoader.new.from_file(file:)
end end
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses" puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"

@ -4,7 +4,7 @@ require "fileutils"
RSpec.describe Cleaner do RSpec.describe Cleaner do
let(:district) { create(:district, name: "Maynard Public Schools") } let(:district) { create(:district, name: "Maynard Public Schools") }
let(:second_district) { create(:district, name: "District2") } let(:second_district) { create(:district, name: "District2") }
let(:school) { create(:school, dese_id: 1_740_505, district:) } let(:school) { create(:school, dese_id: 1_740_505, district:, name: "Maynard High School") }
let(:second_school) { create(:school, dese_id: 1_740_305, district:) } let(:second_school) { create(:school, dese_id: 1_740_305, district:) }
let(:third_school) { create(:school, dese_id: 222_222, district: second_district) } let(:third_school) { create(:school, dese_id: 222_222, district: second_district) }
@ -156,7 +156,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: standard_survey_items, data: headers: standard_survey_items, data:
) )
expect(filename).to eq "maynard.standard.2022-23.csv" expect(filename).to eq "maynard.maynard-high-school.standard.2022-23.csv"
end end
context "when the file is based on short form survey items" do context "when the file is based on short form survey items" do
@ -168,7 +168,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: short_form_survey_items, data: headers: short_form_survey_items, data:
) )
expect(filename).to eq "maynard.short_form.2022-23.csv" expect(filename).to eq "maynard.maynard-high-school.short_form.2022-23.csv"
end end
end end
@ -181,7 +181,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: early_education_survey_items, data: headers: early_education_survey_items, data:
) )
expect(filename).to eq "maynard.early_education.2022-23.csv" expect(filename).to eq "maynard.maynard-high-school.early_education.2022-23.csv"
end end
end end
context "when the file is based on teacher survey items" do context "when the file is based on teacher survey items" do
@ -193,7 +193,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: teacher_survey_items, data: headers: teacher_survey_items, data:
) )
expect(filename).to eq "maynard.teacher.2022-23.csv" expect(filename).to eq "maynard.maynard-high-school.teacher.2022-23.csv"
end end
end end

@ -285,15 +285,15 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.sped).to eq "Not Special Education" expect(values.sped).to eq "Not Special Education"
end end
it 'tranlsates NA into "Unknown"' do it 'tranlsates NA into "Not Special Education"' do
headers = ["Raw SpEd"] headers = ["Raw SpEd"]
row = { "Raw SpEd" => "NA" } row = { "Raw SpEd" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.sped).to eq "Unknown" expect(values.sped).to eq "Not Special Education"
row = { "Raw SpEd" => "#NA" } row = { "Raw SpEd" => "#NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.sped).to eq "Unknown" expect(values.sped).to eq "Not Special Education"
end end
end end

Loading…
Cancel
Save