Convert gender and race text into qualtrics codes during cleaning. Abide by 'prefer not to disclose' for self reported race. Give priority to self reported data but use SIS information as backup

rpp-main
Nelson Jovel 2 years ago
parent 97ddb09167
commit f27a590c5a

@ -2,4 +2,21 @@ class Gender < ApplicationRecord
scope :by_qualtrics_code, lambda {
all.map { |gender| [gender.qualtrics_code, gender] }.to_h
}
def self.qualtrics_code_from(word)
case word
when /Female|F|1/i
1
when /Male|M|2/i
2
when /Another\s*Gender|Gender Identity not listed above|3/i
4
when /Non-Binary|N|4/i
4
when %r{^#*N/*A$}i
nil
else
99
end
end
end

@ -3,4 +3,29 @@ class Race < ApplicationRecord
has_many :student_races
has_many :students, through: :student_races
friendly_id :designation, use: [:slugged]
def self.qualtrics_code_from(word)
case word
when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i
1
when /Asian|Pacific\s*Island|2/i
2
when /Black|African\s*American|3/i
3
when /Hispanic|Latinx|4/i
4
when /White|Caucasian|5/i
5
when /Prefer not to disclose|6/i
6
when /Prefer to self-describe|7/i
7
when /Middle\s*Eastern|North\s*African|8/i
8
when %r{^#*N/*A$}i
nil
else
99
end
end
end

@ -35,7 +35,15 @@ class Cleaner
row.district.short_name
end.to_set.to_a
districts.join(".").to_s + "." + survey_type.to_s + "." + range + ".csv"
schools = data.map do |row|
row.school.name
end.to_set
# Only add school to filename when there's a single school
school_name = ""
school_name = schools.first.parameterize + "." if schools.length == 1
districts.join(".").to_s + "." + school_name + survey_type.to_s + "." + range + ".csv"
end
def process_raw_file(file:)

@ -114,27 +114,30 @@ class SurveyItemValues
def gender
@gender ||= begin
gender_code ||= value_from(pattern: /Gender self report/i)
gender_code ||= value_from(pattern: /^Gender$/i)
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code ||= 99
gender_code = gender_code.to_i
gender_code = 4 if gender_code == 3
gender_code = 99 if gender_code.zero?
gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code = Gender.qualtrics_code_from(gender_code)
genders[gender_code] if genders
end
end
def races
@races ||= begin
race_codes = value_from(pattern: /^RACE$/i)
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
race_codes ||= value_from(pattern: /Race\s*self\s*report/i)
race_codes ||= value_from(pattern: /^RACE$/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i)
race_codes ||= value_from(pattern: /Race-\s*SIS/i)
race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i)
race_codes ||= value_from(pattern: /RACE/i) || ""
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
race_codes = race_codes.split(",").map(&:to_i) || []
race_codes ||= []
race_codes = race_codes.split(",").map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1
race_codes = race_codes.push(4) if hispanic == "true"
process_races(codes: race_codes)
@ -198,6 +201,9 @@ class SurveyItemValues
matches.each do |match|
output ||= row[match]
end
return nil if output&.match?(%r{^#*N/*A}i) || output.blank?
output
end

@ -7,7 +7,7 @@ namespace :data do
student_count = Student.count
path = "/data/survey_responses/clean/"
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
SurveyResponsesDataLoader.new.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"

@ -4,7 +4,7 @@ require "fileutils"
RSpec.describe Cleaner do
let(:district) { create(:district, name: "Maynard Public Schools") }
let(:second_district) { create(:district, name: "District2") }
let(:school) { create(:school, dese_id: 1_740_505, district:) }
let(:school) { create(:school, dese_id: 1_740_505, district:, name: "Maynard High School") }
let(:second_school) { create(:school, dese_id: 1_740_305, district:) }
let(:third_school) { create(:school, dese_id: 222_222, district: second_district) }
@ -156,7 +156,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: standard_survey_items, data:
)
expect(filename).to eq "maynard.standard.2022-23.csv"
expect(filename).to eq "maynard.maynard-high-school.standard.2022-23.csv"
end
context "when the file is based on short form survey items" do
@ -168,7 +168,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: short_form_survey_items, data:
)
expect(filename).to eq "maynard.short_form.2022-23.csv"
expect(filename).to eq "maynard.maynard-high-school.short_form.2022-23.csv"
end
end
@ -181,7 +181,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: early_education_survey_items, data:
)
expect(filename).to eq "maynard.early_education.2022-23.csv"
expect(filename).to eq "maynard.maynard-high-school.early_education.2022-23.csv"
end
end
context "when the file is based on teacher survey items" do
@ -193,7 +193,7 @@ RSpec.describe Cleaner do
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: teacher_survey_items, data:
)
expect(filename).to eq "maynard.teacher.2022-23.csv"
expect(filename).to eq "maynard.maynard-high-school.teacher.2022-23.csv"
end
end

@ -285,15 +285,15 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.sped).to eq "Not Special Education"
end
it 'tranlsates NA into "Unknown"' do
it 'tranlsates NA into "Not Special Education"' do
headers = ["Raw SpEd"]
row = { "Raw SpEd" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.sped).to eq "Unknown"
expect(values.sped).to eq "Not Special Education"
row = { "Raw SpEd" => "#NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.sped).to eq "Unknown"
expect(values.sped).to eq "Not Special Education"
end
end

Loading…
Cancel
Save