chore: various fixes for race and gender categorization during cleaning.

Also add tests for race and gender categorization
speedup-admin-data
Nelson Jovel 2 years ago
parent 242192e2f3
commit 3f44613085

@ -5,9 +5,9 @@ class Gender < ApplicationRecord
def self.qualtrics_code_from(word)
case word
when /Female|F|1/i
when /Female|^F|1/i
1
when /Male|M|2/i
when /Male|^M|2/i
2
when /Another\s*Gender|Gender Identity not listed above|3|7/i
4 # We categorize any self reported gender as non-binary

@ -4,13 +4,17 @@ class Race < ApplicationRecord
has_many :students, through: :student_races
friendly_id :designation, use: [:slugged]
scope :by_qualtrics_code, lambda {
all.map { |race| [race.qualtrics_code, race] }.to_h
}
# TODO: look for alaska native
# Todo: split up possibilities by first a comma and then the word and
def self.qualtrics_code_from(word)
case word
when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i
1
when /Asian|Pacific\s*Island|Hawaiian|2/i
when /^Asian|Pacific\s*Island|Hawaiian|2/i
2
when /Black|African\s*American|3/i
3

@ -118,10 +118,10 @@ class SurveyItemValues
gender_code ||= value_from(pattern: /Gender self report/i)
gender_code ||= value_from(pattern: /^Gender$/i)
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code = Gender.qualtrics_code_from(gender_code)
genders[gender_code] if genders
end
@ -140,7 +140,7 @@ class SurveyItemValues
race_codes ||= []
race_codes = race_codes.split(",")
.map do |word|
word.split("and")
word.split(/\s+and\s+/i)
end.flatten
.reject(&:blank?)
.map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
@ -155,7 +155,7 @@ class SurveyItemValues
end
def raw_income
@raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income/i)
@raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i)
end
def income
@ -163,7 +163,7 @@ class SurveyItemValues
end
def raw_ell
@raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL/i)
@raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i)
end
def ell
@ -171,7 +171,7 @@ class SurveyItemValues
end
def raw_sped
@raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd/i)
@raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i)
end
def sped
@ -183,11 +183,12 @@ class SurveyItemValues
matches = headers.select do |header|
pattern.match(header)
end.map { |item| item.delete("\n") }
matches.each do |match|
output ||= row[match]
output ||= row[match]&.strip
end
return nil if output&.match?(%r{^#*N/*A}i) || output.blank?
return nil if output&.match?(%r{^#*N/*A$}i) || output.blank?
output
end

@ -7,8 +7,25 @@ RSpec.describe SurveyItemValues, type: :model do
end
let(:genders) do
create(:gender, qualtrics_code: 1)
create(:gender, qualtrics_code: 2)
create(:gender, qualtrics_code: 4)
create(:gender, qualtrics_code: 99)
Gender.by_qualtrics_code
end
let(:races) do
create(:race, qualtrics_code: 1)
create(:race, qualtrics_code: 2)
create(:race, qualtrics_code: 3)
create(:race, qualtrics_code: 4)
create(:race, qualtrics_code: 5)
create(:race, qualtrics_code: 6)
create(:race, qualtrics_code: 7)
create(:race, qualtrics_code: 8)
create(:race, qualtrics_code: 99)
Race.by_qualtrics_code
end
let(:survey_items) { [] }
let(:district) { create(:district, name: "Attleboro") }
let(:attleboro) do
@ -118,11 +135,248 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.grade).to eq 1
end
end
context ".gender" do
it "returns the grade that maps to the grade provided" do
row = { "Gender" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
context "when the gender is female" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
row = { "Gender" => "Female" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
row = { "Gender" => "F" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
end
end
context "when the gender is male" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "2" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
row = { "Gender" => "Male" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
row = { "Gender" => "M" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
end
end
context "when the gender is non-binary" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "4" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
row = { "Gender" => "N - Non-Binary" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
row = { "Gender" => "N" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
end
end
context "when the gender is not known" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "#N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "#NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
end
end
end
context ".races" do
before do
races
end
context "when the race is Native American" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
row = { "Race" => "Native American" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
row = { "Race" => "American Indian or Alaskan Native" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
end
end
context "when the race is Asian" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "2" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Asian" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Pacific Islander" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Pacific Island or Hawaiian Native" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
end
end
context "when the race is Black" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "3" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
row = { "Race" => "Black" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
row = { "Race" => "African American" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
end
end
context "when the race is Hispanic" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "4" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
row = { "Race" => "Hispanic" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
row = { "Race" => "Latinx" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
end
end
context "when the race is White" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "5" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
row = { "Race" => "White" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
row = { "Race" => "Caucasian" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
end
end
context "when the race is not disclosed" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "6" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is not disclosed" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "6" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is self described" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "7" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer to self-describe" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is Middle Eastern" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "8" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
row = { "Race" => "Middle Eastern" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
row = { "Race" => "North African" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
end
end
context "when the race is unknown" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "#N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "n/a" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "#na" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
end

Loading…
Cancel
Save