chore: various fixes for race and gender categorization during cleaning.

Also add tests for race and gender categorization
mciea-main
Nelson Jovel 2 years ago
parent 267319604c
commit fe039e3d04

@ -2,4 +2,23 @@ class Gender < ApplicationRecord
scope :by_qualtrics_code, lambda { scope :by_qualtrics_code, lambda {
all.map { |gender| [gender.qualtrics_code, gender] }.to_h all.map { |gender| [gender.qualtrics_code, gender] }.to_h
} }
def self.qualtrics_code_from(word)
case word
when /Female|^F|1/i
1
when /Male|^M|2/i
2
when /Another\s*Gender|Gender Identity not listed above|3|7/i
4 # We categorize any self reported gender as non-binary
when /Non-Binary|^N|4/i
4
when /Prefer not to disclose|6/i
99
when %r{^#*N/*A$}i
nil
else
99
end
end
end end

@ -3,4 +3,33 @@ class Race < ApplicationRecord
has_many :student_races has_many :student_races
has_many :students, through: :student_races has_many :students, through: :student_races
friendly_id :designation, use: [:slugged] friendly_id :designation, use: [:slugged]
scope :by_qualtrics_code, lambda {
all.map { |race| [race.qualtrics_code, race] }.to_h
}
def self.qualtrics_code_from(word)
case word
when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i
1
when /^Asian|Pacific\s*Island|Hawaiian|2/i
2
when /Black|African\s*American|3/i
3
when /Hispanic|Latinx|4/i
4
when /White|Caucasian|5/i
5
when /Prefer not to disclose|6/i
6
when /Prefer to self-describe|7/i
7
when /Middle\s*Eastern|North\s*African|8/i
8
when %r{^#*N/*A$}i
nil
else
99
end
end
end end

@ -116,28 +116,48 @@ class SurveyItemValues
end end
def gender def gender
gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i) @gender ||= begin
gender_code ||= 99 gender_code ||= value_from(pattern: /Gender self report/i)
gender_code = gender_code.to_i gender_code ||= value_from(pattern: /^Gender$/i)
gender_code = 4 if gender_code == 3 gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code = 99 if gender_code.zero? gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
genders[gender_code] gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code = Gender.qualtrics_code_from(gender_code)
genders[gender_code] if genders
end
end end
def races def races
race_codes = value_from(pattern: /RACE/i) @races ||= begin
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
race_codes ||= value_from(pattern: /Race\s*self\s*report/i)
race_codes ||= value_from(pattern: /^RACE$/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i) || "" race_codes ||= value_from(pattern: /Race Secondary/i)
race_codes = race_codes.split(",").map(&:to_i) || [] race_codes ||= value_from(pattern: /Race-\s*SIS/i)
race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i)
race_codes ||= value_from(pattern: /RACE/i) || ""
race_codes ||= []
race_codes = race_codes.split(",")
.map do |word|
word.split(/\s+and\s+/i)
end.flatten
.reject(&:blank?)
.map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1
race_codes = race_codes.push(4) if hispanic == "true"
process_races(codes: race_codes) process_races(codes: race_codes)
end end
end
def lasid def lasid
@lasid ||= value_from(pattern: /LASID/i) @lasid ||= value_from(pattern: /LASID/i)
end end
def raw_income def raw_income
@raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income/i) @raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i)
end end
def income def income
@ -152,7 +172,7 @@ class SurveyItemValues
end end
def raw_ell def raw_ell
@raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL/i) @raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i)
end end
def ell def ell
@ -167,7 +187,7 @@ class SurveyItemValues
end end
def raw_sped def raw_sped
@raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd/i) @raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i)
end end
def sped def sped
@ -186,9 +206,13 @@ class SurveyItemValues
matches = headers.select do |header| matches = headers.select do |header|
pattern.match(header) pattern.match(header)
end.map { |item| item.delete("\n") } end.map { |item| item.delete("\n") }
matches.each do |match| matches.each do |match|
output ||= row[match] output ||= row[match]&.strip
end end
return nil if output&.match?(%r{^#*N/*A$}i) || output.blank?
output output
end end

@ -7,8 +7,25 @@ RSpec.describe SurveyItemValues, type: :model do
end end
let(:genders) do let(:genders) do
create(:gender, qualtrics_code: 1) create(:gender, qualtrics_code: 1)
create(:gender, qualtrics_code: 2)
create(:gender, qualtrics_code: 4)
create(:gender, qualtrics_code: 99)
Gender.by_qualtrics_code Gender.by_qualtrics_code
end end
let(:races) do
create(:race, qualtrics_code: 1)
create(:race, qualtrics_code: 2)
create(:race, qualtrics_code: 3)
create(:race, qualtrics_code: 4)
create(:race, qualtrics_code: 5)
create(:race, qualtrics_code: 6)
create(:race, qualtrics_code: 7)
create(:race, qualtrics_code: 8)
create(:race, qualtrics_code: 99)
Race.by_qualtrics_code
end
let(:survey_items) { [] } let(:survey_items) { [] }
let(:district) { create(:district, name: "Attleboro") } let(:district) { create(:district, name: "Attleboro") }
let(:attleboro) do let(:attleboro) do
@ -117,11 +134,248 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.grade).to eq 1 expect(values.grade).to eq 1
end end
end end
context ".gender" do context ".gender" do
it "returns the grade that maps to the grade provided" do context "when the gender is female" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "1" } row = { "Gender" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1 expect(values.gender.qualtrics_code).to eq 1
row = { "Gender" => "Female" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
row = { "Gender" => "F" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
end
end
context "when the gender is male" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "2" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
row = { "Gender" => "Male" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
row = { "Gender" => "M" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
end
end
context "when the gender is non-binary" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "4" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
row = { "Gender" => "N - Non-Binary" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
row = { "Gender" => "N" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
end
end
context "when the gender is not known" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "#N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "#NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
end
end
end
context ".races" do
before do
races
end
context "when the race is Native American" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
row = { "Race" => "Native American" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
row = { "Race" => "American Indian or Alaskan Native" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
end
end
context "when the race is Asian" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "2" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Asian" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Pacific Islander" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Pacific Island or Hawaiian Native" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
end
end
context "when the race is Black" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "3" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
row = { "Race" => "Black" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
row = { "Race" => "African American" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
end
end
context "when the race is Hispanic" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "4" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
row = { "Race" => "Hispanic" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
row = { "Race" => "Latinx" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
end
end
context "when the race is White" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "5" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
row = { "Race" => "White" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
row = { "Race" => "Caucasian" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
end
end
context "when the race is not disclosed" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "6" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is not disclosed" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "6" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is self described" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "7" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer to self-describe" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is Middle Eastern" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "8" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
row = { "Race" => "Middle Eastern" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
row = { "Race" => "North African" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
end
end
context "when the race is unknown" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "#N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "n/a" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "#na" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end end
end end

Loading…
Cancel
Save