chore: various fixes for race and gender categorization during cleaning.

Also add tests for race and gender categorization
speedup-admin-data
Nelson Jovel 2 years ago
parent 242192e2f3
commit 3f44613085

@ -5,9 +5,9 @@ class Gender < ApplicationRecord
def self.qualtrics_code_from(word) def self.qualtrics_code_from(word)
case word case word
when /Female|F|1/i when /Female|^F|1/i
1 1
when /Male|M|2/i when /Male|^M|2/i
2 2
when /Another\s*Gender|Gender Identity not listed above|3|7/i when /Another\s*Gender|Gender Identity not listed above|3|7/i
4 # We categorize any self reported gender as non-binary 4 # We categorize any self reported gender as non-binary

@ -4,13 +4,17 @@ class Race < ApplicationRecord
has_many :students, through: :student_races has_many :students, through: :student_races
friendly_id :designation, use: [:slugged] friendly_id :designation, use: [:slugged]
scope :by_qualtrics_code, lambda {
all.map { |race| [race.qualtrics_code, race] }.to_h
}
# TODO: look for alaska native # TODO: look for alaska native
# Todo: split up possibilities by first a comma and then the word and # Todo: split up possibilities by first a comma and then the word and
def self.qualtrics_code_from(word) def self.qualtrics_code_from(word)
case word case word
when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i
1 1
when /Asian|Pacific\s*Island|Hawaiian|2/i when /^Asian|Pacific\s*Island|Hawaiian|2/i
2 2
when /Black|African\s*American|3/i when /Black|African\s*American|3/i
3 3

@ -118,10 +118,10 @@ class SurveyItemValues
gender_code ||= value_from(pattern: /Gender self report/i) gender_code ||= value_from(pattern: /Gender self report/i)
gender_code ||= value_from(pattern: /^Gender$/i) gender_code ||= value_from(pattern: /^Gender$/i)
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i) gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code ||= value_from(pattern: /Gender-\s*SIS/i) gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i) gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code = Gender.qualtrics_code_from(gender_code) gender_code = Gender.qualtrics_code_from(gender_code)
genders[gender_code] if genders genders[gender_code] if genders
end end
@ -140,7 +140,7 @@ class SurveyItemValues
race_codes ||= [] race_codes ||= []
race_codes = race_codes.split(",") race_codes = race_codes.split(",")
.map do |word| .map do |word|
word.split("and") word.split(/\s+and\s+/i)
end.flatten end.flatten
.reject(&:blank?) .reject(&:blank?)
.map { |race| Race.qualtrics_code_from(race) }.map(&:to_i) .map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
@ -155,7 +155,7 @@ class SurveyItemValues
end end
def raw_income def raw_income
@raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income/i) @raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i)
end end
def income def income
@ -163,7 +163,7 @@ class SurveyItemValues
end end
def raw_ell def raw_ell
@raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL/i) @raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i)
end end
def ell def ell
@ -171,7 +171,7 @@ class SurveyItemValues
end end
def raw_sped def raw_sped
@raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd/i) @raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i)
end end
def sped def sped
@ -183,11 +183,12 @@ class SurveyItemValues
matches = headers.select do |header| matches = headers.select do |header|
pattern.match(header) pattern.match(header)
end.map { |item| item.delete("\n") } end.map { |item| item.delete("\n") }
matches.each do |match| matches.each do |match|
output ||= row[match] output ||= row[match]&.strip
end end
return nil if output&.match?(%r{^#*N/*A}i) || output.blank? return nil if output&.match?(%r{^#*N/*A$}i) || output.blank?
output output
end end

@ -7,8 +7,25 @@ RSpec.describe SurveyItemValues, type: :model do
end end
let(:genders) do let(:genders) do
create(:gender, qualtrics_code: 1) create(:gender, qualtrics_code: 1)
create(:gender, qualtrics_code: 2)
create(:gender, qualtrics_code: 4)
create(:gender, qualtrics_code: 99)
Gender.by_qualtrics_code Gender.by_qualtrics_code
end end
let(:races) do
create(:race, qualtrics_code: 1)
create(:race, qualtrics_code: 2)
create(:race, qualtrics_code: 3)
create(:race, qualtrics_code: 4)
create(:race, qualtrics_code: 5)
create(:race, qualtrics_code: 6)
create(:race, qualtrics_code: 7)
create(:race, qualtrics_code: 8)
create(:race, qualtrics_code: 99)
Race.by_qualtrics_code
end
let(:survey_items) { [] } let(:survey_items) { [] }
let(:district) { create(:district, name: "Attleboro") } let(:district) { create(:district, name: "Attleboro") }
let(:attleboro) do let(:attleboro) do
@ -118,11 +135,248 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.grade).to eq 1 expect(values.grade).to eq 1
end end
end end
context ".gender" do context ".gender" do
it "returns the grade that maps to the grade provided" do context "when the gender is female" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "1" } row = { "Gender" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1 expect(values.gender.qualtrics_code).to eq 1
row = { "Gender" => "Female" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
row = { "Gender" => "F" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 1
end
end
context "when the gender is male" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "2" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
row = { "Gender" => "Male" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
row = { "Gender" => "M" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 2
end
end
context "when the gender is non-binary" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "4" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
row = { "Gender" => "N - Non-Binary" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
row = { "Gender" => "N" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 4
end
end
context "when the gender is not known" do
it "returns the gender that maps to the gender provided" do
row = { "Gender" => "N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "#N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "#NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
row = { "Gender" => "" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.gender.qualtrics_code).to eq 99
end
end
end
context ".races" do
before do
races
end
context "when the race is Native American" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "1" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
row = { "Race" => "Native American" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
row = { "Race" => "American Indian or Alaskan Native" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [1]
end
end
context "when the race is Asian" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "2" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Asian" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Pacific Islander" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
row = { "Race" => "Pacific Island or Hawaiian Native" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [2]
end
end
context "when the race is Black" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "3" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
row = { "Race" => "Black" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
row = { "Race" => "African American" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [3]
end
end
context "when the race is Hispanic" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "4" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
row = { "Race" => "Hispanic" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
row = { "Race" => "Latinx" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [4]
end
end
context "when the race is White" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "5" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
row = { "Race" => "White" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
row = { "Race" => "Caucasian" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [5]
end
end
context "when the race is not disclosed" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "6" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is not disclosed" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "6" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer not to disclose" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is self described" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "7" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "Prefer to self-describe" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end
context "when the race is Middle Eastern" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "8" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
row = { "Race" => "Middle Eastern" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
row = { "Race" => "North African" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [8]
end
end
context "when the race is unknown" do
it "returns the gender that maps to the gender provided" do
row = { "Race" => "NA" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "#N/A" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "n/a" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "#na" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
row = { "Race" => "" }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.races.map { |race| race&.qualtrics_code}).to eq [99]
end
end end
end end

Loading…
Cancel
Save