From 8a0ba0dbea566e01f2a242fb8fbf5de2b5eca792 Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Fri, 8 Dec 2023 13:12:19 -0800 Subject: [PATCH] chore: various fixes for race and gender categorization during cleaning. Also add tests for race and gender categorization --- app/models/gender.rb | 4 +- app/models/race.rb | 6 +- app/services/survey_item_values.rb | 17 +- spec/services/survey_item_values_spec.rb | 262 ++++++++++++++++++++++- 4 files changed, 274 insertions(+), 15 deletions(-) diff --git a/app/models/gender.rb b/app/models/gender.rb index 31165bc0..f865d8ef 100644 --- a/app/models/gender.rb +++ b/app/models/gender.rb @@ -5,9 +5,9 @@ class Gender < ApplicationRecord def self.qualtrics_code_from(word) case word - when /Female|F|1/i + when /Female|^F|1/i 1 - when /Male|M|2/i + when /Male|^M|2/i 2 when /Another\s*Gender|Gender Identity not listed above|3|7/i 4 # We categorize any self reported gender as non-binary diff --git a/app/models/race.rb b/app/models/race.rb index da3fcfa2..42593a2c 100644 --- a/app/models/race.rb +++ b/app/models/race.rb @@ -4,13 +4,17 @@ class Race < ApplicationRecord has_many :students, through: :student_races friendly_id :designation, use: [:slugged] + scope :by_qualtrics_code, lambda { + all.map { |race| [race.qualtrics_code, race] }.to_h + } + # TODO: look for alaska native # Todo: split up possibilities by first a comma and then the word and def self.qualtrics_code_from(word) case word when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i 1 - when /Asian|Pacific\s*Island|Hawaiian|2/i + when /^Asian|Pacific\s*Island|Hawaiian|2/i 2 when /Black|African\s*American|3/i 3 diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb index 9250b29d..f00a2369 100644 --- a/app/services/survey_item_values.rb +++ b/app/services/survey_item_values.rb @@ -117,10 +117,10 @@ class SurveyItemValues gender_code ||= value_from(pattern: /Gender self report/i) gender_code ||= value_from(pattern: /^Gender$/i) gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i) - gender_code ||= value_from(pattern: /Gender - do not use/i) - gender_code ||= value_from(pattern: /Gender/i) gender_code ||= value_from(pattern: /Gender-\s*SIS/i) gender_code ||= value_from(pattern: /Gender-\s*Qcode/i) + gender_code ||= value_from(pattern: /Gender - do not use/i) + gender_code ||= value_from(pattern: /Gender/i) gender_code = Gender.qualtrics_code_from(gender_code) genders[gender_code] if genders end @@ -139,7 +139,7 @@ class SurveyItemValues race_codes ||= [] race_codes = race_codes.split(",") .map do |word| - word.split("and") + word.split(/\s+and\s+/i) end.flatten .reject(&:blank?) .map { |race| Race.qualtrics_code_from(race) }.map(&:to_i) @@ -154,7 +154,7 @@ class SurveyItemValues end def raw_income - @raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income/i) + @raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i) end def income @@ -162,7 +162,7 @@ class SurveyItemValues end def raw_ell - @raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL/i) + @raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i) end def ell @@ -170,7 +170,7 @@ class SurveyItemValues end def raw_sped - @raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd/i) + @raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i) end def sped @@ -182,11 +182,12 @@ class SurveyItemValues matches = headers.select do |header| pattern.match(header) end.map { |item| item.delete("\n") } + matches.each do |match| - output ||= row[match] + output ||= row[match]&.strip end - return nil if output&.match?(%r{^#*N/*A}i) || output.blank? + return nil if output&.match?(%r{^#*N/*A$}i) || output.blank? output end diff --git a/spec/services/survey_item_values_spec.rb b/spec/services/survey_item_values_spec.rb index 80b01d0c..0df44d1d 100644 --- a/spec/services/survey_item_values_spec.rb +++ b/spec/services/survey_item_values_spec.rb @@ -7,8 +7,25 @@ RSpec.describe SurveyItemValues, type: :model do end let(:genders) do create(:gender, qualtrics_code: 1) + create(:gender, qualtrics_code: 2) + create(:gender, qualtrics_code: 4) + create(:gender, qualtrics_code: 99) Gender.by_qualtrics_code end + + let(:races) do + create(:race, qualtrics_code: 1) + create(:race, qualtrics_code: 2) + create(:race, qualtrics_code: 3) + create(:race, qualtrics_code: 4) + create(:race, qualtrics_code: 5) + create(:race, qualtrics_code: 6) + create(:race, qualtrics_code: 7) + create(:race, qualtrics_code: 8) + create(:race, qualtrics_code: 99) + Race.by_qualtrics_code + end + let(:survey_items) { [] } let(:district) { create(:district, name: "Attleboro") } let(:attleboro) do @@ -118,11 +135,248 @@ RSpec.describe SurveyItemValues, type: :model do expect(values.grade).to eq 1 end end + context ".gender" do - it "returns the grade that maps to the grade provided" do - row = { "Gender" => "1" } - values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) - expect(values.gender.qualtrics_code).to eq 1 + context "when the gender is female" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "1" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 1 + + row = { "Gender" => "Female" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 1 + + row = { "Gender" => "F" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 1 + end + end + + context "when the gender is male" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "2" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 2 + + row = { "Gender" => "Male" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 2 + + row = { "Gender" => "M" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 2 + end + end + + context "when the gender is non-binary" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "4" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 4 + + row = { "Gender" => "N - Non-Binary" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 4 + + row = { "Gender" => "N" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 4 + end + end + + context "when the gender is not known" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "N/A" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "NA" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "#N/A" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "#NA" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "Prefer not to disclose" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + end + end + end + + context ".races" do + before do + races + end + + context "when the race is Native American" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "1" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [1] + + row = { "Race" => "Native American" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [1] + + row = { "Race" => "American Indian or Alaskan Native" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [1] + end + end + + context "when the race is Asian" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "2" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + + row = { "Race" => "Asian" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + + row = { "Race" => "Pacific Islander" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + + row = { "Race" => "Pacific Island or Hawaiian Native" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + end + end + + context "when the race is Black" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "3" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [3] + + row = { "Race" => "Black" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [3] + + row = { "Race" => "African American" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [3] + end + end + + context "when the race is Hispanic" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "4" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [4] + + row = { "Race" => "Hispanic" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [4] + + row = { "Race" => "Latinx" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [4] + end + end + + context "when the race is White" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "5" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [5] + + row = { "Race" => "White" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [5] + + row = { "Race" => "Caucasian" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [5] + end + end + + context "when the race is not disclosed" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "6" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "Prefer not to disclose" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end + end + + context "when the race is not disclosed" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "6" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "Prefer not to disclose" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end + end + + context "when the race is self described" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "7" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "Prefer to self-describe" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end + end + + context "when the race is Middle Eastern" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "8" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [8] + + row = { "Race" => "Middle Eastern" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [8] + + row = { "Race" => "North African" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [8] + end + end + + context "when the race is unknown" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "NA" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "#N/A" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "n/a" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "#na" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end end end