From fe039e3d0446a8a52261be37927f4ecb6a8da29e Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Fri, 8 Dec 2023 13:12:19 -0800 Subject: [PATCH] chore: various fixes for race and gender categorization during cleaning. Also add tests for race and gender categorization --- app/models/gender.rb | 19 ++ app/models/race.rb | 29 +++ app/services/survey_item_values.rb | 54 +++-- spec/services/survey_item_values_spec.rb | 262 ++++++++++++++++++++++- 4 files changed, 345 insertions(+), 19 deletions(-) diff --git a/app/models/gender.rb b/app/models/gender.rb index 50f4d74c..f865d8ef 100644 --- a/app/models/gender.rb +++ b/app/models/gender.rb @@ -2,4 +2,23 @@ class Gender < ApplicationRecord scope :by_qualtrics_code, lambda { all.map { |gender| [gender.qualtrics_code, gender] }.to_h } + + def self.qualtrics_code_from(word) + case word + when /Female|^F|1/i + 1 + when /Male|^M|2/i + 2 + when /Another\s*Gender|Gender Identity not listed above|3|7/i + 4 # We categorize any self reported gender as non-binary + when /Non-Binary|^N|4/i + 4 + when /Prefer not to disclose|6/i + 99 + when %r{^#*N/*A$}i + nil + else + 99 + end + end end diff --git a/app/models/race.rb b/app/models/race.rb index a99ee413..3cbf0b09 100644 --- a/app/models/race.rb +++ b/app/models/race.rb @@ -3,4 +3,33 @@ class Race < ApplicationRecord has_many :student_races has_many :students, through: :student_races friendly_id :designation, use: [:slugged] + + scope :by_qualtrics_code, lambda { + all.map { |race| [race.qualtrics_code, race] }.to_h + } + + def self.qualtrics_code_from(word) + case word + when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i + 1 + when /^Asian|Pacific\s*Island|Hawaiian|2/i + 2 + when /Black|African\s*American|3/i + 3 + when /Hispanic|Latinx|4/i + 4 + when /White|Caucasian|5/i + 5 + when /Prefer not to disclose|6/i + 6 + when /Prefer to self-describe|7/i + 7 + when /Middle\s*Eastern|North\s*African|8/i + 8 + when %r{^#*N/*A$}i + nil + else + 99 + end + end end diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb index 3da94fb5..a5139678 100644 --- a/app/services/survey_item_values.rb +++ b/app/services/survey_item_values.rb @@ -116,20 +116,40 @@ class SurveyItemValues end def gender - gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i) - gender_code ||= 99 - gender_code = gender_code.to_i - gender_code = 4 if gender_code == 3 - gender_code = 99 if gender_code.zero? - genders[gender_code] + @gender ||= begin + gender_code ||= value_from(pattern: /Gender self report/i) + gender_code ||= value_from(pattern: /^Gender$/i) + gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i) + gender_code ||= value_from(pattern: /Gender-\s*SIS/i) + gender_code ||= value_from(pattern: /Gender-\s*Qcode/i) + gender_code ||= value_from(pattern: /Gender - do not use/i) + gender_code ||= value_from(pattern: /Gender/i) + gender_code = Gender.qualtrics_code_from(gender_code) + genders[gender_code] if genders + end end def races - race_codes = value_from(pattern: /RACE/i) - race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) - race_codes ||= value_from(pattern: /Race Secondary/i) || "" - race_codes = race_codes.split(",").map(&:to_i) || [] - process_races(codes: race_codes) + @races ||= begin + hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase + race_codes ||= value_from(pattern: /Race\s*self\s*report/i) + race_codes ||= value_from(pattern: /^RACE$/i) + race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) + race_codes ||= value_from(pattern: /Race Secondary/i) + race_codes ||= value_from(pattern: /Race-\s*SIS/i) + race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i) + race_codes ||= value_from(pattern: /RACE/i) || "" + race_codes ||= [] + race_codes = race_codes.split(",") + .map do |word| + word.split(/\s+and\s+/i) + end.flatten + .reject(&:blank?) + .map { |race| Race.qualtrics_code_from(race) }.map(&:to_i) + race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1 + race_codes = race_codes.push(4) if hispanic == "true" + process_races(codes: race_codes) + end end def lasid @@ -137,7 +157,7 @@ class SurveyItemValues end def raw_income - @raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income/i) + @raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i) end def income @@ -152,7 +172,7 @@ class SurveyItemValues end def raw_ell - @raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL/i) + @raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i) end def ell @@ -167,7 +187,7 @@ class SurveyItemValues end def raw_sped - @raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd/i) + @raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i) end def sped @@ -186,9 +206,13 @@ class SurveyItemValues matches = headers.select do |header| pattern.match(header) end.map { |item| item.delete("\n") } + matches.each do |match| - output ||= row[match] + output ||= row[match]&.strip end + + return nil if output&.match?(%r{^#*N/*A$}i) || output.blank? + output end diff --git a/spec/services/survey_item_values_spec.rb b/spec/services/survey_item_values_spec.rb index 7e8cb511..1977d87a 100644 --- a/spec/services/survey_item_values_spec.rb +++ b/spec/services/survey_item_values_spec.rb @@ -7,8 +7,25 @@ RSpec.describe SurveyItemValues, type: :model do end let(:genders) do create(:gender, qualtrics_code: 1) + create(:gender, qualtrics_code: 2) + create(:gender, qualtrics_code: 4) + create(:gender, qualtrics_code: 99) Gender.by_qualtrics_code end + + let(:races) do + create(:race, qualtrics_code: 1) + create(:race, qualtrics_code: 2) + create(:race, qualtrics_code: 3) + create(:race, qualtrics_code: 4) + create(:race, qualtrics_code: 5) + create(:race, qualtrics_code: 6) + create(:race, qualtrics_code: 7) + create(:race, qualtrics_code: 8) + create(:race, qualtrics_code: 99) + Race.by_qualtrics_code + end + let(:survey_items) { [] } let(:district) { create(:district, name: "Attleboro") } let(:attleboro) do @@ -117,11 +134,248 @@ RSpec.describe SurveyItemValues, type: :model do expect(values.grade).to eq 1 end end + context ".gender" do - it "returns the grade that maps to the grade provided" do - row = { "Gender" => "1" } - values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) - expect(values.gender.qualtrics_code).to eq 1 + context "when the gender is female" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "1" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 1 + + row = { "Gender" => "Female" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 1 + + row = { "Gender" => "F" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 1 + end + end + + context "when the gender is male" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "2" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 2 + + row = { "Gender" => "Male" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 2 + + row = { "Gender" => "M" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 2 + end + end + + context "when the gender is non-binary" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "4" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 4 + + row = { "Gender" => "N - Non-Binary" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 4 + + row = { "Gender" => "N" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 4 + end + end + + context "when the gender is not known" do + it "returns the gender that maps to the gender provided" do + row = { "Gender" => "N/A" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "NA" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "#N/A" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "#NA" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "Prefer not to disclose" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + + row = { "Gender" => "" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.gender.qualtrics_code).to eq 99 + end + end + end + + context ".races" do + before do + races + end + + context "when the race is Native American" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "1" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [1] + + row = { "Race" => "Native American" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [1] + + row = { "Race" => "American Indian or Alaskan Native" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [1] + end + end + + context "when the race is Asian" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "2" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + + row = { "Race" => "Asian" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + + row = { "Race" => "Pacific Islander" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + + row = { "Race" => "Pacific Island or Hawaiian Native" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [2] + end + end + + context "when the race is Black" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "3" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [3] + + row = { "Race" => "Black" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [3] + + row = { "Race" => "African American" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [3] + end + end + + context "when the race is Hispanic" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "4" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [4] + + row = { "Race" => "Hispanic" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [4] + + row = { "Race" => "Latinx" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [4] + end + end + + context "when the race is White" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "5" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [5] + + row = { "Race" => "White" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [5] + + row = { "Race" => "Caucasian" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [5] + end + end + + context "when the race is not disclosed" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "6" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "Prefer not to disclose" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end + end + + context "when the race is not disclosed" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "6" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "Prefer not to disclose" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end + end + + context "when the race is self described" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "7" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "Prefer to self-describe" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end + end + + context "when the race is Middle Eastern" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "8" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [8] + + row = { "Race" => "Middle Eastern" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [8] + + row = { "Race" => "North African" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [8] + end + end + + context "when the race is unknown" do + it "returns the gender that maps to the gender provided" do + row = { "Race" => "NA" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "#N/A" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "n/a" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "#na" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + + row = { "Race" => "" } + values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) + expect(values.races.map { |race| race&.qualtrics_code}).to eq [99] + end end end