From f27a590c5a2fdacf40e7d0274553f2753dc386db Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Thu, 30 Nov 2023 20:57:04 -0800 Subject: [PATCH] Convert gender and race text into qualtrics codes during cleaning. Abide by 'prefer not to disclose' for self reported race. Give priority to self reported data but use SIS information as backup --- app/models/gender.rb | 17 ++++++++++++++++ app/models/race.rb | 25 ++++++++++++++++++++++++ app/services/cleaner.rb | 10 +++++++++- app/services/survey_item_values.rb | 22 +++++++++++++-------- lib/tasks/data.rake | 2 +- spec/services/cleaner_spec.rb | 10 +++++----- spec/services/survey_item_values_spec.rb | 6 +++--- 7 files changed, 74 insertions(+), 18 deletions(-) diff --git a/app/models/gender.rb b/app/models/gender.rb index 50f4d74c..c0cb89a2 100644 --- a/app/models/gender.rb +++ b/app/models/gender.rb @@ -2,4 +2,21 @@ class Gender < ApplicationRecord scope :by_qualtrics_code, lambda { all.map { |gender| [gender.qualtrics_code, gender] }.to_h } + + def self.qualtrics_code_from(word) + case word + when /Female|F|1/i + 1 + when /Male|M|2/i + 2 + when /Another\s*Gender|Gender Identity not listed above|3/i + 4 + when /Non-Binary|N|4/i + 4 + when %r{^#*N/*A$}i + nil + else + 99 + end + end end diff --git a/app/models/race.rb b/app/models/race.rb index a99ee413..6c2e1d82 100644 --- a/app/models/race.rb +++ b/app/models/race.rb @@ -3,4 +3,29 @@ class Race < ApplicationRecord has_many :student_races has_many :students, through: :student_races friendly_id :designation, use: [:slugged] + + def self.qualtrics_code_from(word) + case word + when /Native\s*American|American\s*Indian|Alaskan\s*Native|1/i + 1 + when /Asian|Pacific\s*Island|2/i + 2 + when /Black|African\s*American|3/i + 3 + when /Hispanic|Latinx|4/i + 4 + when /White|Caucasian|5/i + 5 + when /Prefer not to disclose|6/i + 6 + when /Prefer to self-describe|7/i + 7 + when /Middle\s*Eastern|North\s*African|8/i + 8 + when %r{^#*N/*A$}i + nil + else + 99 + end + end end diff --git a/app/services/cleaner.rb b/app/services/cleaner.rb index c5fb2d96..a12d5737 100644 --- a/app/services/cleaner.rb +++ b/app/services/cleaner.rb @@ -35,7 +35,15 @@ class Cleaner row.district.short_name end.to_set.to_a - districts.join(".").to_s + "." + survey_type.to_s + "." + range + ".csv" + schools = data.map do |row| + row.school.name + end.to_set + + # Only add school to filename when there's a single school + school_name = "" + school_name = schools.first.parameterize + "." if schools.length == 1 + + districts.join(".").to_s + "." + school_name + survey_type.to_s + "." + range + ".csv" end def process_raw_file(file:) diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb index c25db216..0a76e21f 100644 --- a/app/services/survey_item_values.rb +++ b/app/services/survey_item_values.rb @@ -114,27 +114,30 @@ class SurveyItemValues def gender @gender ||= begin + gender_code ||= value_from(pattern: /Gender self report/i) gender_code ||= value_from(pattern: /^Gender$/i) gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i) - gender_code ||= value_from(pattern: /Gender-\s*Qcode/i) + gender_code ||= value_from(pattern: /Gender - do not use/i) gender_code ||= value_from(pattern: /Gender/i) - gender_code ||= 99 - gender_code = gender_code.to_i - gender_code = 4 if gender_code == 3 - gender_code = 99 if gender_code.zero? + gender_code ||= value_from(pattern: /Gender-\s*SIS/i) + gender_code ||= value_from(pattern: /Gender-\s*Qcode/i) + gender_code = Gender.qualtrics_code_from(gender_code) genders[gender_code] if genders end end def races @races ||= begin - race_codes = value_from(pattern: /^RACE$/i) + hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase + race_codes ||= value_from(pattern: /Race\s*self\s*report/i) + race_codes ||= value_from(pattern: /^RACE$/i) race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) race_codes ||= value_from(pattern: /Race Secondary/i) + race_codes ||= value_from(pattern: /Race-\s*SIS/i) race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i) race_codes ||= value_from(pattern: /RACE/i) || "" - hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase - race_codes = race_codes.split(",").map(&:to_i) || [] + race_codes ||= [] + race_codes = race_codes.split(",").map { |race| Race.qualtrics_code_from(race) }.map(&:to_i) race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1 race_codes = race_codes.push(4) if hispanic == "true" process_races(codes: race_codes) @@ -198,6 +201,9 @@ class SurveyItemValues matches.each do |match| output ||= row[match] end + + return nil if output&.match?(%r{^#*N/*A}i) || output.blank? + output end diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 6874628e..45ebaac6 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -7,7 +7,7 @@ namespace :data do student_count = Student.count path = "/data/survey_responses/clean/" Sftp::Directory.open(path:) do |file| - SurveyResponsesDataLoader.from_file(file:) + SurveyResponsesDataLoader.new.from_file(file:) end puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses" diff --git a/spec/services/cleaner_spec.rb b/spec/services/cleaner_spec.rb index fbe39261..33aa5e87 100644 --- a/spec/services/cleaner_spec.rb +++ b/spec/services/cleaner_spec.rb @@ -4,7 +4,7 @@ require "fileutils" RSpec.describe Cleaner do let(:district) { create(:district, name: "Maynard Public Schools") } let(:second_district) { create(:district, name: "District2") } - let(:school) { create(:school, dese_id: 1_740_505, district:) } + let(:school) { create(:school, dese_id: 1_740_505, district:, name: "Maynard High School") } let(:second_school) { create(:school, dese_id: 1_740_305, district:) } let(:third_school) { create(:school, dese_id: 222_222, district: second_district) } @@ -156,7 +156,7 @@ RSpec.describe Cleaner do filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( headers: standard_survey_items, data: ) - expect(filename).to eq "maynard.standard.2022-23.csv" + expect(filename).to eq "maynard.maynard-high-school.standard.2022-23.csv" end context "when the file is based on short form survey items" do @@ -168,7 +168,7 @@ RSpec.describe Cleaner do filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( headers: short_form_survey_items, data: ) - expect(filename).to eq "maynard.short_form.2022-23.csv" + expect(filename).to eq "maynard.maynard-high-school.short_form.2022-23.csv" end end @@ -181,7 +181,7 @@ RSpec.describe Cleaner do filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( headers: early_education_survey_items, data: ) - expect(filename).to eq "maynard.early_education.2022-23.csv" + expect(filename).to eq "maynard.maynard-high-school.early_education.2022-23.csv" end end context "when the file is based on teacher survey items" do @@ -193,7 +193,7 @@ RSpec.describe Cleaner do filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( headers: teacher_survey_items, data: ) - expect(filename).to eq "maynard.teacher.2022-23.csv" + expect(filename).to eq "maynard.maynard-high-school.teacher.2022-23.csv" end end diff --git a/spec/services/survey_item_values_spec.rb b/spec/services/survey_item_values_spec.rb index 3b760c55..d4e6e2b4 100644 --- a/spec/services/survey_item_values_spec.rb +++ b/spec/services/survey_item_values_spec.rb @@ -285,15 +285,15 @@ RSpec.describe SurveyItemValues, type: :model do expect(values.sped).to eq "Not Special Education" end - it 'tranlsates NA into "Unknown"' do + it 'tranlsates NA into "Not Special Education"' do headers = ["Raw SpEd"] row = { "Raw SpEd" => "NA" } values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) - expect(values.sped).to eq "Unknown" + expect(values.sped).to eq "Not Special Education" row = { "Raw SpEd" => "#NA" } values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:) - expect(values.sped).to eq "Unknown" + expect(values.sped).to eq "Not Special Education" end end