From 6d84204f83575312236d7f7b8d53094c85e5ed5a Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Mon, 6 Nov 2023 20:30:51 -0800 Subject: [PATCH] Add race and gender columns to cleaned cvs files when those headers are missing --- app/services/cleaner.rb | 9 +++++---- app/services/survey_item_values.rb | 19 ++++++++++++++----- spec/services/cleaner_spec.rb | 25 ++++++++++--------------- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/app/services/cleaner.rb b/app/services/cleaner.rb index 76067e39..91835122 100644 --- a/app/services/cleaner.rb +++ b/app/services/cleaner.rb @@ -43,7 +43,9 @@ class Cleaner log_csv = [] data = [] - headers = CSV.parse(file.first).first.push("Raw Income").push("Income").push("Raw ELL").push("ELL").push("Raw SpEd").push("SpEd").push("Progress Count").uniq + headers = CSV.parse(file.first).first.to_set + headers = headers.merge(Set.new(["Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count", + "Race", "Gender"])).to_a filtered_headers = include_all_headers(headers:) filtered_headers = remove_unwanted_headers(headers: filtered_headers) log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?", @@ -70,7 +72,7 @@ class Cleaner def include_all_headers(headers:) alternates = headers.filter(&:present?) - .filter { |header| header.match? /^[st]-\w*-\w*-1$/i } + .filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) } alternates.each do |header| main = header.sub(/-1\z/, "") headers.push(main) unless headers.include?(main) @@ -86,7 +88,7 @@ class Cleaner def remove_unwanted_headers(headers:) headers.to_set.to_a.compact.reject do |item| item.start_with? "Q" - end.reject { |header| header.match? /^[st]-\w*-\w*-1$/i } + end.reject { |header| header.match?(/^[st]-\w*-\w*-1$/i) } end def write_csv(data:, output_filepath:, filename:, prefix: "") @@ -121,4 +123,3 @@ class Cleaner FileUtils.mkdir_p log_filepath end end - diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb index 7764a756..dd378455 100644 --- a/app/services/survey_item_values.rb +++ b/app/services/survey_item_values.rb @@ -18,6 +18,8 @@ class SurveyItemValues row["Raw SpEd"] = raw_sped row["SpEd"] = sped row["Progress Count"] = progress + row["Race"] ||= races.map { |race| race&.qualtrics_code }.join(",") + row["Gender"] ||= gender&.qualtrics_code copy_data_to_main_column(main: /Race/i, secondary: /Race Secondary|Race-1/i) copy_data_to_main_column(main: /Gender/i, secondary: /Gender Secondary|Gender-1/i) @@ -84,7 +86,6 @@ class SurveyItemValues dese_id = value_from(pattern: /Dese\s*ID/i) dese_id ||= value_from(pattern: /^School$/i) dese_id ||= value_from(pattern: /School-\s*\w/i) - dese_id.to_i end end @@ -113,21 +114,29 @@ class SurveyItemValues def gender @gender ||= begin - gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i) + gender_code ||= value_from(pattern: /^Gender$/i) + gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i) + gender_code ||= value_from(pattern: /Gender\s*-\s*Qcodes/i) + gender_code ||= value_from(pattern: /Gender/i) gender_code ||= 99 gender_code = gender_code.to_i gender_code = 4 if gender_code == 3 gender_code = 99 if gender_code.zero? - genders[gender_code] + genders[gender_code] if genders end end def races @races ||= begin - race_codes = value_from(pattern: /RACE/i) + race_codes = value_from(pattern: /^RACE$/i) race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) - race_codes ||= value_from(pattern: /Race Secondary/i) || "" + race_codes ||= value_from(pattern: /Race Secondary/i) + race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i) + race_codes ||= value_from(pattern: /RACE/i) || "" + hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase race_codes = race_codes.split(",").map(&:to_i) || [] + race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1 + race_codes = race_codes.push(4) if hispanic == "true" process_races(codes: race_codes) end end diff --git a/spec/services/cleaner_spec.rb b/spec/services/cleaner_spec.rb index deecf3e1..004f476d 100644 --- a/spec/services/cleaner_spec.rb +++ b/spec/services/cleaner_spec.rb @@ -206,20 +206,16 @@ end def reads_headers_from_raw_csv(processed_data) processed_data in [headers, clean_csv, log_csv, data] - expect(headers.to_set.sort).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)", - "Finished", "RecordedDate", "ResponseId", "District", "School", - "LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1", - "s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2", - "s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3", "s-cure-q4", "s-sten-q1", "s-sten-q2", - "s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3", - "s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3", - "s-peff-q1", "s-peff-q2", "s-peff-q3", "s-peff-q4", "s-peff-q5", "s-peff-q6", "s-sbel-q1", "s-sbel-q2", - "s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4", "s-vale-q1", - "s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2", - "s-grit-q1", "s-grit-q2", "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3", - "s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1", "s-tint-q5-1", "s-acpr-q1-1", - "s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1", - "s-peff-q5-1", "s-peff-q6-1", "Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count"].to_set.sort + expect(headers.to_set.sort).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)", "Finished", "RecordedDate", + "ResponseId", "District", "School", "LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1", + "s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2", "s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3", + "s-cure-q4", "s-sten-q1", "s-sten-q2", "s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3", + "s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3", "s-peff-q1", "s-peff-q2", "s-peff-q3", + "s-peff-q4", "s-peff-q5", "s-sbel-q1", "s-sbel-q2", "s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4", + "s-vale-q1", "s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2", "s-grit-q1", "s-grit-q2", + "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3", "s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1", + "s-tint-q5-1", "s-acpr-q1-1", "s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1", "s-peff-q5-1", + "s-peff-q6-1", "Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count", "s-peff-q6"].to_set.sort end def invalid_rows_are_rejected_for_the_correct_reasons(data) @@ -302,4 +298,3 @@ def csv_contains_the_correct_rows(csv, rows) expect(csv[index + 1][response_id]).to eq row end end -