Add race and gender columns to cleaned cvs files when those headers are

missing
rpp-main
Nelson Jovel 2 years ago
parent a11a134805
commit 6d84204f83

@ -43,7 +43,9 @@ class Cleaner
log_csv = [] log_csv = []
data = [] data = []
headers = CSV.parse(file.first).first.push("Raw Income").push("Income").push("Raw ELL").push("ELL").push("Raw SpEd").push("SpEd").push("Progress Count").uniq headers = CSV.parse(file.first).first.to_set
headers = headers.merge(Set.new(["Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count",
"Race", "Gender"])).to_a
filtered_headers = include_all_headers(headers:) filtered_headers = include_all_headers(headers:)
filtered_headers = remove_unwanted_headers(headers: filtered_headers) filtered_headers = remove_unwanted_headers(headers: filtered_headers)
log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?", log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?",
@ -70,7 +72,7 @@ class Cleaner
def include_all_headers(headers:) def include_all_headers(headers:)
alternates = headers.filter(&:present?) alternates = headers.filter(&:present?)
.filter { |header| header.match? /^[st]-\w*-\w*-1$/i } .filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
alternates.each do |header| alternates.each do |header|
main = header.sub(/-1\z/, "") main = header.sub(/-1\z/, "")
headers.push(main) unless headers.include?(main) headers.push(main) unless headers.include?(main)
@ -86,7 +88,7 @@ class Cleaner
def remove_unwanted_headers(headers:) def remove_unwanted_headers(headers:)
headers.to_set.to_a.compact.reject do |item| headers.to_set.to_a.compact.reject do |item|
item.start_with? "Q" item.start_with? "Q"
end.reject { |header| header.match? /^[st]-\w*-\w*-1$/i } end.reject { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
end end
def write_csv(data:, output_filepath:, filename:, prefix: "") def write_csv(data:, output_filepath:, filename:, prefix: "")
@ -121,4 +123,3 @@ class Cleaner
FileUtils.mkdir_p log_filepath FileUtils.mkdir_p log_filepath
end end
end end

@ -18,6 +18,8 @@ class SurveyItemValues
row["Raw SpEd"] = raw_sped row["Raw SpEd"] = raw_sped
row["SpEd"] = sped row["SpEd"] = sped
row["Progress Count"] = progress row["Progress Count"] = progress
row["Race"] ||= races.map { |race| race&.qualtrics_code }.join(",")
row["Gender"] ||= gender&.qualtrics_code
copy_data_to_main_column(main: /Race/i, secondary: /Race Secondary|Race-1/i) copy_data_to_main_column(main: /Race/i, secondary: /Race Secondary|Race-1/i)
copy_data_to_main_column(main: /Gender/i, secondary: /Gender Secondary|Gender-1/i) copy_data_to_main_column(main: /Gender/i, secondary: /Gender Secondary|Gender-1/i)
@ -84,7 +86,6 @@ class SurveyItemValues
dese_id = value_from(pattern: /Dese\s*ID/i) dese_id = value_from(pattern: /Dese\s*ID/i)
dese_id ||= value_from(pattern: /^School$/i) dese_id ||= value_from(pattern: /^School$/i)
dese_id ||= value_from(pattern: /School-\s*\w/i) dese_id ||= value_from(pattern: /School-\s*\w/i)
dese_id.to_i dese_id.to_i
end end
end end
@ -113,21 +114,29 @@ class SurveyItemValues
def gender def gender
@gender ||= begin @gender ||= begin
gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i) gender_code ||= value_from(pattern: /^Gender$/i)
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= value_from(pattern: /Gender\s*-\s*Qcodes/i)
gender_code ||= value_from(pattern: /Gender/i)
gender_code ||= 99 gender_code ||= 99
gender_code = gender_code.to_i gender_code = gender_code.to_i
gender_code = 4 if gender_code == 3 gender_code = 4 if gender_code == 3
gender_code = 99 if gender_code.zero? gender_code = 99 if gender_code.zero?
genders[gender_code] genders[gender_code] if genders
end end
end end
def races def races
@races ||= begin @races ||= begin
race_codes = value_from(pattern: /RACE/i) race_codes = value_from(pattern: /^RACE$/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i) || "" race_codes ||= value_from(pattern: /Race Secondary/i)
race_codes ||= value_from(pattern: /Race\s*-\s*Qcodes/i)
race_codes ||= value_from(pattern: /RACE/i) || ""
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
race_codes = race_codes.split(",").map(&:to_i) || [] race_codes = race_codes.split(",").map(&:to_i) || []
race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1
race_codes = race_codes.push(4) if hispanic == "true"
process_races(codes: race_codes) process_races(codes: race_codes)
end end
end end

@ -206,20 +206,16 @@ end
def reads_headers_from_raw_csv(processed_data) def reads_headers_from_raw_csv(processed_data)
processed_data in [headers, clean_csv, log_csv, data] processed_data in [headers, clean_csv, log_csv, data]
expect(headers.to_set.sort).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)", expect(headers.to_set.sort).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)", "Finished", "RecordedDate",
"Finished", "RecordedDate", "ResponseId", "District", "School", "ResponseId", "District", "School", "LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1",
"LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1", "s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2", "s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3",
"s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2", "s-cure-q4", "s-sten-q1", "s-sten-q2", "s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3",
"s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3", "s-cure-q4", "s-sten-q1", "s-sten-q2", "s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3", "s-peff-q1", "s-peff-q2", "s-peff-q3",
"s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3", "s-peff-q4", "s-peff-q5", "s-sbel-q1", "s-sbel-q2", "s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4",
"s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3", "s-vale-q1", "s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2", "s-grit-q1", "s-grit-q2",
"s-peff-q1", "s-peff-q2", "s-peff-q3", "s-peff-q4", "s-peff-q5", "s-peff-q6", "s-sbel-q1", "s-sbel-q2", "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3", "s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1",
"s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4", "s-vale-q1", "s-tint-q5-1", "s-acpr-q1-1", "s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1", "s-peff-q5-1",
"s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2", "s-peff-q6-1", "Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count", "s-peff-q6"].to_set.sort
"s-grit-q1", "s-grit-q2", "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3",
"s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1", "s-tint-q5-1", "s-acpr-q1-1",
"s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1",
"s-peff-q5-1", "s-peff-q6-1", "Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count"].to_set.sort
end end
def invalid_rows_are_rejected_for_the_correct_reasons(data) def invalid_rows_are_rejected_for_the_correct_reasons(data)
@ -302,4 +298,3 @@ def csv_contains_the_correct_rows(csv, rows)
expect(csv[index + 1][response_id]).to eq row expect(csv[index + 1][response_id]).to eq row
end end
end end

Loading…
Cancel
Save