diff --git a/app/services/cleaner.rb b/app/services/cleaner.rb index 1b982e42..307c177e 100644 --- a/app/services/cleaner.rb +++ b/app/services/cleaner.rb @@ -1,4 +1,4 @@ -require 'fileutils' +require "fileutils" class Cleaner attr_reader :input_filepath, :output_filepath, :log_filepath, :disaggregation_filepath @@ -11,7 +11,7 @@ class Cleaner end def clean - Dir.glob(Rails.root.join(input_filepath, '*.csv')).each do |filepath| + Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath| puts filepath File.open(filepath) do |file| processed_data = process_raw_file(file:, disaggregation_data:) @@ -20,7 +20,7 @@ class Cleaner filename = filename(headers:, data:) write_csv(data: clean_csv, output_filepath:, filename:) - write_csv(data: log_csv, output_filepath: log_filepath, prefix: 'removed.', filename:) + write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:) end end end @@ -31,8 +31,8 @@ class Cleaner def filename(headers:, data:) survey_item_ids = headers.filter(&:present?).filter do |header| - header.start_with?('s-', 't-') - end.reject { |item| item.end_with? '-1' } + header.start_with?("s-", "t-") + end.reject { |item| item.end_with? "-1" } survey_type = SurveyItem.survey_type(survey_item_ids:) range = data.first.academic_year.range @@ -40,7 +40,7 @@ class Cleaner row.district.short_name end.to_set.to_a - districts.join('.').to_s + '.' + survey_type.to_s + '.' + range + '.csv' + districts.join(".").to_s + "." + survey_type.to_s + "." + range + ".csv" end def process_raw_file(file:, disaggregation_data:) @@ -48,10 +48,11 @@ class Cleaner log_csv = [] data = [] - headers = (CSV.parse(file.first).first << 'Raw Income') << 'Income' - filtered_headers = remove_unwanted_headers(headers:) - log_headers = (filtered_headers + ['Valid Duration?', 'Valid Progress?', 'Valid Grade?', - 'Valid Standard Deviation?']).flatten + headers = (CSV.parse(file.first).first << "Raw Income") << "Income" + filtered_headers = include_all_headers(headers:) + filtered_headers = remove_unwanted_headers(headers: filtered_headers) + log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?", + "Valid Standard Deviation?"]).flatten clean_csv << filtered_headers log_csv << log_headers @@ -73,6 +74,16 @@ class Cleaner private + def include_all_headers(headers:) + alternates = headers.filter(&:present?) + .filter { |header| header.end_with? "-1" } + alternates.each do |header| + main = header.sub(/-1\z/, "") + headers.push(main) unless headers.include?(main) + end + headers + end + def initialize_directories create_ouput_directory create_log_directory @@ -80,11 +91,11 @@ class Cleaner def remove_unwanted_headers(headers:) headers.to_set.to_a.compact.reject do |item| - item.start_with? 'Q' - end.reject { |item| item.end_with? '-1' } + item.start_with? "Q" + end.reject { |item| item.end_with? "-1" } end - def write_csv(data:, output_filepath:, filename:, prefix: '') + def write_csv(data:, output_filepath:, filename:, prefix: "") csv = CSV.generate do |csv| data.each do |row| csv << row @@ -104,7 +115,7 @@ class Cleaner def survey_items(headers:) survey_item_ids = headers .filter(&:present?) - .filter { |header| header.start_with? 't-', 's-' } + .filter { |header| header.start_with? "t-", "s-" } @survey_items ||= SurveyItem.where(survey_item_id: survey_item_ids) end diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb index ea823abb..54aac257 100644 --- a/app/services/survey_item_values.rb +++ b/app/services/survey_item_values.rb @@ -8,6 +8,10 @@ class SurveyItemValues @survey_items = survey_items @schools = schools @disaggregation_data = disaggregation_data + + copy_likert_scores_from_variant_survey_items + row["Income"] = income + row["Raw Income"] = raw_income end # Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair. @@ -115,6 +119,7 @@ class SurveyItemValues return @raw_income if @raw_income.present? return "Unknown" unless disaggregation_data.present? + byebug disaggregation = disaggregation_data[[lasid, district.name, academic_year.range]] return "Unknown" unless disaggregation.present? @@ -149,9 +154,6 @@ class SurveyItemValues end def to_a - copy_likert_scores_from_variant_survey_items - row["Income"] = income - row["Raw Income"] = raw_income headers.select(&:present?) .reject { |key, _value| key.start_with? "Q" } .reject { |key, _value| key.end_with? "-1" } @@ -242,7 +244,7 @@ class SurveyItemValues headers.filter(&:present?).filter { |header| header.end_with? "-1" }.each do |header| likert_score = row[header] main_item = header.gsub("-1", "") - row[main_item] = likert_score if likert_score.present? + row[main_item] = likert_score if likert_score.present? && row[main_item].blank? end end end diff --git a/spec/fixtures/raw/sample_maynard_raw_student_survey.csv b/spec/fixtures/raw/sample_maynard_raw_student_survey.csv index 1a2b72e6..0317422c 100644 --- a/spec/fixtures/raw/sample_maynard_raw_student_survey.csv +++ b/spec/fixtures/raw/sample_maynard_raw_student_survey.csv @@ -1,36 +1,36 @@ -StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,District,School,LASID,Gender,Race,What grade are you in?,s-emsa-q1,s-emsa-q2,s-emsa-q3,s-tint-q1,s-tint-q2,s-tint-q3,s-tint-q4,s-tint-q5,s-acpr-q1,s-acpr-q2,s-acpr-q3,s-acpr-q4,s-cure-q1,s-cure-q2,s-cure-q3,s-cure-q4,s-sten-q1,s-sten-q2,s-sten-q3,s-sper-q1,s-sper-q2,s-sper-q3,s-sper-q4,s-civp-q1,s-civp-q2,s-civp-q3,s-civp-q4,s-grmi-q1,s-grmi-q2,s-grmi-q3,s-grmi-q4,s-appa-q1,s-appa-q2,s-appa-q3,s-peff-q1,s-peff-q2,s-peff-q3,s-peff-q4,s-peff-q5,s-peff-q6,s-sbel-q1,s-sbel-q2,s-sbel-q3,s-sbel-q4,s-sbel-q5,s-phys-q1,s-phys-q2,s-phys-q3,s-phys-q4,s-vale-q1,s-vale-q2,s-vale-q3,s-vale-q4,s-acst-q1,s-acst-q2,s-acst-q3,s-sust-q1,s-sust-q2,s-grit-q1,s-grit-q2,s-grit-q3,s-grit-q4,s-expa-q1,s-poaf-q1,s-poaf-q2,s-poaf-q3,s-poaf-q4,s-tint-q1-1,s-tint-q2-1,s-tint-q3-1,s-tint-q4-1,s-tint-q5-1,s-acpr-q1-1,s-acpr-q2-1,s-acpr-q3-1,s-acpr-q4-1,s-peff-q1-1,s-peff-q2-1,s-peff-q3-1,s-peff-q4-1,s-peff-q5-1,s-peff-q6-1 -2023-03-17 7:57:47,2023-03-17 8:09:15,0,71.174.81.214,100,1000,1,2023-03-17T8:9:15,1000,2,1740505,1,2,4,9,3,5,5,,,,,,,,,,,,,,,,,,,,,,,,,4,4,3,5,,,,,,,,,,4,4,2,3,2,5,5,5,5,4,2,2,4,3,2,3,3,5,4,4,3,5,2,3,3,4,4,4,1,2,5,5,,,,,4,4,4,3,4,5 -2023-03-17 8:02:15,2023-03-17 8:08:02,0,71.174.81.214,25,1000,1,2023-03-17T8:8:3,1001,2,1740505,2,1,5,10,,,,,,,,,,,,,,,,,,,,5,4,4,4,,,,,,,,,2,3,2,,,,,,,4,3,2,4,3,5,5,4,4,4,4,3,5,3,4,3,2,4,3,4,3,3,1,2,2,2,3,,,,,,5,4,4,5,4,4,5,3,3,4 -2023-03-17 8:00:05,2023-03-17 8:07:39,0,71.174.81.214,24,1000,1,2023-03-17T8:7:39,1002,2,1740505,3,,,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5,4,4,5,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, -2023-03-17 8:03:35,2023-03-17 8:15:38,0,71.174.81.214,0,1000,1,2023-03-17T8:15:38,1003,2,1740505,4,1,"1,4",10,4,5,5,,,,,,,,,,3,4,5,5,4,4,3,5,4,3,4,4,3,3,3,2,1,3,2,3,1,1,,,,,,,,,,,,,,,,,,,,,,,5,4,3,5,3,4,3,3,3,5,4,5,4,4,5,5,5,5,5,5,,,,,, -2023-03-17 7:57:09,2023-03-17 8:12:26,0,71.174.81.214,,1000,1,2023-03-17T8:12:27,1004,2,1740505,5,1,"5,4",9,4,4,5,,,,,,,,,,,,,,,,,,,,,,,,,4,4,2,4,,,,,,,,,,3,3,3,3,3,5,5,4,5,5,3,3,4,2,2,1,3,2,4,5,4,5,4,3,1,3,4,3,1,3,3,1,,,,,4,4,3,4,2,4 -2023-03-17 8:01:50,2023-03-17 8:17:51,0,71.174.81.214,100,240,1,2023-03-17T8:17:52,1005,2,1740505,6,1,"5,4",9,4,3,4,,,,,,,,,,4,4,3,3,3,4,4,4,5,2,5,4,4,4,4,4,4,5,3,4,3,5,,,,,,,4,4,4,5,2,,,,,,,,,,,,,,,,,,,,,,,4,1,4,4,4,5,4,4,5,4,5,5,5,5,4 -2023-03-17 8:01:45,2023-03-17 8:07:59,0,71.174.81.214,100,239,1,2023-03-17T8:8:0,1006,2,1740505,7,1,5,10,,,,,,,,,,,,,5,3,3,5,2,3,3,,,,,4,4,4,4,,,,,,,,,,,,,,4,5,3,4,3,5,5,4,5,5,4,4,4,2,1,1,4,5,4,4,3,4,2,2,3,2,3,,,,,,,,,,4,4,5,4,4,5 -2023-03-17 9:07:09,2023-03-17 9:20:10,0,71.174.81.214,100,0,1,2023-03-17T9:20:11,1007,2,1740305,8,2,5,7,,,,,,,,,,,,,4,5,5,4,,4,3,,,,,5,4,4,5,,,,,,,,,,,,,,5,5,4,5,5,5,5,5,5,4,5,5,5,3,2,3,3,3,5,4,4,4,3,3,4,,4,,,,,,,,,,4,5,5,5,5,5 -2023-03-17 8:02:11,2023-03-17 8:29:53,0,71.174.81.214,100,,1,2023-03-17T8:29:53,1008,2,1740505,9,1,"5,4",10,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,2,3,3,,,,,,,3,4,3,3,3,5,5,5,5,4,4,2,3,2,3,2,2,5,2,4,3,3,1,3,3,3,4,,,,,,4,4,4,4,4,4,4,4,2,4 -2023-03-17 8:00:42,2023-03-17 8:12:00,0,71.174.81.214,100,1000,1,2023-03-17T8:12:0,1009,2,1740505,10,2,5,1,,,,,,,,,,,,,1,4,3,2,1,3,2,,,,,3,3,4,4,,,,,,,,,,,,,,5,5,2,4,5,4,4,5,4,2,2,1,2,2,4,3,5,5,4,4,1,4,1,2,1,3,3,,,,,,,,,,3,4,3,1,1,1 -2023-03-17 8:03:09,2023-03-17 8:13:27,0,71.174.81.214,100,1000,1,2023-03-17T8:13:28,1010,2,1740505,11,1,5,2,,,,,,,,,,,,,5,3,2,4,2,2,3,,,,,4,5,5,5,,,,,,,,,,,,,,4,4,3,4,4,4,4,4,5,4,3,5,4,2,1,2,4,4,5,4,3,5,4,1,3,3,3,,,,,,,,,,4,3,4,4,4,4 -2023-03-17 8:23:20,2023-03-17 8:34:00,0,71.174.81.214,100,1000,1,2023-03-17T8:34:0,1011,2,1740505,12,2,3,3,1,2,2,2,3,2,4,2,5,5,3,5,3,3,3,2,3,4,4,2,4,3,5,4,4,4,3,4,3,3,4,3,1,3,,,,,,,,,,,,,,,,,,,,,,,1,2,4,4,3,4,3,2,2,4,4,,,,,,,,,,,,,,, -2023-03-17 8:36:36,2023-03-17 8:47:33,0,71.174.81.214,100,1000,1,2023-03-17T8:47:34,1012,2,1740505,13,1,3,4,4,5,4,,,,,,,,,,4,2,3,2,2,3,4,4,5,3,4,2,4,2,3,3,4,3,3,2,1,1,,,,,,,,,,,,5,5,2,4,2,3,3,4,5,4,5,,,,,,,,,,,,4,2,3,3,2,4,4,3,3,,,,,, -2023-03-17 8:01:10,2023-03-17 8:09:17,0,71.174.81.214,100,1000,1,2023-03-17T8:9:18,1013,2,1740505,14,1,4,5,4,3,5,,,,,,,,,,3,4,3,4,3,4,4,2,4,4,2,3,1,2,2,4,2,3,5,2,2,1,,,,,,,4,4,3,3,4,,,,,,,,,,,,,,,,,,,,,,,2,1,3,2,5,4,5,2,2,2,3,4,3,3,4 -2023-03-17 10:06:07,2023-03-17 10:12:54,0,71.174.81.214,100,1000,1,2023-03-17T10:12:56,1014,2,1740505,15,1,5,6,,,,,,,,,,,,,3,3,4,2,1,2,4,,,,,2,2,2,2,,,,,,,,,,,,,,1,2,1,2,3,4,5,3,5,3,1,2,3,2,2,1,2,4,3,2,3,2,4,2,3,2,2,,,,,,,,,,4,4,4,4,4,5 -2023-03-17 7:57:13,2023-03-17 8:05:02,0,71.174.81.214,100,1000,1,2023-03-17T8:5:2,1015,2,1740505,16,4,5,7,,,,,,,,,,,,,,,,,,,,3,3,3,3,,,,,,,,,3,5,2,,,,,,,2,1,3,2,4,4,4,3,3,2,2,3,4,3,5,5,5,4,3,4,2,4,5,3,1,3,2,,,,,,4,4,3,4,4,5,4,5,5,5 -2023-03-17 7:57:50,2023-03-17 8:02:53,0,71.174.81.214,100,1000,1,2023-03-17T8:2:54,1016,2,1740505,17,2,5,8,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,5,2,1,1,,,,,,,,,,5,4,3,3,3,2,4,3,3,5,1,1,1,1,3,5,1,1,4,5,4,1,3,1,3,2,1,1,1,1,1,1,,,,,1,1,1,1,1,2 -2023-03-17 8:40:22,2023-03-17 8:53:19,0,71.174.81.214,100,1000,0,2023-03-18T8:53:20,1017,2,1740505,18,2,5,9,,,,,,,,,,,,,,,,,3,3,4,,,,,1,1,1,2,4,3,2,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2,2,1,3,2,3,2,4,,,,,, -2023-03-17 8:37:13,2023-03-17 8:43:34,0,71.174.81.214,100,1000,1,2023-03-17T8:43:35,1018,2,1740505,19,2,2,10,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,2,2,1,,,,,,,4,4,4,4,4,4,5,4,5,3,4,3,3,4,5,4,,,3,2,3,4,1,4,2,3,4,,,,,,4,5,5,4,4,4,5,4,3,4 -2023-03-17 8:36:27,2023-03-17 8:44:07,0,71.174.81.214,100,1000,1,2023-03-17T8:44:8,1019,2,1740505,20,1,2,11,3,4,3,,,,,,,,,,2,3,3,2,4,5,4,3,4,3,5,3,4,4,5,4,5,3,4,5,4,4,,,,,,,3,4,3,4,3,,,,,,,,,,,,,,,,,,,,,,,4,2,,2,4,4,5,2,4,5,5,4,3,3,4 -2023-03-17 8:33:55,2023-03-17 8:43:13,0,71.174.81.214,100,1000,1,2023-03-17T8:43:14,1020,2,1740505,21,1,5,12,3,1,3,,,,,,,,,,,,,,2,3,3,,4,,4,,4,,4,4,3,4,3,1,5,2,,,,,,,3,4,2,3,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,5,4,5,5,5,4,,4,4,4 -2023-03-17 8:50:49,2023-03-17 9:13:18,0,71.174.81.214,100,1000,1,2023-03-17T9:13:19,1021,2,1740305,22,2,"5,2",1,,,,,,,,,,,,,2,3,3,3,2,3,3,,,,,3,4,4,4,,,,,,,,,,,,,,2,2,1,3,2,5,5,4,5,4,2,2,2,1,1,1,1,3,3,4,3,4,4,1,1,1,3,,,,,,,,,,4,4,4,4,3,4 -2023-03-17 7:57:37,2023-03-17 8:04:25,0,71.174.81.214,100,1000,1,2023-03-17T8:4:25,1022,2,1740305,23,1,5,2,,,,,,,,,,,,,,,,,,,,4,4,4,5,,,,,,,,,4,4,5,,,,,,,3,3,4,4,4,4,5,4,5,4,3,3,3,2,2,2,3,3,3,2,3,4,4,3,3,2,4,,,,,,3,2,4,3,3,3,3,4,2,2 -2023-03-17 8:01:47,2023-03-17 8:08:39,0,71.174.81.214,100,1000,1,2023-03-17T8:8:39,1023,2,1740305,24,1,"2,4",3,4,3,5,,,,,,,,,,2,2,2,2,1,2,2,4,3,2,3,2,3,3,4,3,4,2,3,4,3,2,,,,,,,,,,,,5,4,3,5,3,1,2,2,2,2,4,,,,,,,,,,,,1,2,4,2,2,5,4,1,4,,,,,, -2023-03-17 8:37:21,2023-03-17 8:58:16,0,71.174.81.214,100,1000,1,2023-03-17T8:58:16,1024,2,1740305,25,1,5,4,3,3,3,,,,,,,,,,,,,,,,,,,,,,,,,4,4,2,3,,,,,,,,,,3,4,4,3,2,5,5,4,5,3,3,2,3,4,3,3,2,4,4,3,3,3,2,2,3,3,4,3,3,3,3,3,,,,,4,3,2,3,3,3 -2023-03-17 8:02:25,2023-03-17 8:11:16,0,71.174.81.214,100,1000,0,2023-03-18T8:11:21,1025,2,1740305,26,2,5,5,,,,,,,,,,,,,,,,,3,3,3,4,4,4,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,,1,3,,,,,,4,3,3,3,3,,,,,,,,,, -2023-03-17 9:33:54,2023-03-17 9:57:21,0,71.174.81.214,100,1000,1,2023-03-17T9:57:22,1026,2,1740305,27,2,5,6,,,,,,,,,5,5,4,5,,,,,,,,1,2,2,2,,,,,,,,,1,1,2,5,4,4,4,3,5,5,4,,4,2,5,4,4,3,5,3,2,2,1,1,5,3,2,2,3,1,1,1,2,1,3,3,,,,,,,,,,,,,,, -2023-03-17 9:48:38,2023-03-17 9:58:45,0,71.174.81.214,100,1000,1,2023-03-17T9:58:45,1027,2,1740305,28,1,5,7,,,,,,,,,2,4,4,2,,,,,,,,3,3,3,5,,,,,,,,,2,3,3,4,3,2,2,3,2,1,3,2,3,2,3,3,3,2,4,3,4,2,2,1,1,5,5,2,3,2,3,5,4,3,2,2,,,,,,,,,,,,,,, -2023-03-17 8:36:40,2023-03-17 8:43:21,0,71.174.81.214,100,1000,1,2023-03-17T8:43:22,1028,2,1740305,29,2,5,8,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,4,3,4,,,,,,,3,3,2,4,3,5,4,4,5,4,4,2,3,4,2,5,4,4,3,3,3,3,2,2,3,1,4,,,,,,3,4,4,4,2,4,5,4,3,4 -2023-03-17 9:40:56,2023-03-17 9:52:52,0,71.174.81.214,100,1000,1,2023-03-17T9:52:52,1029,2,1740305,30,2,5,9,3,4,3,5,3,5,5,5,5,4,4,5,4,4,4,4,2,3,2,4,4,3,4,3,4,3,5,5,5,4,3,5,4,4,,,,,,,,,,,,1,5,4,4,5,4,3,4,3,1,4,,,,,,,,,,,,,,,,,,,,,,,,,, -2023-03-17 9:33:58,2023-03-17 9:48:33,0,71.174.81.214,100,1000,1,2023-03-17T9:48:33,1030,2,1740305,31,2,5,10,,,,,,,,,4,3,5,2,,,,,,,,5,4,4,5,,,,,,,,,5,2,4,4,4,2,2,3,4,2,2,1,1,2,1,2,2,5,5,5,4,4,3,1,1,1,1,3,2,3,3,3,4,4,4,2,,,,,,,,,,,,,,, -2023-03-17 8:03:04,2023-03-17 8:23:33,0,71.174.81.214,100,1000,1,2023-03-17T8:23:33,1031,2,1740305,32,1,5,11,1,1,1,3,4,2,4,3,,,,,,,,,,,,,,,,,,,,5,5,5,5,,,,4,4,3,2,3,3,2,3,3,1,3,3,5,3,4,5,3,3,5,1,2,2,1,1,4,5,3,5,4,2,4,2,3,,,,,,,,,,,,,,, -2023-03-17 8:33:14,2023-03-17 8:41:01,0,71.174.81.214,100,1000,1,2023-03-17T8:41:2,1032,2,1740305,33,1,5,12,,,,,,,,,,,,,2,1,1,1,2,2,3,,,,,2,1,3,2,,,,,,,,,,,,,,1,1,1,1,1,4,3,4,5,4,1,1,2,3,2,3,4,2,2,3,3,2,2,2,1,2,3,,,,,,,,,,3,2,2,1,2,2 -2023-03-17 7:57:06,2023-03-17 8:08:35,0,71.174.81.214,100,1000,1,2023-03-17T8:8:35,1033,2,1740505,34,2,5,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2,2,2,2,2,2,2,2,2 -2023-03-17 7:58:38,2023-03-17 8:12:04,0,71.174.81.214,100,1000,1,2023-03-17T8:12:5,1034,2,1740505,35,2,"5,4",12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,District,School,LASID,Gender,Race,What grade are you in?,s-emsa-q1,s-emsa-q2,s-emsa-q3,s-tint-q1,s-tint-q2,s-tint-q3,s-tint-q4,s-tint-q5,s-acpr-q1,s-acpr-q2,s-acpr-q3,s-acpr-q4,s-cure-q1,s-cure-q2,s-cure-q3,s-cure-q4,s-sten-q1,s-sten-q2,s-sten-q3,s-sper-q1,s-sper-q2,s-sper-q3,s-sper-q4,s-civp-q1,s-civp-q2,s-civp-q3,s-civp-q4,s-grmi-q1,s-grmi-q2,s-grmi-q3,s-grmi-q4,s-appa-q1,s-appa-q2,s-appa-q3,s-peff-q1,s-peff-q2,s-peff-q3,s-peff-q4,s-peff-q5,s-sbel-q1,s-sbel-q2,s-sbel-q3,s-sbel-q4,s-sbel-q5,s-phys-q1,s-phys-q2,s-phys-q3,s-phys-q4,s-vale-q1,s-vale-q2,s-vale-q3,s-vale-q4,s-acst-q1,s-acst-q2,s-acst-q3,s-sust-q1,s-sust-q2,s-grit-q1,s-grit-q2,s-grit-q3,s-grit-q4,s-expa-q1,s-poaf-q1,s-poaf-q2,s-poaf-q3,s-poaf-q4,s-tint-q1-1,s-tint-q2-1,s-tint-q3-1,s-tint-q4-1,s-tint-q5-1,s-acpr-q1-1,s-acpr-q2-1,s-acpr-q3-1,s-acpr-q4-1,s-peff-q1-1,s-peff-q2-1,s-peff-q3-1,s-peff-q4-1,s-peff-q5-1,s-peff-q6-1 +2023-03-17 7:57:47,2023-03-17 8:09:15,0,71.174.81.214,100,1000,1,2023-03-17T8:9:15,1000,2,1740505,1,2,4,9,3,5,5,,,,,,,,,,,,,,,,,,,,,,,,,4,4,3,5,,,,,,,,,4,4,2,3,2,5,5,5,5,4,2,2,4,3,2,3,3,5,4,4,3,5,2,3,3,4,4,4,1,2,5,5,,,,,4,4,4,3,4,5 +2023-03-17 8:02:15,2023-03-17 8:08:02,0,71.174.81.214,25,1000,1,2023-03-17T8:8:3,1001,2,1740505,2,1,5,10,,,,,,,,,,,,,,,,,,,,5,4,4,4,,,,,,,,,2,3,2,,,,,,4,3,2,4,3,5,5,4,4,4,4,3,5,3,4,3,2,4,3,4,3,3,1,2,2,2,3,,,,,,5,4,4,5,4,4,5,3,3,4 +2023-03-17 8:00:05,2023-03-17 8:07:39,0,71.174.81.214,24,1000,1,2023-03-17T8:7:39,1002,2,1740505,3,,,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5,4,4,5,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +2023-03-17 8:03:35,2023-03-17 8:15:38,0,71.174.81.214,0,1000,1,2023-03-17T8:15:38,1003,2,1740505,4,1,"1,4",10,4,5,5,,,,,,,,,,3,4,5,5,4,4,3,5,4,3,4,4,3,3,3,2,1,3,2,3,1,1,,,,,,,,,,,,,,,,,,,,,,5,4,3,5,3,4,3,3,3,5,4,5,4,4,5,5,5,5,5,5,,,,,, +2023-03-17 7:57:09,2023-03-17 8:12:26,0,71.174.81.214,,1000,1,2023-03-17T8:12:27,1004,2,1740505,5,1,"5,4",9,4,4,5,,,,,,,,,,,,,,,,,,,,,,,,,4,4,2,4,,,,,,,,,3,3,3,3,3,5,5,4,5,5,3,3,4,2,2,1,3,2,4,5,4,5,4,3,1,3,4,3,1,3,3,1,,,,,4,4,3,4,2,4 +2023-03-17 8:01:50,2023-03-17 8:17:51,0,71.174.81.214,100,240,1,2023-03-17T8:17:52,1005,2,1740505,6,1,"5,4",9,4,3,4,,,,,,,,,,4,4,3,3,3,4,4,4,5,2,5,4,4,4,4,4,4,5,3,4,3,5,,,,,,4,4,4,5,2,,,,,,,,,,,,,,,,,,,,,,,4,1,4,4,4,5,4,4,5,4,5,5,5,5,4 +2023-03-17 8:01:45,2023-03-17 8:07:59,0,71.174.81.214,100,239,1,2023-03-17T8:8:0,1006,2,1740505,7,1,5,10,,,,,,,,,,,,,5,3,3,5,2,3,3,,,,,4,4,4,4,,,,,,,,,,,,,4,5,3,4,3,5,5,4,5,5,4,4,4,2,1,1,4,5,4,4,3,4,2,2,3,2,3,,,,,,,,,,4,4,5,4,4,5 +2023-03-17 9:07:09,2023-03-17 9:20:10,0,71.174.81.214,100,0,1,2023-03-17T9:20:11,1007,2,1740305,8,2,5,7,,,,,,,,,,,,,4,5,5,4,,4,3,,,,,5,4,4,5,,,,,,,,,,,,,5,5,4,5,5,5,5,5,5,4,5,5,5,3,2,3,3,3,5,4,4,4,3,3,4,,4,,,,,,,,,,4,5,5,5,5,5 +2023-03-17 8:02:11,2023-03-17 8:29:53,0,71.174.81.214,100,,1,2023-03-17T8:29:53,1008,2,1740505,9,1,"5,4",10,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,2,3,3,,,,,,3,4,3,3,3,5,5,5,5,4,4,2,3,2,3,2,2,5,2,4,3,3,1,3,3,3,4,,,,,,4,4,4,4,4,4,4,4,2,4 +2023-03-17 8:00:42,2023-03-17 8:12:00,0,71.174.81.214,100,1000,1,2023-03-17T8:12:0,1009,2,1740505,10,2,5,1,,,,,,,,,,,,,1,4,3,2,1,3,2,,,,,3,3,4,4,,,,,,,,,,,,,5,5,2,4,5,4,4,5,4,2,2,1,2,2,4,3,5,5,4,4,1,4,1,2,1,3,3,,,,,,,,,,3,4,3,1,1,1 +2023-03-17 8:03:09,2023-03-17 8:13:27,0,71.174.81.214,100,1000,1,2023-03-17T8:13:28,1010,2,1740505,11,1,5,2,,,,,,,,,,,,,5,3,2,4,2,2,3,,,,,4,5,5,5,,,,,,,,,,,,,4,4,3,4,4,4,4,4,5,4,3,5,4,2,1,2,4,4,5,4,3,5,4,1,3,3,3,,,,,,,,,,4,3,4,4,4,4 +2023-03-17 8:23:20,2023-03-17 8:34:00,0,71.174.81.214,100,1000,1,2023-03-17T8:34:0,1011,2,1740505,12,2,3,3,1,2,2,2,3,2,4,2,5,5,3,5,3,3,3,2,3,4,4,2,4,3,5,4,4,4,3,4,3,3,4,3,1,3,,,,,,,,,,,,,,,,,,,,,,1,2,4,4,3,4,3,2,2,4,4,,,,,,,,,,,,,,, +2023-03-17 8:36:36,2023-03-17 8:47:33,0,71.174.81.214,100,1000,1,2023-03-17T8:47:34,1012,2,1740505,13,1,3,4,4,5,4,,,,,,,,,,4,2,3,2,2,3,4,4,5,3,4,2,4,2,3,3,4,3,3,2,1,1,,,,,,,,,,,5,5,2,4,2,3,3,4,5,4,5,,,,,,,,,,,,4,2,3,3,2,4,4,3,3,,,,,, +2023-03-17 8:01:10,2023-03-17 8:09:17,0,71.174.81.214,100,1000,1,2023-03-17T8:9:18,1013,2,1740505,14,1,4,5,4,3,5,,,,,,,,,,3,4,3,4,3,4,4,2,4,4,2,3,1,2,2,4,2,3,5,2,2,1,,,,,,4,4,3,3,4,,,,,,,,,,,,,,,,,,,,,,,2,1,3,2,5,4,5,2,2,2,3,4,3,3,4 +2023-03-17 10:06:07,2023-03-17 10:12:54,0,71.174.81.214,100,1000,1,2023-03-17T10:12:56,1014,2,1740505,15,1,5,6,,,,,,,,,,,,,3,3,4,2,1,2,4,,,,,2,2,2,2,,,,,,,,,,,,,1,2,1,2,3,4,5,3,5,3,1,2,3,2,2,1,2,4,3,2,3,2,4,2,3,2,2,,,,,,,,,,4,4,4,4,4,5 +2023-03-17 7:57:13,2023-03-17 8:05:02,0,71.174.81.214,100,1000,1,2023-03-17T8:5:2,1015,2,1740505,16,4,5,7,,,,,,,,,,,,,,,,,,,,3,3,3,3,,,,,,,,,3,5,2,,,,,,2,1,3,2,4,4,4,3,3,2,2,3,4,3,5,5,5,4,3,4,2,4,5,3,1,3,2,,,,,,4,4,3,4,4,5,4,5,5,5 +2023-03-17 7:57:50,2023-03-17 8:02:53,0,71.174.81.214,100,1000,1,2023-03-17T8:2:54,1016,2,1740505,17,2,5,8,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,5,2,1,1,,,,,,,,,5,4,3,3,3,2,4,3,3,5,1,1,1,1,3,5,1,1,4,5,4,1,3,1,3,2,1,1,1,1,1,1,,,,,1,1,1,1,1,2 +2023-03-17 8:40:22,2023-03-17 8:53:19,0,71.174.81.214,100,1000,0,2023-03-18T8:53:20,1017,2,1740505,18,2,5,9,,,,,,,,,,,,,,,,,3,3,4,,,,,1,1,1,2,4,3,2,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2,2,1,3,2,3,2,4,,,,,, +2023-03-17 8:37:13,2023-03-17 8:43:34,0,71.174.81.214,100,1000,1,2023-03-17T8:43:35,1018,2,1740505,19,2,2,10,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,2,2,1,,,,,,4,4,4,4,4,4,5,4,5,3,4,3,3,4,5,4,,,3,2,3,4,1,4,2,3,4,,,,,,4,5,5,4,4,4,5,4,3,4 +2023-03-17 8:36:27,2023-03-17 8:44:07,0,71.174.81.214,100,1000,1,2023-03-17T8:44:8,1019,2,1740505,20,1,2,11,3,4,3,,,,,,,,,,2,3,3,2,4,5,4,3,4,3,5,3,4,4,5,4,5,3,4,5,4,4,,,,,,3,4,3,4,3,,,,,,,,,,,,,,,,,,,,,,,4,2,,2,4,4,5,2,4,5,5,4,3,3,4 +2023-03-17 8:33:55,2023-03-17 8:43:13,0,71.174.81.214,100,1000,1,2023-03-17T8:43:14,1020,2,1740505,21,1,5,12,3,1,3,,,,,,,,,,,,,,2,3,3,,4,,4,,4,,4,4,3,4,3,1,5,2,,,,,,3,4,2,3,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,5,4,5,5,5,4,,4,4,4 +2023-03-17 8:50:49,2023-03-17 9:13:18,0,71.174.81.214,100,1000,1,2023-03-17T9:13:19,1021,2,1740305,22,2,"5,2",1,,,,,,,,,,,,,2,3,3,3,2,3,3,,,,,3,4,4,4,,,,,,,,,,,,,2,2,1,3,2,5,5,4,5,4,2,2,2,1,1,1,1,3,3,4,3,4,4,1,1,1,3,,,,,,,,,,4,4,4,4,3,4 +2023-03-17 7:57:37,2023-03-17 8:04:25,0,71.174.81.214,100,1000,1,2023-03-17T8:4:25,1022,2,1740305,23,1,5,2,,,,,,,,,,,,,,,,,,,,4,4,4,5,,,,,,,,,4,4,5,,,,,,3,3,4,4,4,4,5,4,5,4,3,3,3,2,2,2,3,3,3,2,3,4,4,3,3,2,4,,,,,,3,2,4,3,3,3,3,4,2,2 +2023-03-17 8:01:47,2023-03-17 8:08:39,0,71.174.81.214,100,1000,1,2023-03-17T8:8:39,1023,2,1740305,24,1,"2,4",3,4,3,5,,,,,,,,,,2,2,2,2,1,2,2,4,3,2,3,2,3,3,4,3,4,2,3,4,3,2,,,,,,,,,,,5,4,3,5,3,1,2,2,2,2,4,,,,,,,,,,,,1,2,4,2,2,5,4,1,4,,,,,, +2023-03-17 8:37:21,2023-03-17 8:58:16,0,71.174.81.214,100,1000,1,2023-03-17T8:58:16,1024,2,1740305,25,1,5,4,3,3,3,,,,,,,,,,,,,,,,,,,,,,,,,4,4,2,3,,,,,,,,,3,4,4,3,2,5,5,4,5,3,3,2,3,4,3,3,2,4,4,3,3,3,2,2,3,3,4,3,3,3,3,3,,,,,4,3,2,3,3,3 +2023-03-17 8:02:25,2023-03-17 8:11:16,0,71.174.81.214,100,1000,0,2023-03-18T8:11:21,1025,2,1740305,26,2,5,5,,,,,,,,,,,,,,,,,3,3,3,4,4,4,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,,1,3,,,,,,4,3,3,3,3,,,,,,,,,, +2023-03-17 9:33:54,2023-03-17 9:57:21,0,71.174.81.214,100,1000,1,2023-03-17T9:57:22,1026,2,1740305,27,2,5,6,,,,,,,,,5,5,4,5,,,,,,,,1,2,2,2,,,,,,,,,1,1,2,5,4,4,4,3,5,4,,4,2,5,4,4,3,5,3,2,2,1,1,5,3,2,2,3,1,1,1,2,1,3,3,,,,,,,,,,,,,,, +2023-03-17 9:48:38,2023-03-17 9:58:45,0,71.174.81.214,100,1000,1,2023-03-17T9:58:45,1027,2,1740305,28,1,5,7,,,,,,,,,2,4,4,2,,,,,,,,3,3,3,5,,,,,,,,,2,3,3,4,3,2,2,3,1,3,2,3,2,3,3,3,2,4,3,4,2,2,1,1,5,5,2,3,2,3,5,4,3,2,2,,,,,,,,,,,,,,, +2023-03-17 8:36:40,2023-03-17 8:43:21,0,71.174.81.214,100,1000,1,2023-03-17T8:43:22,1028,2,1740305,29,2,5,8,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,4,3,4,,,,,,3,3,2,4,3,5,4,4,5,4,4,2,3,4,2,5,4,4,3,3,3,3,2,2,3,1,4,,,,,,3,4,4,4,2,4,5,4,3,4 +2023-03-17 9:40:56,2023-03-17 9:52:52,0,71.174.81.214,100,1000,1,2023-03-17T9:52:52,1029,2,1740305,30,2,5,9,3,4,3,5,3,5,5,5,5,4,4,5,4,4,4,4,2,3,2,4,4,3,4,3,4,3,5,5,5,4,3,5,4,4,,,,,,,,,,,1,5,4,4,5,4,3,4,3,1,4,,,,,,,,,,,,,,,,,,,,,,,,,, +2023-03-17 9:33:58,2023-03-17 9:48:33,0,71.174.81.214,100,1000,1,2023-03-17T9:48:33,1030,2,1740305,31,2,5,10,,,,,,,,,4,3,5,2,,,,,,,,5,4,4,5,,,,,,,,,5,2,4,4,4,2,2,3,2,2,1,1,2,1,2,2,5,5,5,4,4,3,1,1,1,1,3,2,3,3,3,4,4,4,2,,,,,,,,,,,,,,, +2023-03-17 8:03:04,2023-03-17 8:23:33,0,71.174.81.214,100,1000,1,2023-03-17T8:23:33,1031,2,1740305,32,1,5,11,1,1,1,3,4,2,4,3,,,,,,,,,,,,,,,,,,,,5,5,5,5,,,,4,4,3,2,3,2,3,3,1,3,3,5,3,4,5,3,3,5,1,2,2,1,1,4,5,3,5,4,2,4,2,3,,,,,,,,,,,,,,, +2023-03-17 8:33:14,2023-03-17 8:41:01,0,71.174.81.214,100,1000,1,2023-03-17T8:41:2,1032,2,1740305,33,1,5,12,,,,,,,,,,,,,2,1,1,1,2,2,3,,,,,2,1,3,2,,,,,,,,,,,,,1,1,1,1,1,4,3,4,5,4,1,1,2,3,2,3,4,2,2,3,3,2,2,2,1,2,3,,,,,,,,,,3,2,2,1,2,2 +2023-03-17 7:57:06,2023-03-17 8:08:35,0,71.174.81.214,100,1000,1,2023-03-17T8:8:35,1033,2,1740505,34,2,5,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2,2,2,2,2,2,2,2,2 +2023-03-17 7:58:38,2023-03-17 8:12:04,0,71.174.81.214,100,1000,1,2023-03-17T8:12:5,1034,2,1740505,35,2,"5,4",12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, diff --git a/spec/services/cleaner_spec.rb b/spec/services/cleaner_spec.rb index 4372faa2..dec5e07e 100644 --- a/spec/services/cleaner_spec.rb +++ b/spec/services/cleaner_spec.rb @@ -147,7 +147,8 @@ RSpec.describe Cleaner do file: path_to_sample_raw_file, disaggregation_data: cleaner.disaggregation_data ) processed_data in [headers, clean_csv, log_csv, data] - expect(clean_csv.second.last).to eq "Economically Disadvantaged - Y" + index_of_income = clean_csv.first.index("Income") + expect(clean_csv.second[index_of_income]).to eq "Economically Disadvantaged - Y" one_thousand = data.find { |row| row.response_id == "1000" } expect(one_thousand.income).to eq "Economically Disadvantaged - Y" @@ -229,20 +230,20 @@ end def reads_headers_from_raw_csv(processed_data) processed_data in [headers, clean_csv, log_csv, data] - expect(headers).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)", - "Finished", "RecordedDate", "ResponseId", "District", "School", - "LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1", - "s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2", - "s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3", "s-cure-q4", "s-sten-q1", "s-sten-q2", - "s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3", - "s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3", - "s-peff-q1", "s-peff-q2", "s-peff-q3", "s-peff-q4", "s-peff-q5", "s-peff-q6", "s-sbel-q1", "s-sbel-q2", - "s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4", "s-vale-q1", - "s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2", - "s-grit-q1", "s-grit-q2", "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3", - "s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1", "s-tint-q5-1", "s-acpr-q1-1", - "s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1", - "s-peff-q5-1", "s-peff-q6-1", "Raw Income", "Income"] + expect(headers.to_set.sort).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)", + "Finished", "RecordedDate", "ResponseId", "District", "School", + "LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1", + "s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2", + "s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3", "s-cure-q4", "s-sten-q1", "s-sten-q2", + "s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3", + "s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3", + "s-peff-q1", "s-peff-q2", "s-peff-q3", "s-peff-q4", "s-peff-q5", "s-peff-q6", "s-sbel-q1", "s-sbel-q2", + "s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4", "s-vale-q1", + "s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2", + "s-grit-q1", "s-grit-q2", "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3", + "s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1", "s-tint-q5-1", "s-acpr-q1-1", + "s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1", + "s-peff-q5-1", "s-peff-q6-1", "Raw Income", "Income"].to_set.sort end def invalid_rows_are_rejected_for_the_correct_reasons(data)