fix: ensure cleaner outputs columns for all survey items. Before the fix, if a survey item varient (ending in -1, ie s-tint-q1-1) did not have a matching survey item s-tint-q1, the resulting csv would not include that column

rpp-main
rebuilt 2 years ago
parent 2c9df34fac
commit 7bd7923d41

@ -1 +1 @@
3.2.1 3.2.2

@ -1,5 +1,5 @@
source "https://rubygems.org" source "https://rubygems.org"
ruby "3.2.1" ruby "3.2.2"
git_source(:github) do |repo_name| git_source(:github) do |repo_name|
repo_name = "#{repo_name}/#{repo_name}" unless repo_name.include?("/") repo_name = "#{repo_name}/#{repo_name}" unless repo_name.include?("/")

@ -506,7 +506,7 @@ DEPENDENCIES
watir watir
RUBY VERSION RUBY VERSION
ruby 3.2.1p31 ruby 3.2.2p53
BUNDLED WITH BUNDLED WITH
2.3.3 2.3.3

@ -1,4 +1,4 @@
require 'fileutils' require "fileutils"
class Cleaner class Cleaner
attr_reader :input_filepath, :output_filepath, :log_filepath, :clean_csv, :log_csv attr_reader :input_filepath, :output_filepath, :log_filepath, :clean_csv, :log_csv
@ -15,17 +15,44 @@ class Cleaner
end end
def clean def clean
Dir.glob(Rails.root.join(input_filepath, '*.csv')).each do |filepath| Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath|
puts filepath puts filepath
File.open(filepath) do |file| File.open(filepath) do |_file|
clean_csv = [] clean_csv = []
log_csv = [] log_csv = []
data = [] data = []
headers = CSV.parse(file.first).first filename = filename(headers:, data:)
filtered_headers = remove_unwanted_headers(headers:) write_csv(data: clean_csv, output_filepath:, filename:)
log_headers = (filtered_headers + ['Valid Duration?', 'Valid Progress?', 'Valid Grade?', write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:)
'Valid Standard Deviation?']).flatten end
end
end
def filename(headers:, data:)
survey_item_ids = headers.filter(&:present?).filter do |header|
header.start_with?("s-", "t-")
end.reject { |item| item.end_with? "-1" }
survey_type = SurveyItem.survey_type(survey_item_ids:)
range = data.first.academic_year.range
districts = data.map do |row|
row.district.name
end.to_set.to_a
districts.join(".").to_s + "." + survey_type.to_s + "." + range + ".csv"
end
def process_raw_file(file:, disaggregation_data:)
clean_csv = []
log_csv = []
data = []
headers = (CSV.parse(file.first).first << "Raw Income") << "Income"
filtered_headers = include_all_headers(headers:)
filtered_headers = remove_unwanted_headers(headers: filtered_headers)
log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?",
"Valid Standard Deviation?"]).flatten
clean_csv << filtered_headers clean_csv << filtered_headers
log_csv << log_headers log_csv << log_headers
@ -35,44 +62,40 @@ class Cleaner
file.lazy.each_slice(1000) do |lines| file.lazy.each_slice(1000) do |lines|
CSV.parse(lines.join, headers:).map do |row| CSV.parse(lines.join, headers:).map do |row|
values = SurveyItemValues.new(row:, headers:, genders:, values = SurveyItemValues.new(row:, headers:, genders:,
survey_items: all_survey_items, schools:) survey_items: all_survey_items, schools:, disaggregation_data:)
next unless values.valid_school? next unless values.valid_school?
data << values data << values
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s) values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
end end
end end
[headers, clean_csv, log_csv, data]
unless data.empty?
filename = filename(headers:, data:)
write_csv(data: clean_csv, output_filepath:, filename:)
write_csv(data: log_csv, output_filepath: log_filepath, prefix: 'removed.', filename:)
end
end
end
end end
def filename(headers:, data:) private
survey_item_ids = headers.filter(&:present?).filter do |header|
header.start_with?('s-', 't-')
end.reject { |item| item.end_with? '-1' }
survey_type = SurveyItem.survey_type(survey_item_ids:)
range = data.first.academic_year.range
districts = data.map do |row| def include_all_headers(headers:)
row.district.name alternates = headers.filter(&:present?)
end.to_set.to_a .filter { |header| header.end_with? "-1" }
alternates.each do |header|
main = header.sub(/-1\z/, "")
headers.push(main) unless headers.include?(main)
end
headers
end
districts.join('.').to_s + '.' + survey_type.to_s + '.' + range + '.csv' def initialize_directories
create_ouput_directory
create_log_directory
end end
def remove_unwanted_headers(headers:) def remove_unwanted_headers(headers:)
headers.to_set.to_a.compact.reject do |item| headers.to_set.to_a.compact.reject do |item|
item.start_with? 'Q' item.start_with? "Q"
end.reject { |item| item.end_with? '-1' } end.reject { |item| item.end_with? "-1" }
end end
def write_csv(data:, output_filepath:, filename:, prefix: '') def write_csv(data:, output_filepath:, filename:, prefix: "")
csv = CSV.generate do |csv| csv = CSV.generate do |csv|
data.each do |row| data.each do |row|
csv << row csv << row
@ -102,13 +125,10 @@ class Cleaner
end end
def survey_items(headers:) def survey_items(headers:)
@survey_items ||= SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:)) survey_item_ids = headers
end
def get_survey_item_ids_from_headers(headers:)
headers
.filter(&:present?) .filter(&:present?)
.filter { |header| header.start_with? 't-', 's-' } .filter { |header| header.start_with? "t-", "s-" }
@survey_items ||= SurveyItem.where(survey_item_id: survey_item_ids)
end end
def create_ouput_directory def create_ouput_directory

@ -7,6 +7,11 @@ class SurveyItemValues
@genders = genders @genders = genders
@survey_items = survey_items @survey_items = survey_items
@schools = schools @schools = schools
@disaggregation_data = disaggregation_data
copy_likert_scores_from_variant_survey_items
row["Income"] = income
row["Raw Income"] = raw_income
end end
# Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair. # Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair.
@ -115,6 +120,8 @@ class SurveyItemValues
return "Unknown" unless disaggregation_data.present? return "Unknown" unless disaggregation_data.present?
byebug
disaggregation = disaggregation_data[[lasid, district.name, academic_year.range]] disaggregation = disaggregation_data[[lasid, district.name, academic_year.range]]
return "Unknown" unless disaggregation.present? return "Unknown" unless disaggregation.present?
@ -147,7 +154,6 @@ class SurveyItemValues
end end
def to_a def to_a
copy_likert_scores_from_variant_survey_items
headers.select(&:present?) headers.select(&:present?)
.reject { |key, _value| key.start_with? "Q" } .reject { |key, _value| key.start_with? "Q" }
.reject { |key, _value| key.end_with? "-1" } .reject { |key, _value| key.end_with? "-1" }
@ -238,7 +244,8 @@ class SurveyItemValues
headers.filter(&:present?).filter { |header| header.end_with? "-1" }.each do |header| headers.filter(&:present?).filter { |header| header.end_with? "-1" }.each do |header|
likert_score = row[header] likert_score = row[header]
main_item = header.gsub("-1", "") main_item = header.gsub("-1", "")
row[main_item] = likert_score if likert_score.present? row[main_item] = likert_score if likert_score.present? && row[main_item].blank?
end end
end end
end end

@ -1,14 +1,6 @@
<g class="grouped-bar-column" data-for-measure-id="<%= column.measure.measure_id %>"> <g class="grouped-bar-column" data-for-measure-id="<%= column.measure.measure_id %>">
<% score_label_y = [5, 10, 15, 5, 10, 15 ] %> <% score_label_y = [5, 10, 15, 5, 10, 15 ] %>
<% column.bars.each_with_index do |bar, index| %> <% column.bars.each_with_index do |bar, index| %>
<<<<<<< HEAD
<rect data-for-academic-year="<%= bar.academic_year.range %>" x="<%= bar.x_position %>%" y="<%= bar.y_offset %>%" width="<%= column.bar_width %>%" height="<%= bar.bar_height_percentage %>%" fill="<%= bar.color %>" />
<% if ENV["SCORES"].present? && ENV["SCORES"].upcase == "SHOW" %>
<text x="<%= bar.x_position + (column.bar_width * 0.5) %>%" y="<%= score_label_y[index] %>%" text-anchor="middle" dominant-baseline="middle">
<%= bar.average %>
</text>
=======
<% if column.sufficient_data?(index) %> <% if column.sufficient_data?(index) %>
<rect <rect
<% if column.show_popover? %> <% if column.show_popover? %>
@ -29,7 +21,6 @@
</text> </text>
<% end %> <% end %>
>>>>>>> 67e469a6 (feat: add popover to analyze graphs that displays the n-size of the different columns. Make sure to only calculate a score for a race if there are more than 10 respondents to a question.)
<% end %> <% end %>
<% end %> <% end %>

@ -0,0 +1,36 @@
StartDate,EndDate,Status,IPAddress,Progress,Duration (in seconds),Finished,RecordedDate,ResponseId,District,School,LASID,Gender,Race,What grade are you in?,s-emsa-q1,s-emsa-q2,s-emsa-q3,s-tint-q1,s-tint-q2,s-tint-q3,s-tint-q4,s-tint-q5,s-acpr-q1,s-acpr-q2,s-acpr-q3,s-acpr-q4,s-cure-q1,s-cure-q2,s-cure-q3,s-cure-q4,s-sten-q1,s-sten-q2,s-sten-q3,s-sper-q1,s-sper-q2,s-sper-q3,s-sper-q4,s-civp-q1,s-civp-q2,s-civp-q3,s-civp-q4,s-grmi-q1,s-grmi-q2,s-grmi-q3,s-grmi-q4,s-appa-q1,s-appa-q2,s-appa-q3,s-peff-q1,s-peff-q2,s-peff-q3,s-peff-q4,s-peff-q5,s-sbel-q1,s-sbel-q2,s-sbel-q3,s-sbel-q4,s-sbel-q5,s-phys-q1,s-phys-q2,s-phys-q3,s-phys-q4,s-vale-q1,s-vale-q2,s-vale-q3,s-vale-q4,s-acst-q1,s-acst-q2,s-acst-q3,s-sust-q1,s-sust-q2,s-grit-q1,s-grit-q2,s-grit-q3,s-grit-q4,s-expa-q1,s-poaf-q1,s-poaf-q2,s-poaf-q3,s-poaf-q4,s-tint-q1-1,s-tint-q2-1,s-tint-q3-1,s-tint-q4-1,s-tint-q5-1,s-acpr-q1-1,s-acpr-q2-1,s-acpr-q3-1,s-acpr-q4-1,s-peff-q1-1,s-peff-q2-1,s-peff-q3-1,s-peff-q4-1,s-peff-q5-1,s-peff-q6-1
2023-03-17 7:57:47,2023-03-17 8:09:15,0,71.174.81.214,100,1000,1,2023-03-17T8:9:15,1000,2,1740505,1,2,4,9,3,5,5,,,,,,,,,,,,,,,,,,,,,,,,,4,4,3,5,,,,,,,,,4,4,2,3,2,5,5,5,5,4,2,2,4,3,2,3,3,5,4,4,3,5,2,3,3,4,4,4,1,2,5,5,,,,,4,4,4,3,4,5
2023-03-17 8:02:15,2023-03-17 8:08:02,0,71.174.81.214,25,1000,1,2023-03-17T8:8:3,1001,2,1740505,2,1,5,10,,,,,,,,,,,,,,,,,,,,5,4,4,4,,,,,,,,,2,3,2,,,,,,4,3,2,4,3,5,5,4,4,4,4,3,5,3,4,3,2,4,3,4,3,3,1,2,2,2,3,,,,,,5,4,4,5,4,4,5,3,3,4
2023-03-17 8:00:05,2023-03-17 8:07:39,0,71.174.81.214,24,1000,1,2023-03-17T8:7:39,1002,2,1740505,3,,,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5,4,4,5,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2023-03-17 8:03:35,2023-03-17 8:15:38,0,71.174.81.214,0,1000,1,2023-03-17T8:15:38,1003,2,1740505,4,1,"1,4",10,4,5,5,,,,,,,,,,3,4,5,5,4,4,3,5,4,3,4,4,3,3,3,2,1,3,2,3,1,1,,,,,,,,,,,,,,,,,,,,,,5,4,3,5,3,4,3,3,3,5,4,5,4,4,5,5,5,5,5,5,,,,,,
2023-03-17 7:57:09,2023-03-17 8:12:26,0,71.174.81.214,,1000,1,2023-03-17T8:12:27,1004,2,1740505,5,1,"5,4",9,4,4,5,,,,,,,,,,,,,,,,,,,,,,,,,4,4,2,4,,,,,,,,,3,3,3,3,3,5,5,4,5,5,3,3,4,2,2,1,3,2,4,5,4,5,4,3,1,3,4,3,1,3,3,1,,,,,4,4,3,4,2,4
2023-03-17 8:01:50,2023-03-17 8:17:51,0,71.174.81.214,100,240,1,2023-03-17T8:17:52,1005,2,1740505,6,1,"5,4",9,4,3,4,,,,,,,,,,4,4,3,3,3,4,4,4,5,2,5,4,4,4,4,4,4,5,3,4,3,5,,,,,,4,4,4,5,2,,,,,,,,,,,,,,,,,,,,,,,4,1,4,4,4,5,4,4,5,4,5,5,5,5,4
2023-03-17 8:01:45,2023-03-17 8:07:59,0,71.174.81.214,100,239,1,2023-03-17T8:8:0,1006,2,1740505,7,1,5,10,,,,,,,,,,,,,5,3,3,5,2,3,3,,,,,4,4,4,4,,,,,,,,,,,,,4,5,3,4,3,5,5,4,5,5,4,4,4,2,1,1,4,5,4,4,3,4,2,2,3,2,3,,,,,,,,,,4,4,5,4,4,5
2023-03-17 9:07:09,2023-03-17 9:20:10,0,71.174.81.214,100,0,1,2023-03-17T9:20:11,1007,2,1740305,8,2,5,7,,,,,,,,,,,,,4,5,5,4,,4,3,,,,,5,4,4,5,,,,,,,,,,,,,5,5,4,5,5,5,5,5,5,4,5,5,5,3,2,3,3,3,5,4,4,4,3,3,4,,4,,,,,,,,,,4,5,5,5,5,5
2023-03-17 8:02:11,2023-03-17 8:29:53,0,71.174.81.214,100,,1,2023-03-17T8:29:53,1008,2,1740505,9,1,"5,4",10,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,2,3,3,,,,,,3,4,3,3,3,5,5,5,5,4,4,2,3,2,3,2,2,5,2,4,3,3,1,3,3,3,4,,,,,,4,4,4,4,4,4,4,4,2,4
2023-03-17 8:00:42,2023-03-17 8:12:00,0,71.174.81.214,100,1000,1,2023-03-17T8:12:0,1009,2,1740505,10,2,5,1,,,,,,,,,,,,,1,4,3,2,1,3,2,,,,,3,3,4,4,,,,,,,,,,,,,5,5,2,4,5,4,4,5,4,2,2,1,2,2,4,3,5,5,4,4,1,4,1,2,1,3,3,,,,,,,,,,3,4,3,1,1,1
2023-03-17 8:03:09,2023-03-17 8:13:27,0,71.174.81.214,100,1000,1,2023-03-17T8:13:28,1010,2,1740505,11,1,5,2,,,,,,,,,,,,,5,3,2,4,2,2,3,,,,,4,5,5,5,,,,,,,,,,,,,4,4,3,4,4,4,4,4,5,4,3,5,4,2,1,2,4,4,5,4,3,5,4,1,3,3,3,,,,,,,,,,4,3,4,4,4,4
2023-03-17 8:23:20,2023-03-17 8:34:00,0,71.174.81.214,100,1000,1,2023-03-17T8:34:0,1011,2,1740505,12,2,3,3,1,2,2,2,3,2,4,2,5,5,3,5,3,3,3,2,3,4,4,2,4,3,5,4,4,4,3,4,3,3,4,3,1,3,,,,,,,,,,,,,,,,,,,,,,1,2,4,4,3,4,3,2,2,4,4,,,,,,,,,,,,,,,
2023-03-17 8:36:36,2023-03-17 8:47:33,0,71.174.81.214,100,1000,1,2023-03-17T8:47:34,1012,2,1740505,13,1,3,4,4,5,4,,,,,,,,,,4,2,3,2,2,3,4,4,5,3,4,2,4,2,3,3,4,3,3,2,1,1,,,,,,,,,,,5,5,2,4,2,3,3,4,5,4,5,,,,,,,,,,,,4,2,3,3,2,4,4,3,3,,,,,,
2023-03-17 8:01:10,2023-03-17 8:09:17,0,71.174.81.214,100,1000,1,2023-03-17T8:9:18,1013,2,1740505,14,1,4,5,4,3,5,,,,,,,,,,3,4,3,4,3,4,4,2,4,4,2,3,1,2,2,4,2,3,5,2,2,1,,,,,,4,4,3,3,4,,,,,,,,,,,,,,,,,,,,,,,2,1,3,2,5,4,5,2,2,2,3,4,3,3,4
2023-03-17 10:06:07,2023-03-17 10:12:54,0,71.174.81.214,100,1000,1,2023-03-17T10:12:56,1014,2,1740505,15,1,5,6,,,,,,,,,,,,,3,3,4,2,1,2,4,,,,,2,2,2,2,,,,,,,,,,,,,1,2,1,2,3,4,5,3,5,3,1,2,3,2,2,1,2,4,3,2,3,2,4,2,3,2,2,,,,,,,,,,4,4,4,4,4,5
2023-03-17 7:57:13,2023-03-17 8:05:02,0,71.174.81.214,100,1000,1,2023-03-17T8:5:2,1015,2,1740505,16,4,5,7,,,,,,,,,,,,,,,,,,,,3,3,3,3,,,,,,,,,3,5,2,,,,,,2,1,3,2,4,4,4,3,3,2,2,3,4,3,5,5,5,4,3,4,2,4,5,3,1,3,2,,,,,,4,4,3,4,4,5,4,5,5,5
2023-03-17 7:57:50,2023-03-17 8:02:53,0,71.174.81.214,100,1000,1,2023-03-17T8:2:54,1016,2,1740505,17,2,5,8,1,1,1,,,,,,,,,,,,,,,,,,,,,,,,,5,2,1,1,,,,,,,,,5,4,3,3,3,2,4,3,3,5,1,1,1,1,3,5,1,1,4,5,4,1,3,1,3,2,1,1,1,1,1,1,,,,,1,1,1,1,1,2
2023-03-17 8:40:22,2023-03-17 8:53:19,0,71.174.81.214,100,1000,0,2023-03-18T8:53:20,1017,2,1740505,18,2,5,9,,,,,,,,,,,,,,,,,3,3,4,,,,,1,1,1,2,4,3,2,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2,2,1,3,2,3,2,4,,,,,,
2023-03-17 8:37:13,2023-03-17 8:43:34,0,71.174.81.214,100,1000,1,2023-03-17T8:43:35,1018,2,1740505,19,2,2,10,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,2,2,1,,,,,,4,4,4,4,4,4,5,4,5,3,4,3,3,4,5,4,,,3,2,3,4,1,4,2,3,4,,,,,,4,5,5,4,4,4,5,4,3,4
2023-03-17 8:36:27,2023-03-17 8:44:07,0,71.174.81.214,100,1000,1,2023-03-17T8:44:8,1019,2,1740505,20,1,2,11,3,4,3,,,,,,,,,,2,3,3,2,4,5,4,3,4,3,5,3,4,4,5,4,5,3,4,5,4,4,,,,,,3,4,3,4,3,,,,,,,,,,,,,,,,,,,,,,,4,2,,2,4,4,5,2,4,5,5,4,3,3,4
2023-03-17 8:33:55,2023-03-17 8:43:13,0,71.174.81.214,100,1000,1,2023-03-17T8:43:14,1020,2,1740505,21,1,5,12,3,1,3,,,,,,,,,,,,,,2,3,3,,4,,4,,4,,4,4,3,4,3,1,5,2,,,,,,3,4,2,3,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,5,4,5,5,5,4,,4,4,4
2023-03-17 8:50:49,2023-03-17 9:13:18,0,71.174.81.214,100,1000,1,2023-03-17T9:13:19,1021,2,1740305,22,2,"5,2",1,,,,,,,,,,,,,2,3,3,3,2,3,3,,,,,3,4,4,4,,,,,,,,,,,,,2,2,1,3,2,5,5,4,5,4,2,2,2,1,1,1,1,3,3,4,3,4,4,1,1,1,3,,,,,,,,,,4,4,4,4,3,4
2023-03-17 7:57:37,2023-03-17 8:04:25,0,71.174.81.214,100,1000,1,2023-03-17T8:4:25,1022,2,1740305,23,1,5,2,,,,,,,,,,,,,,,,,,,,4,4,4,5,,,,,,,,,4,4,5,,,,,,3,3,4,4,4,4,5,4,5,4,3,3,3,2,2,2,3,3,3,2,3,4,4,3,3,2,4,,,,,,3,2,4,3,3,3,3,4,2,2
2023-03-17 8:01:47,2023-03-17 8:08:39,0,71.174.81.214,100,1000,1,2023-03-17T8:8:39,1023,2,1740305,24,1,"2,4",3,4,3,5,,,,,,,,,,2,2,2,2,1,2,2,4,3,2,3,2,3,3,4,3,4,2,3,4,3,2,,,,,,,,,,,5,4,3,5,3,1,2,2,2,2,4,,,,,,,,,,,,1,2,4,2,2,5,4,1,4,,,,,,
2023-03-17 8:37:21,2023-03-17 8:58:16,0,71.174.81.214,100,1000,1,2023-03-17T8:58:16,1024,2,1740305,25,1,5,4,3,3,3,,,,,,,,,,,,,,,,,,,,,,,,,4,4,2,3,,,,,,,,,3,4,4,3,2,5,5,4,5,3,3,2,3,4,3,3,2,4,4,3,3,3,2,2,3,3,4,3,3,3,3,3,,,,,4,3,2,3,3,3
2023-03-17 8:02:25,2023-03-17 8:11:16,0,71.174.81.214,100,1000,0,2023-03-18T8:11:21,1025,2,1740305,26,2,5,5,,,,,,,,,,,,,,,,,3,3,3,4,4,4,4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3,,1,3,,,,,,4,3,3,3,3,,,,,,,,,,
2023-03-17 9:33:54,2023-03-17 9:57:21,0,71.174.81.214,100,1000,1,2023-03-17T9:57:22,1026,2,1740305,27,2,5,6,,,,,,,,,5,5,4,5,,,,,,,,1,2,2,2,,,,,,,,,1,1,2,5,4,4,4,3,5,4,,4,2,5,4,4,3,5,3,2,2,1,1,5,3,2,2,3,1,1,1,2,1,3,3,,,,,,,,,,,,,,,
2023-03-17 9:48:38,2023-03-17 9:58:45,0,71.174.81.214,100,1000,1,2023-03-17T9:58:45,1027,2,1740305,28,1,5,7,,,,,,,,,2,4,4,2,,,,,,,,3,3,3,5,,,,,,,,,2,3,3,4,3,2,2,3,1,3,2,3,2,3,3,3,2,4,3,4,2,2,1,1,5,5,2,3,2,3,5,4,3,2,2,,,,,,,,,,,,,,,
2023-03-17 8:36:40,2023-03-17 8:43:21,0,71.174.81.214,100,1000,1,2023-03-17T8:43:22,1028,2,1740305,29,2,5,8,,,,,,,,,,,,,,,,,,,,3,3,2,3,,,,,,,,,4,3,4,,,,,,3,3,2,4,3,5,4,4,5,4,4,2,3,4,2,5,4,4,3,3,3,3,2,2,3,1,4,,,,,,3,4,4,4,2,4,5,4,3,4
2023-03-17 9:40:56,2023-03-17 9:52:52,0,71.174.81.214,100,1000,1,2023-03-17T9:52:52,1029,2,1740305,30,2,5,9,3,4,3,5,3,5,5,5,5,4,4,5,4,4,4,4,2,3,2,4,4,3,4,3,4,3,5,5,5,4,3,5,4,4,,,,,,,,,,,1,5,4,4,5,4,3,4,3,1,4,,,,,,,,,,,,,,,,,,,,,,,,,,
2023-03-17 9:33:58,2023-03-17 9:48:33,0,71.174.81.214,100,1000,1,2023-03-17T9:48:33,1030,2,1740305,31,2,5,10,,,,,,,,,4,3,5,2,,,,,,,,5,4,4,5,,,,,,,,,5,2,4,4,4,2,2,3,2,2,1,1,2,1,2,2,5,5,5,4,4,3,1,1,1,1,3,2,3,3,3,4,4,4,2,,,,,,,,,,,,,,,
2023-03-17 8:03:04,2023-03-17 8:23:33,0,71.174.81.214,100,1000,1,2023-03-17T8:23:33,1031,2,1740305,32,1,5,11,1,1,1,3,4,2,4,3,,,,,,,,,,,,,,,,,,,,5,5,5,5,,,,4,4,3,2,3,2,3,3,1,3,3,5,3,4,5,3,3,5,1,2,2,1,1,4,5,3,5,4,2,4,2,3,,,,,,,,,,,,,,,
2023-03-17 8:33:14,2023-03-17 8:41:01,0,71.174.81.214,100,1000,1,2023-03-17T8:41:2,1032,2,1740305,33,1,5,12,,,,,,,,,,,,,2,1,1,1,2,2,3,,,,,2,1,3,2,,,,,,,,,,,,,1,1,1,1,1,4,3,4,5,4,1,1,2,3,2,3,4,2,2,3,3,2,2,2,1,2,3,,,,,,,,,,3,2,2,1,2,2
2023-03-17 7:57:06,2023-03-17 8:08:35,0,71.174.81.214,100,1000,1,2023-03-17T8:8:35,1033,2,1740505,34,2,5,9,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2,2,2,2,2,2,2,2,2,2
2023-03-17 7:58:38,2023-03-17 8:12:04,0,71.174.81.214,100,1000,1,2023-03-17T8:12:5,1034,2,1740505,35,2,"5,4",12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1 StartDate EndDate Status IPAddress Progress Duration (in seconds) Finished RecordedDate ResponseId District School LASID Gender Race What grade are you in? s-emsa-q1 s-emsa-q2 s-emsa-q3 s-tint-q1 s-tint-q2 s-tint-q3 s-tint-q4 s-tint-q5 s-acpr-q1 s-acpr-q2 s-acpr-q3 s-acpr-q4 s-cure-q1 s-cure-q2 s-cure-q3 s-cure-q4 s-sten-q1 s-sten-q2 s-sten-q3 s-sper-q1 s-sper-q2 s-sper-q3 s-sper-q4 s-civp-q1 s-civp-q2 s-civp-q3 s-civp-q4 s-grmi-q1 s-grmi-q2 s-grmi-q3 s-grmi-q4 s-appa-q1 s-appa-q2 s-appa-q3 s-peff-q1 s-peff-q2 s-peff-q3 s-peff-q4 s-peff-q5 s-sbel-q1 s-sbel-q2 s-sbel-q3 s-sbel-q4 s-sbel-q5 s-phys-q1 s-phys-q2 s-phys-q3 s-phys-q4 s-vale-q1 s-vale-q2 s-vale-q3 s-vale-q4 s-acst-q1 s-acst-q2 s-acst-q3 s-sust-q1 s-sust-q2 s-grit-q1 s-grit-q2 s-grit-q3 s-grit-q4 s-expa-q1 s-poaf-q1 s-poaf-q2 s-poaf-q3 s-poaf-q4 s-tint-q1-1 s-tint-q2-1 s-tint-q3-1 s-tint-q4-1 s-tint-q5-1 s-acpr-q1-1 s-acpr-q2-1 s-acpr-q3-1 s-acpr-q4-1 s-peff-q1-1 s-peff-q2-1 s-peff-q3-1 s-peff-q4-1 s-peff-q5-1 s-peff-q6-1
2 2023-03-17 7:57:47 2023-03-17 8:09:15 0 71.174.81.214 100 1000 1 2023-03-17T8:9:15 1000 2 1740505 1 2 4 9 3 5 5 4 4 3 5 4 4 2 3 2 5 5 5 5 4 2 2 4 3 2 3 3 5 4 4 3 5 2 3 3 4 4 4 1 2 5 5 4 4 4 3 4 5
3 2023-03-17 8:02:15 2023-03-17 8:08:02 0 71.174.81.214 25 1000 1 2023-03-17T8:8:3 1001 2 1740505 2 1 5 10 5 4 4 4 2 3 2 4 3 2 4 3 5 5 4 4 4 4 3 5 3 4 3 2 4 3 4 3 3 1 2 2 2 3 5 4 4 5 4 4 5 3 3 4
4 2023-03-17 8:00:05 2023-03-17 8:07:39 0 71.174.81.214 24 1000 1 2023-03-17T8:7:39 1002 2 1740505 3 9 5 4 4 5 3
5 2023-03-17 8:03:35 2023-03-17 8:15:38 0 71.174.81.214 0 1000 1 2023-03-17T8:15:38 1003 2 1740505 4 1 1,4 10 4 5 5 3 4 5 5 4 4 3 5 4 3 4 4 3 3 3 2 1 3 2 3 1 1 5 4 3 5 3 4 3 3 3 5 4 5 4 4 5 5 5 5 5 5
6 2023-03-17 7:57:09 2023-03-17 8:12:26 0 71.174.81.214 1000 1 2023-03-17T8:12:27 1004 2 1740505 5 1 5,4 9 4 4 5 4 4 2 4 3 3 3 3 3 5 5 4 5 5 3 3 4 2 2 1 3 2 4 5 4 5 4 3 1 3 4 3 1 3 3 1 4 4 3 4 2 4
7 2023-03-17 8:01:50 2023-03-17 8:17:51 0 71.174.81.214 100 240 1 2023-03-17T8:17:52 1005 2 1740505 6 1 5,4 9 4 3 4 4 4 3 3 3 4 4 4 5 2 5 4 4 4 4 4 4 5 3 4 3 5 4 4 4 5 2 4 1 4 4 4 5 4 4 5 4 5 5 5 5 4
8 2023-03-17 8:01:45 2023-03-17 8:07:59 0 71.174.81.214 100 239 1 2023-03-17T8:8:0 1006 2 1740505 7 1 5 10 5 3 3 5 2 3 3 4 4 4 4 4 5 3 4 3 5 5 4 5 5 4 4 4 2 1 1 4 5 4 4 3 4 2 2 3 2 3 4 4 5 4 4 5
9 2023-03-17 9:07:09 2023-03-17 9:20:10 0 71.174.81.214 100 0 1 2023-03-17T9:20:11 1007 2 1740305 8 2 5 7 4 5 5 4 4 3 5 4 4 5 5 5 4 5 5 5 5 5 5 4 5 5 5 3 2 3 3 3 5 4 4 4 3 3 4 4 4 5 5 5 5 5
10 2023-03-17 8:02:11 2023-03-17 8:29:53 0 71.174.81.214 100 1 2023-03-17T8:29:53 1008 2 1740505 9 1 5,4 10 3 3 2 3 2 3 3 3 4 3 3 3 5 5 5 5 4 4 2 3 2 3 2 2 5 2 4 3 3 1 3 3 3 4 4 4 4 4 4 4 4 4 2 4
11 2023-03-17 8:00:42 2023-03-17 8:12:00 0 71.174.81.214 100 1000 1 2023-03-17T8:12:0 1009 2 1740505 10 2 5 1 1 4 3 2 1 3 2 3 3 4 4 5 5 2 4 5 4 4 5 4 2 2 1 2 2 4 3 5 5 4 4 1 4 1 2 1 3 3 3 4 3 1 1 1
12 2023-03-17 8:03:09 2023-03-17 8:13:27 0 71.174.81.214 100 1000 1 2023-03-17T8:13:28 1010 2 1740505 11 1 5 2 5 3 2 4 2 2 3 4 5 5 5 4 4 3 4 4 4 4 4 5 4 3 5 4 2 1 2 4 4 5 4 3 5 4 1 3 3 3 4 3 4 4 4 4
13 2023-03-17 8:23:20 2023-03-17 8:34:00 0 71.174.81.214 100 1000 1 2023-03-17T8:34:0 1011 2 1740505 12 2 3 3 1 2 2 2 3 2 4 2 5 5 3 5 3 3 3 2 3 4 4 2 4 3 5 4 4 4 3 4 3 3 4 3 1 3 1 2 4 4 3 4 3 2 2 4 4
14 2023-03-17 8:36:36 2023-03-17 8:47:33 0 71.174.81.214 100 1000 1 2023-03-17T8:47:34 1012 2 1740505 13 1 3 4 4 5 4 4 2 3 2 2 3 4 4 5 3 4 2 4 2 3 3 4 3 3 2 1 1 5 5 2 4 2 3 3 4 5 4 5 4 2 3 3 2 4 4 3 3
15 2023-03-17 8:01:10 2023-03-17 8:09:17 0 71.174.81.214 100 1000 1 2023-03-17T8:9:18 1013 2 1740505 14 1 4 5 4 3 5 3 4 3 4 3 4 4 2 4 4 2 3 1 2 2 4 2 3 5 2 2 1 4 4 3 3 4 2 1 3 2 5 4 5 2 2 2 3 4 3 3 4
16 2023-03-17 10:06:07 2023-03-17 10:12:54 0 71.174.81.214 100 1000 1 2023-03-17T10:12:56 1014 2 1740505 15 1 5 6 3 3 4 2 1 2 4 2 2 2 2 1 2 1 2 3 4 5 3 5 3 1 2 3 2 2 1 2 4 3 2 3 2 4 2 3 2 2 4 4 4 4 4 5
17 2023-03-17 7:57:13 2023-03-17 8:05:02 0 71.174.81.214 100 1000 1 2023-03-17T8:5:2 1015 2 1740505 16 4 5 7 3 3 3 3 3 5 2 2 1 3 2 4 4 4 3 3 2 2 3 4 3 5 5 5 4 3 4 2 4 5 3 1 3 2 4 4 3 4 4 5 4 5 5 5
18 2023-03-17 7:57:50 2023-03-17 8:02:53 0 71.174.81.214 100 1000 1 2023-03-17T8:2:54 1016 2 1740505 17 2 5 8 1 1 1 5 2 1 1 5 4 3 3 3 2 4 3 3 5 1 1 1 1 3 5 1 1 4 5 4 1 3 1 3 2 1 1 1 1 1 1 1 1 1 1 1 2
19 2023-03-17 8:40:22 2023-03-17 8:53:19 0 71.174.81.214 100 1000 0 2023-03-18T8:53:20 1017 2 1740505 18 2 5 9 3 3 4 1 1 1 2 4 3 2 4 2 2 2 1 3 2 3 2 4
20 2023-03-17 8:37:13 2023-03-17 8:43:34 0 71.174.81.214 100 1000 1 2023-03-17T8:43:35 1018 2 1740505 19 2 2 10 3 3 2 3 2 2 1 4 4 4 4 4 4 5 4 5 3 4 3 3 4 5 4 3 2 3 4 1 4 2 3 4 4 5 5 4 4 4 5 4 3 4
21 2023-03-17 8:36:27 2023-03-17 8:44:07 0 71.174.81.214 100 1000 1 2023-03-17T8:44:8 1019 2 1740505 20 1 2 11 3 4 3 2 3 3 2 4 5 4 3 4 3 5 3 4 4 5 4 5 3 4 5 4 4 3 4 3 4 3 4 2 2 4 4 5 2 4 5 5 4 3 3 4
22 2023-03-17 8:33:55 2023-03-17 8:43:13 0 71.174.81.214 100 1000 1 2023-03-17T8:43:14 1020 2 1740505 21 1 5 12 3 1 3 2 3 3 4 4 4 4 4 3 4 3 1 5 2 3 4 2 3 4 5 4 5 5 5 4 4 4 4
23 2023-03-17 8:50:49 2023-03-17 9:13:18 0 71.174.81.214 100 1000 1 2023-03-17T9:13:19 1021 2 1740305 22 2 5,2 1 2 3 3 3 2 3 3 3 4 4 4 2 2 1 3 2 5 5 4 5 4 2 2 2 1 1 1 1 3 3 4 3 4 4 1 1 1 3 4 4 4 4 3 4
24 2023-03-17 7:57:37 2023-03-17 8:04:25 0 71.174.81.214 100 1000 1 2023-03-17T8:4:25 1022 2 1740305 23 1 5 2 4 4 4 5 4 4 5 3 3 4 4 4 4 5 4 5 4 3 3 3 2 2 2 3 3 3 2 3 4 4 3 3 2 4 3 2 4 3 3 3 3 4 2 2
25 2023-03-17 8:01:47 2023-03-17 8:08:39 0 71.174.81.214 100 1000 1 2023-03-17T8:8:39 1023 2 1740305 24 1 2,4 3 4 3 5 2 2 2 2 1 2 2 4 3 2 3 2 3 3 4 3 4 2 3 4 3 2 5 4 3 5 3 1 2 2 2 2 4 1 2 4 2 2 5 4 1 4
26 2023-03-17 8:37:21 2023-03-17 8:58:16 0 71.174.81.214 100 1000 1 2023-03-17T8:58:16 1024 2 1740305 25 1 5 4 3 3 3 4 4 2 3 3 4 4 3 2 5 5 4 5 3 3 2 3 4 3 3 2 4 4 3 3 3 2 2 3 3 4 3 3 3 3 3 4 3 2 3 3 3
27 2023-03-17 8:02:25 2023-03-17 8:11:16 0 71.174.81.214 100 1000 0 2023-03-18T8:11:21 1025 2 1740305 26 2 5 5 3 3 3 4 4 4 4 3 1 3 4 3 3 3 3
28 2023-03-17 9:33:54 2023-03-17 9:57:21 0 71.174.81.214 100 1000 1 2023-03-17T9:57:22 1026 2 1740305 27 2 5 6 5 5 4 5 1 2 2 2 1 1 2 5 4 4 4 3 5 4 4 2 5 4 4 3 5 3 2 2 1 1 5 3 2 2 3 1 1 1 2 1 3 3
29 2023-03-17 9:48:38 2023-03-17 9:58:45 0 71.174.81.214 100 1000 1 2023-03-17T9:58:45 1027 2 1740305 28 1 5 7 2 4 4 2 3 3 3 5 2 3 3 4 3 2 2 3 1 3 2 3 2 3 3 3 2 4 3 4 2 2 1 1 5 5 2 3 2 3 5 4 3 2 2
30 2023-03-17 8:36:40 2023-03-17 8:43:21 0 71.174.81.214 100 1000 1 2023-03-17T8:43:22 1028 2 1740305 29 2 5 8 3 3 2 3 4 3 4 3 3 2 4 3 5 4 4 5 4 4 2 3 4 2 5 4 4 3 3 3 3 2 2 3 1 4 3 4 4 4 2 4 5 4 3 4
31 2023-03-17 9:40:56 2023-03-17 9:52:52 0 71.174.81.214 100 1000 1 2023-03-17T9:52:52 1029 2 1740305 30 2 5 9 3 4 3 5 3 5 5 5 5 4 4 5 4 4 4 4 2 3 2 4 4 3 4 3 4 3 5 5 5 4 3 5 4 4 1 5 4 4 5 4 3 4 3 1 4
32 2023-03-17 9:33:58 2023-03-17 9:48:33 0 71.174.81.214 100 1000 1 2023-03-17T9:48:33 1030 2 1740305 31 2 5 10 4 3 5 2 5 4 4 5 5 2 4 4 4 2 2 3 2 2 1 1 2 1 2 2 5 5 5 4 4 3 1 1 1 1 3 2 3 3 3 4 4 4 2
33 2023-03-17 8:03:04 2023-03-17 8:23:33 0 71.174.81.214 100 1000 1 2023-03-17T8:23:33 1031 2 1740305 32 1 5 11 1 1 1 3 4 2 4 3 5 5 5 5 4 4 3 2 3 2 3 3 1 3 3 5 3 4 5 3 3 5 1 2 2 1 1 4 5 3 5 4 2 4 2 3
34 2023-03-17 8:33:14 2023-03-17 8:41:01 0 71.174.81.214 100 1000 1 2023-03-17T8:41:2 1032 2 1740305 33 1 5 12 2 1 1 1 2 2 3 2 1 3 2 1 1 1 1 1 4 3 4 5 4 1 1 2 3 2 3 4 2 2 3 3 2 2 2 1 2 3 3 2 2 1 2 2
35 2023-03-17 7:57:06 2023-03-17 8:08:35 0 71.174.81.214 100 1000 1 2023-03-17T8:8:35 1033 2 1740505 34 2 5 9 2 2 2 2 2 2 2 2 2 2
36 2023-03-17 7:58:38 2023-03-17 8:12:04 0 71.174.81.214 100 1000 1 2023-03-17T8:12:5 1034 2 1740505 35 2 5,4 12

@ -1,29 +1,29 @@
require 'rails_helper' require "rails_helper"
require 'fileutils' require "fileutils"
RSpec.describe Cleaner do RSpec.describe Cleaner do
let(:district) { create(:district, name: 'District1') } let(:district) { create(:district, name: "District1") }
let(:second_district) { create(:district, name: 'District2') } let(:second_district) { create(:district, name: "District2") }
let(:school) { create(:school, dese_id: 1_740_505, district:) } let(:school) { create(:school, dese_id: 1_740_505, district:) }
let(:second_school) { create(:school, dese_id: 222_222, district: second_district) } let(:second_school) { create(:school, dese_id: 222_222, district: second_district) }
let(:academic_year) { create(:academic_year, range: '2022-23') } let(:academic_year) { create(:academic_year, range: "2022-23") }
let(:respondents) { create(:respondent, school:, academic_year:, nine: 40, ten: 40, eleven: 40, twelve: 40) } let(:respondents) { create(:respondent, school:, academic_year:, nine: 40, ten: 40, eleven: 40, twelve: 40) }
let(:recorded_date) { '2023-04-01' } let(:recorded_date) { "2023-04-01" }
let(:input_filepath) do let(:input_filepath) do
Rails.root.join('spec', 'fixtures', 'raw') Rails.root.join("spec", "fixtures", "raw")
end end
let(:output_filepath) do let(:output_filepath) do
Rails.root.join('tmp', 'spec', 'clean') Rails.root.join("tmp", "spec", "clean")
end end
let(:log_filepath) do let(:log_filepath) do
Rails.root.join('tmp', 'spec', 'removed') Rails.root.join("tmp", "spec", "removed")
end end
let(:common_headers) do let(:common_headers) do
['Recorded Date', 'Dese ID', 'ResponseID'] ["Recorded Date", "Dese ID", "ResponseID"]
end end
let(:standard_survey_items) do let(:standard_survey_items) do
@ -41,16 +41,16 @@ RSpec.describe Cleaner do
end end
let(:short_form_survey_items) do let(:short_form_survey_items) do
([create(:survey_item, survey_item_id: 's-phys-q1', on_short_form: true), ([create(:survey_item, survey_item_id: "s-phys-q1", on_short_form: true),
create(:survey_item, survey_item_id: 's-phys-q2', on_short_form: true), create(:survey_item, survey_item_id: "s-phys-q2", on_short_form: true),
create(:survey_item, survey_item_id: 's-phys-q3', create(:survey_item, survey_item_id: "s-phys-q3",
on_short_form: true)].map(&:survey_item_id) << common_headers).flatten on_short_form: true)].map(&:survey_item_id) << common_headers).flatten
end end
let(:early_education_survey_items) do let(:early_education_survey_items) do
([create(:survey_item, survey_item_id: 's-emsa-es1'), ([create(:survey_item, survey_item_id: "s-emsa-es1"),
create(:survey_item, survey_item_id: 's-emsa-es2'), create(:survey_item, survey_item_id: "s-emsa-es2"),
create(:survey_item, survey_item_id: 's-emsa-es3')].map(&:survey_item_id) << common_headers).flatten create(:survey_item, survey_item_id: "s-emsa-es3")].map(&:survey_item_id) << common_headers).flatten
end end
let(:teacher_survey_items) do let(:teacher_survey_items) do
@ -79,84 +79,232 @@ RSpec.describe Cleaner do
respondents respondents
end end
context 'Creating a new Cleaner' do context "Creating a new Cleaner" do
it 'creates a directory for the clean data' do it "creates a directory for the clean data" do
Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean
expect(output_filepath).to exist expect(output_filepath).to exist
end end
it 'creates a directory for the removed data' do it "creates a directory for the removed data" do
Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean
expect(log_filepath).to exist expect(log_filepath).to exist
end end
end end
context '.filename' do context ".process_raw_file" do
context 'defines a filename in the format: [district].[early_ed/short_form/standard/teacher].[year as 2022-23]' do it "sorts data into valid and invalid csvs" do
context 'when the file is based on standard survey items' do cleaner = Cleaner.new(input_filepath:, output_filepath:, log_filepath:, disaggregation_filepath:)
it 'adds the survey type as standard to the filename' do processed_data = cleaner.process_raw_file(
file: path_to_sample_raw_file, disaggregation_data: cleaner.disaggregation_data
)
processed_data in [headers, clean_csv, log_csv, data]
reads_headers_from_raw_csv(processed_data)
valid_rows = %w[1000 1001 1004 1005 1008 1017 1018 1019 1020 1024 1025 1026
1027 1028]
valid_rows.each do |response_id|
valid_row = data.find { |row| row.response_id == response_id }
expect(valid_row.valid?).to eq true
end
invalid_rows = %w[1002 1003 1006 1007 1009 1010 1011 1012 1013 1014 1015 1016 1021 1022 1023 1029 1030 1031 1032
1033 1034]
invalid_rows.each do |response_id|
invalid_row = data.find { |row| row.response_id == response_id }
expect(invalid_row.valid?).to eq false
end
expect(clean_csv.length).to eq valid_rows.length + 1 # headers + rows
expect(log_csv.length).to eq invalid_rows.length + 1 # headers + rows
csv_contains_the_correct_rows(clean_csv, valid_rows)
csv_contains_the_correct_rows(log_csv, invalid_rows)
invalid_rows_are_rejected_for_the_correct_reasons(data)
end
it "adds dissaggregation data to the cleaned file " do
cleaner = Cleaner.new(input_filepath:, output_filepath:, log_filepath:, disaggregation_filepath:)
processed_data = cleaner.process_raw_file(
file: path_to_sample_raw_file, disaggregation_data: cleaner.disaggregation_data
)
processed_data in [headers, clean_csv, log_csv, data]
index_of_income = clean_csv.first.index("Income")
expect(clean_csv.second[index_of_income]).to eq "Economically Disadvantaged - Y"
one_thousand = data.find { |row| row.response_id == "1000" }
expect(one_thousand.income).to eq "Economically Disadvantaged - Y"
one_thousand_one = data.find { |row| row.response_id == "1001" }
expect(one_thousand_one.income).to eq "Economically Disadvantaged - N"
end
end
context ".filename" do
context "defines a filename in the format: [district].[early_ed/short_form/standard/teacher].[year as 2022-23]" do
context "when the file is based on standard survey items" do
it "adds the survey type as standard to the filename" do
survey_items = SurveyItem.where(survey_item_id: standard_survey_items) survey_items = SurveyItem.where(survey_item_id: standard_survey_items)
data = [SurveyItemValues.new(row: { 'Recorded Date' => recorded_date, 'Dese ID' => '1_740_505' }, headers: standard_survey_items, genders: nil, survey_items:, data = [SurveyItemValues.new(row: { "Recorded Date" => recorded_date, "Dese ID" => "1_740_505" }, headers: standard_survey_items, genders: nil, survey_items:,
schools: School.school_hash)] schools: School.school_hash)]
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: standard_survey_items, data: headers: standard_survey_items, data:
) )
expect(filename).to eq 'District1.standard.2022-23.csv' expect(filename).to eq "District1.standard.2022-23.csv"
end end
context 'when the file is based on short form survey items' do context "when the file is based on short form survey items" do
it 'adds the survey type as short form to the filename' do it "adds the survey type as short form to the filename" do
survey_items = SurveyItem.where(survey_item_id: short_form_survey_items) survey_items = SurveyItem.where(survey_item_id: short_form_survey_items)
data = [SurveyItemValues.new(row: { 'Recorded Date' => recorded_date, 'Dese ID' => '1_740_505' }, headers: short_form_survey_items, genders: nil, survey_items:, data = [SurveyItemValues.new(row: { "Recorded Date" => recorded_date, "Dese ID" => "1_740_505" }, headers: short_form_survey_items, genders: nil, survey_items:,
schools: School.school_hash)] schools: School.school_hash)]
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: short_form_survey_items, data: headers: short_form_survey_items, data:
) )
expect(filename).to eq 'District1.short_form.2022-23.csv' expect(filename).to eq "District1.short_form.2022-23.csv"
end end
end end
context 'when the file is based on early education survey items' do context "when the file is based on early education survey items" do
it 'adds the survey type as early education to the filename' do it "adds the survey type as early education to the filename" do
survey_items = SurveyItem.where(survey_item_id: early_education_survey_items) survey_items = SurveyItem.where(survey_item_id: early_education_survey_items)
data = [SurveyItemValues.new(row: { 'Recorded Date' => recorded_date, 'Dese ID' => '1_740_505' }, headers: early_education_survey_items, genders: nil, survey_items:, data = [SurveyItemValues.new(row: { "Recorded Date" => recorded_date, "Dese ID" => "1_740_505" }, headers: early_education_survey_items, genders: nil, survey_items:,
schools: School.school_hash)] schools: School.school_hash)]
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: early_education_survey_items, data: headers: early_education_survey_items, data:
) )
expect(filename).to eq 'District1.early_education.2022-23.csv' expect(filename).to eq "District1.early_education.2022-23.csv"
end end
end end
context 'when the file is based on teacher survey items' do context "when the file is based on teacher survey items" do
it 'adds the survey type as teacher to the filename' do it "adds the survey type as teacher to the filename" do
survey_items = SurveyItem.where(survey_item_id: teacher_survey_items) survey_items = SurveyItem.where(survey_item_id: teacher_survey_items)
data = [SurveyItemValues.new(row: { 'Recorded Date' => recorded_date, 'Dese ID' => '1_740_505' }, headers: teacher_survey_items, genders: nil, survey_items:, data = [SurveyItemValues.new(row: { "Recorded Date" => recorded_date, "Dese ID" => "1_740_505" }, headers: teacher_survey_items, genders: nil, survey_items:,
schools: School.school_hash)] schools: School.school_hash)]
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: teacher_survey_items, data: headers: teacher_survey_items, data:
) )
expect(filename).to eq 'District1.teacher.2022-23.csv' expect(filename).to eq "District1.teacher.2022-23.csv"
end end
end end
context 'when there is more than one district' do context "when there is more than one district" do
it 'adds all districts to the filename' do it "adds all districts to the filename" do
survey_items = SurveyItem.where(survey_item_id: teacher_survey_items) survey_items = SurveyItem.where(survey_item_id: teacher_survey_items)
data = [SurveyItemValues.new(row: { 'Recorded Date' => recorded_date, 'Dese ID' => '1_740_505' }, headers: teacher_survey_items, genders: nil, survey_items:, schools: School.school_hash), data = [SurveyItemValues.new(row: { "Recorded Date" => recorded_date, "Dese ID" => "1_740_505" }, headers: teacher_survey_items, genders: nil, survey_items:, schools: School.school_hash),
SurveyItemValues.new(row: { 'Recorded Date' => recorded_date, 'Dese ID' => '222_222' }, SurveyItemValues.new(row: { "Recorded Date" => recorded_date, "Dese ID" => "222_222" },
headers: teacher_survey_items, genders: nil, survey_items:, schools: School.school_hash)] headers: teacher_survey_items, genders: nil, survey_items:, schools: School.school_hash)]
filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename( filename = Cleaner.new(input_filepath:, output_filepath:, log_filepath:).filename(
headers: teacher_survey_items, data: headers: teacher_survey_items, data:
) )
expect(filename).to eq 'District1.District2.teacher.2022-23.csv' expect(filename).to eq "District1.District2.teacher.2022-23.csv"
end
end end
end end
end end
end end
end end
def reads_headers_from_raw_csv(processed_data)
processed_data in [headers, clean_csv, log_csv, data]
expect(headers.to_set.sort).to eq ["StartDate", "EndDate", "Status", "IPAddress", "Progress", "Duration (in seconds)",
"Finished", "RecordedDate", "ResponseId", "District", "School",
"LASID", "Gender", "Race", "What grade are you in?", "s-emsa-q1", "s-emsa-q2", "s-emsa-q3", "s-tint-q1",
"s-tint-q2", "s-tint-q3", "s-tint-q4", "s-tint-q5", "s-acpr-q1", "s-acpr-q2",
"s-acpr-q3", "s-acpr-q4", "s-cure-q1", "s-cure-q2", "s-cure-q3", "s-cure-q4", "s-sten-q1", "s-sten-q2",
"s-sten-q3", "s-sper-q1", "s-sper-q2", "s-sper-q3", "s-sper-q4", "s-civp-q1", "s-civp-q2", "s-civp-q3",
"s-civp-q4", "s-grmi-q1", "s-grmi-q2", "s-grmi-q3", "s-grmi-q4", "s-appa-q1", "s-appa-q2", "s-appa-q3",
"s-peff-q1", "s-peff-q2", "s-peff-q3", "s-peff-q4", "s-peff-q5", "s-peff-q6", "s-sbel-q1", "s-sbel-q2",
"s-sbel-q3", "s-sbel-q4", "s-sbel-q5", "s-phys-q1", "s-phys-q2", "s-phys-q3", "s-phys-q4", "s-vale-q1",
"s-vale-q2", "s-vale-q3", "s-vale-q4", "s-acst-q1", "s-acst-q2", "s-acst-q3", "s-sust-q1", "s-sust-q2",
"s-grit-q1", "s-grit-q2", "s-grit-q3", "s-grit-q4", "s-expa-q1", "s-poaf-q1", "s-poaf-q2", "s-poaf-q3",
"s-poaf-q4", "s-tint-q1-1", "s-tint-q2-1", "s-tint-q3-1", "s-tint-q4-1", "s-tint-q5-1", "s-acpr-q1-1",
"s-acpr-q2-1", "s-acpr-q3-1", "s-acpr-q4-1", "s-peff-q1-1", "s-peff-q2-1", "s-peff-q3-1", "s-peff-q4-1",
"s-peff-q5-1", "s-peff-q6-1", "Raw Income", "Income"].to_set.sort
end
def invalid_rows_are_rejected_for_the_correct_reasons(data)
one_thousand_two = data.find { |row| row.response_id == "1002" }
expect(one_thousand_two.valid_progress?).to eq false
expect(one_thousand_two.valid_duration?).to eq true
expect(one_thousand_two.valid_grade?).to eq true
expect(one_thousand_two.valid_sd?).to eq true
one_thousand_three = data.find { |row| row.response_id == "1003" }
expect(one_thousand_three.valid_progress?).to eq false
expect(one_thousand_three.valid_duration?).to eq true
expect(one_thousand_three.valid_grade?).to eq true
expect(one_thousand_three.valid_sd?).to eq true
one_thousand_six = data.find { |row| row.response_id == "1006" }
expect(one_thousand_six.valid_progress?).to eq true
expect(one_thousand_six.valid_duration?).to eq false
expect(one_thousand_six.valid_grade?).to eq true
expect(one_thousand_six.valid_sd?).to eq true
one_thousand_seven = data.find { |row| row.response_id == "1007" }
expect(one_thousand_seven.valid_progress?).to eq true
expect(one_thousand_seven.valid_duration?).to eq false
expect(one_thousand_seven.valid_grade?).to eq true
expect(one_thousand_seven.valid_sd?).to eq true
one_thousand_seven = data.find { |row| row.response_id == "1007" }
expect(one_thousand_seven.valid_progress?).to eq true
expect(one_thousand_seven.valid_duration?).to eq false
expect(one_thousand_seven.valid_grade?).to eq true
expect(one_thousand_seven.valid_sd?).to eq true
one_thousand_nine = data.find { |row| row.response_id == "1009" }
expect(one_thousand_nine.valid_progress?).to eq true
expect(one_thousand_nine.valid_duration?).to eq true
expect(one_thousand_nine.valid_grade?).to eq false
expect(one_thousand_nine.valid_sd?).to eq true
one_thousand_ten = data.find { |row| row.response_id == "1010" }
expect(one_thousand_ten.valid_progress?).to eq true
expect(one_thousand_ten.valid_duration?).to eq true
expect(one_thousand_ten.valid_grade?).to eq false
expect(one_thousand_ten.valid_sd?).to eq true
one_thousand_eleven = data.find { |row| row.response_id == "1011" }
expect(one_thousand_eleven.valid_progress?).to eq true
expect(one_thousand_eleven.valid_duration?).to eq true
expect(one_thousand_eleven.valid_grade?).to eq false
expect(one_thousand_eleven.valid_sd?).to eq true
one_thousand_twenty_two = data.find { |row| row.response_id == "1022" }
expect(one_thousand_twenty_two.valid_progress?).to eq true
expect(one_thousand_twenty_two.valid_duration?).to eq true
expect(one_thousand_twenty_two.valid_grade?).to eq false
expect(one_thousand_twenty_two.valid_sd?).to eq true
one_thousand_twenty_three = data.find { |row| row.response_id == "1023" }
expect(one_thousand_twenty_three.valid_progress?).to eq true
expect(one_thousand_twenty_three.valid_duration?).to eq true
expect(one_thousand_twenty_three.valid_grade?).to eq false
expect(one_thousand_twenty_three.valid_sd?).to eq true
one_thousand_thirty_three = data.find { |row| row.response_id == "1033" }
expect(one_thousand_thirty_three.valid_progress?).to eq true
expect(one_thousand_thirty_three.valid_duration?).to eq true
expect(one_thousand_thirty_three.valid_grade?).to eq true
expect(one_thousand_thirty_three.valid_sd?).to eq false
one_thousand_thirty_four = data.find { |row| row.response_id == "1034" }
expect(one_thousand_thirty_four.valid_progress?).to eq true
expect(one_thousand_thirty_four.valid_duration?).to eq true
expect(one_thousand_thirty_four.valid_grade?).to eq true
expect(one_thousand_thirty_four.valid_sd?).to eq false
end
def csv_contains_the_correct_rows(csv, rows)
rows.each_with_index do |row, index|
response_id = 8 # eigth column
expect(csv[index + 1][response_id]).to eq row
end
end end

Loading…
Cancel
Save