mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-13 09:20:38 -07:00
fix: ensure cleaner outputs columns for all survey items. Before the fix, if a survey item varient (ending in -1, ie s-tint-q1-1) did not have a matching survey item s-tint-q1, the resulting csv would not include that column
This commit is contained in:
parent
2c9df34fac
commit
7bd7923d41
8 changed files with 300 additions and 98 deletions
|
|
@ -1,4 +1,4 @@
|
|||
require 'fileutils'
|
||||
require "fileutils"
|
||||
class Cleaner
|
||||
attr_reader :input_filepath, :output_filepath, :log_filepath, :clean_csv, :log_csv
|
||||
|
||||
|
|
@ -15,47 +15,24 @@ class Cleaner
|
|||
end
|
||||
|
||||
def clean
|
||||
Dir.glob(Rails.root.join(input_filepath, '*.csv')).each do |filepath|
|
||||
Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath|
|
||||
puts filepath
|
||||
File.open(filepath) do |file|
|
||||
File.open(filepath) do |_file|
|
||||
clean_csv = []
|
||||
log_csv = []
|
||||
data = []
|
||||
|
||||
headers = CSV.parse(file.first).first
|
||||
filtered_headers = remove_unwanted_headers(headers:)
|
||||
log_headers = (filtered_headers + ['Valid Duration?', 'Valid Progress?', 'Valid Grade?',
|
||||
'Valid Standard Deviation?']).flatten
|
||||
|
||||
clean_csv << filtered_headers
|
||||
log_csv << log_headers
|
||||
|
||||
all_survey_items = survey_items(headers:)
|
||||
|
||||
file.lazy.each_slice(1000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
values = SurveyItemValues.new(row:, headers:, genders:,
|
||||
survey_items: all_survey_items, schools:)
|
||||
next unless values.valid_school?
|
||||
|
||||
data << values
|
||||
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
|
||||
end
|
||||
end
|
||||
|
||||
unless data.empty?
|
||||
filename = filename(headers:, data:)
|
||||
write_csv(data: clean_csv, output_filepath:, filename:)
|
||||
write_csv(data: log_csv, output_filepath: log_filepath, prefix: 'removed.', filename:)
|
||||
end
|
||||
filename = filename(headers:, data:)
|
||||
write_csv(data: clean_csv, output_filepath:, filename:)
|
||||
write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def filename(headers:, data:)
|
||||
survey_item_ids = headers.filter(&:present?).filter do |header|
|
||||
header.start_with?('s-', 't-')
|
||||
end.reject { |item| item.end_with? '-1' }
|
||||
header.start_with?("s-", "t-")
|
||||
end.reject { |item| item.end_with? "-1" }
|
||||
survey_type = SurveyItem.survey_type(survey_item_ids:)
|
||||
range = data.first.academic_year.range
|
||||
|
||||
|
|
@ -63,16 +40,62 @@ class Cleaner
|
|||
row.district.name
|
||||
end.to_set.to_a
|
||||
|
||||
districts.join('.').to_s + '.' + survey_type.to_s + '.' + range + '.csv'
|
||||
districts.join(".").to_s + "." + survey_type.to_s + "." + range + ".csv"
|
||||
end
|
||||
|
||||
def process_raw_file(file:, disaggregation_data:)
|
||||
clean_csv = []
|
||||
log_csv = []
|
||||
data = []
|
||||
|
||||
headers = (CSV.parse(file.first).first << "Raw Income") << "Income"
|
||||
filtered_headers = include_all_headers(headers:)
|
||||
filtered_headers = remove_unwanted_headers(headers: filtered_headers)
|
||||
log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?",
|
||||
"Valid Standard Deviation?"]).flatten
|
||||
|
||||
clean_csv << filtered_headers
|
||||
log_csv << log_headers
|
||||
|
||||
all_survey_items = survey_items(headers:)
|
||||
|
||||
file.lazy.each_slice(1000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
values = SurveyItemValues.new(row:, headers:, genders:,
|
||||
survey_items: all_survey_items, schools:, disaggregation_data:)
|
||||
next unless values.valid_school?
|
||||
|
||||
data << values
|
||||
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
|
||||
end
|
||||
end
|
||||
[headers, clean_csv, log_csv, data]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def include_all_headers(headers:)
|
||||
alternates = headers.filter(&:present?)
|
||||
.filter { |header| header.end_with? "-1" }
|
||||
alternates.each do |header|
|
||||
main = header.sub(/-1\z/, "")
|
||||
headers.push(main) unless headers.include?(main)
|
||||
end
|
||||
headers
|
||||
end
|
||||
|
||||
def initialize_directories
|
||||
create_ouput_directory
|
||||
create_log_directory
|
||||
end
|
||||
|
||||
def remove_unwanted_headers(headers:)
|
||||
headers.to_set.to_a.compact.reject do |item|
|
||||
item.start_with? 'Q'
|
||||
end.reject { |item| item.end_with? '-1' }
|
||||
item.start_with? "Q"
|
||||
end.reject { |item| item.end_with? "-1" }
|
||||
end
|
||||
|
||||
def write_csv(data:, output_filepath:, filename:, prefix: '')
|
||||
def write_csv(data:, output_filepath:, filename:, prefix: "")
|
||||
csv = CSV.generate do |csv|
|
||||
data.each do |row|
|
||||
csv << row
|
||||
|
|
@ -102,13 +125,10 @@ class Cleaner
|
|||
end
|
||||
|
||||
def survey_items(headers:)
|
||||
@survey_items ||= SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:))
|
||||
end
|
||||
|
||||
def get_survey_item_ids_from_headers(headers:)
|
||||
headers
|
||||
.filter(&:present?)
|
||||
.filter { |header| header.start_with? 't-', 's-' }
|
||||
survey_item_ids = headers
|
||||
.filter(&:present?)
|
||||
.filter { |header| header.start_with? "t-", "s-" }
|
||||
@survey_items ||= SurveyItem.where(survey_item_id: survey_item_ids)
|
||||
end
|
||||
|
||||
def create_ouput_directory
|
||||
|
|
|
|||
|
|
@ -7,6 +7,11 @@ class SurveyItemValues
|
|||
@genders = genders
|
||||
@survey_items = survey_items
|
||||
@schools = schools
|
||||
@disaggregation_data = disaggregation_data
|
||||
|
||||
copy_likert_scores_from_variant_survey_items
|
||||
row["Income"] = income
|
||||
row["Raw Income"] = raw_income
|
||||
end
|
||||
|
||||
# Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair.
|
||||
|
|
@ -115,6 +120,8 @@ class SurveyItemValues
|
|||
|
||||
return "Unknown" unless disaggregation_data.present?
|
||||
|
||||
byebug
|
||||
|
||||
disaggregation = disaggregation_data[[lasid, district.name, academic_year.range]]
|
||||
return "Unknown" unless disaggregation.present?
|
||||
|
||||
|
|
@ -147,7 +154,6 @@ class SurveyItemValues
|
|||
end
|
||||
|
||||
def to_a
|
||||
copy_likert_scores_from_variant_survey_items
|
||||
headers.select(&:present?)
|
||||
.reject { |key, _value| key.start_with? "Q" }
|
||||
.reject { |key, _value| key.end_with? "-1" }
|
||||
|
|
@ -238,7 +244,8 @@ class SurveyItemValues
|
|||
headers.filter(&:present?).filter { |header| header.end_with? "-1" }.each do |header|
|
||||
likert_score = row[header]
|
||||
main_item = header.gsub("-1", "")
|
||||
row[main_item] = likert_score if likert_score.present?
|
||||
row[main_item] = likert_score if likert_score.present? && row[main_item].blank?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -1,14 +1,6 @@
|
|||
<g class="grouped-bar-column" data-for-measure-id="<%= column.measure.measure_id %>">
|
||||
<% score_label_y = [5, 10, 15, 5, 10, 15 ] %>
|
||||
<% column.bars.each_with_index do |bar, index| %>
|
||||
<<<<<<< HEAD
|
||||
<rect data-for-academic-year="<%= bar.academic_year.range %>" x="<%= bar.x_position %>%" y="<%= bar.y_offset %>%" width="<%= column.bar_width %>%" height="<%= bar.bar_height_percentage %>%" fill="<%= bar.color %>" />
|
||||
|
||||
<% if ENV["SCORES"].present? && ENV["SCORES"].upcase == "SHOW" %>
|
||||
<text x="<%= bar.x_position + (column.bar_width * 0.5) %>%" y="<%= score_label_y[index] %>%" text-anchor="middle" dominant-baseline="middle">
|
||||
<%= bar.average %>
|
||||
</text>
|
||||
=======
|
||||
<% if column.sufficient_data?(index) %>
|
||||
<rect
|
||||
<% if column.show_popover? %>
|
||||
|
|
@ -29,7 +21,6 @@
|
|||
</text>
|
||||
<% end %>
|
||||
|
||||
>>>>>>> 67e469a6 (feat: add popover to analyze graphs that displays the n-size of the different columns. Make sure to only calculate a score for a race if there are more than 10 respondents to a question.)
|
||||
<% end %>
|
||||
<% end %>
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue