mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-09 15:38:21 -07:00
Update demographics table with lanugage options Create a lanugage table to hold the new languages Update the demographic loader to input languages into the database Update the cleaner to read the language column Update the parent table to hold a reference to a language Update the data uploader script to read the language from the csv and update the language information for any parent items that already exist (or create database entries if none already exist) update the analyze interface to add controls for selecting ‘parents by group’ and a dropdown for ‘parent by language’ Update the analyze controller to read the parent-by-group parameter Create a graph for the parent-by-group view Bubble up averages for language calculations. Make sure n-size only counts responses for a given measure.
166 lines
5.4 KiB
Ruby
166 lines
5.4 KiB
Ruby
require "fileutils"
|
|
class Cleaner
|
|
attr_reader :input_filepath, :output_filepath, :log_filepath
|
|
|
|
def initialize(input_filepath:, output_filepath:, log_filepath:)
|
|
@input_filepath = input_filepath
|
|
@output_filepath = output_filepath
|
|
@log_filepath = log_filepath
|
|
initialize_directories
|
|
end
|
|
|
|
def clean
|
|
Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath|
|
|
puts filepath
|
|
File.open(filepath) do |file|
|
|
processed_data = process_raw_file(file:)
|
|
processed_data in [headers, clean_csv, log_csv, data]
|
|
return if data.empty?
|
|
|
|
filename = filename(headers:, data:, filepath:)
|
|
write_csv(data: clean_csv, output_filepath:, filename:)
|
|
write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:)
|
|
end
|
|
end
|
|
end
|
|
|
|
def filename(headers:, data:, filepath:)
|
|
output = []
|
|
survey_item_ids = headers.filter(&:present?).filter do |header|
|
|
header.start_with?("s-", "t-", "p-")
|
|
end.reject { |item| item.end_with? "-1" }
|
|
survey_type = SurveyItem.survey_type(survey_item_ids:)
|
|
range = data.first.academic_year.range
|
|
|
|
districts = data.map do |row|
|
|
row.district.short_name
|
|
end.to_set.to_a
|
|
|
|
schools = data.map do |row|
|
|
row.school.name
|
|
end.to_set
|
|
|
|
part = filepath&.match(/[\b\s_.]+(part)[\W*_](?<label>[\w\d])/i)&.named_captures&.[]("label")&.upcase
|
|
|
|
school_name = schools.first.parameterize
|
|
|
|
output << districts.sort.join(".")
|
|
output << school_name if schools.length == 1
|
|
output << survey_type.to_s
|
|
output << "Part-" + part unless part.nil?
|
|
output << range.parameterize
|
|
output << "csv"
|
|
output.join(".")
|
|
end
|
|
|
|
def process_raw_file(file:)
|
|
clean_csv = []
|
|
log_csv = []
|
|
data = []
|
|
headers = CSV.parse(file.first).first
|
|
|
|
# If this is a student survey
|
|
# Make sure it includes a 'Grade' header
|
|
is_student_survey = headers.filter(&:present?)
|
|
.filter { |header| header.start_with? "s-" }
|
|
.count > 0
|
|
|
|
has_grade_header = headers.filter(&:present?).find { |header| header.match?(/grade/i) }.present?
|
|
if is_student_survey && has_grade_header == false
|
|
puts "could not find the Grade header. Stopping execution"
|
|
exit
|
|
end
|
|
|
|
duplicate_header = headers.detect { |header| headers.count(header) > 1 }
|
|
unless duplicate_header.nil?
|
|
puts "\n>>>>>>>>>>>>>>>>>> Duplicate header found. This will misalign column headings. Please delete or rename the duplicate column: #{duplicate_header} \n>>>>>>>>>>>>>> \n"
|
|
exit
|
|
end
|
|
|
|
headers = headers.to_set
|
|
headers = headers.merge(Set.new(["Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count",
|
|
"Race", "Gender", "Raw Housing Status", "Housing Status", "Home Language", "Home Languages"])).to_a
|
|
filtered_headers = include_all_headers(headers:)
|
|
filtered_headers = remove_unwanted_headers(headers: filtered_headers)
|
|
log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?",
|
|
"Valid Standard Deviation?"]).flatten
|
|
clean_csv << filtered_headers
|
|
log_csv << log_headers
|
|
|
|
all_survey_items = survey_items(headers:)
|
|
|
|
file.lazy.each_slice(1000) do |lines|
|
|
CSV.parse(lines.join, headers:).map do |row|
|
|
values = SurveyItemValues.new(row:, headers:,
|
|
survey_items: all_survey_items, schools:, academic_years:)
|
|
unless values.valid_school?
|
|
# puts "row #{values.response_id}: dese id :#{values.dese_id} : could not find school, skipping row for file #{file.path}"
|
|
next
|
|
end
|
|
|
|
data << values
|
|
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
|
|
end
|
|
end
|
|
[headers, clean_csv, log_csv, data]
|
|
end
|
|
|
|
private
|
|
|
|
def academic_years
|
|
@academic_years ||= AcademicYear.all
|
|
end
|
|
|
|
def include_all_headers(headers:)
|
|
alternates = headers.filter(&:present?)
|
|
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
|
|
alternates.each do |header|
|
|
main = header.sub(/-1\z/, "")
|
|
headers.push(main) unless headers.include?(main)
|
|
end
|
|
headers
|
|
end
|
|
|
|
def initialize_directories
|
|
create_ouput_directory
|
|
create_log_directory
|
|
end
|
|
|
|
def remove_unwanted_headers(headers:)
|
|
headers.to_set.to_a.compact.reject do |item|
|
|
item.start_with? "Q"
|
|
end.reject { |header| header.match?(/^[stp]-\w*-\w*-1$/i) }
|
|
end
|
|
|
|
def write_csv(data:, output_filepath:, filename:, prefix: "")
|
|
csv = CSV.generate do |csv|
|
|
data.each do |row|
|
|
csv << row
|
|
end
|
|
end
|
|
File.write(output_filepath.join(prefix + filename), csv)
|
|
end
|
|
|
|
def schools
|
|
@schools ||= School.by_dese_id
|
|
end
|
|
|
|
def genders
|
|
@genders ||= Gender.by_qualtrics_code
|
|
end
|
|
|
|
def survey_items(headers:)
|
|
survey_item_ids = headers
|
|
.filter(&:present?)
|
|
.filter { |header| header.start_with? "t-", "s-", "p-" }
|
|
@survey_items ||= SurveyItem.where(survey_item_id: survey_item_ids)
|
|
end
|
|
|
|
def create_ouput_directory
|
|
FileUtils.mkdir_p output_filepath
|
|
end
|
|
|
|
def create_log_directory
|
|
FileUtils.mkdir_p log_filepath
|
|
end
|
|
end
|