You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
sqm-dashboards/app/services/cleaner.rb

126 lines
3.5 KiB

require 'fileutils'
class Cleaner
attr_reader :input_filepath, :output_filepath, :log_filepath, :clean_csv, :log_csv
def initialize(input_filepath:, output_filepath:, log_filepath:)
@input_filepath = input_filepath
@output_filepath = output_filepath
@log_filepath = log_filepath
initialize_directories
end
def initialize_directories
create_ouput_directory
create_log_directory
end
def clean
Dir.glob(Rails.root.join(input_filepath, '*.csv')).each do |filepath|
puts filepath
File.open(filepath) do |file|
clean_csv = []
log_csv = []
data = []
headers = CSV.parse(file.first).first
filtered_headers = remove_unwanted_headers(headers:)
log_headers = (filtered_headers + ['Valid Duration?', 'Valid Progress?', 'Valid Grade?',
'Valid Standard Deviation?']).flatten
clean_csv << filtered_headers
log_csv << log_headers
all_survey_items = survey_items(headers:)
file.lazy.each_slice(1000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
values = SurveyItemValues.new(row:, headers:, genders:,
survey_items: all_survey_items, schools:)
next unless values.valid_school?
data << values
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
end
end
unless data.empty?
filename = filename(headers:, data:)
write_csv(data: clean_csv, output_filepath:, filename:)
write_csv(data: log_csv, output_filepath: log_filepath, prefix: 'removed.', filename:)
end
end
end
end
def filename(headers:, data:)
survey_item_ids = headers.filter(&:present?).filter do |header|
header.start_with?('s-', 't-')
end.reject { |item| item.end_with? '-1' }
survey_type = SurveyItem.survey_type(survey_item_ids:)
range = data.first.academic_year.range
districts = data.map do |row|
row.district.name
end.to_set.to_a
districts.join('.').to_s + '.' + survey_type.to_s + '.' + range + '.csv'
end
def remove_unwanted_headers(headers:)
headers.to_set.to_a.compact.reject do |item|
item.start_with? 'Q'
end.reject { |item| item.end_with? '-1' }
end
def write_csv(data:, output_filepath:, filename:, prefix: '')
csv = CSV.generate do |csv|
data.each do |row|
csv << row
end
end
File.write(output_filepath.join(prefix + filename), csv)
end
def process_row(row:)
clean_csv << row.to_csv
log_csv << row.to_csv
end
def schools
@schools ||= School.school_hash
end
def genders
@genders ||= begin
gender_hash = {}
Gender.all.each do |gender|
gender_hash[gender.qualtrics_code] = gender
end
gender_hash
end
end
def survey_items(headers:)
@survey_items ||= SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:))
end
def get_survey_item_ids_from_headers(headers:)
headers
.filter(&:present?)
.filter { |header| header.start_with? 't-', 's-' }
end
def create_ouput_directory
FileUtils.mkdir_p output_filepath
end
def create_log_directory
FileUtils.mkdir_p log_filepath
end
def create_file(path:, filename:)
FileUtils.touch path.join(filename)
end
end