diff --git a/app/lib/dashboard/seeder.rb b/app/lib/dashboard/seeder.rb index ff7a570..9bca426 100644 --- a/app/lib/dashboard/seeder.rb +++ b/app/lib/dashboard/seeder.rb @@ -6,7 +6,7 @@ module Dashboard academic_years << { range: } end - AcademicYear.upsert_all(academic_years) + AcademicYear.upsert_all(academic_years, unique_by: :range) end def seed_districts_and_schools(csv_file) @@ -30,7 +30,7 @@ module Dashboard is_hs: marked?(hs), slug: school_name.parameterize } end - School.insert_all(schools) + School.insert_all(schools, unique_by: :dese_id) Respondent.joins(:school).where.not("school.dese_id": dese_ids).destroy_all School.where.not(dese_id: dese_ids).destroy_all @@ -97,9 +97,9 @@ module Dashboard AdminDataItem.where.not(id: admin_data_item_ids).delete_all end - # def seed_demographics(csv_file) - # DemographicLoader.load_data(filepath: csv_file) - # end + def seed_demographics(csv_file) + DemographicLoader.load_data(filepath: csv_file) + end # def seed_enrollment(csv_file) # EnrollmentLoader.load_data(filepath: csv_file) diff --git a/app/services/dashboard/cleaner.rb b/app/services/dashboard/cleaner.rb new file mode 100644 index 0000000..4b11a66 --- /dev/null +++ b/app/services/dashboard/cleaner.rb @@ -0,0 +1,146 @@ +require "fileutils" +module Dashboard + class Cleaner + attr_reader :input_filepath, :output_filepath, :log_filepath + + def initialize(input_filepath:, output_filepath:, log_filepath:) + @input_filepath = input_filepath + @output_filepath = output_filepath + @log_filepath = log_filepath + initialize_directories + end + + def clean + Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath| + puts filepath + File.open(filepath) do |file| + processed_data = process_raw_file(file:) + processed_data in [headers, clean_csv, log_csv, data] + return if data.empty? + + filename = filename(headers:, data:, filepath:) + write_csv(data: clean_csv, output_filepath:, filename:) + write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:) + end + end + end + + def filename(headers:, data:, filepath:) + output = [] + survey_item_ids = headers.filter(&:present?).filter do |header| + header.start_with?("s-", "t-") + end.reject { |item| item.end_with? "-1" } + survey_type = SurveyItem.survey_type(survey_item_ids:) + range = data.first.academic_year.range + + districts = data.map do |row| + row.district.short_name + end.to_set.to_a + + schools = data.map do |row| + row.school.name + end.to_set + + part = filepath&.match(/[\b\s_.]+(part|form)[\W*_](?