mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 13:38:18 -08:00
Add automated data cleaning. Modify SurveyItemValues class to use regex
instead of hard coded values. Produce a clean csv and a csv with all the removed values and columns with reason for removal. Add script for running cleaning for each project
This commit is contained in:
parent
5cf5a5f383
commit
4509c157fa
20 changed files with 1148 additions and 154 deletions
33
lib/tasks/clean.rake
Normal file
33
lib/tasks/clean.rake
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
namespace :clean do
|
||||
# These tasks must be run in their respective project so the correct schools are in the database
|
||||
desc 'clean ecp data'
|
||||
task ecp: :environment do
|
||||
input_filepath = Rails.root.join('tmp', 'data', 'ecp_data', 'raw')
|
||||
output_filepath = Rails.root.join('tmp', 'data', 'ecp_data', 'clean')
|
||||
log_filepath = Rails.root.join('tmp', 'data', 'ecp_data', 'removed')
|
||||
Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean
|
||||
end
|
||||
|
||||
desc 'clean prepped data'
|
||||
task prepped: :environment do
|
||||
input_filepath = Rails.root.join('tmp', 'data', 'ecp_data', 'prepped')
|
||||
output_filepath = Rails.root.join('tmp', 'data', 'ecp_data', 'prepped', 'clean')
|
||||
log_filepath = Rails.root.join('tmp', 'data', 'ecp_data', 'prepped', 'removed')
|
||||
Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean
|
||||
end
|
||||
desc 'clean mciea data'
|
||||
task mciea: :environment do
|
||||
input_filepath = Rails.root.join('tmp', 'data', 'mciea_data', 'raw')
|
||||
output_filepath = Rails.root.join('tmp', 'data', 'mciea_data', 'clean')
|
||||
log_filepath = Rails.root.join('tmp', 'data', 'mciea_data', 'removed')
|
||||
Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean
|
||||
end
|
||||
|
||||
desc 'clean rpp data'
|
||||
task rpp: :environment do
|
||||
input_filepath = Rails.root.join('tmp', 'data', 'rpp_data', 'raw')
|
||||
output_filepath = Rails.root.join('tmp', 'data', 'rpp_data', 'clean')
|
||||
log_filepath = Rails.root.join('tmp', 'data', 'rpp_data', 'removed')
|
||||
Cleaner.new(input_filepath:, output_filepath:, log_filepath:).clean
|
||||
end
|
||||
end
|
||||
Loading…
Add table
Add a link
Reference in a new issue