From ece1f61aecd765bd560c3afc72df6fc2d71c75db Mon Sep 17 00:00:00 2001 From: rebuilt Date: Mon, 30 Oct 2023 19:44:52 -0700 Subject: [PATCH] feat: add command for loading survey responses from arbitrary sftp directory --- README.md | 25 ++++++++++---- lib/tasks/data.rake | 81 +++++++++++++-------------------------------- 2 files changed, 42 insertions(+), 64 deletions(-) diff --git a/README.md b/README.md index 4ab2a16c..491a55f8 100644 --- a/README.md +++ b/README.md @@ -124,30 +124,43 @@ How to run the data loading task: ```bash # locally -$ bundle exec rake data:load_survey_responses +bundle exec rake data:load_survey_responses # on heroku staging environment -$ heroku run:detached -a mciea-beta bundle exec rake data:load_survey_responses +heroku run:detached -a mciea-beta bundle exec rake data:load_survey_responses # on heroku production environment -$ heroku run:detached -a mciea-dashboard bundle exec rake data:load_survey_responses +heroku run:detached -a mciea-dashboard bundle exec rake data:load_survey_responses +``` + +Or if you want to load data from a specific directory + +```bash +# locally +SFTP_PATH=/data/survey_responses/2022_23 bundle exec rake data:load_survey_responses_from_path + +# on heroku staging environment +heroku run:detached -a mciea-beta SFTP_PATH=/data/survey_responses/2022_23 bundle exec rake data:load_survey_responses_from_path + +# on heroku production environment +heroku run:detached -a mciea-dashboard SFTP_PATH=/data/survey_responses/2022_23 bundle exec rake data:load_survey_responses_from_path ``` Or if you only want to load data for Lowell ```bash # locally -$ bundle exec rake data:load_survey_responses_for_lowell +bundle exec rake data:load_survey_responses_for_lowell ``` For convenience, you can use the following script for loading data on Heroku: ```bash # on heroku staging environment -$ ./scripts/load_survey_responses_on_heroku beta +./scripts/load_survey_responses_on_heroku beta # on heroku production environment -$ ./scripts/load_survey_responses_on_heroku dashboard +./scripts/load_survey_responses_on_heroku dashboard ``` There is also an example one-off task to load a single csv at a time. diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 4075a32d..dd233f62 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -1,11 +1,11 @@ -require 'csv' +require "csv" namespace :data do - desc 'load survey responses' + desc "load survey responses" task load_survey_responses: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = '/data/survey_responses/clean/' + path = "/data/survey_responses/clean/" Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.from_file(file:) end @@ -16,129 +16,94 @@ namespace :data do end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts 'Resetting race scores' + puts "Resetting race scores" RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} race scores" Rails.cache.clear end - desc 'load survey responses for lowell schools' - task load_survey_responses_for_lowell: :environment do + desc "load survey responses from a specific directory" + task load_survey_responses_from_path: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = '/data/survey_responses/clean/' + path = "#{ENV['SFTP_PATH']}" Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.new.from_file(file:) end puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database" Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools]) + StudentLoader.from_file(file:, rules: []) end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts 'Resetting race scores' - RaceScoreLoader.reset(fast_processing: false) - puts "=====================> Completed loading #{RaceScore.count} race scores" - Rails.cache.clear end - desc 'delete non-lowell schools and districts' - task delete_non_lowell: :environment do - schools = School.all.reject { |s| s.district.name == 'Lowell' } - ResponseRate.where(school: schools).delete_all - RaceScore.where(school: schools).delete_all - Respondent.where(school: schools).delete_all - schools.each { |school| school.delete } - districts = District.all.reject { |district| district.name == 'Lowell' } - districts.each { |district| district.delete } - end - - - desc 'load students for lowell' - task load_students_for_lowell: :environment do - student_count = Student.count - SurveyItemResponse.update_all(student_id: nil) - StudentRace.delete_all - Student.delete_all - - Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file| - StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools]) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - - puts 'Resetting race scores' - RaceScoreLoader.reset(fast_processing: false) - puts "=====================> Completed loading #{RaceScore.count} survey responses" - - Rails.cache.clear - end - - desc 'reset response rate values' + desc "reset response rate values" task reset_response_rates: :environment do - puts 'Resetting response rates' + puts "Resetting response rates" ResponseRateLoader.reset Rails.cache.clear puts "=====================> Completed loading #{ResponseRate.count} survey responses" end - desc 'reset race score calculations' + desc "reset race score calculations" task reset_race_scores: :environment do - puts 'Resetting race scores' + puts "Resetting race scores" RaceScoreLoader.reset(fast_processing: false) Rails.cache.clear puts "=====================> Completed loading #{RaceScore.count} survey responses" end - desc 'load admin_data' + desc "load admin_data" task load_admin_data: :environment do original_count = AdminDataValue.count - Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath| + Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" Dese::Loader.load_data filepath: end puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end - desc 'load students' + desc "load students" task load_students: :environment do SurveyItemResponse.update_all(student_id: nil) StudentRace.delete_all Student.delete_all - Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file| + Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file| puts "=====================> Loading student data from csv at path: #{file}" StudentLoader.load_data filepath: file end puts "=====================> Completed loading #{Student.count} students" - puts 'Resetting race scores' + puts "Resetting race scores" RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} survey responses" Rails.cache.clear end - desc 'reset all cache counters' + desc "reset all cache counters" task reset_cache_counters: :environment do - puts '=====================> Resetting Category counters' + puts "=====================> Resetting Category counters" Category.all.each do |category| Category.reset_counters(category.id, :subcategories) end - puts '=====================> Resetting Subcategory counters' + puts "=====================> Resetting Subcategory counters" Subcategory.all.each do |subcategory| Subcategory.reset_counters(subcategory.id, :measures) end - puts '=====================> Resetting Measure counters' + puts "=====================> Resetting Measure counters" Measure.all.each do |measure| Measure.reset_counters(measure.id, :scales) end - puts '=====================> Resetting Scale counters' + puts "=====================> Resetting Scale counters" Scale.all.each do |scale| Scale.reset_counters(scale.id, :survey_items) end - puts '=====================> Resetting SurveyItem counters' + puts "=====================> Resetting SurveyItem counters" SurveyItem.all.each do |survey_item| SurveyItem.reset_counters(survey_item.id, :survey_item_responses) end