feat: add command for loading survey responses from arbitrary sftp directory

rpp-main
rebuilt 2 years ago
parent 91ab2dd82e
commit ece1f61aec

@ -124,30 +124,43 @@ How to run the data loading task:
```bash
# locally
$ bundle exec rake data:load_survey_responses
bundle exec rake data:load_survey_responses
# on heroku staging environment
$ heroku run:detached -a mciea-beta bundle exec rake data:load_survey_responses
heroku run:detached -a mciea-beta bundle exec rake data:load_survey_responses
# on heroku production environment
$ heroku run:detached -a mciea-dashboard bundle exec rake data:load_survey_responses
heroku run:detached -a mciea-dashboard bundle exec rake data:load_survey_responses
```
Or if you want to load data from a specific directory
```bash
# locally
SFTP_PATH=/data/survey_responses/2022_23 bundle exec rake data:load_survey_responses_from_path
# on heroku staging environment
heroku run:detached -a mciea-beta SFTP_PATH=/data/survey_responses/2022_23 bundle exec rake data:load_survey_responses_from_path
# on heroku production environment
heroku run:detached -a mciea-dashboard SFTP_PATH=/data/survey_responses/2022_23 bundle exec rake data:load_survey_responses_from_path
```
Or if you only want to load data for Lowell
```bash
# locally
$ bundle exec rake data:load_survey_responses_for_lowell
bundle exec rake data:load_survey_responses_for_lowell
```
For convenience, you can use the following script for loading data on Heroku:
```bash
# on heroku staging environment
$ ./scripts/load_survey_responses_on_heroku beta
./scripts/load_survey_responses_on_heroku beta
# on heroku production environment
$ ./scripts/load_survey_responses_on_heroku dashboard
./scripts/load_survey_responses_on_heroku dashboard
```
There is also an example one-off task to load a single csv at a time.

@ -1,11 +1,11 @@
require 'csv'
require "csv"
namespace :data do
desc 'load survey responses'
desc "load survey responses"
task load_survey_responses: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = '/data/survey_responses/clean/'
path = "/data/survey_responses/clean/"
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
@ -16,129 +16,94 @@ namespace :data do
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts 'Resetting race scores'
puts "Resetting race scores"
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc 'load survey responses for lowell schools'
task load_survey_responses_for_lowell: :environment do
desc "load survey responses from a specific directory"
task load_survey_responses_from_path: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = '/data/survey_responses/clean/'
path = "#{ENV['SFTP_PATH']}"
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.new.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Sftp::Directory.open(path:) do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
StudentLoader.from_file(file:, rules: [])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc 'delete non-lowell schools and districts'
task delete_non_lowell: :environment do
schools = School.all.reject { |s| s.district.name == 'Lowell' }
ResponseRate.where(school: schools).delete_all
RaceScore.where(school: schools).delete_all
Respondent.where(school: schools).delete_all
schools.each { |school| school.delete }
districts = District.all.reject { |district| district.name == 'Lowell' }
districts.each { |district| district.delete }
end
desc 'load students for lowell'
task load_students_for_lowell: :environment do
student_count = Student.count
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc 'reset response rate values'
desc "reset response rate values"
task reset_response_rates: :environment do
puts 'Resetting response rates'
puts "Resetting response rates"
ResponseRateLoader.reset
Rails.cache.clear
puts "=====================> Completed loading #{ResponseRate.count} survey responses"
end
desc 'reset race score calculations'
desc "reset race score calculations"
task reset_race_scores: :environment do
puts 'Resetting race scores'
puts "Resetting race scores"
RaceScoreLoader.reset(fast_processing: false)
Rails.cache.clear
puts "=====================> Completed loading #{RaceScore.count} survey responses"
end
desc 'load admin_data'
desc "load admin_data"
task load_admin_data: :environment do
original_count = AdminDataValue.count
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
Dese::Loader.load_data filepath:
end
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
end
desc 'load students'
desc "load students"
task load_students: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file|
Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file
end
puts "=====================> Completed loading #{Student.count} students"
puts 'Resetting race scores'
puts "Resetting race scores"
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc 'reset all cache counters'
desc "reset all cache counters"
task reset_cache_counters: :environment do
puts '=====================> Resetting Category counters'
puts "=====================> Resetting Category counters"
Category.all.each do |category|
Category.reset_counters(category.id, :subcategories)
end
puts '=====================> Resetting Subcategory counters'
puts "=====================> Resetting Subcategory counters"
Subcategory.all.each do |subcategory|
Subcategory.reset_counters(subcategory.id, :measures)
end
puts '=====================> Resetting Measure counters'
puts "=====================> Resetting Measure counters"
Measure.all.each do |measure|
Measure.reset_counters(measure.id, :scales)
end
puts '=====================> Resetting Scale counters'
puts "=====================> Resetting Scale counters"
Scale.all.each do |scale|
Scale.reset_counters(scale.id, :survey_items)
end
puts '=====================> Resetting SurveyItem counters'
puts "=====================> Resetting SurveyItem counters"
SurveyItem.all.each do |survey_item|
SurveyItem.reset_counters(survey_item.id, :survey_item_responses)
end

Loading…
Cancel
Save