It's possible for admin data likert score values to be above 5. If that happens, we

cap the likert score at 5.   This was happening already at the scraper
level but it's also now being done by the admin data loader for safety.
Also make sure to just update admin data instead of deleting and
reloading all values each load. Add tests to confirm this behavior
This commit is contained in:
rebuilt 2023-06-03 15:05:24 -07:00
parent 3589878700
commit 904d0d2f2c
4 changed files with 97 additions and 58 deletions

View file

@ -1,11 +1,11 @@
require "csv"
require 'csv'
namespace :data do
desc "load survey responses"
desc 'load survey responses'
task load_survey_responses: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/clean/"
path = '/data/survey_responses/clean/'
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
@ -16,30 +16,30 @@ namespace :data do
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc "seed only lowell"
desc 'seed only lowell'
task seed_only_lowell: :environment do
seeder = Seeder.new rules: [Rule::SeedOnlyLowell]
seeder.seed_academic_years "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23"
seeder.seed_districts_and_schools Rails.root.join("data", "master_list_of_schools_and_districts.csv")
seeder.seed_surveys Rails.root.join("data", "master_list_of_schools_and_districts.csv")
seeder.seed_respondents Rails.root.join("data", "master_list_of_schools_and_districts.csv")
seeder.seed_sqm_framework Rails.root.join("data", "sqm_framework.csv")
seeder.seed_demographics Rails.root.join("data", "demographics.csv")
seeder.seed_academic_years '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23'
seeder.seed_districts_and_schools Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_surveys Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_respondents Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_sqm_framework Rails.root.join('data', 'sqm_framework.csv')
seeder.seed_demographics Rails.root.join('data', 'demographics.csv')
end
desc "load survey responses for lowell schools"
desc 'load survey responses for lowell schools'
task load_survey_responses_for_lowell: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/clean/"
path = '/data/survey_responses/clean/'
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
@ -50,44 +50,44 @@ namespace :data do
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc "load students for lowell"
desc 'load students for lowell'
task load_students_for_lowell: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Sftp::Directory.open(path: "/data/survey_responses/clean/") do |file|
Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc "delete non-lowell schools and districts"
desc 'delete non-lowell schools and districts'
task delete_non_lowell: :environment do
schools = School.all.reject { |s| s.district.name == "Lowell" }
schools = School.all.reject { |s| s.district.name == 'Lowell' }
ResponseRate.where(school: schools).delete_all
Respondent.where(school: schools).delete_all
Survey.where(school: schools).delete_all
schools.each { |school| school.delete }
districts = District.all.reject { |district| district.name == "Lowell" }
districts = District.all.reject { |district| district.name == 'Lowell' }
districts.each { |district| district.delete }
end
task load_survey_responses_21_22: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2021-22*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -95,7 +95,7 @@ namespace :data do
end
task load_survey_responses_20_21: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2020-21*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2020-21*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -103,7 +103,7 @@ namespace :data do
end
task load_survey_responses_19_20: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2019-20*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2019-20*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -111,7 +111,7 @@ namespace :data do
end
task load_survey_responses_18_19: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2018-19*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2018-19*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -119,7 +119,7 @@ namespace :data do
end
task load_survey_responses_17_18: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2017-18*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2017-18*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -127,86 +127,86 @@ namespace :data do
end
task load_survey_responses_16_17: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2016-17*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2016-17*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
end
desc "reset response rate values"
desc 'reset response rate values'
task reset_response_rates: :environment do
puts "Resetting response rates"
puts 'Resetting response rates'
ResponseRateLoader.reset
Rails.cache.clear
puts "=====================> Completed loading #{ResponseRate.count} survey responses"
end
desc "reset race score calculations"
desc 'reset race score calculations'
task reset_race_scores: :environment do
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
Rails.cache.clear
puts "=====================> Completed loading #{RaceScore.count} survey responses"
end
desc "load admin_data"
desc 'load admin_data'
task load_admin_data: :environment do
AdminDataValue.delete_all
Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath|
original_count = AdminDataValue.count
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
Dese::Loader.load_data filepath:
end
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
end
desc "load students"
desc 'load students'
task load_students: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file
end
puts "=====================> Completed loading #{Student.count} students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc "reset all cache counters"
desc 'reset all cache counters'
task reset_cache_counters: :environment do
puts "=====================> Resetting Category counters"
puts '=====================> Resetting Category counters'
Category.all.each do |category|
Category.reset_counters(category.id, :subcategories)
end
puts "=====================> Resetting Subcategory counters"
puts '=====================> Resetting Subcategory counters'
Subcategory.all.each do |subcategory|
Subcategory.reset_counters(subcategory.id, :measures)
end
puts "=====================> Resetting Measure counters"
puts '=====================> Resetting Measure counters'
Measure.all.each do |measure|
Measure.reset_counters(measure.id, :scales)
end
puts "=====================> Resetting Scale counters"
puts '=====================> Resetting Scale counters'
Scale.all.each do |scale|
Scale.reset_counters(scale.id, :survey_items)
end
puts "=====================> Resetting SurveyItem counters"
puts '=====================> Resetting SurveyItem counters'
SurveyItem.all.each do |survey_item|
SurveyItem.reset_counters(survey_item.id, :survey_item_responses)
end
end
desc "scrape dese site for admin data"
desc 'scrape dese site for admin data'
task scrape_all: :environment do
puts "scraping data from dese"
puts 'scraping data from dese'
scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo,
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
scrapers.each do |scraper|
scraper.new.run_all
end