From 904d0d2f2cdeb2b1b3aa7bff7485f77b78a7d8b7 Mon Sep 17 00:00:00 2001 From: rebuilt Date: Sat, 3 Jun 2023 15:05:24 -0700 Subject: [PATCH] It's possible for admin data likert score values to be above 5. If that happens, we cap the likert score at 5. This was happening already at the scraper level but it's also now being done by the admin data loader for safety. Also make sure to just update admin data instead of deleting and reloading all values each load. Add tests to confirm this behavior --- app/services/admin_data_loader.rb | 24 +++-- lib/tasks/data.rake | 92 +++++++++---------- spec/fixtures/secondary_sample_admin_data.csv | 13 +++ spec/services/admin_data_loader_spec.rb | 26 ++++-- 4 files changed, 97 insertions(+), 58 deletions(-) create mode 100644 spec/fixtures/secondary_sample_admin_data.csv diff --git a/app/services/admin_data_loader.rb b/app/services/admin_data_loader.rb index 99cfa00e..aba0ad06 100644 --- a/app/services/admin_data_loader.rb +++ b/app/services/admin_data_loader.rb @@ -4,6 +4,7 @@ require 'csv' class AdminDataLoader def self.load_data(filepath:) + admin_data_values = [] CSV.parse(File.read(filepath), headers: true) do |row| score = likert_score(row:) unless valid_likert_score(likert_score: score) @@ -12,8 +13,10 @@ class AdminDataLoader admin data item #{admin_data_item(row:)} " next end - create_admin_data_value(row:, score:) + admin_data_values << create_admin_data_value(row:, score:) end + + AdminDataValue.import(admin_data_values.flatten.compact, on_duplicate_key_update: :all) end private @@ -24,7 +27,8 @@ class AdminDataLoader def self.likert_score(row:) likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f - round_up_to_one(likert_score:) + likert_score = round_up_to_one(likert_score:) + round_down_to_five(likert_score:) end def self.round_up_to_one(likert_score:) @@ -32,6 +36,11 @@ class AdminDataLoader likert_score end + def self.round_down_to_five(likert_score:) + likert_score = 5 if likert_score > 5 + likert_score + end + def self.ay(row:) row['Academic Year'] || row['AcademicYear'] end @@ -45,10 +54,13 @@ class AdminDataLoader end def self.create_admin_data_value(row:, score:) - AdminDataValue.create!(likert_score: score, - academic_year: AcademicYear.find_by_range(ay(row:)), - school: School.find_by_dese_id(dese_id(row:).to_i), - admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))) + admin_data_value = AdminDataValue.find_or_initialize_by(school: School.find_by_dese_id(dese_id(row:).to_i), + academic_year: AcademicYear.find_by_range(ay(row:)), + admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))) + return nil if admin_data_value.likert_score == score + + admin_data_value.likert_score = score + admin_data_value end private_class_method :valid_likert_score diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 2a2cb1f6..15198bfe 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -1,11 +1,11 @@ -require "csv" +require 'csv' namespace :data do - desc "load survey responses" + desc 'load survey responses' task load_survey_responses: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = "/data/survey_responses/clean/" + path = '/data/survey_responses/clean/' Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.from_file(file:) end @@ -16,30 +16,30 @@ namespace :data do end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} race scores" Rails.cache.clear end - desc "seed only lowell" + desc 'seed only lowell' task seed_only_lowell: :environment do seeder = Seeder.new rules: [Rule::SeedOnlyLowell] - seeder.seed_academic_years "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23" - seeder.seed_districts_and_schools Rails.root.join("data", "master_list_of_schools_and_districts.csv") - seeder.seed_surveys Rails.root.join("data", "master_list_of_schools_and_districts.csv") - seeder.seed_respondents Rails.root.join("data", "master_list_of_schools_and_districts.csv") - seeder.seed_sqm_framework Rails.root.join("data", "sqm_framework.csv") - seeder.seed_demographics Rails.root.join("data", "demographics.csv") + seeder.seed_academic_years '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23' + seeder.seed_districts_and_schools Rails.root.join('data', 'master_list_of_schools_and_districts.csv') + seeder.seed_surveys Rails.root.join('data', 'master_list_of_schools_and_districts.csv') + seeder.seed_respondents Rails.root.join('data', 'master_list_of_schools_and_districts.csv') + seeder.seed_sqm_framework Rails.root.join('data', 'sqm_framework.csv') + seeder.seed_demographics Rails.root.join('data', 'demographics.csv') end - desc "load survey responses for lowell schools" + desc 'load survey responses for lowell schools' task load_survey_responses_for_lowell: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = "/data/survey_responses/clean/" + path = '/data/survey_responses/clean/' Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.from_file(file:) end @@ -50,44 +50,44 @@ namespace :data do end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} race scores" Rails.cache.clear end - desc "load students for lowell" + desc 'load students for lowell' task load_students_for_lowell: :environment do SurveyItemResponse.update_all(student_id: nil) StudentRace.delete_all Student.delete_all - Sftp::Directory.open(path: "/data/survey_responses/clean/") do |file| + Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file| StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools]) end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} survey responses" Rails.cache.clear end - desc "delete non-lowell schools and districts" + desc 'delete non-lowell schools and districts' task delete_non_lowell: :environment do - schools = School.all.reject { |s| s.district.name == "Lowell" } + schools = School.all.reject { |s| s.district.name == 'Lowell' } ResponseRate.where(school: schools).delete_all Respondent.where(school: schools).delete_all Survey.where(school: schools).delete_all schools.each { |school| school.delete } - districts = District.all.reject { |district| district.name == "Lowell" } + districts = District.all.reject { |district| district.name == 'Lowell' } districts.each { |district| district.delete } end task load_survey_responses_21_22: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2021-22*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -95,7 +95,7 @@ namespace :data do end task load_survey_responses_20_21: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2020-21*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2020-21*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -103,7 +103,7 @@ namespace :data do end task load_survey_responses_19_20: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2019-20*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2019-20*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -111,7 +111,7 @@ namespace :data do end task load_survey_responses_18_19: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2018-19*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2018-19*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -119,7 +119,7 @@ namespace :data do end task load_survey_responses_17_18: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2017-18*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2017-18*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -127,86 +127,86 @@ namespace :data do end task load_survey_responses_16_17: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2016-17*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2016-17*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses" end - desc "reset response rate values" + desc 'reset response rate values' task reset_response_rates: :environment do - puts "Resetting response rates" + puts 'Resetting response rates' ResponseRateLoader.reset Rails.cache.clear puts "=====================> Completed loading #{ResponseRate.count} survey responses" end - desc "reset race score calculations" + desc 'reset race score calculations' task reset_race_scores: :environment do - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) Rails.cache.clear puts "=====================> Completed loading #{RaceScore.count} survey responses" end - desc "load admin_data" + desc 'load admin_data' task load_admin_data: :environment do - AdminDataValue.delete_all - Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath| + original_count = AdminDataValue.count + Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" Dese::Loader.load_data filepath: end - puts "=====================> Completed loading #{AdminDataValue.count} survey responses" + puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end - desc "load students" + desc 'load students' task load_students: :environment do SurveyItemResponse.update_all(student_id: nil) StudentRace.delete_all Student.delete_all - Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file| + Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file| puts "=====================> Loading student data from csv at path: #{file}" StudentLoader.load_data filepath: file end puts "=====================> Completed loading #{Student.count} students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} survey responses" Rails.cache.clear end - desc "reset all cache counters" + desc 'reset all cache counters' task reset_cache_counters: :environment do - puts "=====================> Resetting Category counters" + puts '=====================> Resetting Category counters' Category.all.each do |category| Category.reset_counters(category.id, :subcategories) end - puts "=====================> Resetting Subcategory counters" + puts '=====================> Resetting Subcategory counters' Subcategory.all.each do |subcategory| Subcategory.reset_counters(subcategory.id, :measures) end - puts "=====================> Resetting Measure counters" + puts '=====================> Resetting Measure counters' Measure.all.each do |measure| Measure.reset_counters(measure.id, :scales) end - puts "=====================> Resetting Scale counters" + puts '=====================> Resetting Scale counters' Scale.all.each do |scale| Scale.reset_counters(scale.id, :survey_items) end - puts "=====================> Resetting SurveyItem counters" + puts '=====================> Resetting SurveyItem counters' SurveyItem.all.each do |survey_item| SurveyItem.reset_counters(survey_item.id, :survey_item_responses) end end - desc "scrape dese site for admin data" + desc 'scrape dese site for admin data' task scrape_all: :environment do - puts "scraping data from dese" + puts 'scraping data from dese' scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo, - Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo] + Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo] scrapers.each do |scraper| scraper.new.run_all end diff --git a/spec/fixtures/secondary_sample_admin_data.csv b/spec/fixtures/secondary_sample_admin_data.csv new file mode 100644 index 00000000..8cdc3c2d --- /dev/null +++ b/spec/fixtures/secondary_sample_admin_data.csv @@ -0,0 +1,13 @@ +District,School,DESE ID,Category,Item ID,NonLikert Title,NL_Value,LikertScore,Benchmark,Data Type,Academic Year +Attleboro,Attleboro High School,160505,2-C-i,a-vale-i1,Chronic absence rate,19.7,1,10,%,2018-19 +Milford,Woodland Elementary School,1850090,2-C-i,a-vale-i1,Chronic absence rate,6.8,1,10,%,2018-19 +Revere,Beachmont Elementary School,2480013,2-C-i,a-vale-i1,Chronic absence rate,4.2,1,10,%,2018-19 +Winchester,Winchester High School,3440505,2-C-i,a-vale-i1,Chronic absence rate,7.2,1,10,%,2018-19 +Attleboro,Attleboro High School,160505,3-A-i,a-reso-i1,Average class size,20.6,2,20,,2018-19 +Milford,Woodland Elementary School,1850090,3-A-i,a-reso-i1,Average class size,22.5,2,20,,2018-19 +Revere,Beachmont Elementary School,2480013,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19 +Winchester,Winchester High School,3440505,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19 +Attleboro,Attleboro High School,160505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,15.28896673,3,43.4,,2018-19 +Milford,Woodland Elementary School,1850090,3-A-ii,a-sust-i3,Student to instructional support staff ratio,22.85714286,3,43.4,,2018-19 +Revere,Beachmont Elementary School,2480013,3-A-ii,a-sust-i3,Student to instructional support staff ratio,38,3,43.4,,2018-19 +Winchester,Winchester High School,3440505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,135.9,3,43.4,,2018-19 diff --git a/spec/services/admin_data_loader_spec.rb b/spec/services/admin_data_loader_spec.rb index 09f10a26..b7ca5c3d 100644 --- a/spec/services/admin_data_loader_spec.rb +++ b/spec/services/admin_data_loader_spec.rb @@ -2,6 +2,7 @@ require 'rails_helper' describe AdminDataLoader do let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') } + let(:path_to_secondary_admin_data) { Rails.root.join('spec', 'fixtures', 'secondary_sample_admin_data.csv') } let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' } let(:attleboro) { School.find_by_dese_id 160_505 } let(:winchester) { School.find_by_dese_id 3_440_505 } @@ -29,7 +30,7 @@ describe AdminDataLoader do # it 'assigns the school to the admin data value' do expect(AdminDataValue.first.school).to eq attleboro - expect(AdminDataValue.last.school).to eq beachmont + expect(AdminDataValue.last.school).to eq winchester # end # it 'links the admin data value to the correct admin data item' do @@ -38,7 +39,7 @@ describe AdminDataLoader do # end # it 'loads all the admin data values in the target csv file' do - expect(AdminDataValue.count).to eq 10 + expect(AdminDataValue.count).to eq 11 # end # it 'captures the likert score ' do @@ -46,6 +47,8 @@ describe AdminDataLoader do admin_data_item: chronic_absense_rate).likert_score).to eq 3.03 expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19, admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5 + expect(AdminDataValue.find_by(school: winchester, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 5 # end # it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do @@ -55,16 +58,27 @@ describe AdminDataLoader do # it 'rejects importing rows with a value of 0' do expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19, admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist - expect(AdminDataValue.where(school: winchester, academic_year: ay_2018_19, - admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3'))).not_to exist # end end + + context 'when a second file exists' do + before :each do + AdminDataLoader.load_data filepath: path_to_secondary_admin_data + end + + it 'updates likert scores to match the new file' do + expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19, + admin_data_item: chronic_absense_rate).likert_score).to eq 1 + expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 3 + end + end end describe 'output to console' do it 'outputs a messsage saying a value has been rejected' do - output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.gsub("\n", '') - expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 Invalid score: 100.0 for school: Winchester High School admin data item a-sust-i3 ' + output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.delete("\n") + expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 ' end end end