From 30285efd693c62c5f245860c25ac7ee0c2373c65 Mon Sep 17 00:00:00 2001 From: rebuilt Date: Sat, 3 Jun 2023 15:05:24 -0700 Subject: [PATCH] It's possible for admin data likert score values to be above 5. If that happens, we cap the likert score at 5. This was happening already at the scraper level but it's also now being done by the admin data loader for safety. Also make sure to just update admin data instead of deleting and reloading all values each load. Add tests to confirm this behavior --- app/services/admin_data_loader.rb | 73 +++++++++++++ lib/tasks/data.rake | 4 +- spec/fixtures/secondary_sample_admin_data.csv | 13 +++ spec/services/admin_data_loader_spec.rb | 102 ++++++++++++++++++ 4 files changed, 190 insertions(+), 2 deletions(-) create mode 100644 app/services/admin_data_loader.rb create mode 100644 spec/fixtures/secondary_sample_admin_data.csv create mode 100644 spec/services/admin_data_loader_spec.rb diff --git a/app/services/admin_data_loader.rb b/app/services/admin_data_loader.rb new file mode 100644 index 00000000..aba0ad06 --- /dev/null +++ b/app/services/admin_data_loader.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +require 'csv' + +class AdminDataLoader + def self.load_data(filepath:) + admin_data_values = [] + CSV.parse(File.read(filepath), headers: true) do |row| + score = likert_score(row:) + unless valid_likert_score(likert_score: score) + puts "Invalid score: #{score} + for school: #{School.find_by_dese_id(row['DESE ID']).name} + admin data item #{admin_data_item(row:)} " + next + end + admin_data_values << create_admin_data_value(row:, score:) + end + + AdminDataValue.import(admin_data_values.flatten.compact, on_duplicate_key_update: :all) + end + + private + + def self.valid_likert_score(likert_score:) + likert_score >= 1 && likert_score <= 5 + end + + def self.likert_score(row:) + likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f + likert_score = round_up_to_one(likert_score:) + round_down_to_five(likert_score:) + end + + def self.round_up_to_one(likert_score:) + likert_score = 1 if likert_score.positive? && likert_score < 1 + likert_score + end + + def self.round_down_to_five(likert_score:) + likert_score = 5 if likert_score > 5 + likert_score + end + + def self.ay(row:) + row['Academic Year'] || row['AcademicYear'] + end + + def self.dese_id(row:) + row['DESE ID'] || row['Dese ID'] || row['Dese Id'] + end + + def self.admin_data_item(row:) + row['Item ID'] || row['Item Id'] + end + + def self.create_admin_data_value(row:, score:) + admin_data_value = AdminDataValue.find_or_initialize_by(school: School.find_by_dese_id(dese_id(row:).to_i), + academic_year: AcademicYear.find_by_range(ay(row:)), + admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))) + return nil if admin_data_value.likert_score == score + + admin_data_value.likert_score = score + admin_data_value + end + + private_class_method :valid_likert_score + private_class_method :likert_score + private_class_method :round_up_to_one + private_class_method :ay + private_class_method :dese_id + private_class_method :admin_data_item + private_class_method :create_admin_data_value +end diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 6f43a016..b14d82c3 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -152,12 +152,12 @@ namespace :data do desc 'load admin_data' task load_admin_data: :environment do - AdminDataValue.delete_all + original_count = AdminDataValue.count Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" Dese::Loader.load_data filepath: end - puts "=====================> Completed loading #{AdminDataValue.count} survey responses" + puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end desc 'load students' diff --git a/spec/fixtures/secondary_sample_admin_data.csv b/spec/fixtures/secondary_sample_admin_data.csv new file mode 100644 index 00000000..8cdc3c2d --- /dev/null +++ b/spec/fixtures/secondary_sample_admin_data.csv @@ -0,0 +1,13 @@ +District,School,DESE ID,Category,Item ID,NonLikert Title,NL_Value,LikertScore,Benchmark,Data Type,Academic Year +Attleboro,Attleboro High School,160505,2-C-i,a-vale-i1,Chronic absence rate,19.7,1,10,%,2018-19 +Milford,Woodland Elementary School,1850090,2-C-i,a-vale-i1,Chronic absence rate,6.8,1,10,%,2018-19 +Revere,Beachmont Elementary School,2480013,2-C-i,a-vale-i1,Chronic absence rate,4.2,1,10,%,2018-19 +Winchester,Winchester High School,3440505,2-C-i,a-vale-i1,Chronic absence rate,7.2,1,10,%,2018-19 +Attleboro,Attleboro High School,160505,3-A-i,a-reso-i1,Average class size,20.6,2,20,,2018-19 +Milford,Woodland Elementary School,1850090,3-A-i,a-reso-i1,Average class size,22.5,2,20,,2018-19 +Revere,Beachmont Elementary School,2480013,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19 +Winchester,Winchester High School,3440505,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19 +Attleboro,Attleboro High School,160505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,15.28896673,3,43.4,,2018-19 +Milford,Woodland Elementary School,1850090,3-A-ii,a-sust-i3,Student to instructional support staff ratio,22.85714286,3,43.4,,2018-19 +Revere,Beachmont Elementary School,2480013,3-A-ii,a-sust-i3,Student to instructional support staff ratio,38,3,43.4,,2018-19 +Winchester,Winchester High School,3440505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,135.9,3,43.4,,2018-19 diff --git a/spec/services/admin_data_loader_spec.rb b/spec/services/admin_data_loader_spec.rb new file mode 100644 index 00000000..7361da96 --- /dev/null +++ b/spec/services/admin_data_loader_spec.rb @@ -0,0 +1,102 @@ +require 'rails_helper' + +describe AdminDataLoader do + let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') } + let(:path_to_secondary_admin_data) { Rails.root.join('spec', 'fixtures', 'secondary_sample_admin_data.csv') } + let(:ay_2018_19) { create(:academic_year, range: '2018-19') } + let(:attleboro) { create(:school, name: 'Attleboro High School', dese_id: 160_505) } + let(:winchester) { create(:school, name: 'Winchester High School', dese_id: 3_440_505) } + let(:beachmont) { create(:school, dese_id: 2_480_013) } + let(:woodland) { create(:school, dese_id: 1_850_090) } # not explicitly tested + let(:chronic_absense_rate) { create(:admin_data_item, admin_data_item_id: 'a-vale-i1') } + let(:student_to_instructor_ratio) { create(:admin_data_item, admin_data_item_id: 'a-sust-i3') } + let(:a_reso) { create(:admin_data_item, admin_data_item_id: 'a-reso-i1') } # not explicitly tested + + before :each do + ay_2018_19 + attleboro + winchester + beachmont + woodland + chronic_absense_rate + student_to_instructor_ratio + a_reso + AdminDataLoader.load_data filepath: path_to_admin_data + end + + after :each do + DatabaseCleaner.clean + end + + describe 'self.load_data' do + it 'loads the correct admin data values' do + # it 'assigns the academic year to admin data value' do + expect(AdminDataValue.where(school: attleboro, + admin_data_item: chronic_absense_rate).first.academic_year).to eq ay_2018_19 + # end + + # it 'assigns the school to the admin data value' do + expect(AdminDataValue.first.school).to eq attleboro + expect(AdminDataValue.last.school).to eq winchester + # end + + # it 'links the admin data value to the correct admin data item' do + expect(AdminDataValue.first.admin_data_item).to eq chronic_absense_rate + expect(AdminDataValue.last.admin_data_item).to eq student_to_instructor_ratio + # end + + # it 'loads all the admin data values in the target csv file' do + expect(AdminDataValue.count).to eq 11 + # end + + # it 'captures the likert score ' do + expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19, + admin_data_item: chronic_absense_rate).likert_score).to eq 3.03 + expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5 + expect(AdminDataValue.find_by(school: winchester, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 5 + # end + + # it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do + expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19, + admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3')).first.likert_score).to eq 1 + # end + # it 'rejects importing rows with a value of 0' do + expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19, + admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist + # end + end + + context 'when a second file exists' do + before :each do + AdminDataLoader.load_data filepath: path_to_secondary_admin_data + end + + it 'updates likert scores to match the new file' do + expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19, + admin_data_item: chronic_absense_rate).likert_score).to eq 1 + expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 3 + end + end + end + + describe 'output to console' do + it 'outputs a messsage saying a value has been rejected' do + output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.delete("\n") + expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 ' + end + end +end + +def capture_stdout + original_stdout = $stdout + $stdout = fake = StringIO.new + begin + yield + ensure + $stdout = original_stdout + end + fake.string +end