diff --git a/app/services/admin_data_loader.rb b/app/services/admin_data_loader.rb index 99cfa00e..aba0ad06 100644 --- a/app/services/admin_data_loader.rb +++ b/app/services/admin_data_loader.rb @@ -4,6 +4,7 @@ require 'csv' class AdminDataLoader def self.load_data(filepath:) + admin_data_values = [] CSV.parse(File.read(filepath), headers: true) do |row| score = likert_score(row:) unless valid_likert_score(likert_score: score) @@ -12,8 +13,10 @@ class AdminDataLoader admin data item #{admin_data_item(row:)} " next end - create_admin_data_value(row:, score:) + admin_data_values << create_admin_data_value(row:, score:) end + + AdminDataValue.import(admin_data_values.flatten.compact, on_duplicate_key_update: :all) end private @@ -24,7 +27,8 @@ class AdminDataLoader def self.likert_score(row:) likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f - round_up_to_one(likert_score:) + likert_score = round_up_to_one(likert_score:) + round_down_to_five(likert_score:) end def self.round_up_to_one(likert_score:) @@ -32,6 +36,11 @@ class AdminDataLoader likert_score end + def self.round_down_to_five(likert_score:) + likert_score = 5 if likert_score > 5 + likert_score + end + def self.ay(row:) row['Academic Year'] || row['AcademicYear'] end @@ -45,10 +54,13 @@ class AdminDataLoader end def self.create_admin_data_value(row:, score:) - AdminDataValue.create!(likert_score: score, - academic_year: AcademicYear.find_by_range(ay(row:)), - school: School.find_by_dese_id(dese_id(row:).to_i), - admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))) + admin_data_value = AdminDataValue.find_or_initialize_by(school: School.find_by_dese_id(dese_id(row:).to_i), + academic_year: AcademicYear.find_by_range(ay(row:)), + admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))) + return nil if admin_data_value.likert_score == score + + admin_data_value.likert_score = score + admin_data_value end private_class_method :valid_likert_score diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 2a2cb1f6..15198bfe 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -1,11 +1,11 @@ -require "csv" +require 'csv' namespace :data do - desc "load survey responses" + desc 'load survey responses' task load_survey_responses: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = "/data/survey_responses/clean/" + path = '/data/survey_responses/clean/' Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.from_file(file:) end @@ -16,30 +16,30 @@ namespace :data do end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} race scores" Rails.cache.clear end - desc "seed only lowell" + desc 'seed only lowell' task seed_only_lowell: :environment do seeder = Seeder.new rules: [Rule::SeedOnlyLowell] - seeder.seed_academic_years "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23" - seeder.seed_districts_and_schools Rails.root.join("data", "master_list_of_schools_and_districts.csv") - seeder.seed_surveys Rails.root.join("data", "master_list_of_schools_and_districts.csv") - seeder.seed_respondents Rails.root.join("data", "master_list_of_schools_and_districts.csv") - seeder.seed_sqm_framework Rails.root.join("data", "sqm_framework.csv") - seeder.seed_demographics Rails.root.join("data", "demographics.csv") + seeder.seed_academic_years '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23' + seeder.seed_districts_and_schools Rails.root.join('data', 'master_list_of_schools_and_districts.csv') + seeder.seed_surveys Rails.root.join('data', 'master_list_of_schools_and_districts.csv') + seeder.seed_respondents Rails.root.join('data', 'master_list_of_schools_and_districts.csv') + seeder.seed_sqm_framework Rails.root.join('data', 'sqm_framework.csv') + seeder.seed_demographics Rails.root.join('data', 'demographics.csv') end - desc "load survey responses for lowell schools" + desc 'load survey responses for lowell schools' task load_survey_responses_for_lowell: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = "/data/survey_responses/clean/" + path = '/data/survey_responses/clean/' Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.from_file(file:) end @@ -50,44 +50,44 @@ namespace :data do end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} race scores" Rails.cache.clear end - desc "load students for lowell" + desc 'load students for lowell' task load_students_for_lowell: :environment do SurveyItemResponse.update_all(student_id: nil) StudentRace.delete_all Student.delete_all - Sftp::Directory.open(path: "/data/survey_responses/clean/") do |file| + Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file| StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools]) end puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} survey responses" Rails.cache.clear end - desc "delete non-lowell schools and districts" + desc 'delete non-lowell schools and districts' task delete_non_lowell: :environment do - schools = School.all.reject { |s| s.district.name == "Lowell" } + schools = School.all.reject { |s| s.district.name == 'Lowell' } ResponseRate.where(school: schools).delete_all Respondent.where(school: schools).delete_all Survey.where(school: schools).delete_all schools.each { |school| school.delete } - districts = District.all.reject { |district| district.name == "Lowell" } + districts = District.all.reject { |district| district.name == 'Lowell' } districts.each { |district| district.delete } end task load_survey_responses_21_22: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2021-22*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -95,7 +95,7 @@ namespace :data do end task load_survey_responses_20_21: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2020-21*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2020-21*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -103,7 +103,7 @@ namespace :data do end task load_survey_responses_19_20: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2019-20*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2019-20*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -111,7 +111,7 @@ namespace :data do end task load_survey_responses_18_19: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2018-19*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2018-19*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -119,7 +119,7 @@ namespace :data do end task load_survey_responses_17_18: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2017-18*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2017-18*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end @@ -127,86 +127,86 @@ namespace :data do end task load_survey_responses_16_17: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "*2016-17*.csv")).each do |filepath| + Dir.glob(Rails.root.join('data', 'survey_responses', '*2016-17*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" SurveyResponsesDataLoader.load_data filepath: end puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses" end - desc "reset response rate values" + desc 'reset response rate values' task reset_response_rates: :environment do - puts "Resetting response rates" + puts 'Resetting response rates' ResponseRateLoader.reset Rails.cache.clear puts "=====================> Completed loading #{ResponseRate.count} survey responses" end - desc "reset race score calculations" + desc 'reset race score calculations' task reset_race_scores: :environment do - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) Rails.cache.clear puts "=====================> Completed loading #{RaceScore.count} survey responses" end - desc "load admin_data" + desc 'load admin_data' task load_admin_data: :environment do - AdminDataValue.delete_all - Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath| + original_count = AdminDataValue.count + Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" Dese::Loader.load_data filepath: end - puts "=====================> Completed loading #{AdminDataValue.count} survey responses" + puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end - desc "load students" + desc 'load students' task load_students: :environment do SurveyItemResponse.update_all(student_id: nil) StudentRace.delete_all Student.delete_all - Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file| + Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file| puts "=====================> Loading student data from csv at path: #{file}" StudentLoader.load_data filepath: file end puts "=====================> Completed loading #{Student.count} students" - puts "Resetting race scores" + puts 'Resetting race scores' RaceScoreLoader.reset(fast_processing: false) puts "=====================> Completed loading #{RaceScore.count} survey responses" Rails.cache.clear end - desc "reset all cache counters" + desc 'reset all cache counters' task reset_cache_counters: :environment do - puts "=====================> Resetting Category counters" + puts '=====================> Resetting Category counters' Category.all.each do |category| Category.reset_counters(category.id, :subcategories) end - puts "=====================> Resetting Subcategory counters" + puts '=====================> Resetting Subcategory counters' Subcategory.all.each do |subcategory| Subcategory.reset_counters(subcategory.id, :measures) end - puts "=====================> Resetting Measure counters" + puts '=====================> Resetting Measure counters' Measure.all.each do |measure| Measure.reset_counters(measure.id, :scales) end - puts "=====================> Resetting Scale counters" + puts '=====================> Resetting Scale counters' Scale.all.each do |scale| Scale.reset_counters(scale.id, :survey_items) end - puts "=====================> Resetting SurveyItem counters" + puts '=====================> Resetting SurveyItem counters' SurveyItem.all.each do |survey_item| SurveyItem.reset_counters(survey_item.id, :survey_item_responses) end end - desc "scrape dese site for admin data" + desc 'scrape dese site for admin data' task scrape_all: :environment do - puts "scraping data from dese" + puts 'scraping data from dese' scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo, - Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo] + Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo] scrapers.each do |scraper| scraper.new.run_all end diff --git a/spec/fixtures/secondary_sample_admin_data.csv b/spec/fixtures/secondary_sample_admin_data.csv new file mode 100644 index 00000000..8cdc3c2d --- /dev/null +++ b/spec/fixtures/secondary_sample_admin_data.csv @@ -0,0 +1,13 @@ +District,School,DESE ID,Category,Item ID,NonLikert Title,NL_Value,LikertScore,Benchmark,Data Type,Academic Year +Attleboro,Attleboro High School,160505,2-C-i,a-vale-i1,Chronic absence rate,19.7,1,10,%,2018-19 +Milford,Woodland Elementary School,1850090,2-C-i,a-vale-i1,Chronic absence rate,6.8,1,10,%,2018-19 +Revere,Beachmont Elementary School,2480013,2-C-i,a-vale-i1,Chronic absence rate,4.2,1,10,%,2018-19 +Winchester,Winchester High School,3440505,2-C-i,a-vale-i1,Chronic absence rate,7.2,1,10,%,2018-19 +Attleboro,Attleboro High School,160505,3-A-i,a-reso-i1,Average class size,20.6,2,20,,2018-19 +Milford,Woodland Elementary School,1850090,3-A-i,a-reso-i1,Average class size,22.5,2,20,,2018-19 +Revere,Beachmont Elementary School,2480013,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19 +Winchester,Winchester High School,3440505,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19 +Attleboro,Attleboro High School,160505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,15.28896673,3,43.4,,2018-19 +Milford,Woodland Elementary School,1850090,3-A-ii,a-sust-i3,Student to instructional support staff ratio,22.85714286,3,43.4,,2018-19 +Revere,Beachmont Elementary School,2480013,3-A-ii,a-sust-i3,Student to instructional support staff ratio,38,3,43.4,,2018-19 +Winchester,Winchester High School,3440505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,135.9,3,43.4,,2018-19 diff --git a/spec/services/admin_data_loader_spec.rb b/spec/services/admin_data_loader_spec.rb index 09f10a26..b7ca5c3d 100644 --- a/spec/services/admin_data_loader_spec.rb +++ b/spec/services/admin_data_loader_spec.rb @@ -2,6 +2,7 @@ require 'rails_helper' describe AdminDataLoader do let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') } + let(:path_to_secondary_admin_data) { Rails.root.join('spec', 'fixtures', 'secondary_sample_admin_data.csv') } let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' } let(:attleboro) { School.find_by_dese_id 160_505 } let(:winchester) { School.find_by_dese_id 3_440_505 } @@ -29,7 +30,7 @@ describe AdminDataLoader do # it 'assigns the school to the admin data value' do expect(AdminDataValue.first.school).to eq attleboro - expect(AdminDataValue.last.school).to eq beachmont + expect(AdminDataValue.last.school).to eq winchester # end # it 'links the admin data value to the correct admin data item' do @@ -38,7 +39,7 @@ describe AdminDataLoader do # end # it 'loads all the admin data values in the target csv file' do - expect(AdminDataValue.count).to eq 10 + expect(AdminDataValue.count).to eq 11 # end # it 'captures the likert score ' do @@ -46,6 +47,8 @@ describe AdminDataLoader do admin_data_item: chronic_absense_rate).likert_score).to eq 3.03 expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19, admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5 + expect(AdminDataValue.find_by(school: winchester, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 5 # end # it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do @@ -55,16 +58,27 @@ describe AdminDataLoader do # it 'rejects importing rows with a value of 0' do expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19, admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist - expect(AdminDataValue.where(school: winchester, academic_year: ay_2018_19, - admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3'))).not_to exist # end end + + context 'when a second file exists' do + before :each do + AdminDataLoader.load_data filepath: path_to_secondary_admin_data + end + + it 'updates likert scores to match the new file' do + expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19, + admin_data_item: chronic_absense_rate).likert_score).to eq 1 + expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19, + admin_data_item: student_to_instructor_ratio).likert_score).to eq 3 + end + end end describe 'output to console' do it 'outputs a messsage saying a value has been rejected' do - output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.gsub("\n", '') - expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 Invalid score: 100.0 for school: Winchester High School admin data item a-sust-i3 ' + output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.delete("\n") + expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 ' end end end