It's possible for admin data likert score values to be above 5. If that happens, we

cap the likert score at 5.   This was happening already at the scraper
level but it's also now being done by the admin data loader for safety.
Also make sure to just update admin data instead of deleting and
reloading all values each load. Add tests to confirm this behavior
mciea-main
rebuilt 3 years ago
parent 3589878700
commit 904d0d2f2c

@ -4,6 +4,7 @@ require 'csv'
class AdminDataLoader
def self.load_data(filepath:)
admin_data_values = []
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
unless valid_likert_score(likert_score: score)
@ -12,8 +13,10 @@ class AdminDataLoader
admin data item #{admin_data_item(row:)} "
next
end
create_admin_data_value(row:, score:)
admin_data_values << create_admin_data_value(row:, score:)
end
AdminDataValue.import(admin_data_values.flatten.compact, on_duplicate_key_update: :all)
end
private
@ -24,7 +27,8 @@ class AdminDataLoader
def self.likert_score(row:)
likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f
round_up_to_one(likert_score:)
likert_score = round_up_to_one(likert_score:)
round_down_to_five(likert_score:)
end
def self.round_up_to_one(likert_score:)
@ -32,6 +36,11 @@ class AdminDataLoader
likert_score
end
def self.round_down_to_five(likert_score:)
likert_score = 5 if likert_score > 5
likert_score
end
def self.ay(row:)
row['Academic Year'] || row['AcademicYear']
end
@ -45,10 +54,13 @@ class AdminDataLoader
end
def self.create_admin_data_value(row:, score:)
AdminDataValue.create!(likert_score: score,
academic_year: AcademicYear.find_by_range(ay(row:)),
school: School.find_by_dese_id(dese_id(row:).to_i),
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
admin_data_value = AdminDataValue.find_or_initialize_by(school: School.find_by_dese_id(dese_id(row:).to_i),
academic_year: AcademicYear.find_by_range(ay(row:)),
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
return nil if admin_data_value.likert_score == score
admin_data_value.likert_score = score
admin_data_value
end
private_class_method :valid_likert_score

@ -1,11 +1,11 @@
require "csv"
require 'csv'
namespace :data do
desc "load survey responses"
desc 'load survey responses'
task load_survey_responses: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/clean/"
path = '/data/survey_responses/clean/'
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
@ -16,30 +16,30 @@ namespace :data do
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc "seed only lowell"
desc 'seed only lowell'
task seed_only_lowell: :environment do
seeder = Seeder.new rules: [Rule::SeedOnlyLowell]
seeder.seed_academic_years "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23"
seeder.seed_districts_and_schools Rails.root.join("data", "master_list_of_schools_and_districts.csv")
seeder.seed_surveys Rails.root.join("data", "master_list_of_schools_and_districts.csv")
seeder.seed_respondents Rails.root.join("data", "master_list_of_schools_and_districts.csv")
seeder.seed_sqm_framework Rails.root.join("data", "sqm_framework.csv")
seeder.seed_demographics Rails.root.join("data", "demographics.csv")
seeder.seed_academic_years '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23'
seeder.seed_districts_and_schools Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_surveys Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_respondents Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_sqm_framework Rails.root.join('data', 'sqm_framework.csv')
seeder.seed_demographics Rails.root.join('data', 'demographics.csv')
end
desc "load survey responses for lowell schools"
desc 'load survey responses for lowell schools'
task load_survey_responses_for_lowell: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/clean/"
path = '/data/survey_responses/clean/'
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
@ -50,44 +50,44 @@ namespace :data do
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc "load students for lowell"
desc 'load students for lowell'
task load_students_for_lowell: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Sftp::Directory.open(path: "/data/survey_responses/clean/") do |file|
Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc "delete non-lowell schools and districts"
desc 'delete non-lowell schools and districts'
task delete_non_lowell: :environment do
schools = School.all.reject { |s| s.district.name == "Lowell" }
schools = School.all.reject { |s| s.district.name == 'Lowell' }
ResponseRate.where(school: schools).delete_all
Respondent.where(school: schools).delete_all
Survey.where(school: schools).delete_all
schools.each { |school| school.delete }
districts = District.all.reject { |district| district.name == "Lowell" }
districts = District.all.reject { |district| district.name == 'Lowell' }
districts.each { |district| district.delete }
end
task load_survey_responses_21_22: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2021-22*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -95,7 +95,7 @@ namespace :data do
end
task load_survey_responses_20_21: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2020-21*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2020-21*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -103,7 +103,7 @@ namespace :data do
end
task load_survey_responses_19_20: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2019-20*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2019-20*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -111,7 +111,7 @@ namespace :data do
end
task load_survey_responses_18_19: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2018-19*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2018-19*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -119,7 +119,7 @@ namespace :data do
end
task load_survey_responses_17_18: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2017-18*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2017-18*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
@ -127,86 +127,86 @@ namespace :data do
end
task load_survey_responses_16_17: :environment do
Dir.glob(Rails.root.join("data", "survey_responses", "*2016-17*.csv")).each do |filepath|
Dir.glob(Rails.root.join('data', 'survey_responses', '*2016-17*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
end
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
end
desc "reset response rate values"
desc 'reset response rate values'
task reset_response_rates: :environment do
puts "Resetting response rates"
puts 'Resetting response rates'
ResponseRateLoader.reset
Rails.cache.clear
puts "=====================> Completed loading #{ResponseRate.count} survey responses"
end
desc "reset race score calculations"
desc 'reset race score calculations'
task reset_race_scores: :environment do
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
Rails.cache.clear
puts "=====================> Completed loading #{RaceScore.count} survey responses"
end
desc "load admin_data"
desc 'load admin_data'
task load_admin_data: :environment do
AdminDataValue.delete_all
Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath|
original_count = AdminDataValue.count
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
Dese::Loader.load_data filepath:
end
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
end
desc "load students"
desc 'load students'
task load_students: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file
end
puts "=====================> Completed loading #{Student.count} students"
puts "Resetting race scores"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
desc "reset all cache counters"
desc 'reset all cache counters'
task reset_cache_counters: :environment do
puts "=====================> Resetting Category counters"
puts '=====================> Resetting Category counters'
Category.all.each do |category|
Category.reset_counters(category.id, :subcategories)
end
puts "=====================> Resetting Subcategory counters"
puts '=====================> Resetting Subcategory counters'
Subcategory.all.each do |subcategory|
Subcategory.reset_counters(subcategory.id, :measures)
end
puts "=====================> Resetting Measure counters"
puts '=====================> Resetting Measure counters'
Measure.all.each do |measure|
Measure.reset_counters(measure.id, :scales)
end
puts "=====================> Resetting Scale counters"
puts '=====================> Resetting Scale counters'
Scale.all.each do |scale|
Scale.reset_counters(scale.id, :survey_items)
end
puts "=====================> Resetting SurveyItem counters"
puts '=====================> Resetting SurveyItem counters'
SurveyItem.all.each do |survey_item|
SurveyItem.reset_counters(survey_item.id, :survey_item_responses)
end
end
desc "scrape dese site for admin data"
desc 'scrape dese site for admin data'
task scrape_all: :environment do
puts "scraping data from dese"
puts 'scraping data from dese'
scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo,
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
scrapers.each do |scraper|
scraper.new.run_all
end

@ -0,0 +1,13 @@
District,School,DESE ID,Category,Item ID,NonLikert Title,NL_Value,LikertScore,Benchmark,Data Type,Academic Year
Attleboro,Attleboro High School,160505,2-C-i,a-vale-i1,Chronic absence rate,19.7,1,10,%,2018-19
Milford,Woodland Elementary School,1850090,2-C-i,a-vale-i1,Chronic absence rate,6.8,1,10,%,2018-19
Revere,Beachmont Elementary School,2480013,2-C-i,a-vale-i1,Chronic absence rate,4.2,1,10,%,2018-19
Winchester,Winchester High School,3440505,2-C-i,a-vale-i1,Chronic absence rate,7.2,1,10,%,2018-19
Attleboro,Attleboro High School,160505,3-A-i,a-reso-i1,Average class size,20.6,2,20,,2018-19
Milford,Woodland Elementary School,1850090,3-A-i,a-reso-i1,Average class size,22.5,2,20,,2018-19
Revere,Beachmont Elementary School,2480013,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19
Winchester,Winchester High School,3440505,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19
Attleboro,Attleboro High School,160505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,15.28896673,3,43.4,,2018-19
Milford,Woodland Elementary School,1850090,3-A-ii,a-sust-i3,Student to instructional support staff ratio,22.85714286,3,43.4,,2018-19
Revere,Beachmont Elementary School,2480013,3-A-ii,a-sust-i3,Student to instructional support staff ratio,38,3,43.4,,2018-19
Winchester,Winchester High School,3440505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,135.9,3,43.4,,2018-19
1 District School DESE ID Category Item ID NonLikert Title NL_Value LikertScore Benchmark Data Type Academic Year
2 Attleboro Attleboro High School 160505 2-C-i a-vale-i1 Chronic absence rate 19.7 1 10 % 2018-19
3 Milford Woodland Elementary School 1850090 2-C-i a-vale-i1 Chronic absence rate 6.8 1 10 % 2018-19
4 Revere Beachmont Elementary School 2480013 2-C-i a-vale-i1 Chronic absence rate 4.2 1 10 % 2018-19
5 Winchester Winchester High School 3440505 2-C-i a-vale-i1 Chronic absence rate 7.2 1 10 % 2018-19
6 Attleboro Attleboro High School 160505 3-A-i a-reso-i1 Average class size 20.6 2 20 2018-19
7 Milford Woodland Elementary School 1850090 3-A-i a-reso-i1 Average class size 22.5 2 20 2018-19
8 Revere Beachmont Elementary School 2480013 3-A-i a-reso-i1 Average class size 17 2 20 2018-19
9 Winchester Winchester High School 3440505 3-A-i a-reso-i1 Average class size 17 2 20 2018-19
10 Attleboro Attleboro High School 160505 3-A-ii a-sust-i3 Student to instructional support staff ratio 15.28896673 3 43.4 2018-19
11 Milford Woodland Elementary School 1850090 3-A-ii a-sust-i3 Student to instructional support staff ratio 22.85714286 3 43.4 2018-19
12 Revere Beachmont Elementary School 2480013 3-A-ii a-sust-i3 Student to instructional support staff ratio 38 3 43.4 2018-19
13 Winchester Winchester High School 3440505 3-A-ii a-sust-i3 Student to instructional support staff ratio 135.9 3 43.4 2018-19

@ -2,6 +2,7 @@ require 'rails_helper'
describe AdminDataLoader do
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') }
let(:path_to_secondary_admin_data) { Rails.root.join('spec', 'fixtures', 'secondary_sample_admin_data.csv') }
let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' }
let(:attleboro) { School.find_by_dese_id 160_505 }
let(:winchester) { School.find_by_dese_id 3_440_505 }
@ -29,7 +30,7 @@ describe AdminDataLoader do
# it 'assigns the school to the admin data value' do
expect(AdminDataValue.first.school).to eq attleboro
expect(AdminDataValue.last.school).to eq beachmont
expect(AdminDataValue.last.school).to eq winchester
# end
# it 'links the admin data value to the correct admin data item' do
@ -38,7 +39,7 @@ describe AdminDataLoader do
# end
# it 'loads all the admin data values in the target csv file' do
expect(AdminDataValue.count).to eq 10
expect(AdminDataValue.count).to eq 11
# end
# it 'captures the likert score ' do
@ -46,6 +47,8 @@ describe AdminDataLoader do
admin_data_item: chronic_absense_rate).likert_score).to eq 3.03
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5
expect(AdminDataValue.find_by(school: winchester, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 5
# end
# it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do
@ -55,16 +58,27 @@ describe AdminDataLoader do
# it 'rejects importing rows with a value of 0' do
expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist
expect(AdminDataValue.where(school: winchester, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3'))).not_to exist
# end
end
context 'when a second file exists' do
before :each do
AdminDataLoader.load_data filepath: path_to_secondary_admin_data
end
it 'updates likert scores to match the new file' do
expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19,
admin_data_item: chronic_absense_rate).likert_score).to eq 1
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3
end
end
end
describe 'output to console' do
it 'outputs a messsage saying a value has been rejected' do
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.gsub("\n", '')
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 Invalid score: 100.0 for school: Winchester High School admin data item a-sust-i3 '
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.delete("\n")
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 '
end
end
end

Loading…
Cancel
Save