mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-08 23:18:18 -07:00
It's possible for admin data likert score values to be above 5. If that happens, we
cap the likert score at 5. This was happening already at the scraper level but it's also now being done by the admin data loader for safety. Also make sure to just update admin data instead of deleting and reloading all values each load. Add tests to confirm this behavior
This commit is contained in:
parent
3589878700
commit
904d0d2f2c
4 changed files with 97 additions and 58 deletions
|
|
@ -4,6 +4,7 @@ require 'csv'
|
|||
|
||||
class AdminDataLoader
|
||||
def self.load_data(filepath:)
|
||||
admin_data_values = []
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
score = likert_score(row:)
|
||||
unless valid_likert_score(likert_score: score)
|
||||
|
|
@ -12,8 +13,10 @@ class AdminDataLoader
|
|||
admin data item #{admin_data_item(row:)} "
|
||||
next
|
||||
end
|
||||
create_admin_data_value(row:, score:)
|
||||
admin_data_values << create_admin_data_value(row:, score:)
|
||||
end
|
||||
|
||||
AdminDataValue.import(admin_data_values.flatten.compact, on_duplicate_key_update: :all)
|
||||
end
|
||||
|
||||
private
|
||||
|
|
@ -24,7 +27,8 @@ class AdminDataLoader
|
|||
|
||||
def self.likert_score(row:)
|
||||
likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f
|
||||
round_up_to_one(likert_score:)
|
||||
likert_score = round_up_to_one(likert_score:)
|
||||
round_down_to_five(likert_score:)
|
||||
end
|
||||
|
||||
def self.round_up_to_one(likert_score:)
|
||||
|
|
@ -32,6 +36,11 @@ class AdminDataLoader
|
|||
likert_score
|
||||
end
|
||||
|
||||
def self.round_down_to_five(likert_score:)
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score
|
||||
end
|
||||
|
||||
def self.ay(row:)
|
||||
row['Academic Year'] || row['AcademicYear']
|
||||
end
|
||||
|
|
@ -45,10 +54,13 @@ class AdminDataLoader
|
|||
end
|
||||
|
||||
def self.create_admin_data_value(row:, score:)
|
||||
AdminDataValue.create!(likert_score: score,
|
||||
academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school: School.find_by_dese_id(dese_id(row:).to_i),
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
|
||||
admin_data_value = AdminDataValue.find_or_initialize_by(school: School.find_by_dese_id(dese_id(row:).to_i),
|
||||
academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
|
||||
return nil if admin_data_value.likert_score == score
|
||||
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value
|
||||
end
|
||||
|
||||
private_class_method :valid_likert_score
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
require "csv"
|
||||
require 'csv'
|
||||
|
||||
namespace :data do
|
||||
desc "load survey responses"
|
||||
desc 'load survey responses'
|
||||
task load_survey_responses: :environment do
|
||||
survey_item_response_count = SurveyItemResponse.count
|
||||
student_count = Student.count
|
||||
path = "/data/survey_responses/clean/"
|
||||
path = '/data/survey_responses/clean/'
|
||||
Sftp::Directory.open(path:) do |file|
|
||||
SurveyResponsesDataLoader.from_file(file:)
|
||||
end
|
||||
|
|
@ -16,30 +16,30 @@ namespace :data do
|
|||
end
|
||||
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
puts 'Resetting race scores'
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
puts "=====================> Completed loading #{RaceScore.count} race scores"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
desc "seed only lowell"
|
||||
desc 'seed only lowell'
|
||||
task seed_only_lowell: :environment do
|
||||
seeder = Seeder.new rules: [Rule::SeedOnlyLowell]
|
||||
|
||||
seeder.seed_academic_years "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23"
|
||||
seeder.seed_districts_and_schools Rails.root.join("data", "master_list_of_schools_and_districts.csv")
|
||||
seeder.seed_surveys Rails.root.join("data", "master_list_of_schools_and_districts.csv")
|
||||
seeder.seed_respondents Rails.root.join("data", "master_list_of_schools_and_districts.csv")
|
||||
seeder.seed_sqm_framework Rails.root.join("data", "sqm_framework.csv")
|
||||
seeder.seed_demographics Rails.root.join("data", "demographics.csv")
|
||||
seeder.seed_academic_years '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23'
|
||||
seeder.seed_districts_and_schools Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
|
||||
seeder.seed_surveys Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
|
||||
seeder.seed_respondents Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
|
||||
seeder.seed_sqm_framework Rails.root.join('data', 'sqm_framework.csv')
|
||||
seeder.seed_demographics Rails.root.join('data', 'demographics.csv')
|
||||
end
|
||||
|
||||
desc "load survey responses for lowell schools"
|
||||
desc 'load survey responses for lowell schools'
|
||||
task load_survey_responses_for_lowell: :environment do
|
||||
survey_item_response_count = SurveyItemResponse.count
|
||||
student_count = Student.count
|
||||
path = "/data/survey_responses/clean/"
|
||||
path = '/data/survey_responses/clean/'
|
||||
Sftp::Directory.open(path:) do |file|
|
||||
SurveyResponsesDataLoader.from_file(file:)
|
||||
end
|
||||
|
|
@ -50,44 +50,44 @@ namespace :data do
|
|||
end
|
||||
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
puts 'Resetting race scores'
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
puts "=====================> Completed loading #{RaceScore.count} race scores"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
desc "load students for lowell"
|
||||
desc 'load students for lowell'
|
||||
task load_students_for_lowell: :environment do
|
||||
SurveyItemResponse.update_all(student_id: nil)
|
||||
StudentRace.delete_all
|
||||
Student.delete_all
|
||||
|
||||
Sftp::Directory.open(path: "/data/survey_responses/clean/") do |file|
|
||||
Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file|
|
||||
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
|
||||
end
|
||||
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
puts 'Resetting race scores'
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
puts "=====================> Completed loading #{RaceScore.count} survey responses"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
desc "delete non-lowell schools and districts"
|
||||
desc 'delete non-lowell schools and districts'
|
||||
task delete_non_lowell: :environment do
|
||||
schools = School.all.reject { |s| s.district.name == "Lowell" }
|
||||
schools = School.all.reject { |s| s.district.name == 'Lowell' }
|
||||
ResponseRate.where(school: schools).delete_all
|
||||
Respondent.where(school: schools).delete_all
|
||||
Survey.where(school: schools).delete_all
|
||||
schools.each { |school| school.delete }
|
||||
districts = District.all.reject { |district| district.name == "Lowell" }
|
||||
districts = District.all.reject { |district| district.name == 'Lowell' }
|
||||
districts.each { |district| district.delete }
|
||||
end
|
||||
|
||||
task load_survey_responses_21_22: :environment do
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*2021-22*.csv")).each do |filepath|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
SurveyResponsesDataLoader.load_data filepath:
|
||||
end
|
||||
|
|
@ -95,7 +95,7 @@ namespace :data do
|
|||
end
|
||||
|
||||
task load_survey_responses_20_21: :environment do
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*2020-21*.csv")).each do |filepath|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*2020-21*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
SurveyResponsesDataLoader.load_data filepath:
|
||||
end
|
||||
|
|
@ -103,7 +103,7 @@ namespace :data do
|
|||
end
|
||||
|
||||
task load_survey_responses_19_20: :environment do
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*2019-20*.csv")).each do |filepath|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*2019-20*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
SurveyResponsesDataLoader.load_data filepath:
|
||||
end
|
||||
|
|
@ -111,7 +111,7 @@ namespace :data do
|
|||
end
|
||||
|
||||
task load_survey_responses_18_19: :environment do
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*2018-19*.csv")).each do |filepath|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*2018-19*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
SurveyResponsesDataLoader.load_data filepath:
|
||||
end
|
||||
|
|
@ -119,7 +119,7 @@ namespace :data do
|
|||
end
|
||||
|
||||
task load_survey_responses_17_18: :environment do
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*2017-18*.csv")).each do |filepath|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*2017-18*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
SurveyResponsesDataLoader.load_data filepath:
|
||||
end
|
||||
|
|
@ -127,86 +127,86 @@ namespace :data do
|
|||
end
|
||||
|
||||
task load_survey_responses_16_17: :environment do
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*2016-17*.csv")).each do |filepath|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*2016-17*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
SurveyResponsesDataLoader.load_data filepath:
|
||||
end
|
||||
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
|
||||
end
|
||||
|
||||
desc "reset response rate values"
|
||||
desc 'reset response rate values'
|
||||
task reset_response_rates: :environment do
|
||||
puts "Resetting response rates"
|
||||
puts 'Resetting response rates'
|
||||
ResponseRateLoader.reset
|
||||
Rails.cache.clear
|
||||
puts "=====================> Completed loading #{ResponseRate.count} survey responses"
|
||||
end
|
||||
|
||||
desc "reset race score calculations"
|
||||
desc 'reset race score calculations'
|
||||
task reset_race_scores: :environment do
|
||||
puts "Resetting race scores"
|
||||
puts 'Resetting race scores'
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
Rails.cache.clear
|
||||
puts "=====================> Completed loading #{RaceScore.count} survey responses"
|
||||
end
|
||||
|
||||
desc "load admin_data"
|
||||
desc 'load admin_data'
|
||||
task load_admin_data: :environment do
|
||||
AdminDataValue.delete_all
|
||||
Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath|
|
||||
original_count = AdminDataValue.count
|
||||
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
|
||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||
Dese::Loader.load_data filepath:
|
||||
end
|
||||
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
|
||||
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
|
||||
end
|
||||
|
||||
desc "load students"
|
||||
desc 'load students'
|
||||
task load_students: :environment do
|
||||
SurveyItemResponse.update_all(student_id: nil)
|
||||
StudentRace.delete_all
|
||||
Student.delete_all
|
||||
Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file|
|
||||
Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file|
|
||||
puts "=====================> Loading student data from csv at path: #{file}"
|
||||
StudentLoader.load_data filepath: file
|
||||
end
|
||||
puts "=====================> Completed loading #{Student.count} students"
|
||||
|
||||
puts "Resetting race scores"
|
||||
puts 'Resetting race scores'
|
||||
RaceScoreLoader.reset(fast_processing: false)
|
||||
puts "=====================> Completed loading #{RaceScore.count} survey responses"
|
||||
|
||||
Rails.cache.clear
|
||||
end
|
||||
|
||||
desc "reset all cache counters"
|
||||
desc 'reset all cache counters'
|
||||
task reset_cache_counters: :environment do
|
||||
puts "=====================> Resetting Category counters"
|
||||
puts '=====================> Resetting Category counters'
|
||||
Category.all.each do |category|
|
||||
Category.reset_counters(category.id, :subcategories)
|
||||
end
|
||||
puts "=====================> Resetting Subcategory counters"
|
||||
puts '=====================> Resetting Subcategory counters'
|
||||
Subcategory.all.each do |subcategory|
|
||||
Subcategory.reset_counters(subcategory.id, :measures)
|
||||
end
|
||||
puts "=====================> Resetting Measure counters"
|
||||
puts '=====================> Resetting Measure counters'
|
||||
Measure.all.each do |measure|
|
||||
Measure.reset_counters(measure.id, :scales)
|
||||
end
|
||||
puts "=====================> Resetting Scale counters"
|
||||
puts '=====================> Resetting Scale counters'
|
||||
Scale.all.each do |scale|
|
||||
Scale.reset_counters(scale.id, :survey_items)
|
||||
end
|
||||
puts "=====================> Resetting SurveyItem counters"
|
||||
puts '=====================> Resetting SurveyItem counters'
|
||||
SurveyItem.all.each do |survey_item|
|
||||
SurveyItem.reset_counters(survey_item.id, :survey_item_responses)
|
||||
end
|
||||
end
|
||||
|
||||
desc "scrape dese site for admin data"
|
||||
desc 'scrape dese site for admin data'
|
||||
task scrape_all: :environment do
|
||||
puts "scraping data from dese"
|
||||
puts 'scraping data from dese'
|
||||
scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo,
|
||||
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
|
||||
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
|
||||
scrapers.each do |scraper|
|
||||
scraper.new.run_all
|
||||
end
|
||||
|
|
|
|||
13
spec/fixtures/secondary_sample_admin_data.csv
vendored
Normal file
13
spec/fixtures/secondary_sample_admin_data.csv
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
District,School,DESE ID,Category,Item ID,NonLikert Title,NL_Value,LikertScore,Benchmark,Data Type,Academic Year
|
||||
Attleboro,Attleboro High School,160505,2-C-i,a-vale-i1,Chronic absence rate,19.7,1,10,%,2018-19
|
||||
Milford,Woodland Elementary School,1850090,2-C-i,a-vale-i1,Chronic absence rate,6.8,1,10,%,2018-19
|
||||
Revere,Beachmont Elementary School,2480013,2-C-i,a-vale-i1,Chronic absence rate,4.2,1,10,%,2018-19
|
||||
Winchester,Winchester High School,3440505,2-C-i,a-vale-i1,Chronic absence rate,7.2,1,10,%,2018-19
|
||||
Attleboro,Attleboro High School,160505,3-A-i,a-reso-i1,Average class size,20.6,2,20,,2018-19
|
||||
Milford,Woodland Elementary School,1850090,3-A-i,a-reso-i1,Average class size,22.5,2,20,,2018-19
|
||||
Revere,Beachmont Elementary School,2480013,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19
|
||||
Winchester,Winchester High School,3440505,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19
|
||||
Attleboro,Attleboro High School,160505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,15.28896673,3,43.4,,2018-19
|
||||
Milford,Woodland Elementary School,1850090,3-A-ii,a-sust-i3,Student to instructional support staff ratio,22.85714286,3,43.4,,2018-19
|
||||
Revere,Beachmont Elementary School,2480013,3-A-ii,a-sust-i3,Student to instructional support staff ratio,38,3,43.4,,2018-19
|
||||
Winchester,Winchester High School,3440505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,135.9,3,43.4,,2018-19
|
||||
|
|
|
@ -2,6 +2,7 @@ require 'rails_helper'
|
|||
|
||||
describe AdminDataLoader do
|
||||
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') }
|
||||
let(:path_to_secondary_admin_data) { Rails.root.join('spec', 'fixtures', 'secondary_sample_admin_data.csv') }
|
||||
let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' }
|
||||
let(:attleboro) { School.find_by_dese_id 160_505 }
|
||||
let(:winchester) { School.find_by_dese_id 3_440_505 }
|
||||
|
|
@ -29,7 +30,7 @@ describe AdminDataLoader do
|
|||
|
||||
# it 'assigns the school to the admin data value' do
|
||||
expect(AdminDataValue.first.school).to eq attleboro
|
||||
expect(AdminDataValue.last.school).to eq beachmont
|
||||
expect(AdminDataValue.last.school).to eq winchester
|
||||
# end
|
||||
|
||||
# it 'links the admin data value to the correct admin data item' do
|
||||
|
|
@ -38,7 +39,7 @@ describe AdminDataLoader do
|
|||
# end
|
||||
|
||||
# it 'loads all the admin data values in the target csv file' do
|
||||
expect(AdminDataValue.count).to eq 10
|
||||
expect(AdminDataValue.count).to eq 11
|
||||
# end
|
||||
|
||||
# it 'captures the likert score ' do
|
||||
|
|
@ -46,6 +47,8 @@ describe AdminDataLoader do
|
|||
admin_data_item: chronic_absense_rate).likert_score).to eq 3.03
|
||||
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
|
||||
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5
|
||||
expect(AdminDataValue.find_by(school: winchester, academic_year: ay_2018_19,
|
||||
admin_data_item: student_to_instructor_ratio).likert_score).to eq 5
|
||||
# end
|
||||
|
||||
# it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do
|
||||
|
|
@ -55,16 +58,27 @@ describe AdminDataLoader do
|
|||
# it 'rejects importing rows with a value of 0' do
|
||||
expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist
|
||||
expect(AdminDataValue.where(school: winchester, academic_year: ay_2018_19,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3'))).not_to exist
|
||||
# end
|
||||
end
|
||||
|
||||
context 'when a second file exists' do
|
||||
before :each do
|
||||
AdminDataLoader.load_data filepath: path_to_secondary_admin_data
|
||||
end
|
||||
|
||||
it 'updates likert scores to match the new file' do
|
||||
expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19,
|
||||
admin_data_item: chronic_absense_rate).likert_score).to eq 1
|
||||
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
|
||||
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe 'output to console' do
|
||||
it 'outputs a messsage saying a value has been rejected' do
|
||||
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.gsub("\n", '')
|
||||
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 Invalid score: 100.0 for school: Winchester High School admin data item a-sust-i3 '
|
||||
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.delete("\n")
|
||||
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 '
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue