It's possible for admin data likert score values to be above 5. If that happens, we

cap the likert score at 5.   This was happening already at the scraper
level but it's also now being done by the admin data loader for safety.
Also make sure to just update admin data instead of deleting and
reloading all values each load. Add tests to confirm this behavior
rpp-main
rebuilt 3 years ago
parent c789c46032
commit 0f23053294

@ -2,6 +2,7 @@
class AdminDataLoader
def self.load_data(filepath:)
admin_data_values = []
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
unless valid_likert_score(likert_score: score)
@ -10,8 +11,10 @@ class AdminDataLoader
admin data item #{admin_data_item(row:)} "
next
end
create_admin_data_value(row:, score:)
admin_data_values << create_admin_data_value(row:, score:)
end
AdminDataValue.import(admin_data_values.flatten.compact, on_duplicate_key_update: :all)
end
private
@ -22,7 +25,8 @@ class AdminDataLoader
def self.likert_score(row:)
likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f
round_up_to_one(likert_score:)
likert_score = round_up_to_one(likert_score:)
round_down_to_five(likert_score:)
end
def self.round_up_to_one(likert_score:)
@ -30,6 +34,11 @@ class AdminDataLoader
likert_score
end
def self.round_down_to_five(likert_score:)
likert_score = 5 if likert_score > 5
likert_score
end
def self.ay(row:)
row['Academic Year'] || row['AcademicYear']
end
@ -43,11 +52,13 @@ class AdminDataLoader
end
def self.create_admin_data_value(row:, score:)
# byebug unless %w[a-vale-i1 a-sust-i3].include? admin_data_item(row:)
AdminDataValue.create!(likert_score: score,
admin_data_value = AdminDataValue.find_or_initialize_by(school: School.find_by_dese_id(dese_id(row:).to_i),
academic_year: AcademicYear.find_by_range(ay(row:)),
school: School.find_by_dese_id(dese_id(row:).to_i),
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
return nil if admin_data_value.likert_score == score
admin_data_value.likert_score = score
admin_data_value
end
private_class_method :valid_likert_score

@ -117,7 +117,7 @@ class SurveyItemValues
valid_duration? && valid_progress? && valid_grade? && valid_sd?
end
def survey_type
def respondent_type
return :teacher if headers
.filter(&:present?)
.filter { |header| header.start_with? 't-' }.count > 0

@ -1,9 +1,13 @@
require 'csv'
namespace :data do
desc 'load survey responses'
task load_survey_responses: :environment do
Dir.glob(Rails.root.join('data', 'survey_responses', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
SurveyResponsesDataLoader.load_data filepath:
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = '/data/survey_responses/clean/'
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
@ -11,6 +15,58 @@ namespace :data do
ResponseRateLoader.reset
puts "=====================> Completed loading #{ResponseRate.count} survey responses"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc 'seed only lowell'
task seed_only_lowell: :environment do
seeder = Seeder.new rules: [Rule::SeedOnlyLowell]
seeder.seed_academic_years '2016-17', '2017-18', '2018-19', '2019-20', '2020-21', '2021-22', '2022-23'
seeder.seed_districts_and_schools Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_surveys Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_respondents Rails.root.join('data', 'master_list_of_schools_and_districts.csv')
seeder.seed_sqm_framework Rails.root.join('data', 'sqm_framework.csv')
seeder.seed_demographics Rails.root.join('data', 'demographics.csv')
end
desc 'load survey responses for lowell schools'
task load_survey_responses_for_lowell: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = '/data/survey_responses/clean/'
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Sftp::Directory.open(path:) do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} race scores"
Rails.cache.clear
end
desc 'load students for lowell'
task load_students_for_lowell: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Sftp::Directory.open(path: '/data/survey_responses/clean/') do |file|
StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools])
end
puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
@ -152,12 +208,12 @@ namespace :data do
desc 'load admin_data'
task load_admin_data: :environment do
AdminDataValue.delete_all
original_count = AdminDataValue.count
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
Dese::Loader.load_data filepath:
end
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
end
desc 'load students'

@ -0,0 +1,13 @@
District,School,DESE ID,Category,Item ID,NonLikert Title,NL_Value,LikertScore,Benchmark,Data Type,Academic Year
Attleboro,Attleboro High School,160505,2-C-i,a-vale-i1,Chronic absence rate,19.7,1,10,%,2018-19
Milford,Woodland Elementary School,1850090,2-C-i,a-vale-i1,Chronic absence rate,6.8,1,10,%,2018-19
Revere,Beachmont Elementary School,2480013,2-C-i,a-vale-i1,Chronic absence rate,4.2,1,10,%,2018-19
Winchester,Winchester High School,3440505,2-C-i,a-vale-i1,Chronic absence rate,7.2,1,10,%,2018-19
Attleboro,Attleboro High School,160505,3-A-i,a-reso-i1,Average class size,20.6,2,20,,2018-19
Milford,Woodland Elementary School,1850090,3-A-i,a-reso-i1,Average class size,22.5,2,20,,2018-19
Revere,Beachmont Elementary School,2480013,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19
Winchester,Winchester High School,3440505,3-A-i,a-reso-i1,Average class size,17,2,20,,2018-19
Attleboro,Attleboro High School,160505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,15.28896673,3,43.4,,2018-19
Milford,Woodland Elementary School,1850090,3-A-ii,a-sust-i3,Student to instructional support staff ratio,22.85714286,3,43.4,,2018-19
Revere,Beachmont Elementary School,2480013,3-A-ii,a-sust-i3,Student to instructional support staff ratio,38,3,43.4,,2018-19
Winchester,Winchester High School,3440505,3-A-ii,a-sust-i3,Student to instructional support staff ratio,135.9,3,43.4,,2018-19
1 District School DESE ID Category Item ID NonLikert Title NL_Value LikertScore Benchmark Data Type Academic Year
2 Attleboro Attleboro High School 160505 2-C-i a-vale-i1 Chronic absence rate 19.7 1 10 % 2018-19
3 Milford Woodland Elementary School 1850090 2-C-i a-vale-i1 Chronic absence rate 6.8 1 10 % 2018-19
4 Revere Beachmont Elementary School 2480013 2-C-i a-vale-i1 Chronic absence rate 4.2 1 10 % 2018-19
5 Winchester Winchester High School 3440505 2-C-i a-vale-i1 Chronic absence rate 7.2 1 10 % 2018-19
6 Attleboro Attleboro High School 160505 3-A-i a-reso-i1 Average class size 20.6 2 20 2018-19
7 Milford Woodland Elementary School 1850090 3-A-i a-reso-i1 Average class size 22.5 2 20 2018-19
8 Revere Beachmont Elementary School 2480013 3-A-i a-reso-i1 Average class size 17 2 20 2018-19
9 Winchester Winchester High School 3440505 3-A-i a-reso-i1 Average class size 17 2 20 2018-19
10 Attleboro Attleboro High School 160505 3-A-ii a-sust-i3 Student to instructional support staff ratio 15.28896673 3 43.4 2018-19
11 Milford Woodland Elementary School 1850090 3-A-ii a-sust-i3 Student to instructional support staff ratio 22.85714286 3 43.4 2018-19
12 Revere Beachmont Elementary School 2480013 3-A-ii a-sust-i3 Student to instructional support staff ratio 38 3 43.4 2018-19
13 Winchester Winchester High School 3440505 3-A-ii a-sust-i3 Student to instructional support staff ratio 135.9 3 43.4 2018-19

@ -2,6 +2,7 @@ require 'rails_helper'
describe AdminDataLoader do
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') }
let(:path_to_secondary_admin_data) { Rails.root.join('spec', 'fixtures', 'secondary_sample_admin_data.csv') }
let(:ay_2018_19) { create(:academic_year, range: '2018-19') }
let(:attleboro) { create(:school, name: 'Attleboro High School', dese_id: 160_505) }
let(:winchester) { create(:school, name: 'Winchester High School', dese_id: 3_440_505) }
@ -36,7 +37,7 @@ describe AdminDataLoader do
# it 'assigns the school to the admin data value' do
expect(AdminDataValue.first.school).to eq attleboro
expect(AdminDataValue.last.school).to eq beachmont
expect(AdminDataValue.last.school).to eq winchester
# end
# it 'links the admin data value to the correct admin data item' do
@ -45,7 +46,7 @@ describe AdminDataLoader do
# end
# it 'loads all the admin data values in the target csv file' do
expect(AdminDataValue.count).to eq 10
expect(AdminDataValue.count).to eq 11
# end
# it 'captures the likert score ' do
@ -53,6 +54,8 @@ describe AdminDataLoader do
admin_data_item: chronic_absense_rate).likert_score).to eq 3.03
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5
expect(AdminDataValue.find_by(school: winchester, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 5
# end
# it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do
@ -62,16 +65,27 @@ describe AdminDataLoader do
# it 'rejects importing rows with a value of 0' do
expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist
expect(AdminDataValue.where(school: winchester, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3'))).not_to exist
# end
end
context 'when a second file exists' do
before :each do
AdminDataLoader.load_data filepath: path_to_secondary_admin_data
end
it 'updates likert scores to match the new file' do
expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19,
admin_data_item: chronic_absense_rate).likert_score).to eq 1
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3
end
end
end
describe 'output to console' do
it 'outputs a messsage saying a value has been rejected' do
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.gsub("\n", '')
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 Invalid score: 100.0 for school: Winchester High School admin data item a-sust-i3 '
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.delete("\n")
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 '
end
end
end

@ -27,6 +27,60 @@ RSpec.describe SurveyItemValues, type: :model do
create(:academic_year, range: '2022-23')
end
let(:common_headers) do
['Recorded Date', 'DeseID', 'ResponseID', 'Duration (in seconds)', 'Gender', 'Grade']
end
let(:standard_survey_items) do
survey_item_ids = %w[s-peff-q1 s-peff-q2 s-peff-q3 s-peff-q4 s-peff-q5 s-peff-q6 s-phys-q1 s-phys-q2 s-phys-q3 s-phys-q4
s-emsa-q1 s-emsa-q2 s-emsa-q3 s-sbel-q1 s-sbel-q2 s-sbel-q3 s-sbel-q4 s-sbel-q5 s-tint-q1 s-tint-q2
s-tint-q3 s-tint-q4 s-tint-q5 s-vale-q1 s-vale-q2 s-vale-q3 s-vale-q4 s-acpr-q1 s-acpr-q2 s-acpr-q3
s-acpr-q4 s-sust-q1 s-sust-q2 s-cure-q1 s-cure-q2 s-cure-q3 s-cure-q4 s-sten-q1 s-sten-q2 s-sten-q3
s-sper-q1 s-sper-q2 s-sper-q3 s-sper-q4 s-civp-q1 s-civp-q2 s-civp-q3 s-civp-q4 s-grit-q1 s-grit-q2
s-grit-q3 s-grit-q4 s-grmi-q1 s-grmi-q2 s-grmi-q3 s-grmi-q4 s-expa-q1 s-appa-q1 s-appa-q2 s-appa-q3
s-acst-q1 s-acst-q2 s-acst-q3 s-poaf-q1 s-poaf-q2 s-poaf-q3 s-poaf-q4]
survey_item_ids.map do |survey_item_id|
create(:survey_item, survey_item_id:)
end
(survey_item_ids << common_headers).flatten
end
let(:short_form_survey_items) do
survey_item_ids = [create(:survey_item, survey_item_id: 's-phys-q1', on_short_form: true),
create(:survey_item, survey_item_id: 's-phys-q2', on_short_form: true),
create(:survey_item, survey_item_id: 's-phys-q3',
on_short_form: true)].map(&:survey_item_id)
survey_item_ids.map do |survey_item_id|
create(:survey_item, survey_item_id:)
end
(survey_item_ids << common_headers).flatten
end
let(:early_education_survey_items) do
survey_item_ids = [create(:survey_item, survey_item_id: 's-emsa-es1'),
create(:survey_item, survey_item_id: 's-emsa-es2'),
create(:survey_item, survey_item_id: 's-emsa-es3')].map(&:survey_item_id)
survey_item_ids.map do |survey_item_id|
create(:survey_item, survey_item_id:)
end
(survey_item_ids << common_headers).flatten
end
let(:teacher_survey_items) do
survey_item_ids = %w[t-prep-q1 t-prep-q2 t-prep-q3 t-ieff-q1 t-ieff-q2 t-ieff-q3 t-ieff-q4 t-pcom-q1 t-pcom-q2 t-pcom-q3
t-pcom-q4 t-pcom-q5 t-inle-q1 t-inle-q2 t-inle-q3 t-prtr-q1 t-prtr-q2 t-prtr-q3 t-coll-q1 t-coll-q2
t-coll-q3 t-qupd-q1 t-qupd-q2 t-qupd-q3 t-qupd-q4 t-pvic-q1 t-pvic-q2 t-pvic-q3 t-psup-q1 t-psup-q2
t-psup-q3 t-psup-q4 t-acch-q1 t-acch-q2 t-acch-q3 t-reso-q1 t-reso-q2 t-reso-q3 t-reso-q4 t-reso-q5
t-sust-q1 t-sust-q2 t-sust-q3 t-sust-q4 t-curv-q1 t-curv-q2 t-curv-q3 t-curv-q4 t-cure-q1 t-cure-q2
t-cure-q3 t-cure-q4 t-peng-q1 t-peng-q2 t-peng-q3 t-peng-q4 t-ceng-q1 t-ceng-q2 t-ceng-q3 t-ceng-q4
t-sach-q1 t-sach-q2 t-sach-q3 t-psol-q1 t-psol-q2 t-psol-q3 t-expa-q2 t-expa-q3 t-phya-q2 t-phya-q3]
survey_item_ids.map do |survey_item_id|
create(:survey_item, survey_item_id:)
end
(survey_item_ids << common_headers).flatten
end
context '.recorded_date' do
it 'returns the recorded date' do
row = { 'RecordedDate' => '2017-01-01' }
@ -43,18 +97,11 @@ RSpec.describe SurveyItemValues, type: :model do
context '.school' do
it 'returns the school that maps to the dese id provided' do
attleboro
headers = ['Dese ID']
row = { 'Dese ID' => '1234' }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.school).to eq attleboro
headers = ['School']
row = { 'School' => '1234' }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.school).to eq attleboro
headers = ['School- Attleboro']
row = { 'School- Attleboro' => '1234' }
row = { 'DeseID' => '1234' }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.school).to eq attleboro
end
@ -67,7 +114,6 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.grade).to eq 1
end
end
context '.gender' do
it 'returns the grade that maps to the grade provided' do
row = { 'Gender' => '1' }
@ -76,40 +122,60 @@ RSpec.describe SurveyItemValues, type: :model do
end
end
context '.dese_id' do
it 'returns the dese id for the id provided' do
headers = ['Dese ID']
row = { 'Dese ID' => '11' }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.dese_id).to eq 11
headers = ['School']
row = { 'School' => '22' }
values = SurveyItemValues.new(row:, headers:, genders:, survey_items:, schools:)
expect(values.dese_id).to eq 22
end
end
context '.survey_type' do
context '.respondent_type' do
it 'reads header to find the survey type' do
headers = %w[s-sbel-q5 s-phys-q2 RecordedDate]
values = SurveyItemValues.new(row: {}, headers:, genders:, survey_items:, schools:)
expect(values.survey_type).to eq :student
expect(values.respondent_type).to eq :student
headers = %w[t-sbel-q5 t-phys-q2]
values = SurveyItemValues.new(row: {}, headers:, genders:, survey_items:, schools:)
expect(values.respondent_type).to eq :teacher
end
end
context '.survey_type' do
context 'when survey type is standard form' do
it 'returns the survey type' do
headers = standard_survey_items
values = SurveyItemValues.new(row: {}, headers:, genders:, survey_items:, schools:)
expect(values.survey_type).to eq :standard
end
end
context 'when survey type is teacher form' do
it 'returns the survey type' do
headers = teacher_survey_items
values = SurveyItemValues.new(row: {}, headers:, genders:, survey_items:, schools:)
expect(values.survey_type).to eq :teacher
end
end
context 'when survey type is short form' do
it 'returns the survey type' do
headers = short_form_survey_items
values = SurveyItemValues.new(row: {}, headers:, genders:, survey_items:, schools:)
expect(values.survey_type).to eq :short_form
end
end
context 'when survey type is early education' do
it 'returns the survey type' do
headers = early_education_survey_items
values = SurveyItemValues.new(row: {}, headers:, genders:, survey_items:, schools:)
expect(values.survey_type).to eq :early_education
end
end
end
context '.valid_duration' do
context 'when duration is valid' do
it 'returns true' do
headers = ['s-sbel-q5', 's-phys-q2', 'RecordedDate', 'Duration (in seconds)']
values = SurveyItemValues.new(row: { 'Duration (in seconds)' => '240' }, headers:, genders:, survey_items:,
headers = standard_survey_items
values = SurveyItemValues.new(row: { 'Duration (in seconds)' => '240', 'Gender' => 'Male' }, headers:, genders:, survey_items:,
schools:)
expect(values.valid_duration?).to eq true
headers = ['t-sbel-q5', 't-phys-q2', 'Duration (in seconds)']
headers = teacher_survey_items
values = SurveyItemValues.new(row: { 'Duration (in seconds)' => '300' }, headers:, genders:, survey_items:,
schools:)
expect(values.valid_duration?).to eq true
@ -139,15 +205,19 @@ RSpec.describe SurveyItemValues, type: :model do
context 'when duration is invalid' do
it 'returns false' do
headers = ['s-sbel-q5', 's-phys-q2', 'RecordedDate', 'Duration (in seconds)']
headers = standard_survey_items
values = SurveyItemValues.new(row: { 'Duration (in seconds)' => '239' }, headers:, genders:, survey_items:,
schools:)
expect(values.valid_duration?).to eq false
headers = ['t-sbel-q5', 't-phys-q2', 'Duration (in seconds)']
headers = teacher_survey_items
values = SurveyItemValues.new(row: { 'Duration (in seconds)' => '299' }, headers:, genders:, survey_items:,
schools:)
expect(values.valid_duration?).to eq false
headers = short_form_survey_items
values = SurveyItemValues.new(row: { 'Duration (in seconds)' => '99' }, headers:, genders:, survey_items:,
schools:)
expect(values.valid_duration?).to eq false
end
end
end
@ -188,7 +258,7 @@ RSpec.describe SurveyItemValues, type: :model do
end
end
xcontext '.valid_grade?' do
context '.valid_grade?' do
context 'when grade is valid' do
before :each do
attleboro
@ -201,7 +271,7 @@ RSpec.describe SurveyItemValues, type: :model do
expect(values.valid_grade?).to eq true
end
xit 'returns true for teachers' do
it 'returns true for teachers' do
headers = %w[t-sbel-q5 t-phys-q2 grade RecordedDate]
values = SurveyItemValues.new(row: { 'RecordedDate' => recorded_date, 'Dese ID' => '1234' }, headers:, genders:, survey_items:,
schools:)
@ -209,7 +279,7 @@ RSpec.describe SurveyItemValues, type: :model do
end
end
xcontext 'when grade is invalid' do
context 'when grade is invalid' do
before :each do
attleboro
attleboro_respondents
@ -242,13 +312,13 @@ RSpec.describe SurveyItemValues, type: :model do
context 'when the standard deviation is invalid' do
it 'returns false for student questions' do
headers = %w[s-sbel-q5 s-phys-q1 s-phys-q2 RecordedDate]
values = SurveyItemValues.new(row: { 'RecordedDate' => recorded_date, 'Dese ID' => '1234', 's-sbel-q5' => '1', 's-phys-q2' => '', 's-phys-q3' => '1' }, headers:, genders:, survey_items:,
values = SurveyItemValues.new(row: { 'RecordedDate' => recorded_date, 'Dese ID' => '1234', 's-sbel-q5' => '1', 's-phys-q2' => '', 's-phys-q2' => '1' }, headers:, genders:, survey_items:,
schools: School.school_hash)
expect(values.valid_sd?).to eq false
end
it 'returns false for teacher questions' do
headers = %w[t-sbel-q5 t-phys-q1 t-phys-q2 RecordedDate]
values = SurveyItemValues.new(row: { 'RecordedDate' => recorded_date, 'Dese ID' => '1234', 't-sbel-q5' => '1', 't-phys-q2' => '', 't-phys-q3' => '1' }, headers:, genders:, survey_items:,
values = SurveyItemValues.new(row: { 'RecordedDate' => recorded_date, 'Dese ID' => '1234', 't-sbel-q5' => '1', 't-phys-q2' => '', 't-phys-q2' => '1' }, headers:, genders:, survey_items:,
schools: School.school_hash)
expect(values.valid_sd?).to eq false
end

Loading…
Cancel
Save