Fix problem with dese scraper lumping in 2021-22 data as 2022-23 data.

Deleted unused csvs.  Turned off puts statements in admin loader.
Remove old, now unused admin data loader class.
pull/1/head
rebuilt 3 years ago
parent ca31bd3217
commit 3f2a7dff50

@ -1,60 +0,0 @@
# frozen_string_literal: true
class AdminDataLoader
def self.load_data(filepath:)
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
unless valid_likert_score(likert_score: score)
puts "Invalid score: #{score}
for school: #{School.find_by_dese_id(row['DESE ID']).name}
admin data item #{admin_data_item(row:)} "
next
end
create_admin_data_value(row:, score:)
end
end
private
def self.valid_likert_score(likert_score:)
likert_score >= 1 && likert_score <= 5
end
def self.likert_score(row:)
likert_score = (row['LikertScore'] || row['Likert Score'] || row['Likert_Score']).to_f
round_up_to_one(likert_score:)
end
def self.round_up_to_one(likert_score:)
likert_score = 1 if likert_score.positive? && likert_score < 1
likert_score
end
def self.ay(row:)
row['Academic Year'] || row['AcademicYear']
end
def self.dese_id(row:)
row['DESE ID'] || row['Dese ID'] || row['Dese Id']
end
def self.admin_data_item(row:)
row['Item ID'] || row['Item Id']
end
def self.create_admin_data_value(row:, score:)
# byebug unless %w[a-vale-i1 a-sust-i3].include? admin_data_item(row:)
AdminDataValue.create!(likert_score: score,
academic_year: AcademicYear.find_by_range(ay(row:)),
school: School.find_by_dese_id(dese_id(row:).to_i),
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
end
private_class_method :valid_likert_score
private_class_method :likert_score
private_class_method :round_up_to_one
private_class_method :ay
private_class_method :dese_id
private_class_method :admin_data_item
private_class_method :create_admin_data_value
end

@ -6,7 +6,7 @@ module Dese
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '5D_2_enrollments.csv'),
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '5D_2_age_staffing.csv')])
@filepaths = filepaths
end

@ -1,18 +1,22 @@
module Dese
class Loader
def self.load_data(filepath:)
admin_data_values = []
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
unless valid_likert_score(likert_score: score)
school = School.find_by_dese_id(row['DESE ID']) || School.new(name: 'School not in consortium',
dese_id: row['DESE ID'])
puts "Invalid score: #{score}
for school: #{school.name}
admin data item #{admin_data_item(row:)} "
# school = School.find_by_dese_id(row['DESE ID']) || School.new(name: 'School not in consortium',
# dese_id: row['DESE ID'])
# puts "Invalid score: #{score}
# for school: #{school.name}
# admin data item #{admin_data_item(row:)} "
next
end
create_admin_data_value(row:, score:)
admin_data_values << create_admin_data_value(row:, score:)
end
AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
private
@ -56,8 +60,9 @@ module Dese
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save
nil
else
AdminDataValue.create!(
AdminDataValue.new(
likert_score: score,
academic_year: AcademicYear.find_by_range(ay(row:)),
school:,

@ -35,7 +35,7 @@ module Dese
browser.goto(url)
selectors.each do |key, value|
next unless browser.option(text: value).present?
return unless browser.option(text: value).present?
browser.select(id: key).select(text: value)
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -1,88 +0,0 @@
require 'rails_helper'
describe AdminDataLoader do
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_admin_data.csv') }
let(:ay_2018_19) { create(:academic_year, range: '2018-19') }
let(:attleboro) { create(:school, name: 'Attleboro High School', dese_id: 160_505) }
let(:winchester) { create(:school, name: 'Winchester High School', dese_id: 3_440_505) }
let(:beachmont) { create(:school, dese_id: 2_480_013) }
let(:woodland) { create(:school, dese_id: 1_850_090) } # not explicitly tested
let(:chronic_absense_rate) { create(:admin_data_item, admin_data_item_id: 'a-vale-i1') }
let(:student_to_instructor_ratio) { create(:admin_data_item, admin_data_item_id: 'a-sust-i3') }
let(:a_reso) { create(:admin_data_item, admin_data_item_id: 'a-reso-i1') } # not explicitly tested
before :each do
ay_2018_19
attleboro
winchester
beachmont
woodland
chronic_absense_rate
student_to_instructor_ratio
a_reso
AdminDataLoader.load_data filepath: path_to_admin_data
end
after :each do
DatabaseCleaner.clean
end
describe 'self.load_data' do
it 'loads the correct admin data values' do
# it 'assigns the academic year to admin data value' do
expect(AdminDataValue.where(school: attleboro,
admin_data_item: chronic_absense_rate).first.academic_year).to eq ay_2018_19
# end
# it 'assigns the school to the admin data value' do
expect(AdminDataValue.first.school).to eq attleboro
expect(AdminDataValue.last.school).to eq beachmont
# end
# it 'links the admin data value to the correct admin data item' do
expect(AdminDataValue.first.admin_data_item).to eq chronic_absense_rate
expect(AdminDataValue.last.admin_data_item).to eq student_to_instructor_ratio
# end
# it 'loads all the admin data values in the target csv file' do
expect(AdminDataValue.count).to eq 10
# end
# it 'captures the likert score ' do
expect(AdminDataValue.find_by(school: attleboro, academic_year: ay_2018_19,
admin_data_item: chronic_absense_rate).likert_score).to eq 3.03
expect(AdminDataValue.find_by(school: beachmont, academic_year: ay_2018_19,
admin_data_item: student_to_instructor_ratio).likert_score).to eq 3.5
# end
# it 'rounds up any likert_scores between 0 and 1 (non-inclusive) to 1' do
expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3')).first.likert_score).to eq 1
# end
# it 'rejects importing rows with a value of 0' do
expect(AdminDataValue.where(school: attleboro, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-reso-i1'))).not_to exist
expect(AdminDataValue.where(school: winchester, academic_year: ay_2018_19,
admin_data_item: AdminDataItem.find_by_admin_data_item_id('a-sust-i3'))).not_to exist
# end
end
end
describe 'output to console' do
it 'outputs a messsage saying a value has been rejected' do
output = capture_stdout { AdminDataLoader.load_data filepath: path_to_admin_data }.gsub("\n", '')
expect(output).to eq 'Invalid score: 0.0 for school: Attleboro High School admin data item a-reso-i1 Invalid score: 100.0 for school: Winchester High School admin data item a-sust-i3 '
end
end
end
def capture_stdout
original_stdout = $stdout
$stdout = fake = StringIO.new
begin
yield
ensure
$stdout = original_stdout
end
fake.string
end
Loading…
Cancel
Save