Add admin data for 4D. Finishes #182818489 and #182793402

pull/1/head
rebuilt 3 years ago
parent 1e349519bc
commit 048aacd641

@ -52,6 +52,10 @@ gem 'turbo-rails'
gem 'stimulus-rails'
gem 'watir'
gem 'selenium-webdriver', '~> 4.4'
group :development, :test do
# Call 'byebug' anywhere in the code to stop execution and get a debugger console
gem 'byebug', platform: :mri

@ -109,6 +109,7 @@ GEM
rack-test (>= 0.6.3)
regexp_parser (>= 1.5, < 3.0)
xpath (~> 3.2)
childprocess (4.1.0)
concurrent-ruby (1.1.10)
crass (1.0.6)
cssbundling-rails (1.1.0)
@ -318,9 +319,15 @@ GEM
rubocop (>= 1.7.0, < 2.0)
rubocop-ast (>= 0.4.0)
ruby-progressbar (1.11.0)
rubyzip (2.3.2)
seed_dump (3.3.1)
activerecord (>= 4)
activesupport (>= 4)
selenium-webdriver (4.4.0)
childprocess (>= 0.5, < 5.0)
rexml (~> 3.2, >= 3.2.5)
rubyzip (>= 1.2.2, < 3.0)
websocket (~> 1.0)
simplecov (0.21.2)
docile (~> 1.1)
simplecov-html (~> 0.11)
@ -381,12 +388,16 @@ GEM
uniform_notifier (1.16.0)
warden (1.2.9)
rack (>= 2.0.9)
watir (7.1.0)
regexp_parser (>= 1.2, < 3)
selenium-webdriver (~> 4.0)
web-console (4.2.0)
actionview (>= 6.0.0)
activemodel (>= 6.0.0)
bindex (>= 0.4.0)
railties (>= 6.0.0)
webrick (1.7.0)
websocket (1.2.9)
websocket-driver (0.7.5)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.5)
@ -436,6 +447,7 @@ DEPENDENCIES
rspec-rails (~> 5.1.0)
rubocop
seed_dump
selenium-webdriver (~> 4.4)
simplecov
solargraph-reek
spring
@ -447,6 +459,7 @@ DEPENDENCIES
twilio-ruby (~> 4.11.1)
tzinfo-data
uglifier (>= 1.3.0)
watir
web-console
RUBY VERSION

@ -0,0 +1,74 @@
require 'csv'
module Dese
class FourDLoader
def self.load_data(filepath:)
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
unless valid_likert_score(likert_score: score)
puts "Invalid score: #{score}
for school: #{School.find_by_dese_id(row['DESE ID']).name}
admin data item #{admin_data_item(row:)} "
next
end
create_admin_data_value(row:, score:)
end
end
private
def self.valid_likert_score(likert_score:)
likert_score >= 1 && likert_score <= 5
end
def self.likert_score(row:)
likert_score = (row['Likert Score'] || row['LikertScore'] || row['Likert_Score']).to_f
round_up_to_one(likert_score:)
end
def self.round_up_to_one(likert_score:)
likert_score = 1 if likert_score.positive? && likert_score < 1
likert_score
end
def self.ay(row:)
row['Academic Year'] || row['AcademicYear']
end
def self.dese_id(row:)
row['DESE ID'] || row['Dese ID'] || row['Dese Id']
end
def self.admin_data_item(row:)
row['Admin Data Item'] || row['Item ID'] || row['Item Id']
end
def self.create_admin_data_value(row:, score:)
school = School.find_by_dese_id(dese_id(row:).to_i)
return if school.nil?
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save
else
AdminDataValue.create!(
likert_score: score,
academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))
)
end
end
private_class_method :valid_likert_score
private_class_method :likert_score
private_class_method :round_up_to_one
private_class_method :ay
private_class_method :dese_id
private_class_method :admin_data_item
private_class_method :create_admin_data_value
end
end

@ -0,0 +1,65 @@
require 'watir'
require 'csv'
module Dese
class FourDScraper
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'four_d.csv'))
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
table = scrape(browser:, url:, range: academic_year.range)
id = 'a-cgpr-i1'
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
end
browser.close
end
def scrape(browser:, url:, range:)
browser.goto(url)
return unless browser.option(text: range).present?
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'btnViewReport').click
sleep 5 # Sleep to prevent hitting mass.edu with too many requests
document = Nokogiri::HTML(browser.html)
document.css('tr')
end
def write_headers(filepath:)
CSV.open(filepath, 'w') do |csv|
headers = ['School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total', 'Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year']
csv << headers
end
end
def write_csv(table:, filepath:, range:, id:)
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculate(cells: items)
items << raw_likert_score
likert_score = raw_likert_score
likert_score = 5 if raw_likert_score > 5
likert_score = 1 if raw_likert_score < 1
likert_score = likert_score.round(2)
items << likert_score
items << id
items << range
csv << items
end
end
end
def calculate(cells:)
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
end
end
end

File diff suppressed because it is too large Load Diff

@ -67,9 +67,10 @@ namespace :data do
desc 'load admin_data'
task load_admin_data: :environment do
Dir.glob(Rails.root.join('data', 'admin_data', '*.csv')).each do |filepath|
AdminDataValue.delete_all
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
AdminDataLoader.load_data filepath:
Dese::FourDLoader.load_data filepath:
end
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
end

File diff suppressed because it is too large Load Diff

@ -0,0 +1,50 @@
require 'rails_helper'
RSpec.describe Dese::FourDLoader, type: :model do
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_four_d_data.csv') }
let(:ay_2020_21) { AcademicYear.find_by_range '2020-21' }
let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' }
let(:ay_2017_18) { AcademicYear.find_by_range '2017-18' }
let(:ay_2016_17) { AcademicYear.find_by_range '2016-17' }
let(:four_d) { AdminDataItem.find_by_admin_data_item_id 'a-cgpr-i1' }
let(:attleboro) { School.find_by_dese_id 160_505 }
let(:winchester) { School.find_by_dese_id 3_440_505 }
let(:milford) { School.find_by_dese_id 1_850_505 }
let(:seacoast) { School.find_by_dese_id 2_480_520 }
let(:next_wave) { School.find_by_dese_id 2_740_510 }
before :each do
Rails.application.load_seed
end
after :each do
DatabaseCleaner.clean
end
context 'when running the loader' do
before :each do
Dese::FourDLoader.load_data filepath: path_to_admin_data
end
it 'load the correct admin data values' do
expect(AdminDataValue.find_by(school: winchester, admin_data_item: four_d,
academic_year: ay_2016_17).likert_score).to eq 5
expect(AdminDataValue.find_by(school: attleboro, admin_data_item: four_d,
academic_year: ay_2018_19).likert_score).to eq 5
expect(AdminDataValue.find_by(school: milford, admin_data_item: four_d,
academic_year: ay_2017_18).likert_score).to eq 4.92
expect(AdminDataValue.find_by(school: seacoast, admin_data_item: four_d,
academic_year: ay_2020_21).likert_score).to eq 3.84
expect(AdminDataValue.find_by(school: next_wave, admin_data_item: four_d,
academic_year: ay_2020_21).likert_score).to eq 4.8
end
it 'loads the correct number of items' do
expect(AdminDataValue.count).to eq 230
end
it 'is idempotent' do
Dese::FourDLoader.load_data filepath: path_to_admin_data
expect(AdminDataValue.count).to eq 230
end
end
end

@ -0,0 +1,24 @@
require 'rails_helper'
require 'fileutils'
RSpec.describe type: :model do
let(:academic_years) do
[
create(:academic_year, range: '2020-21'),
create(:academic_year, range: '2019-20'),
create(:academic_year, range: '2018-19'),
create(:academic_year, range: '2017-18'),
create(:academic_year, range: '2016-17')
]
end
before :each do
academic_years
end
xcontext 'Creating a new FourDScraper' do
it 'creates a csv file with the scraped data' do
FileUtils.mkdir_p 'tmp/spec/dese'
file = Rails.root.join('tmp', 'spec', 'dese', 'four_d.csv')
Dese::FourDScraper.new(filepath: file)
expect(file).to exist
end
end
end
Loading…
Cancel
Save