mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
Add admin data for 4D. Finishes #182818489 and #182793402
This commit is contained in:
parent
1e349519bc
commit
048aacd641
9 changed files with 4191 additions and 2 deletions
4
Gemfile
4
Gemfile
|
|
@ -52,6 +52,10 @@ gem 'turbo-rails'
|
||||||
|
|
||||||
gem 'stimulus-rails'
|
gem 'stimulus-rails'
|
||||||
|
|
||||||
|
gem 'watir'
|
||||||
|
|
||||||
|
gem 'selenium-webdriver', '~> 4.4'
|
||||||
|
|
||||||
group :development, :test do
|
group :development, :test do
|
||||||
# Call 'byebug' anywhere in the code to stop execution and get a debugger console
|
# Call 'byebug' anywhere in the code to stop execution and get a debugger console
|
||||||
gem 'byebug', platform: :mri
|
gem 'byebug', platform: :mri
|
||||||
|
|
|
||||||
13
Gemfile.lock
13
Gemfile.lock
|
|
@ -109,6 +109,7 @@ GEM
|
||||||
rack-test (>= 0.6.3)
|
rack-test (>= 0.6.3)
|
||||||
regexp_parser (>= 1.5, < 3.0)
|
regexp_parser (>= 1.5, < 3.0)
|
||||||
xpath (~> 3.2)
|
xpath (~> 3.2)
|
||||||
|
childprocess (4.1.0)
|
||||||
concurrent-ruby (1.1.10)
|
concurrent-ruby (1.1.10)
|
||||||
crass (1.0.6)
|
crass (1.0.6)
|
||||||
cssbundling-rails (1.1.0)
|
cssbundling-rails (1.1.0)
|
||||||
|
|
@ -318,9 +319,15 @@ GEM
|
||||||
rubocop (>= 1.7.0, < 2.0)
|
rubocop (>= 1.7.0, < 2.0)
|
||||||
rubocop-ast (>= 0.4.0)
|
rubocop-ast (>= 0.4.0)
|
||||||
ruby-progressbar (1.11.0)
|
ruby-progressbar (1.11.0)
|
||||||
|
rubyzip (2.3.2)
|
||||||
seed_dump (3.3.1)
|
seed_dump (3.3.1)
|
||||||
activerecord (>= 4)
|
activerecord (>= 4)
|
||||||
activesupport (>= 4)
|
activesupport (>= 4)
|
||||||
|
selenium-webdriver (4.4.0)
|
||||||
|
childprocess (>= 0.5, < 5.0)
|
||||||
|
rexml (~> 3.2, >= 3.2.5)
|
||||||
|
rubyzip (>= 1.2.2, < 3.0)
|
||||||
|
websocket (~> 1.0)
|
||||||
simplecov (0.21.2)
|
simplecov (0.21.2)
|
||||||
docile (~> 1.1)
|
docile (~> 1.1)
|
||||||
simplecov-html (~> 0.11)
|
simplecov-html (~> 0.11)
|
||||||
|
|
@ -381,12 +388,16 @@ GEM
|
||||||
uniform_notifier (1.16.0)
|
uniform_notifier (1.16.0)
|
||||||
warden (1.2.9)
|
warden (1.2.9)
|
||||||
rack (>= 2.0.9)
|
rack (>= 2.0.9)
|
||||||
|
watir (7.1.0)
|
||||||
|
regexp_parser (>= 1.2, < 3)
|
||||||
|
selenium-webdriver (~> 4.0)
|
||||||
web-console (4.2.0)
|
web-console (4.2.0)
|
||||||
actionview (>= 6.0.0)
|
actionview (>= 6.0.0)
|
||||||
activemodel (>= 6.0.0)
|
activemodel (>= 6.0.0)
|
||||||
bindex (>= 0.4.0)
|
bindex (>= 0.4.0)
|
||||||
railties (>= 6.0.0)
|
railties (>= 6.0.0)
|
||||||
webrick (1.7.0)
|
webrick (1.7.0)
|
||||||
|
websocket (1.2.9)
|
||||||
websocket-driver (0.7.5)
|
websocket-driver (0.7.5)
|
||||||
websocket-extensions (>= 0.1.0)
|
websocket-extensions (>= 0.1.0)
|
||||||
websocket-extensions (0.1.5)
|
websocket-extensions (0.1.5)
|
||||||
|
|
@ -436,6 +447,7 @@ DEPENDENCIES
|
||||||
rspec-rails (~> 5.1.0)
|
rspec-rails (~> 5.1.0)
|
||||||
rubocop
|
rubocop
|
||||||
seed_dump
|
seed_dump
|
||||||
|
selenium-webdriver (~> 4.4)
|
||||||
simplecov
|
simplecov
|
||||||
solargraph-reek
|
solargraph-reek
|
||||||
spring
|
spring
|
||||||
|
|
@ -447,6 +459,7 @@ DEPENDENCIES
|
||||||
twilio-ruby (~> 4.11.1)
|
twilio-ruby (~> 4.11.1)
|
||||||
tzinfo-data
|
tzinfo-data
|
||||||
uglifier (>= 1.3.0)
|
uglifier (>= 1.3.0)
|
||||||
|
watir
|
||||||
web-console
|
web-console
|
||||||
|
|
||||||
RUBY VERSION
|
RUBY VERSION
|
||||||
|
|
|
||||||
74
app/services/dese/four_d_loader.rb
Normal file
74
app/services/dese/four_d_loader.rb
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
require 'csv'
|
||||||
|
|
||||||
|
module Dese
|
||||||
|
class FourDLoader
|
||||||
|
def self.load_data(filepath:)
|
||||||
|
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||||
|
score = likert_score(row:)
|
||||||
|
unless valid_likert_score(likert_score: score)
|
||||||
|
puts "Invalid score: #{score}
|
||||||
|
for school: #{School.find_by_dese_id(row['DESE ID']).name}
|
||||||
|
admin data item #{admin_data_item(row:)} "
|
||||||
|
next
|
||||||
|
end
|
||||||
|
create_admin_data_value(row:, score:)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def self.valid_likert_score(likert_score:)
|
||||||
|
likert_score >= 1 && likert_score <= 5
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.likert_score(row:)
|
||||||
|
likert_score = (row['Likert Score'] || row['LikertScore'] || row['Likert_Score']).to_f
|
||||||
|
round_up_to_one(likert_score:)
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.round_up_to_one(likert_score:)
|
||||||
|
likert_score = 1 if likert_score.positive? && likert_score < 1
|
||||||
|
likert_score
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.ay(row:)
|
||||||
|
row['Academic Year'] || row['AcademicYear']
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.dese_id(row:)
|
||||||
|
row['DESE ID'] || row['Dese ID'] || row['Dese Id']
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.admin_data_item(row:)
|
||||||
|
row['Admin Data Item'] || row['Item ID'] || row['Item Id']
|
||||||
|
end
|
||||||
|
|
||||||
|
def self.create_admin_data_value(row:, score:)
|
||||||
|
school = School.find_by_dese_id(dese_id(row:).to_i)
|
||||||
|
return if school.nil?
|
||||||
|
|
||||||
|
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||||
|
school:,
|
||||||
|
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
|
||||||
|
if admin_data_value.present?
|
||||||
|
admin_data_value.likert_score = score
|
||||||
|
admin_data_value.save
|
||||||
|
else
|
||||||
|
AdminDataValue.create!(
|
||||||
|
likert_score: score,
|
||||||
|
academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||||
|
school:,
|
||||||
|
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))
|
||||||
|
)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private_class_method :valid_likert_score
|
||||||
|
private_class_method :likert_score
|
||||||
|
private_class_method :round_up_to_one
|
||||||
|
private_class_method :ay
|
||||||
|
private_class_method :dese_id
|
||||||
|
private_class_method :admin_data_item
|
||||||
|
private_class_method :create_admin_data_value
|
||||||
|
end
|
||||||
|
end
|
||||||
65
app/services/dese/four_d_scraper.rb
Normal file
65
app/services/dese/four_d_scraper.rb
Normal file
|
|
@ -0,0 +1,65 @@
|
||||||
|
require 'watir'
|
||||||
|
require 'csv'
|
||||||
|
|
||||||
|
module Dese
|
||||||
|
class FourDScraper
|
||||||
|
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'four_d.csv'))
|
||||||
|
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
|
||||||
|
browser = Watir::Browser.new
|
||||||
|
write_headers(filepath:)
|
||||||
|
academic_years = AcademicYear.all
|
||||||
|
academic_years.each do |academic_year|
|
||||||
|
table = scrape(browser:, url:, range: academic_year.range)
|
||||||
|
id = 'a-cgpr-i1'
|
||||||
|
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
|
||||||
|
end
|
||||||
|
browser.close
|
||||||
|
end
|
||||||
|
|
||||||
|
def scrape(browser:, url:, range:)
|
||||||
|
browser.goto(url)
|
||||||
|
|
||||||
|
return unless browser.option(text: range).present?
|
||||||
|
|
||||||
|
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
|
||||||
|
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
|
||||||
|
browser.button(id: 'btnViewReport').click
|
||||||
|
sleep 5 # Sleep to prevent hitting mass.edu with too many requests
|
||||||
|
document = Nokogiri::HTML(browser.html)
|
||||||
|
document.css('tr')
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_headers(filepath:)
|
||||||
|
CSV.open(filepath, 'w') do |csv|
|
||||||
|
headers = ['School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
|
||||||
|
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total', 'Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year']
|
||||||
|
csv << headers
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def write_csv(table:, filepath:, range:, id:)
|
||||||
|
CSV.open(filepath, 'a') do |csv|
|
||||||
|
table.each do |row|
|
||||||
|
items = row.css('td').map(&:text)
|
||||||
|
dese_id = items[1].to_i
|
||||||
|
next if dese_id.nil? || dese_id.zero?
|
||||||
|
|
||||||
|
raw_likert_score = calculate(cells: items)
|
||||||
|
items << raw_likert_score
|
||||||
|
likert_score = raw_likert_score
|
||||||
|
likert_score = 5 if raw_likert_score > 5
|
||||||
|
likert_score = 1 if raw_likert_score < 1
|
||||||
|
likert_score = likert_score.round(2)
|
||||||
|
items << likert_score
|
||||||
|
items << id
|
||||||
|
items << range
|
||||||
|
csv << items
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate(cells:)
|
||||||
|
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
1979
data/admin_data/dese/four_d.csv
Normal file
1979
data/admin_data/dese/four_d.csv
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -67,9 +67,10 @@ namespace :data do
|
||||||
|
|
||||||
desc 'load admin_data'
|
desc 'load admin_data'
|
||||||
task load_admin_data: :environment do
|
task load_admin_data: :environment do
|
||||||
Dir.glob(Rails.root.join('data', 'admin_data', '*.csv')).each do |filepath|
|
AdminDataValue.delete_all
|
||||||
|
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
|
||||||
puts "=====================> Loading data from csv at path: #{filepath}"
|
puts "=====================> Loading data from csv at path: #{filepath}"
|
||||||
AdminDataLoader.load_data filepath:
|
Dese::FourDLoader.load_data filepath:
|
||||||
end
|
end
|
||||||
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
|
puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
|
||||||
end
|
end
|
||||||
|
|
|
||||||
1979
spec/fixtures/sample_four_d_data.csv
vendored
Normal file
1979
spec/fixtures/sample_four_d_data.csv
vendored
Normal file
File diff suppressed because it is too large
Load diff
50
spec/services/dese/four_d_loader_spec.rb
Normal file
50
spec/services/dese/four_d_loader_spec.rb
Normal file
|
|
@ -0,0 +1,50 @@
|
||||||
|
require 'rails_helper'
|
||||||
|
RSpec.describe Dese::FourDLoader, type: :model do
|
||||||
|
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_four_d_data.csv') }
|
||||||
|
let(:ay_2020_21) { AcademicYear.find_by_range '2020-21' }
|
||||||
|
let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' }
|
||||||
|
let(:ay_2017_18) { AcademicYear.find_by_range '2017-18' }
|
||||||
|
let(:ay_2016_17) { AcademicYear.find_by_range '2016-17' }
|
||||||
|
let(:four_d) { AdminDataItem.find_by_admin_data_item_id 'a-cgpr-i1' }
|
||||||
|
let(:attleboro) { School.find_by_dese_id 160_505 }
|
||||||
|
let(:winchester) { School.find_by_dese_id 3_440_505 }
|
||||||
|
let(:milford) { School.find_by_dese_id 1_850_505 }
|
||||||
|
let(:seacoast) { School.find_by_dese_id 2_480_520 }
|
||||||
|
let(:next_wave) { School.find_by_dese_id 2_740_510 }
|
||||||
|
|
||||||
|
before :each do
|
||||||
|
Rails.application.load_seed
|
||||||
|
end
|
||||||
|
|
||||||
|
after :each do
|
||||||
|
DatabaseCleaner.clean
|
||||||
|
end
|
||||||
|
context 'when running the loader' do
|
||||||
|
before :each do
|
||||||
|
Dese::FourDLoader.load_data filepath: path_to_admin_data
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'load the correct admin data values' do
|
||||||
|
expect(AdminDataValue.find_by(school: winchester, admin_data_item: four_d,
|
||||||
|
academic_year: ay_2016_17).likert_score).to eq 5
|
||||||
|
expect(AdminDataValue.find_by(school: attleboro, admin_data_item: four_d,
|
||||||
|
academic_year: ay_2018_19).likert_score).to eq 5
|
||||||
|
expect(AdminDataValue.find_by(school: milford, admin_data_item: four_d,
|
||||||
|
academic_year: ay_2017_18).likert_score).to eq 4.92
|
||||||
|
expect(AdminDataValue.find_by(school: seacoast, admin_data_item: four_d,
|
||||||
|
academic_year: ay_2020_21).likert_score).to eq 3.84
|
||||||
|
expect(AdminDataValue.find_by(school: next_wave, admin_data_item: four_d,
|
||||||
|
academic_year: ay_2020_21).likert_score).to eq 4.8
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'loads the correct number of items' do
|
||||||
|
expect(AdminDataValue.count).to eq 230
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'is idempotent' do
|
||||||
|
Dese::FourDLoader.load_data filepath: path_to_admin_data
|
||||||
|
|
||||||
|
expect(AdminDataValue.count).to eq 230
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
24
spec/services/dese/four_d_scraper_spec.rb
Normal file
24
spec/services/dese/four_d_scraper_spec.rb
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
require 'rails_helper'
|
||||||
|
require 'fileutils'
|
||||||
|
RSpec.describe type: :model do
|
||||||
|
let(:academic_years) do
|
||||||
|
[
|
||||||
|
create(:academic_year, range: '2020-21'),
|
||||||
|
create(:academic_year, range: '2019-20'),
|
||||||
|
create(:academic_year, range: '2018-19'),
|
||||||
|
create(:academic_year, range: '2017-18'),
|
||||||
|
create(:academic_year, range: '2016-17')
|
||||||
|
]
|
||||||
|
end
|
||||||
|
before :each do
|
||||||
|
academic_years
|
||||||
|
end
|
||||||
|
xcontext 'Creating a new FourDScraper' do
|
||||||
|
it 'creates a csv file with the scraped data' do
|
||||||
|
FileUtils.mkdir_p 'tmp/spec/dese'
|
||||||
|
file = Rails.root.join('tmp', 'spec', 'dese', 'four_d.csv')
|
||||||
|
Dese::FourDScraper.new(filepath: file)
|
||||||
|
expect(file).to exist
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Loading…
Add table
Add a link
Reference in a new issue