Add admin data for 4D. Finishes #182818489 and #182793402

This commit is contained in:
rebuilt 2022-08-30 21:46:23 -07:00
parent 1e349519bc
commit 048aacd641
9 changed files with 4191 additions and 2 deletions

View file

@ -0,0 +1,74 @@
require 'csv'
module Dese
class FourDLoader
def self.load_data(filepath:)
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
unless valid_likert_score(likert_score: score)
puts "Invalid score: #{score}
for school: #{School.find_by_dese_id(row['DESE ID']).name}
admin data item #{admin_data_item(row:)} "
next
end
create_admin_data_value(row:, score:)
end
end
private
def self.valid_likert_score(likert_score:)
likert_score >= 1 && likert_score <= 5
end
def self.likert_score(row:)
likert_score = (row['Likert Score'] || row['LikertScore'] || row['Likert_Score']).to_f
round_up_to_one(likert_score:)
end
def self.round_up_to_one(likert_score:)
likert_score = 1 if likert_score.positive? && likert_score < 1
likert_score
end
def self.ay(row:)
row['Academic Year'] || row['AcademicYear']
end
def self.dese_id(row:)
row['DESE ID'] || row['Dese ID'] || row['Dese Id']
end
def self.admin_data_item(row:)
row['Admin Data Item'] || row['Item ID'] || row['Item Id']
end
def self.create_admin_data_value(row:, score:)
school = School.find_by_dese_id(dese_id(row:).to_i)
return if school.nil?
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save
else
AdminDataValue.create!(
likert_score: score,
academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))
)
end
end
private_class_method :valid_likert_score
private_class_method :likert_score
private_class_method :round_up_to_one
private_class_method :ay
private_class_method :dese_id
private_class_method :admin_data_item
private_class_method :create_admin_data_value
end
end

View file

@ -0,0 +1,65 @@
require 'watir'
require 'csv'
module Dese
class FourDScraper
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'four_d.csv'))
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
table = scrape(browser:, url:, range: academic_year.range)
id = 'a-cgpr-i1'
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
end
browser.close
end
def scrape(browser:, url:, range:)
browser.goto(url)
return unless browser.option(text: range).present?
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'btnViewReport').click
sleep 5 # Sleep to prevent hitting mass.edu with too many requests
document = Nokogiri::HTML(browser.html)
document.css('tr')
end
def write_headers(filepath:)
CSV.open(filepath, 'w') do |csv|
headers = ['School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total', 'Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year']
csv << headers
end
end
def write_csv(table:, filepath:, range:, id:)
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculate(cells: items)
items << raw_likert_score
likert_score = raw_likert_score
likert_score = 5 if raw_likert_score > 5
likert_score = 1 if raw_likert_score < 1
likert_score = likert_score.round(2)
items << likert_score
items << id
items << range
csv << items
end
end
end
def calculate(cells:)
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
end
end
end