mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
Add admin data for 4D. Finishes #182818489 and #182793402
This commit is contained in:
parent
1e349519bc
commit
048aacd641
9 changed files with 4191 additions and 2 deletions
74
app/services/dese/four_d_loader.rb
Normal file
74
app/services/dese/four_d_loader.rb
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class FourDLoader
|
||||
def self.load_data(filepath:)
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
score = likert_score(row:)
|
||||
unless valid_likert_score(likert_score: score)
|
||||
puts "Invalid score: #{score}
|
||||
for school: #{School.find_by_dese_id(row['DESE ID']).name}
|
||||
admin data item #{admin_data_item(row:)} "
|
||||
next
|
||||
end
|
||||
create_admin_data_value(row:, score:)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.valid_likert_score(likert_score:)
|
||||
likert_score >= 1 && likert_score <= 5
|
||||
end
|
||||
|
||||
def self.likert_score(row:)
|
||||
likert_score = (row['Likert Score'] || row['LikertScore'] || row['Likert_Score']).to_f
|
||||
round_up_to_one(likert_score:)
|
||||
end
|
||||
|
||||
def self.round_up_to_one(likert_score:)
|
||||
likert_score = 1 if likert_score.positive? && likert_score < 1
|
||||
likert_score
|
||||
end
|
||||
|
||||
def self.ay(row:)
|
||||
row['Academic Year'] || row['AcademicYear']
|
||||
end
|
||||
|
||||
def self.dese_id(row:)
|
||||
row['DESE ID'] || row['Dese ID'] || row['Dese Id']
|
||||
end
|
||||
|
||||
def self.admin_data_item(row:)
|
||||
row['Admin Data Item'] || row['Item ID'] || row['Item Id']
|
||||
end
|
||||
|
||||
def self.create_admin_data_value(row:, score:)
|
||||
school = School.find_by_dese_id(dese_id(row:).to_i)
|
||||
return if school.nil?
|
||||
|
||||
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school:,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
|
||||
if admin_data_value.present?
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value.save
|
||||
else
|
||||
AdminDataValue.create!(
|
||||
likert_score: score,
|
||||
academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school:,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
private_class_method :valid_likert_score
|
||||
private_class_method :likert_score
|
||||
private_class_method :round_up_to_one
|
||||
private_class_method :ay
|
||||
private_class_method :dese_id
|
||||
private_class_method :admin_data_item
|
||||
private_class_method :create_admin_data_value
|
||||
end
|
||||
end
|
||||
65
app/services/dese/four_d_scraper.rb
Normal file
65
app/services/dese/four_d_scraper.rb
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class FourDScraper
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'four_d.csv'))
|
||||
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
|
||||
browser = Watir::Browser.new
|
||||
write_headers(filepath:)
|
||||
academic_years = AcademicYear.all
|
||||
academic_years.each do |academic_year|
|
||||
table = scrape(browser:, url:, range: academic_year.range)
|
||||
id = 'a-cgpr-i1'
|
||||
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
|
||||
end
|
||||
browser.close
|
||||
end
|
||||
|
||||
def scrape(browser:, url:, range:)
|
||||
browser.goto(url)
|
||||
|
||||
return unless browser.option(text: range).present?
|
||||
|
||||
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
|
||||
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
|
||||
browser.button(id: 'btnViewReport').click
|
||||
sleep 5 # Sleep to prevent hitting mass.edu with too many requests
|
||||
document = Nokogiri::HTML(browser.html)
|
||||
document.css('tr')
|
||||
end
|
||||
|
||||
def write_headers(filepath:)
|
||||
CSV.open(filepath, 'w') do |csv|
|
||||
headers = ['School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
|
||||
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total', 'Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year']
|
||||
csv << headers
|
||||
end
|
||||
end
|
||||
|
||||
def write_csv(table:, filepath:, range:, id:)
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
raw_likert_score = calculate(cells: items)
|
||||
items << raw_likert_score
|
||||
likert_score = raw_likert_score
|
||||
likert_score = 5 if raw_likert_score > 5
|
||||
likert_score = 1 if raw_likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
items << likert_score
|
||||
items << id
|
||||
items << range
|
||||
csv << items
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def calculate(cells:)
|
||||
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
|
||||
end
|
||||
end
|
||||
end
|
||||
Loading…
Add table
Add a link
Reference in a new issue