mirror of
https://github.com/edcommonwealth/Dashboard.git
synced 2026-03-07 21:38:14 -08:00
chore: scrape enrollment and staffing data
This commit is contained in:
parent
725348bf95
commit
1810ee0074
7 changed files with 15026 additions and 13173 deletions
|
|
@ -1,38 +1,40 @@
|
||||||
require 'watir'
|
require "watir"
|
||||||
require 'csv'
|
require "csv"
|
||||||
|
|
||||||
module Dese
|
module Dashboard
|
||||||
module Enrollments
|
module Dese
|
||||||
include Dese::Scraper
|
module Enrollments
|
||||||
attr_reader :filepaths
|
include Dashboard::Dese::Scraper
|
||||||
|
attr_reader :filepaths
|
||||||
|
|
||||||
def scrape_enrollments(filepath:)
|
def scrape_enrollments(filepath:)
|
||||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
|
||||||
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
|
"PK", "K", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "SP", "Total"]
|
||||||
write_headers(filepath:, headers:)
|
write_headers(filepath:, headers:)
|
||||||
run do |academic_year|
|
run do |academic_year|
|
||||||
admin_data_item_id = ''
|
admin_data_item_id = ""
|
||||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
|
url = "https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx"
|
||||||
range = academic_year.range
|
range = academic_year.range
|
||||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
"ctl00_ContentPlaceHolder1_ddYear" => range }
|
||||||
submit_id = 'btnViewReport'
|
submit_id = "btnViewReport"
|
||||||
calculation = ->(_headers, _items) { 'NA' }
|
calculation = ->(_headers, _items) { "NA" }
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def student_count(filepath:, dese_id:, year:)
|
|
||||||
@students ||= {}
|
|
||||||
if @students.count == 0
|
|
||||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
|
||||||
academic_year = row['Academic Year']
|
|
||||||
school_id = row['DESE ID'].to_i
|
|
||||||
total = row['Total'].gsub(',', '').to_i
|
|
||||||
@students[[school_id, academic_year]] = total
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@students[[dese_id, year]]
|
|
||||||
|
def student_count(filepath:, dese_id:, year:)
|
||||||
|
@students ||= {}
|
||||||
|
if @students.count == 0
|
||||||
|
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||||
|
academic_year = row["Academic Year"]
|
||||||
|
school_id = row["DESE ID"].to_i
|
||||||
|
total = row["Total"].gsub(",", "").to_i
|
||||||
|
@students[[school_id, academic_year]] = total
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@students[[dese_id, year]]
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -1,76 +1,78 @@
|
||||||
module Dese
|
module Dashboard
|
||||||
module Scraper
|
module Dese
|
||||||
DELAY = 20
|
module Scraper
|
||||||
|
DELAY = 20
|
||||||
|
|
||||||
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
||||||
:calculation)
|
:calculation)
|
||||||
def run
|
def run
|
||||||
academic_years = AcademicYear.all.order(range: :DESC)
|
academic_years = AcademicYear.all.order(range: :DESC)
|
||||||
academic_years.each do |academic_year|
|
academic_years.each do |academic_year|
|
||||||
prerequisites = yield academic_year
|
prerequisites = yield academic_year
|
||||||
|
|
||||||
document = get_html(url: prerequisites.url,
|
document = get_html(url: prerequisites.url,
|
||||||
selectors: prerequisites.selectors,
|
selectors: prerequisites.selectors,
|
||||||
submit_id: prerequisites.submit_id)
|
submit_id: prerequisites.submit_id)
|
||||||
unless document.nil?
|
unless document.nil?
|
||||||
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
||||||
calculation: prerequisites.calculation)
|
calculation: prerequisites.calculation)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def browser
|
def browser
|
||||||
@browser ||= Watir::Browser.new
|
@browser ||= Watir::Browser.new
|
||||||
end
|
|
||||||
|
|
||||||
def get_html(url:, selectors:, submit_id:)
|
|
||||||
browser.goto(url)
|
|
||||||
|
|
||||||
selectors.each do |key, value|
|
|
||||||
return unless browser.option(text: value).present?
|
|
||||||
|
|
||||||
browser.select(id: key).select(text: value)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
browser.button(id: submit_id).click
|
def get_html(url:, selectors:, submit_id:)
|
||||||
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
browser.goto(url)
|
||||||
Nokogiri::HTML(browser.html)
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_headers(filepath:, headers:)
|
selectors.each do |key, value|
|
||||||
CSV.open(filepath, 'w') do |csv|
|
return unless browser.option(text: value).present?
|
||||||
csv << headers
|
|
||||||
|
browser.select(id: key).select(text: value)
|
||||||
|
end
|
||||||
|
|
||||||
|
browser.button(id: submit_id).click
|
||||||
|
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
||||||
|
Nokogiri::HTML(browser.html)
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def write_csv(document:, filepath:, range:, id:, calculation:)
|
def write_headers(filepath:, headers:)
|
||||||
table = document.css('tr')
|
CSV.open(filepath, "w") do |csv|
|
||||||
headers = document.css('.sorting')
|
csv << headers
|
||||||
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
end
|
||||||
|
end
|
||||||
|
|
||||||
CSV.open(filepath, 'a') do |csv|
|
def write_csv(document:, filepath:, range:, id:, calculation:)
|
||||||
table.each do |row|
|
table = document.css("tr")
|
||||||
items = row.css('td').map(&:text)
|
headers = document.css(".sorting")
|
||||||
dese_id = items[1].to_i
|
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
||||||
next if dese_id.nil? || dese_id.zero?
|
|
||||||
|
|
||||||
raw_likert_score = calculation.call(header_hash, items)
|
CSV.open(filepath, "a") do |csv|
|
||||||
raw_likert_score ||= 'NA'
|
table.each do |row|
|
||||||
likert_score = raw_likert_score
|
items = row.css("td").map(&:text)
|
||||||
if likert_score != 'NA'
|
dese_id = items[1].to_i
|
||||||
likert_score = 5 if likert_score > 5
|
next if dese_id.nil? || dese_id.zero?
|
||||||
likert_score = 1 if likert_score < 1
|
|
||||||
likert_score = likert_score.round(2)
|
raw_likert_score = calculation.call(header_hash, items)
|
||||||
|
raw_likert_score ||= "NA"
|
||||||
|
likert_score = raw_likert_score
|
||||||
|
if likert_score != "NA"
|
||||||
|
likert_score = 5 if likert_score > 5
|
||||||
|
likert_score = 1 if likert_score < 1
|
||||||
|
likert_score = likert_score.round(2)
|
||||||
|
end
|
||||||
|
|
||||||
|
output = []
|
||||||
|
output << raw_likert_score
|
||||||
|
output << likert_score
|
||||||
|
output << id
|
||||||
|
output << range
|
||||||
|
output << items
|
||||||
|
output = output.flatten
|
||||||
|
csv << output
|
||||||
end
|
end
|
||||||
|
|
||||||
output = []
|
|
||||||
output << raw_likert_score
|
|
||||||
output << likert_score
|
|
||||||
output << id
|
|
||||||
output << range
|
|
||||||
output << items
|
|
||||||
output = output.flatten
|
|
||||||
csv << output
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -1,34 +1,36 @@
|
||||||
require 'watir'
|
require "watir"
|
||||||
|
|
||||||
module Dese
|
module Dashboard
|
||||||
class Staffing
|
module Dese
|
||||||
include Dese::Scraper
|
class Staffing
|
||||||
attr_reader :filepath
|
include Dashboard::Dese::Scraper
|
||||||
|
attr_reader :filepath
|
||||||
|
|
||||||
def initialize(filepath: Rails.root.join('data', 'staffing', 'staffing.csv'))
|
def initialize(filepath: Dashboard::Engine.root.join("data", "dashboard", "staffing", "staffing.csv"))
|
||||||
@filepath = filepath
|
@filepath = filepath
|
||||||
end
|
end
|
||||||
|
|
||||||
def run_all
|
def run_all
|
||||||
scrape_staffing(filepath:)
|
scrape_staffing(filepath:)
|
||||||
end
|
end
|
||||||
|
|
||||||
def scrape_staffing(filepath:)
|
def scrape_staffing(filepath:)
|
||||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year',
|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year",
|
||||||
'School Name', 'DESE ID',
|
"School Name", "DESE ID",
|
||||||
'PK-2 (#)', '3-5 (#)', '6-8 (#)', '9-12 (#)', 'Multiple Grades (#)',
|
"PK-2 (#)", "3-5 (#)", "6-8 (#)", "9-12 (#)", "Multiple Grades (#)",
|
||||||
'All Grades (#)', 'FTE Count']
|
"All Grades (#)", "FTE Count"]
|
||||||
write_headers(filepath:, headers:)
|
write_headers(filepath:, headers:)
|
||||||
run do |academic_year|
|
run do |academic_year|
|
||||||
admin_data_item_id = 'NA'
|
admin_data_item_id = "NA"
|
||||||
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
|
url = "https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx"
|
||||||
range = academic_year.range
|
range = academic_year.range
|
||||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
"ctl00_ContentPlaceHolder1_ddYear" => range,
|
||||||
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents' }
|
"ctl00_ContentPlaceHolder1_ddDisplay" => "Full-time Equivalents" }
|
||||||
submit_id = 'btnViewReport'
|
submit_id = "btnViewReport"
|
||||||
calculation = ->(_headers, _items) { 'NA' }
|
calculation = ->(_headers, _items) { "NA" }
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -1,162 +1,167 @@
|
||||||
require 'watir'
|
require "watir"
|
||||||
require 'csv'
|
require "csv"
|
||||||
|
|
||||||
module Dese
|
module Dashboard
|
||||||
class ThreeATwo
|
module Dese
|
||||||
include Dese::Scraper
|
class ThreeATwo
|
||||||
include Dese::Enrollments
|
include Dashboard::Dese::Scraper
|
||||||
attr_reader :filepaths
|
include Dashboard::Dese::Enrollments
|
||||||
|
attr_reader :filepaths
|
||||||
|
|
||||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
def initialize(filepaths:
|
||||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
|
[Dashboard::Engine.root.join("data", "dashboard", "admin_data", "dese", "enrollments.csv"),
|
||||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
|
Dashboard::Engine.root.join("data", "dashboard", "admin_data", "dese",
|
||||||
|
"3A_2_age_staffing.csv"),
|
||||||
|
Dashboard::Engine.root.join("data", "dashboard", "admin_data", "dese",
|
||||||
|
"3A_2_grade_subject_staffing.csv")])
|
||||||
|
|
||||||
@filepaths = filepaths
|
@filepaths = filepaths
|
||||||
end
|
|
||||||
|
|
||||||
def run_all
|
|
||||||
filepath = filepaths[0]
|
|
||||||
scrape_enrollments(filepath:)
|
|
||||||
|
|
||||||
filepath = filepaths[1]
|
|
||||||
write_a_sust_i1_headers(filepath:)
|
|
||||||
run_a_sust_i1(filepath:)
|
|
||||||
run_a_sust_i2(filepath:)
|
|
||||||
run_a_sust_i3(filepath:)
|
|
||||||
|
|
||||||
filepath = filepaths[2]
|
|
||||||
write_a_sust_i4_headers(filepath:)
|
|
||||||
run_a_sust_i4(filepath:)
|
|
||||||
|
|
||||||
browser.close
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_a_sust_i1_headers(filepath:)
|
|
||||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
|
||||||
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)',
|
|
||||||
'49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count',
|
|
||||||
'Student Count', 'Student to Guidance Counselor ratio']
|
|
||||||
|
|
||||||
write_headers(filepath:, headers:)
|
|
||||||
end
|
|
||||||
|
|
||||||
def write_a_sust_i4_headers(filepath:)
|
|
||||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
|
||||||
'PK-2 (# )', '3-5 (# )', '6-8 (# )', '9-12 (# )', 'Multiple Grades (# )', 'All Grades (# )', 'FTE Count',
|
|
||||||
'Student Count', 'Student to Art Teacher ratio']
|
|
||||||
|
|
||||||
write_headers(filepath:, headers:)
|
|
||||||
end
|
|
||||||
|
|
||||||
def run_a_sust_i1(filepath:)
|
|
||||||
run do |academic_year|
|
|
||||||
admin_data_item_id = 'a-sust-i1'
|
|
||||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
|
||||||
range = academic_year.range
|
|
||||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
|
||||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
|
||||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Guidance Counselor' }
|
|
||||||
submit_id = 'btnViewReport'
|
|
||||||
calculation = lambda { |headers, items|
|
|
||||||
fte_index = headers['FTE Count']
|
|
||||||
num_of_guidance_counselors = items[fte_index].to_f
|
|
||||||
dese_id = items[headers['School Code']].to_i
|
|
||||||
school = School.find_by_dese_id(dese_id)
|
|
||||||
|
|
||||||
return 'NA' unless school.present? && school.is_hs?
|
|
||||||
|
|
||||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
|
||||||
items << num_of_students
|
|
||||||
benchmark = 250
|
|
||||||
if fte_index.present? && !items[fte_index] != ''
|
|
||||||
result = ((benchmark - (num_of_students / num_of_guidance_counselors)) + benchmark) * 4 / benchmark
|
|
||||||
end
|
|
||||||
items << (num_of_students / num_of_guidance_counselors)
|
|
||||||
result
|
|
||||||
}
|
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def run_a_sust_i2(filepath:)
|
def run_all
|
||||||
run do |academic_year|
|
filepath = filepaths[0]
|
||||||
admin_data_item_id = 'a-sust-i2'
|
scrape_enrollments(filepath:)
|
||||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
|
||||||
range = academic_year.range
|
|
||||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
|
||||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
|
||||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Psychologist -- Non-Special Education' }
|
|
||||||
submit_id = 'btnViewReport'
|
|
||||||
calculation = lambda { |headers, items|
|
|
||||||
fte_index = headers['FTE Count']
|
|
||||||
num_of_psychologists = items[fte_index].to_f
|
|
||||||
dese_id = items[headers['School Code']].to_i
|
|
||||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
|
||||||
items << num_of_students
|
|
||||||
benchmark = 250
|
|
||||||
if fte_index.present? && !items[fte_index] != ''
|
|
||||||
result = ((benchmark - (num_of_students / num_of_psychologists)) + benchmark) * 4 / benchmark
|
|
||||||
end
|
|
||||||
|
|
||||||
items << (num_of_students / num_of_psychologists)
|
filepath = filepaths[1]
|
||||||
result
|
write_a_sust_i1_headers(filepath:)
|
||||||
}
|
run_a_sust_i1(filepath:)
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
run_a_sust_i2(filepath:)
|
||||||
|
run_a_sust_i3(filepath:)
|
||||||
|
|
||||||
|
filepath = filepaths[2]
|
||||||
|
write_a_sust_i4_headers(filepath:)
|
||||||
|
run_a_sust_i4(filepath:)
|
||||||
|
|
||||||
|
browser.close
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def run_a_sust_i3(filepath:)
|
def write_a_sust_i1_headers(filepath:)
|
||||||
run do |academic_year|
|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
|
||||||
admin_data_item_id = 'a-sust-i3'
|
"<26 yrs (# )", "26-32 yrs (#)", "33-40 yrs (#)", "41-48 yrs (#)",
|
||||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
"49-56 yrs (#)", "57-64 yrs (#)", "Over 64 yrs (#)", "FTE Count",
|
||||||
range = academic_year.range
|
"Student Count", "Student to Guidance Counselor ratio"]
|
||||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
|
||||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
|
||||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Paraprofessional' }
|
|
||||||
submit_id = 'btnViewReport'
|
|
||||||
calculation = lambda { |headers, items|
|
|
||||||
fte_index = headers['FTE Count']
|
|
||||||
num_of_paraprofessionals = items[fte_index].to_f
|
|
||||||
dese_id = items[headers['School Code']].to_i
|
|
||||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
|
||||||
items << num_of_students
|
|
||||||
benchmark = 43.4
|
|
||||||
if fte_index.present? && !items[fte_index] != ''
|
|
||||||
result = ((benchmark - (num_of_students / num_of_paraprofessionals)) + benchmark) * 4 / benchmark
|
|
||||||
end
|
|
||||||
|
|
||||||
items << (num_of_students / num_of_paraprofessionals)
|
write_headers(filepath:, headers:)
|
||||||
result
|
|
||||||
}
|
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
def run_a_sust_i4(filepath:)
|
def write_a_sust_i4_headers(filepath:)
|
||||||
run do |academic_year|
|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
|
||||||
admin_data_item_id = 'a-sust-i4'
|
"PK-2 (# )", "3-5 (# )", "6-8 (# )", "9-12 (# )", "Multiple Grades (# )", "All Grades (# )", "FTE Count",
|
||||||
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
|
"Student Count", "Student to Art Teacher ratio"]
|
||||||
range = academic_year.range
|
|
||||||
|
|
||||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
write_headers(filepath:, headers:)
|
||||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
end
|
||||||
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents',
|
|
||||||
'ctl00_ContentPlaceHolder1_ddSubject' => 'Arts' }
|
|
||||||
submit_id = 'btnViewReport'
|
|
||||||
calculation = lambda { |_headers, items|
|
|
||||||
num_of_art_teachers = items.last.to_f
|
|
||||||
dese_id = items[1].to_i
|
|
||||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
|
||||||
items << num_of_students
|
|
||||||
benchmark = 500
|
|
||||||
if num_of_art_teachers.present?
|
|
||||||
result = ((benchmark - (num_of_students / num_of_art_teachers)) + benchmark) * 4 / benchmark
|
|
||||||
end
|
|
||||||
|
|
||||||
items << (num_of_students / num_of_art_teachers)
|
def run_a_sust_i1(filepath:)
|
||||||
result
|
run do |academic_year|
|
||||||
}
|
admin_data_item_id = "a-sust-i1"
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
|
||||||
|
range = academic_year.range
|
||||||
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
|
"ctl00_ContentPlaceHolder1_ddYear" => range,
|
||||||
|
"ctl00_ContentPlaceHolder1_ddJobClassification" => "Guidance Counselor" }
|
||||||
|
submit_id = "btnViewReport"
|
||||||
|
calculation = lambda { |headers, items|
|
||||||
|
fte_index = headers["FTE Count"]
|
||||||
|
num_of_guidance_counselors = items[fte_index].to_f
|
||||||
|
dese_id = items[headers["School Code"]].to_i
|
||||||
|
school = School.find_by_dese_id(dese_id)
|
||||||
|
|
||||||
|
return "NA" unless school.present? && school.is_hs?
|
||||||
|
|
||||||
|
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||||
|
items << num_of_students
|
||||||
|
benchmark = 250
|
||||||
|
if fte_index.present? && !items[fte_index] != ""
|
||||||
|
result = ((benchmark - (num_of_students / num_of_guidance_counselors)) + benchmark) * 4 / benchmark
|
||||||
|
end
|
||||||
|
items << (num_of_students / num_of_guidance_counselors)
|
||||||
|
result
|
||||||
|
}
|
||||||
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def run_a_sust_i2(filepath:)
|
||||||
|
run do |academic_year|
|
||||||
|
admin_data_item_id = "a-sust-i2"
|
||||||
|
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
|
||||||
|
range = academic_year.range
|
||||||
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
|
"ctl00_ContentPlaceHolder1_ddYear" => range,
|
||||||
|
"ctl00_ContentPlaceHolder1_ddJobClassification" => "School Psychologist -- Non-Special Education" }
|
||||||
|
submit_id = "btnViewReport"
|
||||||
|
calculation = lambda { |headers, items|
|
||||||
|
fte_index = headers["FTE Count"]
|
||||||
|
num_of_psychologists = items[fte_index].to_f
|
||||||
|
dese_id = items[headers["School Code"]].to_i
|
||||||
|
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||||
|
items << num_of_students
|
||||||
|
benchmark = 250
|
||||||
|
if fte_index.present? && !items[fte_index] != ""
|
||||||
|
result = ((benchmark - (num_of_students / num_of_psychologists)) + benchmark) * 4 / benchmark
|
||||||
|
end
|
||||||
|
|
||||||
|
items << (num_of_students / num_of_psychologists)
|
||||||
|
result
|
||||||
|
}
|
||||||
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def run_a_sust_i3(filepath:)
|
||||||
|
run do |academic_year|
|
||||||
|
admin_data_item_id = "a-sust-i3"
|
||||||
|
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
|
||||||
|
range = academic_year.range
|
||||||
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
|
"ctl00_ContentPlaceHolder1_ddYear" => range,
|
||||||
|
"ctl00_ContentPlaceHolder1_ddJobClassification" => "Paraprofessional" }
|
||||||
|
submit_id = "btnViewReport"
|
||||||
|
calculation = lambda { |headers, items|
|
||||||
|
fte_index = headers["FTE Count"]
|
||||||
|
num_of_paraprofessionals = items[fte_index].to_f
|
||||||
|
dese_id = items[headers["School Code"]].to_i
|
||||||
|
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||||
|
items << num_of_students
|
||||||
|
benchmark = 43.4
|
||||||
|
if fte_index.present? && !items[fte_index] != ""
|
||||||
|
result = ((benchmark - (num_of_students / num_of_paraprofessionals)) + benchmark) * 4 / benchmark
|
||||||
|
end
|
||||||
|
|
||||||
|
items << (num_of_students / num_of_paraprofessionals)
|
||||||
|
result
|
||||||
|
}
|
||||||
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def run_a_sust_i4(filepath:)
|
||||||
|
run do |academic_year|
|
||||||
|
admin_data_item_id = "a-sust-i4"
|
||||||
|
url = "https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx"
|
||||||
|
range = academic_year.range
|
||||||
|
|
||||||
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
|
"ctl00_ContentPlaceHolder1_ddYear" => range,
|
||||||
|
"ctl00_ContentPlaceHolder1_ddDisplay" => "Full-time Equivalents",
|
||||||
|
"ctl00_ContentPlaceHolder1_ddSubject" => "Arts" }
|
||||||
|
submit_id = "btnViewReport"
|
||||||
|
calculation = lambda { |_headers, items|
|
||||||
|
num_of_art_teachers = items.last.to_f
|
||||||
|
dese_id = items[1].to_i
|
||||||
|
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||||
|
items << num_of_students
|
||||||
|
benchmark = 500
|
||||||
|
if num_of_art_teachers.present?
|
||||||
|
result = ((benchmark - (num_of_students / num_of_art_teachers)) + benchmark) * 4 / benchmark
|
||||||
|
end
|
||||||
|
|
||||||
|
items << (num_of_students / num_of_art_teachers)
|
||||||
|
result
|
||||||
|
}
|
||||||
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
24
lib/tasks/scrape.rake
Normal file
24
lib/tasks/scrape.rake
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
namespace :dashboard do
|
||||||
|
namespace :scrape do
|
||||||
|
desc "scrape dese site for admin data"
|
||||||
|
task admin: :environment do
|
||||||
|
puts "scraping data from dese"
|
||||||
|
scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo,
|
||||||
|
Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo]
|
||||||
|
scrapers.each do |scraper|
|
||||||
|
scraper.new.run_all
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
desc "scrape dese site for teacher staffing information"
|
||||||
|
task enrollment: :environment do
|
||||||
|
Dese::ThreeATwo.new.scrape_enrollments(filepath: Dashboard::Engine.root.join("data", "dashboard", "enrollment",
|
||||||
|
"enrollment.csv"))
|
||||||
|
end
|
||||||
|
|
||||||
|
desc "scrape dese site for student staffing information"
|
||||||
|
task staffing: :environment do
|
||||||
|
Dashboard::Dese::Staffing.new.run_all
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Loading…
Add table
Add a link
Reference in a new issue