chore: get admin data scraper running

This commit is contained in:
Nelson Jovel 2024-02-07 11:47:25 -08:00
parent 1810ee0074
commit a142afe022
102 changed files with 41933 additions and 8756 deletions

View file

@ -1,47 +1,49 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class FiveCOne
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class FiveCOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '5C_1_art_course.csv')])
@filepaths = filepaths
end
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "5C_1_art_course.csv")])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'K', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
'11', '12', 'All Grades', 'Total Students']
write_headers(filepath:, headers:)
def run_all
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"K", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
"11", "12", "All Grades", "Total Students"]
write_headers(filepath:, headers:)
run_a_picp_i1(filepath:)
run_a_picp_i1(filepath:)
browser.close
end
browser.close
end
def run_a_picp_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-picp-i1'
url = 'https://profiles.doe.mass.edu/statereport/artcourse.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddView' => 'Percent' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
percent_graduated_index = headers['All Grades'] - 1
if items[percent_graduated_index].nil? || items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
return 'NA'
end
def run_a_picp_i1(filepath:)
run do |academic_year|
admin_data_item_id = "a-picp-i1"
url = "https://profiles.doe.mass.edu/statereport/artcourse.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddView" => "Percent" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
percent_graduated_index = headers["All Grades"] - 1
if items[percent_graduated_index].nil? || items[percent_graduated_index] == "" || items[percent_graduated_index].strip == ".0"
return "NA"
end
percent_passing = items[percent_graduated_index].to_f
benchmark = 77.5
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
percent_passing = items[percent_graduated_index].to_f
benchmark = 77.5
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,55 +1,57 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class FiveDTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
module Dashboard
module Dese
class FiveDTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '5D_2_age_staffing.csv')])
@filepaths = filepaths
end
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "enrollments.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese", "5D_2_age_staffing.csv")])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
scrape_enrollments(filepath:)
def run_all
filepath = filepaths[0]
scrape_enrollments(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)', '49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count']
write_headers(filepath:, headers:)
filepath = filepaths[1]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"<26 yrs (# )", "26-32 yrs (#)", "33-40 yrs (#)", "41-48 yrs (#)", "49-56 yrs (#)", "57-64 yrs (#)", "Over 64 yrs (#)", "FTE Count"]
write_headers(filepath:, headers:)
run_a_phya_i1(filepath:)
run_a_phya_i1(filepath:)
browser.close
end
browser.close
end
def run_a_phya_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-phya-i1'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Nurse -- Non-Special Education' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
nurse_index = headers['FTE Count']
return 'NA' if items[nurse_index] == '' || items[nurse_index].strip == '.0'
def run_a_phya_i1(filepath:)
run do |academic_year|
admin_data_item_id = "a-phya-i1"
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddJobClassification" => "School Nurse -- Non-Special Education" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
nurse_index = headers["FTE Count"]
return "NA" if items[nurse_index] == "" || items[nurse_index].strip == ".0"
nurse_count = items[nurse_index].to_f
benchmark = 750
nurse_count * 4 / benchmark if nurse_index.present?
nurse_count = items[nurse_index].to_f
benchmark = 750
nurse_count * 4 / benchmark if nurse_index.present?
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
items << (num_of_students / nurse_count)
((benchmark - (num_of_students / nurse_count)) + benchmark) * 4 / benchmark
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
dese_id = items[headers["School Code"]].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
items << (num_of_students / nurse_count)
((benchmark - (num_of_students / nurse_count)) + benchmark) * 4 / benchmark
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,43 +1,46 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class FourAOne
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class FourAOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '4A_1_grade_nine_course_pass.csv')])
@filepaths = filepaths
end
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese",
"4A_1_grade_nine_course_pass.csv")])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade Nine Students', '# Passing All Courses', '% Passing All Courses']
write_headers(filepath:, headers:)
def run_all
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# Grade Nine Students", "# Passing All Courses", "% Passing All Courses"]
write_headers(filepath:, headers:)
run_a_ovpe_i1(filepath:)
run_a_ovpe_i1(filepath:)
browser.close
end
browser.close
end
def run_a_ovpe_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-ovpe-i1'
url = 'https://profiles.doe.mass.edu/statereport/gradeninecoursepass.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
passing_index = headers['% Passing All Courses']
return 'NA' if items[passing_index] == '' || items[passing_index].strip == '.0'
def run_a_ovpe_i1(filepath:)
run do |academic_year|
admin_data_item_id = "a-ovpe-i1"
url = "https://profiles.doe.mass.edu/statereport/gradeninecoursepass.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
passing_index = headers["% Passing All Courses"]
return "NA" if items[passing_index] == "" || items[passing_index].strip == ".0"
percent_passing = items[passing_index].to_f
benchmark = 95
percent_passing * 4 / benchmark if passing_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
percent_passing = items[passing_index].to_f
benchmark = 95
percent_passing * 4 / benchmark if passing_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,104 +1,106 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class FourBTwo
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class FourBTwo
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '4B_2_four_year_grad.csv'),
Rails.root.join('data', 'admin_data', 'dese', '4B_2_retention.csv'),
Rails.root.join('data', 'admin_data', 'dese', '4B_2_five_year_grad.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
'% Dropped Out', '% Permanently Excluded']
write_headers(filepath:, headers:)
run_a_degr_i1(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Enrolled', '# Retained', '% Retained', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
'11', '12']
write_headers(filepath:, headers:)
run_a_degr_i2(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
'% Dropped Out', '% Permanently Excluded']
write_headers(filepath:, headers:)
run_a_degr_i3(filepath:)
browser.close
end
def run_a_degr_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-degr-i1'
url = 'https://profiles.doe.mass.edu/statereport/gradrates.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddRateType' => '4yr Grad' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
percent_graduated_index = headers['% Graduated']
return 'NA' if items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
percent_passing = items[percent_graduated_index].to_f
benchmark = 80
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "4B_2_four_year_grad.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese", "4B_2_retention.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese", "4B_2_five_year_grad.csv")])
@filepaths = filepaths
end
end
def run_a_degr_i2(filepath:)
run do |academic_year|
admin_data_item_id = 'a-degr-i2'
url = 'https://profiles.doe.mass.edu/statereport/retention.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddView' => 'Percent' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
retained_index = headers['% Retained']
return 'NA' if items[retained_index] == '' || items[retained_index].strip == '.0'
def run_all
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# in Cohort", "% Graduated", "% Still in School", "% Non-Grad Completers", "% H.S. Equiv.",
"% Dropped Out", "% Permanently Excluded"]
write_headers(filepath:, headers:)
percent_retained = items[retained_index].to_f
benchmark = 2
((benchmark - percent_retained) + benchmark) * 4 / benchmark if retained_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
run_a_degr_i1(filepath:)
filepath = filepaths[1]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# Enrolled", "# Retained", "% Retained", "01", "02", "03", "04", "05", "06", "07", "08", "09", "10",
"11", "12"]
write_headers(filepath:, headers:)
run_a_degr_i2(filepath:)
filepath = filepaths[2]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# in Cohort", "% Graduated", "% Still in School", "% Non-Grad Completers", "% H.S. Equiv.",
"% Dropped Out", "% Permanently Excluded"]
write_headers(filepath:, headers:)
run_a_degr_i3(filepath:)
browser.close
end
end
def run_a_degr_i3(filepath:)
run do |academic_year|
admin_data_item_id = 'a-degr-i3'
url = 'https://profiles.doe.mass.edu/statereport/gradrates.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddRateType' => '5yr Grad' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
percent_graduated_index = headers['% Graduated']
return 'NA' if items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
def run_a_degr_i1(filepath:)
run do |academic_year|
admin_data_item_id = "a-degr-i1"
url = "https://profiles.doe.mass.edu/statereport/gradrates.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddRateType" => "4yr Grad" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
percent_graduated_index = headers["% Graduated"]
return "NA" if items[percent_graduated_index] == "" || items[percent_graduated_index].strip == ".0"
percent_passing = items[percent_graduated_index].to_f
benchmark = 85
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
percent_passing = items[percent_graduated_index].to_f
benchmark = 80
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_degr_i2(filepath:)
run do |academic_year|
admin_data_item_id = "a-degr-i2"
url = "https://profiles.doe.mass.edu/statereport/retention.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddView" => "Percent" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
retained_index = headers["% Retained"]
return "NA" if items[retained_index] == "" || items[retained_index].strip == ".0"
percent_retained = items[retained_index].to_f
benchmark = 2
((benchmark - percent_retained) + benchmark) * 4 / benchmark if retained_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_degr_i3(filepath:)
run do |academic_year|
admin_data_item_id = "a-degr-i3"
url = "https://profiles.doe.mass.edu/statereport/gradrates.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddRateType" => "5yr Grad" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
percent_graduated_index = headers["% Graduated"]
return "NA" if items[percent_graduated_index] == "" || items[percent_graduated_index].strip == ".0"
percent_passing = items[percent_graduated_index].to_f
benchmark = 85
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,74 +1,76 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
# TODO: convert this to simpler format and add a run_all method
module Dese
class FourDOne
attr_reader :filepath
module Dashboard
module Dese
class FourDOne
attr_reader :filepath
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '4D_1_plans_of_grads.csv'))
@filepath = filepath
end
def run_all
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
table = scrape(browser:, url:, range: academic_year.range)
id = 'a-cgpr-i1'
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
def initialize(filepath: Dashboard::Engine.root.join("data", "admin_data", "dese", "4D_1_plans_of_grads.csv"))
@filepath = filepath
end
browser.close
end
def scrape(browser:, url:, range:)
browser.goto(url)
return unless browser.option(text: range).present?
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'btnViewReport').click
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
document = Nokogiri::HTML(browser.html)
document.css('tr')
end
def write_headers(filepath:)
CSV.open(filepath, 'w') do |csv|
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total']
csv << headers
def run_all
url = "https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx"
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
table = scrape(browser:, url:, range: academic_year.range)
id = "a-cgpr-i1"
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
end
browser.close
end
end
def write_csv(table:, filepath:, range:, id:)
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
def scrape(browser:, url:, range:)
browser.goto(url)
raw_likert_score = calculate(cells: items)
likert_score = raw_likert_score
likert_score = 5 if raw_likert_score > 5
likert_score = 1 if raw_likert_score < 1
likert_score = likert_score.round(2)
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
csv << output.flatten
return unless browser.option(text: range).present?
browser.select(id: "ctl00_ContentPlaceHolder1_ddReportType").select(/School/)
browser.select(id: "ctl00_ContentPlaceHolder1_ddYear").select(text: range)
browser.button(id: "btnViewReport").click
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
document = Nokogiri::HTML(browser.html)
document.css("tr")
end
def write_headers(filepath:)
CSV.open(filepath, "w") do |csv|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID", "4 Year Private College", "4 Year Public College", "2 Year Private College", "2 Year Public College",
"Other Post Secondary", "Apprenticeship", "Work", "Military", "Other", "Unknown", "Total"]
csv << headers
end
end
end
def calculate(cells:)
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
def write_csv(table:, filepath:, range:, id:)
CSV.open(filepath, "a") do |csv|
table.each do |row|
items = row.css("td").map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculate(cells: items)
likert_score = raw_likert_score
likert_score = 5 if raw_likert_score > 5
likert_score = 1 if raw_likert_score < 1
likert_score = likert_score.round(2)
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
csv << output.flatten
end
end
end
def calculate(cells:)
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
end
end
end
end

View file

@ -1,115 +1,117 @@
require 'watir'
require 'csv'
# TODO: convert this to simpler format and add a run_all method
module Dese
class OneAOne
attr_reader :filepath
require "watir"
require "csv"
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '1A_1_teacher_data.csv'))
@filepath = filepath
end
module Dashboard
module Dese
class OneAOne
attr_reader :filepath
def run_all
url = 'https://profiles.doe.mass.edu/statereport/teacherdata.aspx'
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
document = scrape(browser:, url:, range: academic_year.range)
id = 'a-exp-i1'
write_csv(document:, filepath:, range: academic_year.range, id:) unless document.nil?
def initialize(filepath: Dashboard::Engine.root.join("data", "admin_data", "dese", "1A_1_teacher_data.csv"))
@filepath = filepath
end
browser.close
end
def scrape(browser:, url:, range:)
browser.goto(url)
return unless browser.option(text: 'School').present?
return unless browser.option(text: range).present?
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(text: 'School')
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'ctl00_ContentPlaceHolder1_btnViewReport').click
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html)
end
def write_headers(filepath:)
CSV.open(filepath, 'w') do |csv|
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', 'Total # of Teachers(FTE)', 'Percent of Teachers Licensed',
'Student/Teacher Ratio', 'Percent of Experienced Teachers', 'Percent of Teachers without Waiver or Provisional License', 'Percent Teaching in-field']
csv << headers
def run_all
url = "https://profiles.doe.mass.edu/statereport/teacherdata.aspx"
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
document = scrape(browser:, url:, range: academic_year.range)
id = "a-exp-i1"
write_csv(document:, filepath:, range: academic_year.range, id:) unless document.nil?
end
browser.close
end
end
def write_csv(document:, filepath:, range:, id:)
table = document.css('tr')
headers = document.css('.sorting')
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
experienced_teacher_index = header_hash['Percent of Experienced Teachers']
dese_id_index = header_hash['School Code']
def scrape(browser:, url:, range:)
browser.goto(url)
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
return unless browser.option(text: "School").present?
return unless browser.option(text: range).present?
raw_likert_score = items[experienced_teacher_index].to_f * 4 / 80 if experienced_teacher_index.present?
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
browser.select(id: "ctl00_ContentPlaceHolder1_ddReportType").select(text: "School")
browser.select(id: "ctl00_ContentPlaceHolder1_ddYear").select(text: range)
browser.button(id: "ctl00_ContentPlaceHolder1_btnViewReport").click
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html)
end
output = []
output << raw_likert_score
output << likert_score
output << 'a-exp-i1'
output << range
output << items
output = output.flatten
csv << output
def write_headers(filepath:)
CSV.open(filepath, "w") do |csv|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID", "Total # of Teachers(FTE)", "Percent of Teachers Licensed",
"Student/Teacher Ratio", "Percent of Experienced Teachers", "Percent of Teachers without Waiver or Provisional License", "Percent Teaching in-field"]
csv << headers
end
end
in_field_index = header_hash['Percent Teaching In-Field']
def write_csv(document:, filepath:, range:, id:)
table = document.css("tr")
headers = document.css(".sorting")
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
experienced_teacher_index = header_hash["Percent of Experienced Teachers"]
dese_id_index = header_hash["School Code"]
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[dese_id_index].to_i
next if dese_id.nil? || dese_id.zero?
CSV.open(filepath, "a") do |csv|
table.each do |row|
items = row.css("td").map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
percent_in_field = items[in_field_index].to_f if in_field_index.present?
if in_field_index.present? && percent_in_field.present? && !percent_in_field.zero?
raw_likert_score = percent_in_field * 4 / 95
end
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
raw_likert_score = items[experienced_teacher_index].to_f * 4 / 80 if experienced_teacher_index.present?
raw_likert_score ||= "NA"
likert_score = raw_likert_score
if likert_score != "NA"
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
output = []
output << raw_likert_score
output << likert_score
output << "a-exp-i1"
output << range
output << items
output = output.flatten
csv << output
end
end
output = []
output << raw_likert_score
output << likert_score
output << 'a-exp-i3'
output << range
output << items
output = output.flatten
csv << output
in_field_index = header_hash["Percent Teaching In-Field"]
CSV.open(filepath, "a") do |csv|
table.each do |row|
items = row.css("td").map(&:text)
dese_id = items[dese_id_index].to_i
next if dese_id.nil? || dese_id.zero?
percent_in_field = items[in_field_index].to_f if in_field_index.present?
if in_field_index.present? && percent_in_field.present? && !percent_in_field.zero?
raw_likert_score = percent_in_field * 4 / 95
end
raw_likert_score ||= "NA"
likert_score = raw_likert_score
if likert_score != "NA"
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
output = []
output << raw_likert_score
output << likert_score
output << "a-exp-i3"
output << range
output << items
output = output.flatten
csv << output
end
end
end
end
def calculate(cells:)
cells[5].to_f * 4 / 95
def calculate(cells:)
cells[5].to_f * 4 / 95
end
end
end
end

View file

@ -1,67 +1,70 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class OneAThree
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class OneAThree
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '1A_3_staffing_retention.csv'),
Rails.root.join('data', 'admin_data', 'dese', '1A_3_teachers_of_color.csv')])
@filepaths = filepaths
end
def run_all
run_a_pcom_i1
run_a_pcom_i3
browser.close
end
def run_a_pcom_i1
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Principal Total', 'Principal # Retained', 'Principal % Retained',
'Teacher Total', 'Teacher # Retained', 'Teacher % Retained']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/staffingRetentionRates.aspx'
range = "#{academic_year.range.split('-').last.to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
retained_teachers = headers['% Retained']
items[retained_teachers].to_f * 4 / 85 if retained_teachers.present?
}
admin_data_item_id = 'a-pcom-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "1A_3_staffing_retention.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese",
"1A_3_teachers_of_color.csv")])
@filepaths = filepaths
end
end
def run_a_pcom_i3
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native Hawaiian, Pacific Islander (%)',
'Multi-Race,Non-Hispanic (%)', 'Females (%)', 'Males (%)', 'FTE Count']
write_headers(filepath:, headers:)
def run_all
run_a_pcom_i1
run_a_pcom_i3
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Percentages' }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
white = headers['White (%)']
result = ((100 - items[white].to_f) * 4) / 12.8 if white.present?
browser.close
end
result = 1 if result < 1
result
}
admin_data_item_id = 'a-pcom-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def run_a_pcom_i1
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Principal Total", "Principal # Retained", "Principal % Retained",
"Teacher Total", "Teacher # Retained", "Teacher % Retained"]
write_headers(filepath:, headers:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/staffingRetentionRates.aspx"
range = "#{academic_year.range.split('-').last.to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
retained_teachers = headers["% Retained"]
items[retained_teachers].to_f * 4 / 85 if retained_teachers.present?
}
admin_data_item_id = "a-pcom-i1"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_pcom_i3
filepath = filepaths[1]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"African American (%)", "Asian (%)", "Hispanic (%)", "White (%)", "Native Hawaiian, Pacific Islander (%)",
"Multi-Race,Non-Hispanic (%)", "Females (%)", "Males (%)", "FTE Count"]
write_headers(filepath:, headers:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddDisplay" => "Percentages" }
submit_id = "ctl00_ContentPlaceHolder1_btnViewReport"
calculation = lambda { |headers, items|
white = headers["White (%)"]
result = ((100 - items[white].to_f) * 4) / 12.8 if white.present?
result = 1 if result < 1
result
}
admin_data_item_id = "a-pcom-i3"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -6,7 +6,7 @@ module Dashboard
include Dashboard::Dese::Scraper
attr_reader :filepath
def initialize(filepath: Dashboard::Engine.root.join("data", "dashboard", "staffing", "staffing.csv"))
def initialize(filepath: Dashboard::Engine.root.join("data", "staffing", "staffing.csv"))
@filepath = filepath
end

View file

@ -1,44 +1,47 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class ThreeAOne
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class ThreeAOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3A_1_average_class_size.csv')])
@filepaths = filepaths
end
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese",
"3A_1_average_class_size.csv")])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %',
'English Language Learner %', 'Students with Disabilities %', 'Economically Disadvantaged %']
write_headers(filepath:, headers:)
def run_all
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Total # of Classes", "Average Class Size", "Number of Students", "Female %", "Male %",
"English Language Learner %", "Students with Disabilities %", "Economically Disadvantaged %"]
write_headers(filepath:, headers:)
run_a_reso_i1
run_a_reso_i1
browser.close
end
browser.close
end
def run_a_reso_i1
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
class_size_index = headers['Average Class Size']
average_class_size = items[class_size_index].to_f
benchmark = 20
if class_size_index.present? && !items[class_size_index] != ''
((benchmark - average_class_size) + benchmark) * 4 / benchmark
end
}
admin_data_item_id = 'a-reso-i1'
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
def run_a_reso_i1
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
class_size_index = headers["Average Class Size"]
average_class_size = items[class_size_index].to_f
benchmark = 20
if class_size_index.present? && !items[class_size_index] != ""
((benchmark - average_class_size) + benchmark) * 4 / benchmark
end
}
admin_data_item_id = "a-reso-i1"
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -9,11 +9,9 @@ module Dashboard
attr_reader :filepaths
def initialize(filepaths:
[Dashboard::Engine.root.join("data", "dashboard", "admin_data", "dese", "enrollments.csv"),
Dashboard::Engine.root.join("data", "dashboard", "admin_data", "dese",
"3A_2_age_staffing.csv"),
Dashboard::Engine.root.join("data", "dashboard", "admin_data", "dese",
"3A_2_grade_subject_staffing.csv")])
[Dashboard::Engine.root.join("data", "admin_data", "dese", "enrollments.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese", "3A_2_age_staffing.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese", "3A_2_grade_subject_staffing.csv")])
@filepaths = filepaths
end

View file

@ -1,129 +1,133 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class ThreeBOne
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
module Dashboard
module Dese
class ThreeBOne
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_1_masscore.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_advcoursecomprate.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_ap.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_student_courses_ratio.csv')])
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "3B_1_masscore.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese",
"3B_1_advcoursecomprate.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese", "3B_1_ap.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese",
"3B_1_student_courses_ratio.csv")])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Graduated', '# Completed MassCore', '% Completed MassCore']
write_headers(filepath:, headers:)
run_a_curv_i1(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced',
'% ELA', '% Math', '% Science and Technology', '% Computer and Information Science',
'% History and Social Sciences', '% Arts', '% All Other Subjects', '% All Other Subjects']
write_headers(filepath:, headers:)
run_a_curv_i2(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Tests Taken', 'Score=1', 'Score=2', 'Score=3', 'Score=4', 'Score=5', '% Score 1-2', '% Score 3-5']
write_headers(filepath:, headers:)
run_a_curv_i3(filepath:)
filepath = filepaths[3]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %', 'English Language Learner %', 'Students with Disabilities %', 'Low Income %', 'Number of Students']
write_headers(filepath:, headers:)
run_a_curv_i5(filepath:)
browser.close
end
def run_a_curv_i1(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/masscore.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Completed MassCore']
percent_completed = items[completed_index].to_f
benchmark = 90
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
@filepaths = filepaths
end
end
def run_a_curv_i2(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Students Completing Advanced']
percent_completed = items[completed_index].to_f
benchmark = 30
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i2'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def run_all
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# Graduated", "# Completed MassCore", "% Completed MassCore"]
write_headers(filepath:, headers:)
run_a_curv_i1(filepath:)
filepath = filepaths[1]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# Grade 11 and 12 Students", "# Students Completing Advanced", "% Students Completing Advanced",
"% ELA", "% Math", "% Science and Technology", "% Computer and Information Science",
"% History and Social Sciences", "% Arts", "% All Other Subjects", "% All Other Subjects"]
write_headers(filepath:, headers:)
run_a_curv_i2(filepath:)
filepath = filepaths[2]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Tests Taken", "Score=1", "Score=2", "Score=3", "Score=4", "Score=5", "% Score 1-2", "% Score 3-5"]
write_headers(filepath:, headers:)
run_a_curv_i3(filepath:)
filepath = filepaths[3]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Total # of Classes", "Average Class Size", "Number of Students", "Female %", "Male %", "English Language Learner %", "Students with Disabilities %", "Low Income %", "Number of Students"]
write_headers(filepath:, headers:)
run_a_curv_i5(filepath:)
browser.close
end
end
def run_a_curv_i3(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ap.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Score 3-5']
percent_score = items[completed_index].to_f
benchmark = 20
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def run_a_curv_i1(filepath:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/masscore.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
completed_index = headers["% Completed MassCore"]
percent_completed = items[completed_index].to_f
benchmark = 90
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ""
}
admin_data_item_id = "a-curv-i1"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
def run_a_curv_i5(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
row = headers.keys.zip(items).to_h
dese_id = row['School Code'].to_i
is_hs = (row['School Name'] in /High School/i)
school = School.find_by(dese_id:)
is_hs = school.is_hs if school.present?
next 'NA' unless is_hs
def run_a_curv_i2(filepath:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
completed_index = headers["% Students Completing Advanced"]
percent_completed = items[completed_index].to_f
benchmark = 30
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ""
}
admin_data_item_id = "a-curv-i2"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
num_of_classes = row['Total # of Classes'].delete(',').to_f
num_of_students = student_count(filepath: Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
dese_id:, year: academic_year.range) || 0
items << num_of_students
actual = num_of_students / num_of_classes
benchmark = 5
((benchmark - actual) + benchmark) * 4 / benchmark if num_of_classes.present? && num_of_students.present?
}
admin_data_item_id = 'a-curv-i5'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def run_a_curv_i3(filepath:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/ap.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "ctl00_ContentPlaceHolder1_btnViewReport"
calculation = lambda { |headers, items|
completed_index = headers["% Score 3-5"]
percent_score = items[completed_index].to_f
benchmark = 20
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ""
}
admin_data_item_id = "a-curv-i3"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i5(filepath:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
row = headers.keys.zip(items).to_h
dese_id = row["School Code"].to_i
is_hs = (row["School Name"] in /High School/i)
school = School.find_by(dese_id:)
is_hs = school.is_hs if school.present?
next "NA" unless is_hs
num_of_classes = row["Total # of Classes"].delete(",").to_f
num_of_students = student_count(filepath: Dashboard::Engine.root.join("data", "admin_data", "dese", "enrollments.csv"),
dese_id:, year: academic_year.range) || 0
items << num_of_students
actual = num_of_students / num_of_classes
benchmark = 5
((benchmark - actual) + benchmark) * 4 / benchmark if num_of_classes.present? && num_of_students.present?
}
admin_data_item_id = "a-curv-i5"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,128 +1,131 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class ThreeBTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
module Dashboard
module Dese
class ThreeBTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_2_teacher_by_race_and_gender.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_2_student_by_race_and_gender.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (%)', 'School Name', 'DESE ID',
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native American (%)',
'Native Hawaiian Pacific Islander (%)', 'Multi-Race Non-Hispanic (%)', 'Females (%)',
'Males (%)', 'FTE Count']
write_headers(filepath:, headers:)
run_teacher_demographics(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers %', 'Non-White Students %', 'School Name', 'DESE ID',
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
'Females', 'Non-Binary', 'Students of color (%)']
write_headers(filepath:, headers:)
run_student_demographics(filepath:)
browser.close
end
def run_teacher_demographics(filepath:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Percentages',
'ctl00_ContentPlaceHolder1_ddClassification' => 'Teacher' }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
african_american_index = headers['African American (%)']
african_american_number = items[african_american_index].to_f
asian_index = headers['Asian (%)']
asian_number = items[asian_index].to_f
hispanic_index = headers['Hispanic (%)']
hispanic_number = items[hispanic_index].to_f
native_american_index = headers['Native American (%)']
native_american_number = items[native_american_index].to_f
native_hawaiian_index = headers['Native Hawaiian, Pacific Islander (%)']
native_hawaiian_number = items[native_hawaiian_index].to_f
multi_race_index = headers['Multi-Race,Non-Hispanic (%)']
multi_race_number = items[multi_race_index].to_f
non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
items.unshift(non_white_teachers)
non_white_teachers
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "3B_2_teacher_by_race_and_gender.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese",
"3B_2_student_by_race_and_gender.csv")])
@filepaths = filepaths
end
end
def teacher_count(filepath:, dese_id:, year:)
@teachers ||= {}
@years_with_data ||= Set.new
if @teachers.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
@years_with_data << academic_year
school_id = row['DESE ID'].to_i
total = row['Teachers of color (%)'].delete(',')
total = 'NA' if total == '' || total.nil?
@teachers[[school_id, academic_year]] = total
def run_all
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "Teachers of color (%)", "School Name", "DESE ID",
"African American (%)", "Asian (%)", "Hispanic (%)", "White (%)", "Native American (%)",
"Native Hawaiian Pacific Islander (%)", "Multi-Race Non-Hispanic (%)", "Females (%)",
"Males (%)", "FTE Count"]
write_headers(filepath:, headers:)
run_teacher_demographics(filepath:)
filepath = filepaths[1]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "Non-White Teachers %", "Non-White Students %", "School Name", "DESE ID",
"African American", "Asian", "Hispanic", "White", "Native American",
"Native Hawaiian or Pacific Islander", "Multi-Race or Non-Hispanic", "Males",
"Females", "Non-Binary", "Students of color (%)"]
write_headers(filepath:, headers:)
run_student_demographics(filepath:)
browser.close
end
def run_teacher_demographics(filepath:)
run do |academic_year|
admin_data_item_id = ""
url = "https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddDisplay" => "Percentages",
"ctl00_ContentPlaceHolder1_ddClassification" => "Teacher" }
submit_id = "ctl00_ContentPlaceHolder1_btnViewReport"
calculation = lambda { |headers, items|
african_american_index = headers["African American (%)"]
african_american_number = items[african_american_index].to_f
asian_index = headers["Asian (%)"]
asian_number = items[asian_index].to_f
hispanic_index = headers["Hispanic (%)"]
hispanic_number = items[hispanic_index].to_f
native_american_index = headers["Native American (%)"]
native_american_number = items[native_american_index].to_f
native_hawaiian_index = headers["Native Hawaiian, Pacific Islander (%)"]
native_hawaiian_number = items[native_hawaiian_index].to_f
multi_race_index = headers["Multi-Race,Non-Hispanic (%)"]
multi_race_number = items[multi_race_index].to_f
non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
items.unshift(non_white_teachers)
non_white_teachers
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
return 'NA' unless @years_with_data.include? year
@teachers[[dese_id, year]]
end
def run_student_demographics(filepath:)
run do |academic_year|
admin_data_item_id = 'a-cure-i1'
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
white_index = headers['White']
white_number = items[white_index].to_f
dese_id = items[headers['School Code']].to_i
non_white_student_percentage = (100 - white_number).to_f
items.unshift(non_white_student_percentage)
count_of_teachers = teacher_count(filepath: filepaths[0], dese_id:, year: academic_year.range)
return 'NA' if count_of_teachers == 'NA'
non_white_teacher_percentage = count_of_teachers.to_f
items.unshift(non_white_teacher_percentage)
floor = 5
benchmark = 0.25
return 1 if non_white_student_percentage.zero? && non_white_teacher_percentage < floor
if non_white_teacher_percentage >= floor
parity_index = non_white_teacher_percentage / non_white_student_percentage
likert_score = parity_index * 4 / benchmark
else
likert_score = 1
def teacher_count(filepath:, dese_id:, year:)
@teachers ||= {}
@years_with_data ||= Set.new
if @teachers.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row["Academic Year"]
@years_with_data << academic_year
school_id = row["DESE ID"].to_i
total = row["Teachers of color (%)"].delete(",")
total = "NA" if total == "" || total.nil?
@teachers[[school_id, academic_year]] = total
end
likert_score
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
return "NA" unless @years_with_data.include? year
@teachers[[dese_id, year]]
end
def run_student_demographics(filepath:)
run do |academic_year|
admin_data_item_id = "a-cure-i1"
url = "https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
white_index = headers["White"]
white_number = items[white_index].to_f
dese_id = items[headers["School Code"]].to_i
non_white_student_percentage = (100 - white_number).to_f
items.unshift(non_white_student_percentage)
count_of_teachers = teacher_count(filepath: filepaths[0], dese_id:, year: academic_year.range)
return "NA" if count_of_teachers == "NA"
non_white_teacher_percentage = count_of_teachers.to_f
items.unshift(non_white_teacher_percentage)
floor = 5
benchmark = 0.25
return 1 if non_white_student_percentage.zero? && non_white_teacher_percentage < floor
if non_white_teacher_percentage >= floor
parity_index = non_white_teacher_percentage / non_white_student_percentage
likert_score = parity_index * 4 / benchmark
else
likert_score = 1
end
likert_score
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,69 +1,72 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class TwoAOne
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class TwoAOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '2A_1_students_suspended.csv'),
Rails.root.join('data', 'admin_data', 'dese', '2A_1_students_disciplined.csv')])
@filepaths = filepaths
end
def run_all
run_a_phys_i1
run_a_phys_i3
browser.close
end
def run_a_phys_i1
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Students', 'Students Disciplined', '% In-School Suspension', '% Out-of-School Suspension', '% Expulsion', '% Removed to Alternate Setting',
'% Emergency Removal', '% Students with a School-Based Arrest', '% Students with a Law Enforcement Referral']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ssdr.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
suspensions_index = headers['% Out-of-School Suspension']
benchmark = 5.27
suspension_rate = items[suspensions_index].to_f
if suspensions_index.present? && items[suspensions_index] != ''
((benchmark - suspension_rate) + benchmark) * 4 / 5.27
end
}
admin_data_item_id = 'a-phys-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "2A_1_students_suspended.csv"),
Dashboard::Engine.root.join("data", "admin_data", "dese",
"2A_1_students_disciplined.csv")])
@filepaths = filepaths
end
end
def run_a_phys_i3
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Students', 'Students Disciplined', '% 1 Day', '% 2 to 3 Days', '% 4 to 7 Days', '% 8 to 10 Days', '% > 10 Days']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ssdr_days_missed.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
days_missed_index = headers['% > 10 Days']
benchmark = 1
missed_days = items[days_missed_index].to_f
if days_missed_index.present? && items[days_missed_index] != ''
((benchmark - missed_days) + benchmark) * 4 / benchmark
end
}
admin_data_item_id = 'a-phys-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
def run_all
run_a_phys_i1
run_a_phys_i3
browser.close
end
def run_a_phys_i1
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Students", "Students Disciplined", "% In-School Suspension", "% Out-of-School Suspension", "% Expulsion", "% Removed to Alternate Setting",
"% Emergency Removal", "% Students with a School-Based Arrest", "% Students with a Law Enforcement Referral"]
write_headers(filepath:, headers:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/ssdr.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "ctl00_ContentPlaceHolder1_btnViewReport"
calculation = lambda { |headers, items|
suspensions_index = headers["% Out-of-School Suspension"]
benchmark = 5.27
suspension_rate = items[suspensions_index].to_f
if suspensions_index.present? && items[suspensions_index] != ""
((benchmark - suspension_rate) + benchmark) * 4 / 5.27
end
}
admin_data_item_id = "a-phys-i1"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_phys_i3
filepath = filepaths[1]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Students", "Students Disciplined", "% 1 Day", "% 2 to 3 Days", "% 4 to 7 Days", "% 8 to 10 Days", "% > 10 Days"]
write_headers(filepath:, headers:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/ssdr_days_missed.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "ctl00_ContentPlaceHolder1_btnViewReport"
calculation = lambda { |headers, items|
days_missed_index = headers["% > 10 Days"]
benchmark = 1
missed_days = items[days_missed_index].to_f
if days_missed_index.present? && items[days_missed_index] != ""
((benchmark - missed_days) + benchmark) * 4 / benchmark
end
}
admin_data_item_id = "a-phys-i3"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

View file

@ -1,75 +1,77 @@
require 'watir'
require 'csv'
require "watir"
require "csv"
module Dese
class TwoCOne
include Dese::Scraper
attr_reader :filepaths
module Dashboard
module Dese
class TwoCOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '2C_1_attendance.csv')])
@filepaths = filepaths
end
def run_all
write_a_vale_i1_headers
run_a_vale_i1
run_a_vale_i2
browser.close
end
def write_a_vale_i1_headers
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Attendance Rate', 'Average # of Absences', 'Absent 10 or more days', 'Chronically Absent (10% or more)',
'Chronically Absent (20% or more)', 'Unexcused > 9 days']
write_headers(filepath:, headers:)
end
def run_a_vale_i1
run do |academic_year|
admin_data_item_id = 'a-vale-i1'
url = 'https://profiles.doe.mass.edu/statereport/attendance.aspx'
range = case academic_year.range
when '2021-22', '2020-21'
"#{academic_year.range} (End of year)"
else
academic_year.range
end
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
absence_index = headers['Chronically Absent (10% or more)']
benchmark = 10
absence_rate = items[absence_index].to_f
if absence_index.present? && !items[absence_index].blank?
((benchmark - absence_rate) + benchmark) * 4 / benchmark
end
}
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
def initialize(filepaths: [Dashboard::Engine.root.join("data", "admin_data", "dese", "2C_1_attendance.csv")])
@filepaths = filepaths
end
end
def run_a_vale_i2
run do |academic_year|
admin_data_item_id = 'a-vale-i2'
url = 'https://profiles.doe.mass.edu/statereport/attendance.aspx'
range = case academic_year.range
when '2021-22', '2020-21'
"#{academic_year.range} (End of year)"
else
academic_year.range
end
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
attendance = headers[' Attendance Rate ']
benchmark = 90
items[attendance].to_f * 4 / benchmark if attendance.present?
}
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
def run_all
write_a_vale_i1_headers
run_a_vale_i1
run_a_vale_i2
browser.close
end
def write_a_vale_i1_headers
filepath = filepaths[0]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Attendance Rate", "Average # of Absences", "Absent 10 or more days", "Chronically Absent (10% or more)",
"Chronically Absent (20% or more)", "Unexcused > 9 days"]
write_headers(filepath:, headers:)
end
def run_a_vale_i1
run do |academic_year|
admin_data_item_id = "a-vale-i1"
url = "https://profiles.doe.mass.edu/statereport/attendance.aspx"
range = case academic_year.range
when "2021-22", "2020-21"
"#{academic_year.range} (End of year)"
else
academic_year.range
end
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
absence_index = headers["Chronically Absent (10% or more)"]
benchmark = 10
absence_rate = items[absence_index].to_f
if absence_index.present? && !items[absence_index].blank?
((benchmark - absence_rate) + benchmark) * 4 / benchmark
end
}
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_vale_i2
run do |academic_year|
admin_data_item_id = "a-vale-i2"
url = "https://profiles.doe.mass.edu/statereport/attendance.aspx"
range = case academic_year.range
when "2021-22", "2020-21"
"#{academic_year.range} (End of year)"
else
academic_year.range
end
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
attendance = headers[" Attendance Rate "]
benchmark = 90
items[attendance].to_f * 4 / benchmark if attendance.present?
}
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end