You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
118 lines
4.2 KiB
118 lines
4.2 KiB
require "watir"
|
|
require "csv"
|
|
|
|
module Dashboard
|
|
module Dese
|
|
class OneAOne
|
|
attr_reader :filepath
|
|
|
|
def initialize(filepath: Dashboard::Engine.root.join("data", "admin_data", "dese", "1A_1_teacher_data.csv"))
|
|
@filepath = filepath
|
|
end
|
|
|
|
def run_all
|
|
url = "https://profiles.doe.mass.edu/statereport/teacherdata.aspx"
|
|
browser = Watir::Browser.new
|
|
write_headers(filepath:)
|
|
academic_years = AcademicYear.all
|
|
academic_years.each do |academic_year|
|
|
document = scrape(browser:, url:, range: academic_year.range)
|
|
id = "a-exp-i1"
|
|
write_csv(document:, filepath:, range: academic_year.range, id:) unless document.nil?
|
|
end
|
|
browser.close
|
|
end
|
|
|
|
def scrape(browser:, url:, range:)
|
|
browser.goto(url)
|
|
|
|
return unless browser.option(text: "School").present?
|
|
return unless browser.option(text: range).present?
|
|
|
|
browser.select(id: "ctl00_ContentPlaceHolder1_ddReportType").select(text: "School")
|
|
browser.select(id: "ctl00_ContentPlaceHolder1_ddYear").select(text: range)
|
|
browser.button(id: "ctl00_ContentPlaceHolder1_btnViewReport").click
|
|
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
|
|
Nokogiri::HTML(browser.html)
|
|
end
|
|
|
|
def write_headers(filepath:)
|
|
CSV.open(filepath, "w") do |csv|
|
|
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID", "Total # of Teachers(FTE)", "Percent of Teachers Licensed",
|
|
"Student/Teacher Ratio", "Percent of Experienced Teachers", "Percent of Teachers without Waiver or Provisional License", "Percent Teaching in-field"]
|
|
csv << headers
|
|
end
|
|
end
|
|
|
|
def write_csv(document:, filepath:, range:, id:)
|
|
table = document.css("tr")
|
|
headers = document.css(".sorting")
|
|
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
|
experienced_teacher_index = header_hash["Percent of Experienced Teachers"]
|
|
dese_id_index = header_hash["School Code"]
|
|
|
|
CSV.open(filepath, "a") do |csv|
|
|
table.each do |row|
|
|
items = row.css("td").map(&:text)
|
|
dese_id = items[1].to_i
|
|
next if dese_id.nil? || dese_id.zero?
|
|
|
|
raw_likert_score = items[experienced_teacher_index].to_f * 4 / 80 if experienced_teacher_index.present?
|
|
raw_likert_score ||= "NA"
|
|
likert_score = raw_likert_score
|
|
if likert_score != "NA"
|
|
likert_score = 5 if likert_score > 5
|
|
likert_score = 1 if likert_score < 1
|
|
likert_score = likert_score.round(2)
|
|
end
|
|
|
|
output = []
|
|
output << raw_likert_score
|
|
output << likert_score
|
|
output << "a-exp-i1"
|
|
output << range
|
|
output << items
|
|
output = output.flatten
|
|
csv << output
|
|
end
|
|
end
|
|
|
|
in_field_index = header_hash["Percent Teaching In-Field"]
|
|
|
|
CSV.open(filepath, "a") do |csv|
|
|
table.each do |row|
|
|
items = row.css("td").map(&:text)
|
|
dese_id = items[dese_id_index].to_i
|
|
next if dese_id.nil? || dese_id.zero?
|
|
|
|
percent_in_field = items[in_field_index].to_f if in_field_index.present?
|
|
if in_field_index.present? && percent_in_field.present? && !percent_in_field.zero?
|
|
raw_likert_score = percent_in_field * 4 / 95
|
|
end
|
|
raw_likert_score ||= "NA"
|
|
likert_score = raw_likert_score
|
|
if likert_score != "NA"
|
|
likert_score = 5 if likert_score > 5
|
|
likert_score = 1 if likert_score < 1
|
|
likert_score = likert_score.round(2)
|
|
end
|
|
|
|
output = []
|
|
output << raw_likert_score
|
|
output << likert_score
|
|
output << "a-exp-i3"
|
|
output << range
|
|
output << items
|
|
output = output.flatten
|
|
csv << output
|
|
end
|
|
end
|
|
end
|
|
|
|
def calculate(cells:)
|
|
cells[5].to_f * 4 / 95
|
|
end
|
|
end
|
|
end
|
|
end
|