mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-09 07:28:41 -07:00
initialized with a season, i.e. "2024-25 Fall". Update scapers for admin data, enrollment and staffing to use the new range standard correctly. Update the loaders for admin data, enrollment and staffing so that it populates all seasons in a given year. So admin data for 2024-25 gets loaded into "2024-25 Fall" and "2024-25 Spring". Add tests for the new range format. Set the default cutoff for the start of Spring season will be the last Sunday in February
89 lines
2.8 KiB
Ruby
89 lines
2.8 KiB
Ruby
module Dese
|
|
module Scraper
|
|
DELAY = 20 # The dese site will block you if you hit it too many times in a short period of time
|
|
|
|
Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
|
:calculation)
|
|
def reverse_score(likert_score:)
|
|
return nil unless likert_score.present?
|
|
|
|
likert_score = 1 if likert_score < 1
|
|
likert_score = 5 if likert_score > 5
|
|
(likert_score - 6).abs
|
|
end
|
|
|
|
def run
|
|
academic_years = AcademicYear.all.order(range: :DESC)
|
|
.map(&:range_without_season)
|
|
.uniq
|
|
.map { |range| AcademicYear.new(range:) }
|
|
academic_years.each do |academic_year|
|
|
prerequisites = yield academic_year
|
|
|
|
document = get_html(url: prerequisites.url,
|
|
selectors: prerequisites.selectors,
|
|
submit_id: prerequisites.submit_id)
|
|
unless document.nil?
|
|
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range_without_season, id: prerequisites.admin_data_item_id,
|
|
calculation: prerequisites.calculation)
|
|
end
|
|
end
|
|
end
|
|
|
|
def browser
|
|
@browser ||= Watir::Browser.new
|
|
end
|
|
|
|
def get_html(url:, selectors:, submit_id:)
|
|
browser.goto(url)
|
|
|
|
selectors.each do |key, value|
|
|
return unless browser.option(text: value).present?
|
|
|
|
browser.select(id: key).select(text: value)
|
|
end
|
|
|
|
browser.button(id: submit_id).click
|
|
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
|
Nokogiri::HTML(browser.html)
|
|
end
|
|
|
|
def write_headers(filepath:, headers:)
|
|
CSV.open(filepath, "w") do |csv|
|
|
csv << headers
|
|
end
|
|
end
|
|
|
|
def write_csv(document:, filepath:, range:, id:, calculation:)
|
|
table = document.css("tr")
|
|
headers = document.css(".sorting")
|
|
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
|
|
|
CSV.open(filepath, "a") do |csv|
|
|
table.each do |row|
|
|
items = row.css("td").map(&:text)
|
|
dese_id = items[1].to_i
|
|
next if dese_id.nil? || dese_id.zero?
|
|
|
|
raw_likert_score = calculation.call(header_hash, items)
|
|
raw_likert_score ||= "NA"
|
|
likert_score = raw_likert_score
|
|
if likert_score != "NA"
|
|
likert_score = 5 if likert_score > 5
|
|
likert_score = 1 if likert_score < 1
|
|
likert_score = likert_score.round(2)
|
|
end
|
|
|
|
output = []
|
|
output << raw_likert_score
|
|
output << likert_score
|
|
output << id
|
|
output << range
|
|
output << items
|
|
output = output.flatten
|
|
csv << output
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|