mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
to survey table. We no longer check or keep track of the survey type. Instead we look in the database to see if a survey item has at least 10 responses. If it does, that survey item was presented to the respondent and we count it, and all responses when calculating the response rate. Remove response rate timestamp from caching logic because we no longer add the response rate to the database. All response rates are calculated on the fly Update three_b_two scraper to use teacher only numbers swap over to using https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx as the source of staffing information
87 lines
2.7 KiB
Ruby
87 lines
2.7 KiB
Ruby
module Dese
|
|
module Scraper
|
|
DELAY = 20 # The dese site will block you if you hit it too many times in a short period of time
|
|
|
|
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
|
:calculation)
|
|
def reverse_score(likert_score:)
|
|
return nil unless likert_score.present?
|
|
|
|
likert_score = 1 if likert_score < 1
|
|
likert_score = 5 if likert_score > 5
|
|
(likert_score - 6).abs
|
|
end
|
|
|
|
def run
|
|
academic_years = AcademicYear.all.order(range: :DESC)
|
|
academic_years.each do |academic_year|
|
|
prerequisites = yield academic_year
|
|
|
|
document = get_html(url: prerequisites.url,
|
|
selectors: prerequisites.selectors,
|
|
submit_id: prerequisites.submit_id)
|
|
unless document.nil?
|
|
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
|
calculation: prerequisites.calculation)
|
|
end
|
|
end
|
|
end
|
|
|
|
def browser
|
|
@browser ||= Watir::Browser.new
|
|
end
|
|
|
|
def get_html(url:, selectors:, submit_id:)
|
|
browser.goto(url)
|
|
|
|
selectors.each do |key, value|
|
|
next unless browser.option(text: value).present?
|
|
|
|
browser.select(id: key).select(text: value)
|
|
end
|
|
|
|
browser.button(id: submit_id).click
|
|
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
|
Nokogiri::HTML(browser.html)
|
|
end
|
|
|
|
def write_headers(filepath:, headers:)
|
|
CSV.open(filepath, 'w') do |csv|
|
|
csv << headers
|
|
end
|
|
end
|
|
|
|
def write_csv(document:, filepath:, range:, id:, calculation:)
|
|
table = document.css('tr')
|
|
headers = document.css('.sorting')
|
|
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
|
|
|
CSV.open(filepath, 'a') do |csv|
|
|
table.each do |row|
|
|
items = row.css('td').map(&:text)
|
|
dese_id = items[1].to_i
|
|
next if dese_id.nil? || dese_id.zero?
|
|
|
|
raw_likert_score = calculation.call(header_hash, items)
|
|
raw_likert_score ||= 'NA'
|
|
likert_score = raw_likert_score
|
|
if likert_score != 'NA'
|
|
likert_score = 5 if likert_score > 5
|
|
likert_score = 1 if likert_score < 1
|
|
likert_score = likert_score.round(2)
|
|
end
|
|
|
|
# byebug if dese_id == 30_305
|
|
output = []
|
|
output << raw_likert_score
|
|
output << likert_score
|
|
output << id
|
|
output << range
|
|
output << items
|
|
output = output.flatten
|
|
csv << output
|
|
end
|
|
end
|
|
end
|
|
end
|
|
end
|