sqm-dashboards/app/services/dese/scraper.rb
rebuilt 128748addd Update logic for calculating student response rate. Remove references
to survey table.  We no longer check or keep track of the survey type.
Instead we look in the database to see if a survey item has at least 10
responses.  If it does, that survey item was presented to the respondent
and we count it, and all responses when calculating the response rate.

Remove response rate timestamp from caching logic because we no longer
add the response rate to the database. All response rates are calculated
on the fly

Update three_b_two scraper to use teacher only numbers

swap over to using https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx as the source of staffing information
2023-04-18 13:59:29 -07:00

87 lines
2.7 KiB
Ruby

module Dese
module Scraper
DELAY = 20 # The dese site will block you if you hit it too many times in a short period of time
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def reverse_score(likert_score:)
return nil unless likert_score.present?
likert_score = 1 if likert_score < 1
likert_score = 5 if likert_score > 5
(likert_score - 6).abs
end
def run
academic_years = AcademicYear.all.order(range: :DESC)
academic_years.each do |academic_year|
prerequisites = yield academic_year
document = get_html(url: prerequisites.url,
selectors: prerequisites.selectors,
submit_id: prerequisites.submit_id)
unless document.nil?
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
calculation: prerequisites.calculation)
end
end
end
def browser
@browser ||= Watir::Browser.new
end
def get_html(url:, selectors:, submit_id:)
browser.goto(url)
selectors.each do |key, value|
next unless browser.option(text: value).present?
browser.select(id: key).select(text: value)
end
browser.button(id: submit_id).click
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html)
end
def write_headers(filepath:, headers:)
CSV.open(filepath, 'w') do |csv|
csv << headers
end
end
def write_csv(document:, filepath:, range:, id:, calculation:)
table = document.css('tr')
headers = document.css('.sorting')
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculation.call(header_hash, items)
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
# byebug if dese_id == 30_305
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
output = output.flatten
csv << output
end
end
end
end
end