mirror of
https://github.com/edcommonwealth/Dashboard.git
synced 2026-03-17 01:45:53 -07:00
chore: scrape enrollment and staffing data
This commit is contained in:
parent
725348bf95
commit
1810ee0074
7 changed files with 15026 additions and 13173 deletions
|
|
@ -1,76 +1,78 @@
|
|||
module Dese
|
||||
module Scraper
|
||||
DELAY = 20
|
||||
module Dashboard
|
||||
module Dese
|
||||
module Scraper
|
||||
DELAY = 20
|
||||
|
||||
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
||||
:calculation)
|
||||
def run
|
||||
academic_years = AcademicYear.all.order(range: :DESC)
|
||||
academic_years.each do |academic_year|
|
||||
prerequisites = yield academic_year
|
||||
Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
||||
:calculation)
|
||||
def run
|
||||
academic_years = AcademicYear.all.order(range: :DESC)
|
||||
academic_years.each do |academic_year|
|
||||
prerequisites = yield academic_year
|
||||
|
||||
document = get_html(url: prerequisites.url,
|
||||
selectors: prerequisites.selectors,
|
||||
submit_id: prerequisites.submit_id)
|
||||
unless document.nil?
|
||||
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
||||
calculation: prerequisites.calculation)
|
||||
document = get_html(url: prerequisites.url,
|
||||
selectors: prerequisites.selectors,
|
||||
submit_id: prerequisites.submit_id)
|
||||
unless document.nil?
|
||||
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
||||
calculation: prerequisites.calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def browser
|
||||
@browser ||= Watir::Browser.new
|
||||
end
|
||||
|
||||
def get_html(url:, selectors:, submit_id:)
|
||||
browser.goto(url)
|
||||
|
||||
selectors.each do |key, value|
|
||||
return unless browser.option(text: value).present?
|
||||
|
||||
browser.select(id: key).select(text: value)
|
||||
def browser
|
||||
@browser ||= Watir::Browser.new
|
||||
end
|
||||
|
||||
browser.button(id: submit_id).click
|
||||
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
||||
Nokogiri::HTML(browser.html)
|
||||
end
|
||||
def get_html(url:, selectors:, submit_id:)
|
||||
browser.goto(url)
|
||||
|
||||
def write_headers(filepath:, headers:)
|
||||
CSV.open(filepath, 'w') do |csv|
|
||||
csv << headers
|
||||
selectors.each do |key, value|
|
||||
return unless browser.option(text: value).present?
|
||||
|
||||
browser.select(id: key).select(text: value)
|
||||
end
|
||||
|
||||
browser.button(id: submit_id).click
|
||||
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
||||
Nokogiri::HTML(browser.html)
|
||||
end
|
||||
end
|
||||
|
||||
def write_csv(document:, filepath:, range:, id:, calculation:)
|
||||
table = document.css('tr')
|
||||
headers = document.css('.sorting')
|
||||
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
||||
def write_headers(filepath:, headers:)
|
||||
CSV.open(filepath, "w") do |csv|
|
||||
csv << headers
|
||||
end
|
||||
end
|
||||
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
def write_csv(document:, filepath:, range:, id:, calculation:)
|
||||
table = document.css("tr")
|
||||
headers = document.css(".sorting")
|
||||
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
||||
|
||||
raw_likert_score = calculation.call(header_hash, items)
|
||||
raw_likert_score ||= 'NA'
|
||||
likert_score = raw_likert_score
|
||||
if likert_score != 'NA'
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score = 1 if likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
CSV.open(filepath, "a") do |csv|
|
||||
table.each do |row|
|
||||
items = row.css("td").map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
raw_likert_score = calculation.call(header_hash, items)
|
||||
raw_likert_score ||= "NA"
|
||||
likert_score = raw_likert_score
|
||||
if likert_score != "NA"
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score = 1 if likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
end
|
||||
|
||||
output = []
|
||||
output << raw_likert_score
|
||||
output << likert_score
|
||||
output << id
|
||||
output << range
|
||||
output << items
|
||||
output = output.flatten
|
||||
csv << output
|
||||
end
|
||||
|
||||
output = []
|
||||
output << raw_likert_score
|
||||
output << likert_score
|
||||
output << id
|
||||
output << range
|
||||
output << items
|
||||
output = output.flatten
|
||||
csv << output
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue