mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
Turn off specs for scrapers. Generate csvs for scraped data
This commit is contained in:
parent
5002e4eb63
commit
9e18bf2151
22 changed files with 78034 additions and 17121 deletions
38
app/services/dese/enrollments.rb
Normal file
38
app/services/dese/enrollments.rb
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
module Enrollments
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def scrape_enrollments(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = ->(_headers, _items) { 'NA' }
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def student_count(filepath:, dese_id:, year:)
|
||||
@students ||= {}
|
||||
if @students.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Total'].gsub(',', '').to_i
|
||||
@students[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@students[[dese_id, year]]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
# TODO: convert this to simpler format and add a run_all method
|
||||
module Dese
|
||||
class FourDOne
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '4D_1_plans_of_grads.csv'))
|
||||
|
|
|
|||
|
|
@ -47,11 +47,14 @@ module Dese
|
|||
|
||||
def self.create_admin_data_value(row:, score:)
|
||||
school = School.find_by_dese_id(dese_id(row:).to_i)
|
||||
admin_data_item_id = admin_data_item(row:)
|
||||
|
||||
return if school.nil?
|
||||
return if admin_data_item_id.nil? || admin_data_item_id.blank?
|
||||
|
||||
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school:,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item_id))
|
||||
if admin_data_value.present?
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value.save
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
# TODO convert this to simpler format and add a run_all method
|
||||
module Dese
|
||||
class OneAOne
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '1A_1_teacher_data.csv'))
|
||||
|
|
|
|||
|
|
@ -4,9 +4,10 @@ require 'csv'
|
|||
module Dese
|
||||
class ThreeATwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3A_2_enrollment.csv'),
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
|
||||
|
||||
|
|
@ -47,35 +48,6 @@ module Dese
|
|||
write_headers(filepath:, headers:)
|
||||
end
|
||||
|
||||
def scrape_enrollments(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = ->(_headers, _items) { 'NA' }
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def student_count(filepath:, dese_id:, year:)
|
||||
@students ||= {}
|
||||
if @students.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Total'].gsub(',', '').to_i
|
||||
@students[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@students[[dese_id, year]]
|
||||
end
|
||||
|
||||
def run_a_sust_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-sust-i1'
|
||||
|
|
|
|||
115
app/services/dese/three_b_two.rb
Normal file
115
app/services/dese/three_b_two.rb
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class ThreeBTwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_2_teacher_by_race_and_gender.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_2_student_by_race_and_gender.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
scrape_enrollments(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (#)', 'School Name', 'DESE ID',
|
||||
'African American (#)', 'Asian (#)', 'Hispanic (#)', 'White (#)', 'Native American (#)',
|
||||
'Native Hawaiian Pacific Islander (#)', 'Multi-Race Non-Hispanic (#)', 'Females (#)',
|
||||
'Males (#)', 'FTE Count']
|
||||
write_headers(filepath:, headers:)
|
||||
run_teacher_demographics(filepath:)
|
||||
|
||||
filepath = filepaths[2]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers', 'Non-White Students', 'School Name', 'DESE ID',
|
||||
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
|
||||
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
|
||||
'Females', 'Non-Binary', 'Students of color (%)']
|
||||
write_headers(filepath:, headers:)
|
||||
run_student_demographics(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_teacher_demographics(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
african_american_index = headers['African American (#)']
|
||||
african_american_number = items[african_american_index].to_f
|
||||
|
||||
asian_index = headers['Asian (#)']
|
||||
asian_number = items[asian_index].to_f
|
||||
|
||||
hispanic_index = headers['Hispanic (#)']
|
||||
hispanic_number = items[hispanic_index].to_f
|
||||
|
||||
native_american_index = headers['Native American (#)']
|
||||
native_american_number = items[native_american_index].to_f
|
||||
|
||||
native_hawaiian_index = headers['Native Hawaiian, Pacific Islander (#)']
|
||||
native_hawaiian_number = items[native_hawaiian_index].to_f
|
||||
|
||||
multi_race_index = headers['Multi-Race,Non-Hispanic (#)']
|
||||
multi_race_number = items[multi_race_index].to_f
|
||||
|
||||
total_non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
|
||||
items.unshift(total_non_white_teachers)
|
||||
|
||||
total_non_white_teachers
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def teacher_count(filepath:, dese_id:, year:)
|
||||
@teachers ||= {}
|
||||
if @teachers.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Teachers of color (#)'].gsub(',', '').to_f
|
||||
@teachers[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@teachers[[dese_id, year]]
|
||||
end
|
||||
|
||||
def run_student_demographics(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-cure-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
white_index = headers['White']
|
||||
white_number = items[white_index].to_f
|
||||
non_white_student_percentage = 100 - white_number
|
||||
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
num_of_non_white_students = num_of_students * non_white_student_percentage / 100
|
||||
items.unshift(num_of_non_white_students)
|
||||
|
||||
num_of_non_white_teachers = teacher_count(filepath: filepaths[1], dese_id:, year: academic_year.range)
|
||||
items.unshift(num_of_non_white_teachers)
|
||||
parity_index = num_of_non_white_teachers.to_f / num_of_non_white_students.to_f
|
||||
parity_index * 4 / 0.26
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Loading…
Add table
Add a link
Reference in a new issue