mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-08 15:08:15 -07:00
Turn off specs for scrapers. Generate csvs for scraped data
This commit is contained in:
parent
5002e4eb63
commit
9e18bf2151
22 changed files with 78034 additions and 17121 deletions
38
app/services/dese/enrollments.rb
Normal file
38
app/services/dese/enrollments.rb
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
module Enrollments
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def scrape_enrollments(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = ->(_headers, _items) { 'NA' }
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def student_count(filepath:, dese_id:, year:)
|
||||
@students ||= {}
|
||||
if @students.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Total'].gsub(',', '').to_i
|
||||
@students[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@students[[dese_id, year]]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
# TODO: convert this to simpler format and add a run_all method
|
||||
module Dese
|
||||
class FourDOne
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '4D_1_plans_of_grads.csv'))
|
||||
|
|
|
|||
|
|
@ -47,11 +47,14 @@ module Dese
|
|||
|
||||
def self.create_admin_data_value(row:, score:)
|
||||
school = School.find_by_dese_id(dese_id(row:).to_i)
|
||||
admin_data_item_id = admin_data_item(row:)
|
||||
|
||||
return if school.nil?
|
||||
return if admin_data_item_id.nil? || admin_data_item_id.blank?
|
||||
|
||||
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school:,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item_id))
|
||||
if admin_data_value.present?
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value.save
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
# TODO convert this to simpler format and add a run_all method
|
||||
module Dese
|
||||
class OneAOne
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '1A_1_teacher_data.csv'))
|
||||
|
|
|
|||
|
|
@ -4,9 +4,10 @@ require 'csv'
|
|||
module Dese
|
||||
class ThreeATwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3A_2_enrollment.csv'),
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
|
||||
|
||||
|
|
@ -47,35 +48,6 @@ module Dese
|
|||
write_headers(filepath:, headers:)
|
||||
end
|
||||
|
||||
def scrape_enrollments(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = ->(_headers, _items) { 'NA' }
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def student_count(filepath:, dese_id:, year:)
|
||||
@students ||= {}
|
||||
if @students.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Total'].gsub(',', '').to_i
|
||||
@students[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@students[[dese_id, year]]
|
||||
end
|
||||
|
||||
def run_a_sust_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-sust-i1'
|
||||
|
|
|
|||
115
app/services/dese/three_b_two.rb
Normal file
115
app/services/dese/three_b_two.rb
Normal file
|
|
@ -0,0 +1,115 @@
|
|||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class ThreeBTwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_2_teacher_by_race_and_gender.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_2_student_by_race_and_gender.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
scrape_enrollments(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (#)', 'School Name', 'DESE ID',
|
||||
'African American (#)', 'Asian (#)', 'Hispanic (#)', 'White (#)', 'Native American (#)',
|
||||
'Native Hawaiian Pacific Islander (#)', 'Multi-Race Non-Hispanic (#)', 'Females (#)',
|
||||
'Males (#)', 'FTE Count']
|
||||
write_headers(filepath:, headers:)
|
||||
run_teacher_demographics(filepath:)
|
||||
|
||||
filepath = filepaths[2]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers', 'Non-White Students', 'School Name', 'DESE ID',
|
||||
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
|
||||
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
|
||||
'Females', 'Non-Binary', 'Students of color (%)']
|
||||
write_headers(filepath:, headers:)
|
||||
run_student_demographics(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_teacher_demographics(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
african_american_index = headers['African American (#)']
|
||||
african_american_number = items[african_american_index].to_f
|
||||
|
||||
asian_index = headers['Asian (#)']
|
||||
asian_number = items[asian_index].to_f
|
||||
|
||||
hispanic_index = headers['Hispanic (#)']
|
||||
hispanic_number = items[hispanic_index].to_f
|
||||
|
||||
native_american_index = headers['Native American (#)']
|
||||
native_american_number = items[native_american_index].to_f
|
||||
|
||||
native_hawaiian_index = headers['Native Hawaiian, Pacific Islander (#)']
|
||||
native_hawaiian_number = items[native_hawaiian_index].to_f
|
||||
|
||||
multi_race_index = headers['Multi-Race,Non-Hispanic (#)']
|
||||
multi_race_number = items[multi_race_index].to_f
|
||||
|
||||
total_non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
|
||||
items.unshift(total_non_white_teachers)
|
||||
|
||||
total_non_white_teachers
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def teacher_count(filepath:, dese_id:, year:)
|
||||
@teachers ||= {}
|
||||
if @teachers.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Teachers of color (#)'].gsub(',', '').to_f
|
||||
@teachers[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@teachers[[dese_id, year]]
|
||||
end
|
||||
|
||||
def run_student_demographics(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-cure-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
white_index = headers['White']
|
||||
white_number = items[white_index].to_f
|
||||
non_white_student_percentage = 100 - white_number
|
||||
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
num_of_non_white_students = num_of_students * non_white_student_percentage / 100
|
||||
items.unshift(num_of_non_white_students)
|
||||
|
||||
num_of_non_white_teachers = teacher_count(filepath: filepaths[1], dese_id:, year: academic_year.range)
|
||||
items.unshift(num_of_non_white_teachers)
|
||||
parity_index = num_of_non_white_teachers.to_f / num_of_non_white_students.to_f
|
||||
parity_index * 4 / 0.26
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
Can't render this file because it is too large.
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
|
Can't render this file because it is too large.
|
9226
data/admin_data/dese/3A_1_average_class_size.csv
Normal file
9226
data/admin_data/dese/3A_1_average_class_size.csv
Normal file
File diff suppressed because it is too large
Load diff
33171
data/admin_data/dese/3A_2_age_staffing.csv
Normal file
33171
data/admin_data/dese/3A_2_age_staffing.csv
Normal file
File diff suppressed because it is too large
Load diff
11067
data/admin_data/dese/3A_2_enrollment.csv
Normal file
11067
data/admin_data/dese/3A_2_enrollment.csv
Normal file
File diff suppressed because it is too large
Load diff
11067
data/admin_data/dese/3A_2_grade_subject_staffing.csv
Normal file
11067
data/admin_data/dese/3A_2_grade_subject_staffing.csv
Normal file
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
73
spec/services/dese/three_b_two_spec.rb
Normal file
73
spec/services/dese/three_b_two_spec.rb
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
require 'rails_helper'
|
||||
require 'fileutils'
|
||||
require 'csv'
|
||||
|
||||
RSpec.describe Dese::ThreeBTwo do
|
||||
let(:academic_years) do
|
||||
[
|
||||
create(:academic_year, range: '2021-22'),
|
||||
create(:academic_year, range: '2020-21')
|
||||
# create(:academic_year, range: '2019-20'),
|
||||
# create(:academic_year, range: '2018-19'),
|
||||
# create(:academic_year, range: '2017-18'),
|
||||
# create(:academic_year, range: '2016-17')
|
||||
]
|
||||
end
|
||||
|
||||
let(:enrollment_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'enrollments.csv') }
|
||||
let(:teacher_race_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_teacher_by_race_and_gender.csv') }
|
||||
let(:student_race_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_student_by_race_and_gender.csv') }
|
||||
|
||||
let(:filepaths) do
|
||||
[enrollment_filepath, teacher_race_filepath, student_race_filepath]
|
||||
end
|
||||
before do
|
||||
FileUtils.mkdir_p 'tmp/spec/dese'
|
||||
end
|
||||
|
||||
before :each do
|
||||
academic_years
|
||||
end
|
||||
|
||||
xcontext '#run_all' do
|
||||
it 'creates a csv file with the scraped data' do
|
||||
Dese::ThreeBTwo.new(filepaths:).run_all
|
||||
expect(teacher_race_filepath).to exist
|
||||
expect(student_race_filepath).to exist
|
||||
end
|
||||
|
||||
it 'has the correct headers for teacher demographic information' do
|
||||
headers = File.open(teacher_race_filepath) do |file|
|
||||
headers = file.first
|
||||
end.split(',')
|
||||
|
||||
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (#)', 'School Name', 'DESE ID',
|
||||
'African American (#)', 'Asian (#)', 'Hispanic (#)', 'White (#)', 'Native American (#)',
|
||||
'Native Hawaiian Pacific Islander (#)', 'Multi-Race Non-Hispanic (#)', 'Females (#)',
|
||||
'Males (#)', "FTE Count\n"]
|
||||
end
|
||||
it 'has the correct headers for student demographic information' do
|
||||
pending 'need feedback from peter'
|
||||
headers = File.open(student_race_filepath) do |file|
|
||||
headers = file.first
|
||||
end.split(',')
|
||||
|
||||
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers', 'Non-White Students', 'School Name', 'DESE ID',
|
||||
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
|
||||
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
|
||||
'Females', 'Non-Binary', "Students of color (%)\n"]
|
||||
end
|
||||
|
||||
it 'has the right likert score results for a-cure-i1' do
|
||||
pending 'not yet implemented'
|
||||
results = CSV.parse(File.read(student_race_filepath), headers: true).map do |row|
|
||||
next unless row['Admin Data Item'] == 'a-cure-i1' && row['Academic Year'] == '2020-21'
|
||||
|
||||
row['Likert Score'].to_f
|
||||
end.flatten.compact
|
||||
|
||||
expect(results.take(20)).to eq [4.44, 4.44, 3.33, 3.83, 4.44, 3.6, 4.44, 4.44, 1, 4.44, 4.44, 4.44, 4.44, 3.89,
|
||||
4.44, 4.44, 4.44, 4.44, 4.01, 3.92]
|
||||
end
|
||||
end
|
||||
end
|
||||
Loading…
Add table
Add a link
Reference in a new issue