Turn off specs for scrapers. Generate csvs for scraped data

pull/1/head
rebuilt 3 years ago
parent 5002e4eb63
commit 9e18bf2151

@ -0,0 +1,38 @@
require 'watir'
require 'csv'
module Dese
module Enrollments
include Dese::Scraper
attr_reader :filepaths
def scrape_enrollments(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
write_headers(filepath:, headers:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = ->(_headers, _items) { 'NA' }
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def student_count(filepath:, dese_id:, year:)
@students ||= {}
if @students.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
school_id = row['DESE ID'].to_i
total = row['Total'].gsub(',', '').to_i
@students[[school_id, academic_year]] = total
end
end
@students[[dese_id, year]]
end
end
end

@ -1,6 +1,7 @@
require 'watir'
require 'csv'
# TODO: convert this to simpler format and add a run_all method
module Dese
class FourDOne
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '4D_1_plans_of_grads.csv'))

@ -47,11 +47,14 @@ module Dese
def self.create_admin_data_value(row:, score:)
school = School.find_by_dese_id(dese_id(row:).to_i)
admin_data_item_id = admin_data_item(row:)
return if school.nil?
return if admin_data_item_id.nil? || admin_data_item_id.blank?
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:)))
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item_id))
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save

@ -1,6 +1,6 @@
require 'watir'
require 'csv'
# TODO convert this to simpler format and add a run_all method
module Dese
class OneAOne
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '1A_1_teacher_data.csv'))

@ -4,9 +4,10 @@ require 'csv'
module Dese
class ThreeATwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3A_2_enrollment.csv'),
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
@ -47,35 +48,6 @@ module Dese
write_headers(filepath:, headers:)
end
def scrape_enrollments(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
write_headers(filepath:, headers:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = ->(_headers, _items) { 'NA' }
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def student_count(filepath:, dese_id:, year:)
@students ||= {}
if @students.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
school_id = row['DESE ID'].to_i
total = row['Total'].gsub(',', '').to_i
@students[[school_id, academic_year]] = total
end
end
@students[[dese_id, year]]
end
def run_a_sust_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i1'

@ -0,0 +1,115 @@
require 'watir'
require 'csv'
module Dese
class ThreeBTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_2_teacher_by_race_and_gender.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_2_student_by_race_and_gender.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
scrape_enrollments(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (#)', 'School Name', 'DESE ID',
'African American (#)', 'Asian (#)', 'Hispanic (#)', 'White (#)', 'Native American (#)',
'Native Hawaiian Pacific Islander (#)', 'Multi-Race Non-Hispanic (#)', 'Females (#)',
'Males (#)', 'FTE Count']
write_headers(filepath:, headers:)
run_teacher_demographics(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers', 'Non-White Students', 'School Name', 'DESE ID',
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
'Females', 'Non-Binary', 'Students of color (%)']
write_headers(filepath:, headers:)
run_student_demographics(filepath:)
browser.close
end
def run_teacher_demographics(filepath:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
african_american_index = headers['African American (#)']
african_american_number = items[african_american_index].to_f
asian_index = headers['Asian (#)']
asian_number = items[asian_index].to_f
hispanic_index = headers['Hispanic (#)']
hispanic_number = items[hispanic_index].to_f
native_american_index = headers['Native American (#)']
native_american_number = items[native_american_index].to_f
native_hawaiian_index = headers['Native Hawaiian, Pacific Islander (#)']
native_hawaiian_number = items[native_hawaiian_index].to_f
multi_race_index = headers['Multi-Race,Non-Hispanic (#)']
multi_race_number = items[multi_race_index].to_f
total_non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
items.unshift(total_non_white_teachers)
total_non_white_teachers
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def teacher_count(filepath:, dese_id:, year:)
@teachers ||= {}
if @teachers.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
school_id = row['DESE ID'].to_i
total = row['Teachers of color (#)'].gsub(',', '').to_f
@teachers[[school_id, academic_year]] = total
end
end
@teachers[[dese_id, year]]
end
def run_student_demographics(filepath:)
run do |academic_year|
admin_data_item_id = 'a-cure-i1'
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
white_index = headers['White']
white_number = items[white_index].to_f
non_white_student_percentage = 100 - white_number
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
num_of_non_white_students = num_of_students * non_white_student_percentage / 100
items.unshift(num_of_non_white_students)
num_of_non_white_teachers = teacher_count(filepath: filepaths[1], dese_id:, year: academic_year.range)
items.unshift(num_of_non_white_teachers)
parity_index = num_of_non_white_teachers.to_f / num_of_non_white_students.to_f
parity_index * 4 / 0.26
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,73 @@
require 'rails_helper'
require 'fileutils'
require 'csv'
RSpec.describe Dese::ThreeBTwo do
let(:academic_years) do
[
create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21')
# create(:academic_year, range: '2019-20'),
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:enrollment_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'enrollments.csv') }
let(:teacher_race_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_teacher_by_race_and_gender.csv') }
let(:student_race_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_student_by_race_and_gender.csv') }
let(:filepaths) do
[enrollment_filepath, teacher_race_filepath, student_race_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
xcontext '#run_all' do
it 'creates a csv file with the scraped data' do
Dese::ThreeBTwo.new(filepaths:).run_all
expect(teacher_race_filepath).to exist
expect(student_race_filepath).to exist
end
it 'has the correct headers for teacher demographic information' do
headers = File.open(teacher_race_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (#)', 'School Name', 'DESE ID',
'African American (#)', 'Asian (#)', 'Hispanic (#)', 'White (#)', 'Native American (#)',
'Native Hawaiian Pacific Islander (#)', 'Multi-Race Non-Hispanic (#)', 'Females (#)',
'Males (#)', "FTE Count\n"]
end
it 'has the correct headers for student demographic information' do
pending 'need feedback from peter'
headers = File.open(student_race_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers', 'Non-White Students', 'School Name', 'DESE ID',
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
'Females', 'Non-Binary', "Students of color (%)\n"]
end
it 'has the right likert score results for a-cure-i1' do
pending 'not yet implemented'
results = CSV.parse(File.read(student_race_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-cure-i1' && row['Academic Year'] == '2020-21'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [4.44, 4.44, 3.33, 3.83, 4.44, 3.6, 4.44, 4.44, 1, 4.44, 4.44, 4.44, 4.44, 3.89,
4.44, 4.44, 4.44, 4.44, 4.01, 3.92]
end
end
end
Loading…
Cancel
Save