Remove 'scraper' from file/class names add 3B-1

pull/1/head
rebuilt 3 years ago
parent 83ef9310a4
commit 3e28be2d0d

@ -2,7 +2,7 @@ require 'watir'
require 'csv' require 'csv'
module Dese module Dese
class FourDScraper class FourDOne
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'four_d.csv')) def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'four_d.csv'))
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx' url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
browser = Watir::Browser.new browser = Watir::Browser.new
@ -24,7 +24,7 @@ module Dese
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/) browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range) browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'btnViewReport').click browser.button(id: 'btnViewReport').click
sleep 2 # Sleep to prevent hitting mass.edu with too many requests sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
document = Nokogiri::HTML(browser.html) document = Nokogiri::HTML(browser.html)
document.css('tr') document.css('tr')
end end

@ -1,7 +1,7 @@
require 'csv' require 'csv'
module Dese module Dese
class FourDLoader class Loader
def self.load_data(filepath:) def self.load_data(filepath:)
CSV.parse(File.read(filepath), headers: true) do |row| CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:) score = likert_score(row:)

@ -2,8 +2,8 @@ require 'watir'
require 'csv' require 'csv'
module Dese module Dese
class OneAScraper class OneAOne
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'one_a.csv')) def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', 'one_a_one_teacher_data.csv'))
url = 'https://profiles.doe.mass.edu/statereport/teacherdata.aspx' url = 'https://profiles.doe.mass.edu/statereport/teacherdata.aspx'
browser = Watir::Browser.new browser = Watir::Browser.new
write_headers(filepath:) write_headers(filepath:)
@ -25,7 +25,7 @@ module Dese
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(text: 'School') browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(text: 'School')
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range) browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'ctl00_ContentPlaceHolder1_btnViewReport').click browser.button(id: 'ctl00_ContentPlaceHolder1_btnViewReport').click
sleep 3 # Sleep to prevent hitting mass.edu with too many requests sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html) Nokogiri::HTML(browser.html)
end end

@ -2,16 +2,16 @@ require 'watir'
require 'csv' require 'csv'
module Dese module Dese
class OneAThreeScraper class OneAThree
include Dese::Scraper
attr_reader :filepaths attr_reader :filepaths
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id, def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'one_a_three_staffing_retention.csv'),
:calculation)
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'one_a_three.csv'),
Rails.root.join('data', 'admin_data', 'dese', 'one_a_three_teachers_of_color.csv')]) Rails.root.join('data', 'admin_data', 'dese', 'one_a_three_teachers_of_color.csv')])
@filepaths = filepaths @filepaths = filepaths
end
def run_all
run_a_pcom_i1 run_a_pcom_i1
run_a_pcom_i3 run_a_pcom_i3
@ -64,76 +64,5 @@ module Dese
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation) Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end end
end end
def run
academic_years = AcademicYear.all
academic_years.each do |academic_year|
prerequisites = yield academic_year
document = get_html(url: prerequisites.url,
selectors: prerequisites.selectors,
submit_id: prerequisites.submit_id)
unless document.nil?
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
calculation: prerequisites.calculation)
end
end
end
def browser
@browser ||= Watir::Browser.new
end
def get_html(url:, selectors:, submit_id:)
browser.goto(url)
selectors.each do |key, value|
return unless browser.option(text: value).present?
browser.select(id: key).select(text: value)
end
browser.button(id: submit_id).click
sleep 2 # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html)
end
def write_headers(filepath:, headers:)
CSV.open(filepath, 'w') do |csv|
csv << headers
end
end
def write_csv(document:, filepath:, range:, id:, calculation:)
table = document.css('tr')
headers = document.css('.sorting')
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculation.call(header_hash, items)
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
output = output.flatten
csv << output
end
end
end
end end
end end

@ -1,6 +1,9 @@
module Dese module Dese
module Scraper module Scraper
DELAY = 3 DELAY = 20
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def reverse_score(likert_score:) def reverse_score(likert_score:)
return nil unless likert_score.present? return nil unless likert_score.present?

@ -6,12 +6,11 @@ module Dese
include Dese::Scraper include Dese::Scraper
attr_reader :filepaths attr_reader :filepaths
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id, def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'three_a_one_gender_population.csv')])
:calculation)
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'two_c_one_attendance.csv')])
@filepaths = filepaths @filepaths = filepaths
end
def run_all
filepath = filepaths[0] filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %', 'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %',

@ -0,0 +1,187 @@
require 'watir'
require 'csv'
module Dese
class ThreeATwo
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollment.csv'),
Rails.root.join('data', 'admin_data', 'dese', 'three_a_two_age_staffing.csv'),
Rails.root.join('data', 'admin_data', 'dese', 'three_a_two_grade_subject_staffing.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
scrape_enrollments(filepath:)
filepath = filepaths[1]
write_a_sust_i1_headers(filepath:)
run_a_sust_i1(filepath:)
run_a_sust_i2(filepath:)
run_a_sust_i3(filepath:)
filepath = filepaths[2]
write_a_sust_i4_headers(filepath:)
run_a_sust_i4(filepath:)
browser.close
end
def write_a_sust_i1_headers(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)',
'49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count',
'Student Count', 'Student to Guidance Counselor ratio']
write_headers(filepath:, headers:)
end
def write_a_sust_i4_headers(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK-2 (# )', '3-5 (# )', '6-8 (# )', '9-12 (# )', 'Multiple Grades (# )', 'All Grades (# )', 'FTE Count',
'Student Count', 'Student to Art Teacher ratio']
write_headers(filepath:, headers:)
end
def scrape_enrollments(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
write_headers(filepath:, headers:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = ->(_headers, _items) { 'NA' }
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def student_count(filepath:, dese_id:, year:)
@students ||= {}
if @students.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
school_id = row['DESE ID'].to_i
total = row['Total'].gsub(',', '').to_i
@students[[school_id, academic_year]] = total
end
end
@students[[dese_id, year]]
end
def run_a_sust_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i1'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Guidance Counselor' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
num_of_guidance_counselors = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 250
if fte_index.present? && !items[fte_index] != ''
result = ((benchmark - (num_of_students / num_of_guidance_counselors)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_guidance_counselors)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_sust_i2(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i2'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Psychologist -- Non-Special Education' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
num_of_psychologists = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 250
if fte_index.present? && !items[fte_index] != ''
result = ((benchmark - (num_of_students / num_of_psychologists)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_psychologists)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_sust_i3(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i3'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Paraprofessional' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
num_of_paraprofessionals = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 43.4
if fte_index.present? && !items[fte_index] != ''
result = ((benchmark - (num_of_students / num_of_paraprofessionals)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_paraprofessionals)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_sust_i4(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i4'
url = 'https://profiles.doe.mass.edu/state_report/gradesubjectstaffing.aspx'
range = academic_year.range
selectors = {
'ctl00_ContentPlaceHolder1_reportType' => 'School',
'ctl00_ContentPlaceHolder1_fyCode' => range,
'ctl00_ContentPlaceHolder1_subjectCode' => 'Arts'
}
submit_id = 'ctl00_ContentPlaceHolder1_Continue'
calculation = lambda { |_headers, items|
num_of_art_teachers = items.last.to_f
dese_id = items[1].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 500
if num_of_art_teachers.present?
result = ((benchmark - (num_of_students / num_of_art_teachers)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_art_teachers)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,94 @@
require 'watir'
require 'csv'
module Dese
class ThreeBOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'three_b_one_masscore.csv'),
Rails.root.join('data', 'admin_data', 'dese', 'three_b_one_advcoursecomprate.csv'),
Rails.root.join('data', 'admin_data', 'dese', 'three_b_one_ap.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Graduated', '# Completed MassCore', '% Completed MassCore']
write_headers(filepath:, headers:)
run_a_curv_i1(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced',
'% ELA', '% Math', '% Science and Technology', '% Computer and Information Science',
'% History and Social Sciences', '% Arts', '% All Other Subjects', '% All Other Subjects']
write_headers(filepath:, headers:)
run_a_curv_i2(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Tests Taken', 'Score=1', 'Score=2', 'Score=3', 'Score=4', 'Score=5', '% Score 1-2', '% Score 3-5']
write_headers(filepath:, headers:)
run_a_curv_i3(filepath:)
browser.close
end
def run_a_curv_i1(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/masscore.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Completed MassCore']
percent_completed = items[completed_index].to_f
benchmark = 90
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i2(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Students Completing Advanced']
percent_completed = items[completed_index].to_f
benchmark = 30
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i2'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i3(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ap.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Score 3-5']
percent_score = items[completed_index].to_f
benchmark = 20
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -2,17 +2,16 @@ require 'watir'
require 'csv' require 'csv'
module Dese module Dese
class TwoAOneScraper class TwoAOne
include Dese::Scraper include Dese::Scraper
attr_reader :filepaths attr_reader :filepaths
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'two_a_one_students_suspended.csv'), def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'two_a_one_students_suspended.csv'),
Rails.root.join('data', 'admin_data', 'dese', 'two_a_one_students_disciplined.csv')]) Rails.root.join('data', 'admin_data', 'dese', 'two_a_one_students_disciplined.csv')])
@filepaths = filepaths @filepaths = filepaths
end
def run_all
run_a_phys_i1 run_a_phys_i1
run_a_phys_i3 run_a_phys_i3

@ -2,26 +2,28 @@ require 'watir'
require 'csv' require 'csv'
module Dese module Dese
class TwoCOneScraper class TwoCOne
include Dese::Scraper include Dese::Scraper
attr_reader :filepaths attr_reader :filepaths
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'two_c_one_attendance.csv')]) def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'two_c_one_attendance.csv')])
@filepaths = filepaths @filepaths = filepaths
end
def run_all
write_a_vale_i1_headers
run_a_vale_i1
run_a_vale_i2
browser.close
end
def write_a_vale_i1_headers
filepath = filepaths[0] filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Attendance Rate', 'Average # of Absences', 'Absent 10 or more days', 'Chronically Absent (10% or more)', 'Attendance Rate', 'Average # of Absences', 'Absent 10 or more days', 'Chronically Absent (10% or more)',
'Chronically Absent (20% or more)', 'Unexcused > 9 days'] 'Chronically Absent (20% or more)', 'Unexcused > 9 days']
write_headers(filepath:, headers:) write_headers(filepath:, headers:)
run_a_vale_i1
run_a_vale_i2
browser.close
end end
def run_a_vale_i1 def run_a_vale_i1

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -70,7 +70,7 @@ namespace :data do
AdminDataValue.delete_all AdminDataValue.delete_all
Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath| Dir.glob(Rails.root.join('data', 'admin_data', 'dese', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}" puts "=====================> Loading data from csv at path: #{filepath}"
Dese::FourDLoader.load_data filepath: Dese::Loader.load_data filepath:
end end
puts "=====================> Completed loading #{AdminDataValue.count} survey responses" puts "=====================> Completed loading #{AdminDataValue.count} survey responses"
end end

@ -1,6 +1,6 @@
require 'rails_helper' require 'rails_helper'
require 'fileutils' require 'fileutils'
RSpec.describe Dese::FourDScraper do RSpec.describe Dese::FourDOne do
let(:academic_years) do let(:academic_years) do
[ [
create(:academic_year, range: '2020-21'), create(:academic_year, range: '2020-21'),
@ -14,11 +14,11 @@ RSpec.describe Dese::FourDScraper do
academic_years academic_years
end end
xcontext 'Creating a new FourDScraper' do xcontext 'Creating a new FourDOne' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
FileUtils.mkdir_p 'tmp/spec/dese' FileUtils.mkdir_p 'tmp/spec/dese'
file = Rails.root.join('tmp', 'spec', 'dese', 'four_d.csv') file = Rails.root.join('tmp', 'spec', 'dese', 'four_d.csv')
Dese::FourDScraper.new(filepath: file) Dese::FourDOne.new(filepath: file)
expect(file).to exist expect(file).to exist
end end
end end

@ -1,5 +1,5 @@
require 'rails_helper' require 'rails_helper'
RSpec.describe Dese::FourDLoader do RSpec.describe Dese::Loader do
let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_four_d_data.csv') } let(:path_to_admin_data) { Rails.root.join('spec', 'fixtures', 'sample_four_d_data.csv') }
let(:ay_2020_21) { AcademicYear.find_by_range '2020-21' } let(:ay_2020_21) { AcademicYear.find_by_range '2020-21' }
let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' } let(:ay_2018_19) { AcademicYear.find_by_range '2018-19' }
@ -21,7 +21,7 @@ RSpec.describe Dese::FourDLoader do
end end
context 'when running the loader' do context 'when running the loader' do
before :each do before :each do
Dese::FourDLoader.load_data filepath: path_to_admin_data Dese::Loader.load_data filepath: path_to_admin_data
end end
it 'load the correct admin data values' do it 'load the correct admin data values' do
@ -42,7 +42,7 @@ RSpec.describe Dese::FourDLoader do
end end
it 'is idempotent' do it 'is idempotent' do
Dese::FourDLoader.load_data filepath: path_to_admin_data Dese::Loader.load_data filepath: path_to_admin_data
expect(AdminDataValue.count).to eq 230 expect(AdminDataValue.count).to eq 230
end end

@ -2,7 +2,7 @@ require 'rails_helper'
require 'fileutils' require 'fileutils'
require 'csv' require 'csv'
RSpec.describe Dese::OneAScraper do RSpec.describe Dese::OneAOne do
let(:academic_years) do let(:academic_years) do
[ [
create(:academic_year, range: '2021-22'), create(:academic_year, range: '2021-22'),
@ -13,7 +13,7 @@ RSpec.describe Dese::OneAScraper do
# create(:academic_year, range: '2016-17') # create(:academic_year, range: '2016-17')
] ]
end end
let(:filepath) { Rails.root.join('tmp', 'spec', 'dese', 'one_a.csv') } let(:filepath) { Rails.root.join('tmp', 'spec', 'dese', 'one_a_one_teacher_data.csv') }
before do before do
FileUtils.mkdir_p 'tmp/spec/dese' FileUtils.mkdir_p 'tmp/spec/dese'
end end
@ -24,7 +24,7 @@ RSpec.describe Dese::OneAScraper do
xcontext 'Creating a new Scraper' do xcontext 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
Dese::OneAScraper.new(filepath:) Dese::OneAOne.new(filepath:)
expect(filepath).to exist expect(filepath).to exist
end end

@ -2,18 +2,18 @@ require 'rails_helper'
require 'fileutils' require 'fileutils'
require 'csv' require 'csv'
RSpec.describe Dese::OneAThreeScraper do RSpec.describe Dese::OneAThree do
let(:academic_years) do let(:academic_years) do
[ [
create(:academic_year, range: '2021-22'), create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21'), create(:academic_year, range: '2020-21')
create(:academic_year, range: '2019-20'), # create(:academic_year, range: '2019-20'),
create(:academic_year, range: '2018-19'), # create(:academic_year, range: '2018-19'),
create(:academic_year, range: '2017-18'), # create(:academic_year, range: '2017-18'),
create(:academic_year, range: '2016-17') # create(:academic_year, range: '2016-17')
] ]
end end
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'one_a_three.csv') } let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'one_a_three_staffing_retention.csv') }
let(:i3_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'one_a_three_teachers_of_color.csv') } let(:i3_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'one_a_three_teachers_of_color.csv') }
let(:filepaths) do let(:filepaths) do
@ -29,7 +29,7 @@ RSpec.describe Dese::OneAThreeScraper do
context 'Creating a new Scraper' do context 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
Dese::OneAThreeScraper.new(filepaths:) Dese::OneAThree.new(filepaths:).run_all
expect(i1_filepath).to exist expect(i1_filepath).to exist
end end

@ -6,11 +6,11 @@ RSpec.describe Dese::ThreeAOne do
let(:academic_years) do let(:academic_years) do
[ [
create(:academic_year, range: '2021-22'), create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21'), create(:academic_year, range: '2020-21')
create(:academic_year, range: '2019-20'), # create(:academic_year, range: '2019-20'),
create(:academic_year, range: '2018-19'), # create(:academic_year, range: '2018-19'),
create(:academic_year, range: '2017-18'), # create(:academic_year, range: '2017-18'),
create(:academic_year, range: '2016-17') # create(:academic_year, range: '2016-17')
] ]
end end
@ -29,7 +29,7 @@ RSpec.describe Dese::ThreeAOne do
context 'Creating a new Scraper' do context 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
Dese::ThreeAOne.new(filepaths:) Dese::ThreeAOne.new(filepaths:).run_all
expect(i1_filepath).to exist expect(i1_filepath).to exist
end end

@ -0,0 +1,131 @@
require 'rails_helper'
require 'fileutils'
require 'csv'
RSpec.describe Dese::ThreeATwo do
let(:academic_years) do
[
create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21')
# create(:academic_year, range: '2019-20'),
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:enrollment_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'enrollment.csv') }
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'three_a_two_age_staffing.csv') }
let(:i4_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'three_a_two_grade_subject_staffing.csv') }
let(:filepaths) do
[enrollment_filepath, i1_filepath, i4_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
context 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do
Dese::ThreeATwo.new(filepaths:).run_all
expect(enrollment_filepath).to exist
expect(i1_filepath).to exist
expect(i4_filepath).to exist
end
it 'has the correct headers for enrollements' do
headers = File.open(enrollment_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', "Total\n"]
end
it 'has the correct headers for a-sust-i1' do
headers = File.open(i1_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)',
'49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count',
'Student Count', "Student to Guidance Counselor ratio\n"]
end
it 'has the correct headers for a-sust-i4' do
headers = File.open(i4_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK-2 (# )', '3-5 (# )', '6-8 (# )', '9-12 (# )', 'Multiple Grades (# )', 'All Grades (# )', 'FTE Count',
'Student Count', "Student to Art Teacher ratio\n"]
end
it 'has the right likert score results for a-sust-i1' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-sust-i1' && row['Academic Year'] == '2021-22'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [4.0, 1.0, 4.87, 1.0, 1.0, 1.0, 4.6, 5.0, 1.0, 1.89, 1.0, 1.55, 1.0, 1.0, 2.11,
4.7, 1.0, 1.0, 5.0, 1.0]
end
it 'has the right likert score results for a-sust-i2' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-sust-i2' && row['Academic Year'] == '2021-22'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [1.0, 1.0, 1.0, 2.82, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
1.0, 1.0, 1.0, 1.0]
end
it 'has the right likert score results for a-sust-i3' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-sust-i3' && row['Academic Year'] == '2021-22'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [2.03, 5.0, 1.0, 3.74, 5.0, 4.38, 1.0, 1.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
4.74, 3.5, 2.76, 1.0, 5.0]
end
it 'has the right likert score results for a-sust-i4' do
results = CSV.parse(File.read(i4_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-sust-i4' && row['Academic Year'] == '2021-22'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [5.0, 1.0, 5.0, 5.0, 4.14, 5.0, 5.0, 5.0, 5.0, 5.0, 1.0, 5.0, 5.0, 5.0, 5.0, 5.0,
5.0, 4.82, 5.0, 1.0]
end
end
context 'student_count' do
it 'returns the right enrollment count for a school and year' do
to_check = [[4_450_105, '2021-22', 1426],
[3_500_003, '2020-21', 489],
[2_430_315, '2020-21', 616],
[3_260_055, '2020-21', 290]]
three_a_two = Dese::ThreeATwo.new
three_a_two.scrape_enrollments(filepath: enrollment_filepath)
to_check.each do |items|
expect(three_a_two.student_count(filepath: enrollment_filepath, dese_id: items[0],
year: items[1])).to be items[2]
end
three_a_two.browser.close
end
end
end

@ -0,0 +1,100 @@
require 'rails_helper'
require 'fileutils'
require 'csv'
RSpec.describe Dese::ThreeBOne do
let(:academic_years) do
[
create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21')
# create(:academic_year, range: '2019-20'),
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'three_b_one_masscore.csv') }
let(:i2_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'three_b_one_advcoursecomprate.csv') }
let(:i3_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'three_b_one_ap.csv') }
let(:filepaths) do
[i1_filepath, i2_filepath, i3_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
context 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do
Dese::ThreeBOne.new(filepaths:).run_all
expect(i1_filepath).to exist
expect(i2_filepath).to exist
expect(i3_filepath).to exist
end
it 'has the correct headers for a-curv-i1' do
headers = File.open(i1_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Graduated', '# Completed MassCore', "% Completed MassCore\n"]
end
it 'has the correct headers for a-curv-i2' do
headers = File.open(i2_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced',
'% ELA', '% Math', '% Science and Technology', '% Computer and Information Science',
'% History and Social Sciences', '% Arts', '% All Other Subjects', "% All Other Subjects\n"]
end
it 'has the correct headers for a-curv-i3' do
headers = File.open(i3_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Tests Taken', 'Score=1', 'Score=2', 'Score=3', 'Score=4', 'Score=5', '% Score 1-2', "% Score 3-5\n"]
end
it 'has the right likert score results for a-curv-i1' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-curv-i1' && row['Academic Year'] == '2020-21'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [4.44, 4.44, 3.33, 3.83, 4.44, 3.6, 4.44, 4.44, 1, 4.44, 4.44, 4.44, 4.44, 3.89,
4.44, 4.44, 4.44, 4.44, 4.01, 3.92]
end
it 'has the right likert score results for a-curv-i2' do
results = CSV.parse(File.read(i2_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-curv-i2' && row['Academic Year'] == '2020-21'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 1.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
5.0, 2.53, 5.0, 5.0]
end
it 'has the right likert score results for a-curv-i3' do
results = CSV.parse(File.read(i3_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-curv-i3' && row['Academic Year'] == '2020-21'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [5.0, 1.46, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 2.06, 3.54, 5.0,
5.0, 5.0, 5.0, 5.0, 1.0]
end
end
end

@ -2,15 +2,15 @@ require 'rails_helper'
require 'fileutils' require 'fileutils'
require 'csv' require 'csv'
RSpec.describe Dese::TwoAOneScraper do RSpec.describe Dese::TwoAOne do
let(:academic_years) do let(:academic_years) do
[ [
create(:academic_year, range: '2021-22'), create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21'), create(:academic_year, range: '2020-21')
create(:academic_year, range: '2019-20'), # create(:academic_year, range: '2019-20'),
create(:academic_year, range: '2018-19'), # create(:academic_year, range: '2018-19'),
create(:academic_year, range: '2017-18'), # create(:academic_year, range: '2017-18'),
create(:academic_year, range: '2016-17') # create(:academic_year, range: '2016-17')
] ]
end end
@ -30,7 +30,7 @@ RSpec.describe Dese::TwoAOneScraper do
context 'Creating a new Scraper' do context 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
Dese::TwoAOneScraper.new(filepaths:) Dese::TwoAOne.new(filepaths:).run_all
expect(i1_filepath).to exist expect(i1_filepath).to exist
end end

@ -2,15 +2,15 @@ require 'rails_helper'
require 'fileutils' require 'fileutils'
require 'csv' require 'csv'
RSpec.describe Dese::TwoCOneScraper do RSpec.describe Dese::TwoCOne do
let(:academic_years) do let(:academic_years) do
[ [
create(:academic_year, range: '2021-22'), create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21'), create(:academic_year, range: '2020-21')
create(:academic_year, range: '2019-20'), # create(:academic_year, range: '2019-20'),
create(:academic_year, range: '2018-19'), # create(:academic_year, range: '2018-19'),
create(:academic_year, range: '2017-18'), # create(:academic_year, range: '2017-18'),
create(:academic_year, range: '2016-17') # create(:academic_year, range: '2016-17')
] ]
end end
@ -29,7 +29,7 @@ RSpec.describe Dese::TwoCOneScraper do
context 'Creating a new Scraper' do context 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
Dese::TwoCOneScraper.new(filepaths:) Dese::TwoCOne.new(filepaths:).run_all
expect(i1_filepath).to exist expect(i1_filepath).to exist
end end
Loading…
Cancel
Save