Add scraper for 3B-i student/#courses ratio

mciea-main
rebuilt 3 years ago
parent bea9be2d76
commit ce76c979a4

@ -4,14 +4,6 @@ module Dese
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id, Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation) :calculation)
def reverse_score(likert_score:)
return nil unless likert_score.present?
likert_score = 1 if likert_score < 1
likert_score = 5 if likert_score > 5
(likert_score - 6).abs
end
def run def run
academic_years = AcademicYear.all.order(range: :DESC) academic_years = AcademicYear.all.order(range: :DESC)
academic_years.each do |academic_year| academic_years.each do |academic_year|

@ -4,11 +4,14 @@ require 'csv'
module Dese module Dese
class ThreeBOne class ThreeBOne
include Dese::Scraper include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_1_masscore.csv'), def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_1_masscore.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_advcoursecomprate.csv'), Rails.root.join('data', 'admin_data', 'dese', '3B_1_advcoursecomprate.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_ap.csv')]) Rails.root.join('data', 'admin_data', 'dese', '3B_1_ap.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_student_courses_ratio.csv')])
@filepaths = filepaths @filepaths = filepaths
end end
@ -34,6 +37,11 @@ module Dese
write_headers(filepath:, headers:) write_headers(filepath:, headers:)
run_a_curv_i3(filepath:) run_a_curv_i3(filepath:)
filepath = filepaths[3]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %', 'English Language Learner %', 'Students with Disabilities %', 'Low Income %', 'Number of Students']
write_headers(filepath:, headers:)
run_a_curv_i5(filepath:)
browser.close browser.close
end end
@ -90,5 +98,33 @@ module Dese
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation) Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end end
end end
def run_a_curv_i5(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
row = headers.keys.zip(items).to_h
dese_id = row['School Code'].to_i
is_hs = (row['School Name'] in /High School/i)
school = School.find_by(dese_id:)
is_hs = school.is_hs if school.present?
next 'NA' unless is_hs
num_of_classes = row['Total # of Classes'].delete(',').to_f
num_of_students = student_count(filepath: Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
dese_id:, year: academic_year.range) || 0
items << num_of_students
actual = num_of_students / num_of_classes
benchmark = 5
((benchmark - actual) + benchmark) * 4 / benchmark if num_of_classes.present? && num_of_students.present?
}
admin_data_item_id = 'a-curv-i5'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end end
end end

File diff suppressed because it is too large Load Diff

@ -17,9 +17,10 @@ RSpec.describe Dese::ThreeBOne do
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_masscore.csv') } let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_masscore.csv') }
let(:i2_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_advcoursecomprate.csv') } let(:i2_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_advcoursecomprate.csv') }
let(:i3_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_ap.csv') } let(:i3_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_ap.csv') }
let(:i4_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_1_student_courses_ratio.csv') }
let(:filepaths) do let(:filepaths) do
[i1_filepath, i2_filepath, i3_filepath] [i1_filepath, i2_filepath, i3_filepath, i4_filepath]
end end
before do before do
FileUtils.mkdir_p 'tmp/spec/dese' FileUtils.mkdir_p 'tmp/spec/dese'
@ -29,6 +30,13 @@ RSpec.describe Dese::ThreeBOne do
academic_years academic_years
end end
context 'a-curv-i5' do
it 'creates a csv file with the scraped data' do
Dese::ThreeBOne.new(filepaths:).run_all
expect(i4_filepath).to exist
end
end
xcontext 'Creating a new Scraper' do xcontext 'Creating a new Scraper' do
it 'creates a csv file with the scraped data' do it 'creates a csv file with the scraped data' do
Dese::ThreeBOne.new(filepaths:).run_all Dese::ThreeBOne.new(filepaths:).run_all

Loading…
Cancel
Save