feat: add admin data item a-curv-i5 with temporary benchmark of 2

This commit is contained in:
Nelson Jovel 2025-01-14 13:29:40 -08:00
parent e2fe434bb5
commit 2333e9c8c7
8 changed files with 29962 additions and 87 deletions

View file

@ -4,14 +4,6 @@ module Dese
Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def reverse_score(likert_score:)
return nil unless likert_score.present?
likert_score = 1 if likert_score < 1
likert_score = 5 if likert_score > 5
(likert_score - 6).abs
end
def run
academic_years = AcademicYear.all.order(range: :DESC)
.map(&:range_without_season)
@ -65,6 +57,8 @@ module Dese
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
# row = header_hash.keys.zip(items).to_h
raw_likert_score = calculation.call(header_hash, items)
raw_likert_score ||= "NA"
likert_score = raw_likert_score
@ -74,12 +68,17 @@ module Dese
likert_score = likert_score.round(2)
end
# school_level = row["School Code"][-3]
# ratio = row["Number of Students"].gsub(",", "").to_f / row["Total # of Classes"].gsub(",", "").to_f
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
# output << school_level
# output << ratio
output = output.flatten
csv << output
end

View file

@ -1,4 +1,4 @@
require 'watir'
require "watir"
module Dese
class ThreeATwo
@ -6,9 +6,9 @@ module Dese
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
def initialize(filepaths: [Rails.root.join("data", "admin_data", "dese", "enrollments.csv"),
Rails.root.join("data", "admin_data", "dese", "3A_2_age_staffing.csv"),
Rails.root.join("data", "admin_data", "dese", "3A_2_grade_subject_staffing.csv")])
@filepaths = filepaths
end
@ -31,43 +31,43 @@ module Dese
end
def write_a_sust_i1_headers(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)',
'49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count',
'Student Count', 'Student to Guidance Counselor ratio']
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"<26 yrs (# )", "26-32 yrs (#)", "33-40 yrs (#)", "41-48 yrs (#)",
"49-56 yrs (#)", "57-64 yrs (#)", "Over 64 yrs (#)", "FTE Count",
"Student Count", "Student to Guidance Counselor ratio"]
write_headers(filepath:, headers:)
end
def write_a_sust_i4_headers(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK-2 (# )', '3-5 (# )', '6-8 (# )', '9-12 (# )', 'Multiple Grades (# )', 'All Grades (# )', 'FTE Count',
'Student Count', 'Student to Art Teacher ratio']
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"PK-2 (# )", "3-5 (# )", "6-8 (# )", "9-12 (# )", "Multiple Grades (# )", "All Grades (# )", "FTE Count",
"Student Count", "Student to Art Teacher ratio"]
write_headers(filepath:, headers:)
end
def run_a_sust_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i1'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
admin_data_item_id = "a-sust-i1"
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Guidance Counselor' }
submit_id = 'btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddJobClassification" => "Guidance Counselor" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
fte_index = headers["FTE Count"]
num_of_guidance_counselors = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
dese_id = items[headers["School Code"]].to_i
school = School.find_by_dese_id(dese_id)
return 'NA' unless school.present? && school.is_hs?
return "NA" unless school.present? && school.is_hs?
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 250
if fte_index.present? && !items[fte_index] != ''
if fte_index.present? && !items[fte_index] != ""
result = ((benchmark - (num_of_students / num_of_guidance_counselors)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_guidance_counselors)
@ -79,21 +79,21 @@ module Dese
def run_a_sust_i2(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i2'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
admin_data_item_id = "a-sust-i2"
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Psychologist -- Non-Special Education' }
submit_id = 'btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddJobClassification" => "School Psychologist -- Non-Special Education" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
fte_index = headers["FTE Count"]
num_of_psychologists = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
dese_id = items[headers["School Code"]].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 250
if fte_index.present? && !items[fte_index] != ''
if fte_index.present? && !items[fte_index] != ""
result = ((benchmark - (num_of_students / num_of_psychologists)) + benchmark) * 4 / benchmark
end
@ -106,21 +106,21 @@ module Dese
def run_a_sust_i3(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i3'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
admin_data_item_id = "a-sust-i3"
url = "https://profiles.doe.mass.edu/statereport/agestaffing.aspx"
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Paraprofessional' }
submit_id = 'btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddJobClassification" => "Paraprofessional" }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
fte_index = headers["FTE Count"]
num_of_paraprofessionals = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
dese_id = items[headers["School Code"]].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 43.4
if fte_index.present? && !items[fte_index] != ''
if fte_index.present? && !items[fte_index] != ""
result = ((benchmark - (num_of_students / num_of_paraprofessionals)) + benchmark) * 4 / benchmark
end
@ -133,15 +133,15 @@ module Dese
def run_a_sust_i4(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i4'
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
admin_data_item_id = "a-sust-i4"
url = "https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx"
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents',
'ctl00_ContentPlaceHolder1_ddSubject' => 'Arts' }
submit_id = 'btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range,
"ctl00_ContentPlaceHolder1_ddDisplay" => "Full-time Equivalents",
"ctl00_ContentPlaceHolder1_ddSubject" => "Arts" }
submit_id = "btnViewReport"
calculation = lambda { |_headers, items|
num_of_art_teachers = items.last.to_f
dese_id = items[1].to_i

View file

@ -1,91 +1,124 @@
require 'watir'
require "watir"
module Dese
class ThreeBOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_1_masscore.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_advcoursecomprate.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_ap.csv')])
def initialize(filepaths: [Rails.root.join("data", "admin_data", "dese", "3B_1_masscore.csv"),
Rails.root.join("data", "admin_data", "dese", "3B_1_advcoursecomprate.csv"),
Rails.root.join("data", "admin_data", "dese", "3B_1_ap.csv"),
Rails.root.join("data", "admin_data", "dese", "3B_1_course_ratio.csv")])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Graduated', '# Completed MassCore', '% Completed MassCore']
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# Graduated", "# Completed MassCore", "% Completed MassCore"]
write_headers(filepath:, headers:)
run_a_curv_i1(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced',
'% ELA', '% Math', '% Science and Technology', '% Computer and Information Science',
'% History and Social Sciences', '% Arts', '% All Other Subjects', '% All Other Subjects']
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"# Grade 11 and 12 Students", "# Students Completing Advanced", "% Students Completing Advanced",
"% ELA", "% Math", "% Science and Technology", "% Computer and Information Science",
"% History and Social Sciences", "% Arts", "% All Other Subjects", "% All Other Subjects"]
write_headers(filepath:, headers:)
run_a_curv_i2(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Tests Taken', 'Score=1', 'Score=2', 'Score=3', 'Score=4', 'Score=5', '% Score 1-2', '% Score 3-5']
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Tests Taken", "Score=1", "Score=2", "Score=3", "Score=4", "Score=5", "% Score 1-2", "% Score 3-5"]
write_headers(filepath:, headers:)
run_a_curv_i3(filepath:)
filepath = filepaths[3]
headers = ["Raw likert calculation", "Likert Score", "Admin Data Item", "Academic Year", "School Name", "DESE ID",
"Total # of Classes", "Average Class Size", "Number of Students", "Female %", "Male %", "English Language Learner %", "Students with Disabilities %", "Low Income %"]
write_headers(filepath:, headers:)
run_a_curv_i5(filepath:)
browser.close
end
def run_a_curv_i1(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/masscore.aspx'
url = "https://profiles.doe.mass.edu/statereport/masscore.aspx"
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
completed_index = headers['% Completed MassCore']
completed_index = headers["% Completed MassCore"]
percent_completed = items[completed_index].to_f
benchmark = 90
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ""
}
admin_data_item_id = 'a-curv-i1'
admin_data_item_id = "a-curv-i1"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i2(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx'
url = "https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx"
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
completed_index = headers['% Students Completing Advanced']
completed_index = headers["% Students Completing Advanced"]
percent_completed = items[completed_index].to_f
benchmark = 30
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ""
}
admin_data_item_id = 'a-curv-i2'
admin_data_item_id = "a-curv-i2"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i3(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ap.aspx'
url = "https://profiles.doe.mass.edu/statereport/ap.aspx"
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "ctl00_ContentPlaceHolder1_btnViewReport"
calculation = lambda { |headers, items|
completed_index = headers['% Score 3-5']
completed_index = headers["% Score 3-5"]
percent_score = items[completed_index].to_f
benchmark = 20
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ""
}
admin_data_item_id = 'a-curv-i3'
admin_data_item_id = "a-curv-i3"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i5(filepath:)
run do |academic_year|
url = "https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx"
range = academic_year.range
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
"ctl00_ContentPlaceHolder1_ddYear" => range }
submit_id = "btnViewReport"
calculation = lambda { |headers, items|
dese_id = items[headers["School Code"]].to_i
school = School.find_by_dese_id(dese_id)
return "NA" unless school.present? && school.is_hs?
classes_index = headers["Total # of Classes"]
num_classes = items[classes_index].gsub(",", "").to_f
students_index = headers["Number of Students"]
num_students = items[students_index].gsub(",", "").to_f
benchmark = 2.04
((benchmark - (num_students / num_classes)) + benchmark) * 4 / benchmark
}
admin_data_item_id = "a-curv-i5"
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end