parent
27550e0b30
commit
bd8dfe45d3
@ -0,0 +1,146 @@
|
||||
require "fileutils"
|
||||
module Dashboard
|
||||
class Cleaner
|
||||
attr_reader :input_filepath, :output_filepath, :log_filepath
|
||||
|
||||
def initialize(input_filepath:, output_filepath:, log_filepath:)
|
||||
@input_filepath = input_filepath
|
||||
@output_filepath = output_filepath
|
||||
@log_filepath = log_filepath
|
||||
initialize_directories
|
||||
end
|
||||
|
||||
def clean
|
||||
Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath|
|
||||
puts filepath
|
||||
File.open(filepath) do |file|
|
||||
processed_data = process_raw_file(file:)
|
||||
processed_data in [headers, clean_csv, log_csv, data]
|
||||
return if data.empty?
|
||||
|
||||
filename = filename(headers:, data:, filepath:)
|
||||
write_csv(data: clean_csv, output_filepath:, filename:)
|
||||
write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def filename(headers:, data:, filepath:)
|
||||
output = []
|
||||
survey_item_ids = headers.filter(&:present?).filter do |header|
|
||||
header.start_with?("s-", "t-")
|
||||
end.reject { |item| item.end_with? "-1" }
|
||||
survey_type = SurveyItem.survey_type(survey_item_ids:)
|
||||
range = data.first.academic_year.range
|
||||
|
||||
districts = data.map do |row|
|
||||
row.district.short_name
|
||||
end.to_set.to_a
|
||||
|
||||
schools = data.map do |row|
|
||||
row.school.name
|
||||
end.to_set
|
||||
|
||||
part = filepath&.match(/[\b\s_.]+(part|form)[\W*_](?<label>[\w\d])/i)&.named_captures&.[]("label")&.upcase
|
||||
|
||||
school_name = schools.first.parameterize
|
||||
|
||||
output << districts.join(".")
|
||||
output << school_name if schools.length == 1
|
||||
output << survey_type.to_s
|
||||
output << "Part-" + part unless part.nil?
|
||||
output << range
|
||||
output << "csv"
|
||||
output.join(".")
|
||||
end
|
||||
|
||||
def process_raw_file(file:)
|
||||
clean_csv = []
|
||||
log_csv = []
|
||||
data = []
|
||||
headers = CSV.parse(file.first).first
|
||||
duplicate_header = headers.detect { |header| headers.count(header) > 1 }
|
||||
unless duplicate_header.nil?
|
||||
puts "\n>>>>>>>>>>>>>>>>>> Duplicate header found. This will misalign column headings. Please delete or rename the duplicate column: #{duplicate_header} \n>>>>>>>>>>>>>> \n"
|
||||
end
|
||||
headers = headers.to_set
|
||||
headers = headers.merge(Set.new(["Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count",
|
||||
"Race", "Gender"])).to_a
|
||||
filtered_headers = include_all_headers(headers:)
|
||||
filtered_headers = remove_unwanted_headers(headers: filtered_headers)
|
||||
log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?",
|
||||
"Valid Standard Deviation?"]).flatten
|
||||
clean_csv << filtered_headers
|
||||
log_csv << log_headers
|
||||
|
||||
all_survey_items = survey_items(headers:)
|
||||
|
||||
file.lazy.each_slice(1000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
values = SurveyItemValues.new(row:, headers:,
|
||||
survey_items: all_survey_items, schools:)
|
||||
next unless values.valid_school?
|
||||
|
||||
data << values
|
||||
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
|
||||
end
|
||||
end
|
||||
[headers, clean_csv, log_csv, data]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def include_all_headers(headers:)
|
||||
alternates = headers.filter(&:present?)
|
||||
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
|
||||
alternates.each do |header|
|
||||
main = header.sub(/-1\z/, "")
|
||||
headers.push(main) unless headers.include?(main)
|
||||
end
|
||||
headers
|
||||
end
|
||||
|
||||
def initialize_directories
|
||||
create_ouput_directory
|
||||
create_log_directory
|
||||
end
|
||||
|
||||
def remove_unwanted_headers(headers:)
|
||||
headers.to_set.to_a.compact.reject do |item|
|
||||
item.start_with? "Q"
|
||||
end.reject { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
|
||||
end
|
||||
|
||||
def write_csv(data:, output_filepath:, filename:, prefix: "")
|
||||
csv = CSV.generate do |csv|
|
||||
data.each do |row|
|
||||
csv << row
|
||||
end
|
||||
end
|
||||
File.write(output_filepath.join(prefix + filename), csv)
|
||||
end
|
||||
|
||||
def schools
|
||||
@schools ||= School.school_hash
|
||||
end
|
||||
|
||||
def genders
|
||||
@genders ||= Gender.by_qualtrics_code
|
||||
end
|
||||
|
||||
def survey_items(headers:)
|
||||
survey_item_ids = headers
|
||||
.filter(&:present?)
|
||||
.filter { |header| header.start_with? "t-", "s-" }
|
||||
@survey_items ||= SurveyItem.where(survey_item_id: survey_item_ids)
|
||||
end
|
||||
|
||||
def create_ouput_directory
|
||||
FileUtils.mkdir_p output_filepath
|
||||
end
|
||||
|
||||
def create_log_directory
|
||||
FileUtils.mkdir_p log_filepath
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,63 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "csv"
|
||||
|
||||
module Dashboard
|
||||
class DemographicLoader
|
||||
def self.load_data(filepath:)
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
process_race(row:)
|
||||
process_gender(row:)
|
||||
create_from_column(column: "Income", row:, model: Income)
|
||||
create_from_column(column: "ELL", row:, model: Ell)
|
||||
create_from_column(column: "Special Ed Status", row:, model: Sped)
|
||||
end
|
||||
end
|
||||
|
||||
def self.process_race(row:)
|
||||
qualtrics_code = row["Race Qualtrics Code"].to_i
|
||||
designation = row["Race/Ethnicity"]
|
||||
return unless qualtrics_code && designation
|
||||
|
||||
if qualtrics_code.between?(6, 7)
|
||||
UnknownRace.new(qualtrics_code:, designation:)
|
||||
else
|
||||
KnownRace.new(qualtrics_code:, designation:)
|
||||
end
|
||||
end
|
||||
|
||||
def self.process_gender(row:)
|
||||
qualtrics_code = row["Gender Qualtrics Code"].to_i
|
||||
designation = row["Sex/Gender"]
|
||||
return unless qualtrics_code && designation
|
||||
|
||||
gender = Gender.find_or_create_by!(qualtrics_code:, designation:)
|
||||
gender.save
|
||||
end
|
||||
|
||||
def self.create_from_column(column:, row:, model:)
|
||||
designation = row[column]
|
||||
return unless designation
|
||||
|
||||
model.find_or_create_by!(designation:)
|
||||
end
|
||||
end
|
||||
|
||||
class KnownRace
|
||||
def initialize(qualtrics_code:, designation:)
|
||||
known = Race.find_or_create_by!(qualtrics_code:)
|
||||
known.designation = designation
|
||||
known.slug = designation.parameterize
|
||||
known.save
|
||||
end
|
||||
end
|
||||
|
||||
class UnknownRace
|
||||
def initialize(qualtrics_code:, designation:)
|
||||
unknown = Race.find_or_create_by!(qualtrics_code: 99)
|
||||
unknown.designation = "Race/Ethnicity Not Listed"
|
||||
unknown.slug = designation.parameterize
|
||||
unknown.save
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,38 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
module Enrollments
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def scrape_enrollments(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = ->(_headers, _items) { 'NA' }
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def student_count(filepath:, dese_id:, year:)
|
||||
@students ||= {}
|
||||
if @students.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Total'].gsub(',', '').to_i
|
||||
@students[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
@students[[dese_id, year]]
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,48 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class FiveCOne
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '5C_1_art_course.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'K', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
|
||||
'11', '12', 'All Grades', 'Total Students']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_picp_i1(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_picp_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-picp-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/artcourse.aspx'
|
||||
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddView' => 'Percent' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
percent_graduated_index = headers['All Grades'] - 1
|
||||
if items[percent_graduated_index].nil? || items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
|
||||
return 'NA'
|
||||
end
|
||||
|
||||
percent_passing = items[percent_graduated_index].to_f
|
||||
benchmark = 77.5
|
||||
percent_passing * 4 / benchmark if percent_graduated_index.present?
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,56 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class FiveDTwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '5D_2_age_staffing.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
scrape_enrollments(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)', '49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_phya_i1(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_phya_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-phya-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Nurse -- Non-Special Education' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
nurse_index = headers['FTE Count']
|
||||
return 'NA' if items[nurse_index] == '' || items[nurse_index].strip == '.0'
|
||||
|
||||
nurse_count = items[nurse_index].to_f
|
||||
benchmark = 750
|
||||
nurse_count * 4 / benchmark if nurse_index.present?
|
||||
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
items << num_of_students
|
||||
items << (num_of_students / nurse_count)
|
||||
((benchmark - (num_of_students / nurse_count)) + benchmark) * 4 / benchmark
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,44 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class FourAOne
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '4A_1_grade_nine_course_pass.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'# Grade Nine Students', '# Passing All Courses', '% Passing All Courses']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_ovpe_i1(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_ovpe_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-ovpe-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/gradeninecoursepass.aspx'
|
||||
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
passing_index = headers['% Passing All Courses']
|
||||
return 'NA' if items[passing_index] == '' || items[passing_index].strip == '.0'
|
||||
|
||||
percent_passing = items[passing_index].to_f
|
||||
benchmark = 95
|
||||
percent_passing * 4 / benchmark if passing_index.present?
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,105 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class FourBTwo
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '4B_2_four_year_grad.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '4B_2_retention.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '4B_2_five_year_grad.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
|
||||
'% Dropped Out', '% Permanently Excluded']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_degr_i1(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'# Enrolled', '# Retained', '% Retained', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
|
||||
'11', '12']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_degr_i2(filepath:)
|
||||
|
||||
filepath = filepaths[2]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
|
||||
'% Dropped Out', '% Permanently Excluded']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_degr_i3(filepath:)
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_degr_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-degr-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/gradrates.aspx'
|
||||
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddRateType' => '4yr Grad' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
percent_graduated_index = headers['% Graduated']
|
||||
return 'NA' if items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
|
||||
|
||||
percent_passing = items[percent_graduated_index].to_f
|
||||
benchmark = 80
|
||||
percent_passing * 4 / benchmark if percent_graduated_index.present?
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_degr_i2(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-degr-i2'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/retention.aspx'
|
||||
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddView' => 'Percent' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
retained_index = headers['% Retained']
|
||||
return 'NA' if items[retained_index] == '' || items[retained_index].strip == '.0'
|
||||
|
||||
percent_retained = items[retained_index].to_f
|
||||
benchmark = 2
|
||||
((benchmark - percent_retained) + benchmark) * 4 / benchmark if retained_index.present?
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_degr_i3(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-degr-i3'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/gradrates.aspx'
|
||||
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddRateType' => '5yr Grad' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
percent_graduated_index = headers['% Graduated']
|
||||
return 'NA' if items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
|
||||
|
||||
percent_passing = items[percent_graduated_index].to_f
|
||||
benchmark = 85
|
||||
percent_passing * 4 / benchmark if percent_graduated_index.present?
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,74 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
# TODO: convert this to simpler format and add a run_all method
|
||||
module Dese
|
||||
class FourDOne
|
||||
attr_reader :filepath
|
||||
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '4D_1_plans_of_grads.csv'))
|
||||
@filepath = filepath
|
||||
end
|
||||
|
||||
def run_all
|
||||
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
|
||||
browser = Watir::Browser.new
|
||||
write_headers(filepath:)
|
||||
academic_years = AcademicYear.all
|
||||
academic_years.each do |academic_year|
|
||||
table = scrape(browser:, url:, range: academic_year.range)
|
||||
id = 'a-cgpr-i1'
|
||||
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
|
||||
end
|
||||
browser.close
|
||||
end
|
||||
|
||||
def scrape(browser:, url:, range:)
|
||||
browser.goto(url)
|
||||
|
||||
return unless browser.option(text: range).present?
|
||||
|
||||
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
|
||||
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
|
||||
browser.button(id: 'btnViewReport').click
|
||||
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
|
||||
document = Nokogiri::HTML(browser.html)
|
||||
document.css('tr')
|
||||
end
|
||||
|
||||
def write_headers(filepath:)
|
||||
CSV.open(filepath, 'w') do |csv|
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
|
||||
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total']
|
||||
csv << headers
|
||||
end
|
||||
end
|
||||
|
||||
def write_csv(table:, filepath:, range:, id:)
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
raw_likert_score = calculate(cells: items)
|
||||
likert_score = raw_likert_score
|
||||
likert_score = 5 if raw_likert_score > 5
|
||||
likert_score = 1 if raw_likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
output = []
|
||||
output << raw_likert_score
|
||||
output << likert_score
|
||||
output << id
|
||||
output << range
|
||||
output << items
|
||||
csv << output.flatten
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def calculate(cells:)
|
||||
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,71 @@
|
||||
require "csv"
|
||||
|
||||
module Dese
|
||||
class Loader
|
||||
def self.load_data(filepath:)
|
||||
admin_data_values = []
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
score = likert_score(row:)
|
||||
next unless valid_likert_score(likert_score: score)
|
||||
|
||||
admin_data_values << create_admin_data_value(row:, score:)
|
||||
end
|
||||
|
||||
AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.valid_likert_score(likert_score:)
|
||||
likert_score >= 1 && likert_score <= 5
|
||||
end
|
||||
|
||||
def self.likert_score(row:)
|
||||
likert_score = (row["Likert Score"] || row["LikertScore"] || row["Likert_Score"]).to_f
|
||||
likert_score.round_up_to_one.round_down_to_five
|
||||
end
|
||||
|
||||
def self.ay(row:)
|
||||
row["Academic Year"] || row["AcademicYear"]
|
||||
end
|
||||
|
||||
def self.dese_id(row:)
|
||||
row["DESE ID"] || row["Dese ID"] || row["Dese Id"] || row["School ID"]
|
||||
end
|
||||
|
||||
def self.admin_data_item(row:)
|
||||
row["Admin Data Item"] || row["Item ID"] || row["Item Id"] || row["Item ID"]
|
||||
end
|
||||
|
||||
def self.create_admin_data_value(row:, score:)
|
||||
school = School.find_by_dese_id(dese_id(row:).to_i)
|
||||
admin_data_item_id = admin_data_item(row:)
|
||||
|
||||
return if school.nil?
|
||||
return if admin_data_item_id.nil? || admin_data_item_id.blank?
|
||||
|
||||
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school:,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item_id))
|
||||
if admin_data_value.present?
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value.save
|
||||
nil
|
||||
else
|
||||
AdminDataValue.new(
|
||||
likert_score: score,
|
||||
academic_year: AcademicYear.find_by_range(ay(row:)),
|
||||
school:,
|
||||
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
private_class_method :valid_likert_score
|
||||
private_class_method :likert_score
|
||||
private_class_method :ay
|
||||
private_class_method :dese_id
|
||||
private_class_method :admin_data_item
|
||||
private_class_method :create_admin_data_value
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,115 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
# TODO: convert this to simpler format and add a run_all method
|
||||
module Dese
|
||||
class OneAOne
|
||||
attr_reader :filepath
|
||||
|
||||
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '1A_1_teacher_data.csv'))
|
||||
@filepath = filepath
|
||||
end
|
||||
|
||||
def run_all
|
||||
url = 'https://profiles.doe.mass.edu/statereport/teacherdata.aspx'
|
||||
browser = Watir::Browser.new
|
||||
write_headers(filepath:)
|
||||
academic_years = AcademicYear.all
|
||||
academic_years.each do |academic_year|
|
||||
document = scrape(browser:, url:, range: academic_year.range)
|
||||
id = 'a-exp-i1'
|
||||
write_csv(document:, filepath:, range: academic_year.range, id:) unless document.nil?
|
||||
end
|
||||
browser.close
|
||||
end
|
||||
|
||||
def scrape(browser:, url:, range:)
|
||||
browser.goto(url)
|
||||
|
||||
return unless browser.option(text: 'School').present?
|
||||
return unless browser.option(text: range).present?
|
||||
|
||||
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(text: 'School')
|
||||
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
|
||||
browser.button(id: 'ctl00_ContentPlaceHolder1_btnViewReport').click
|
||||
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
|
||||
Nokogiri::HTML(browser.html)
|
||||
end
|
||||
|
||||
def write_headers(filepath:)
|
||||
CSV.open(filepath, 'w') do |csv|
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', 'Total # of Teachers(FTE)', 'Percent of Teachers Licensed',
|
||||
'Student/Teacher Ratio', 'Percent of Experienced Teachers', 'Percent of Teachers without Waiver or Provisional License', 'Percent Teaching in-field']
|
||||
csv << headers
|
||||
end
|
||||
end
|
||||
|
||||
def write_csv(document:, filepath:, range:, id:)
|
||||
table = document.css('tr')
|
||||
headers = document.css('.sorting')
|
||||
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
||||
experienced_teacher_index = header_hash['Percent of Experienced Teachers']
|
||||
dese_id_index = header_hash['School Code']
|
||||
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
raw_likert_score = items[experienced_teacher_index].to_f * 4 / 80 if experienced_teacher_index.present?
|
||||
raw_likert_score ||= 'NA'
|
||||
likert_score = raw_likert_score
|
||||
if likert_score != 'NA'
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score = 1 if likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
end
|
||||
|
||||
output = []
|
||||
output << raw_likert_score
|
||||
output << likert_score
|
||||
output << 'a-exp-i1'
|
||||
output << range
|
||||
output << items
|
||||
output = output.flatten
|
||||
csv << output
|
||||
end
|
||||
end
|
||||
|
||||
in_field_index = header_hash['Percent Teaching In-Field']
|
||||
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
dese_id = items[dese_id_index].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
percent_in_field = items[in_field_index].to_f if in_field_index.present?
|
||||
if in_field_index.present? && percent_in_field.present? && !percent_in_field.zero?
|
||||
raw_likert_score = percent_in_field * 4 / 95
|
||||
end
|
||||
raw_likert_score ||= 'NA'
|
||||
likert_score = raw_likert_score
|
||||
if likert_score != 'NA'
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score = 1 if likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
end
|
||||
|
||||
output = []
|
||||
output << raw_likert_score
|
||||
output << likert_score
|
||||
output << 'a-exp-i3'
|
||||
output << range
|
||||
output << items
|
||||
output = output.flatten
|
||||
csv << output
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def calculate(cells:)
|
||||
cells[5].to_f * 4 / 95
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,68 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class OneAThree
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '1A_3_staffing_retention.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '1A_3_teachers_of_color.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
run_a_pcom_i1
|
||||
run_a_pcom_i3
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_pcom_i1
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Principal Total', 'Principal # Retained', 'Principal % Retained',
|
||||
'Teacher Total', 'Teacher # Retained', 'Teacher % Retained']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/staffingRetentionRates.aspx'
|
||||
range = "#{academic_year.range.split('-').last.to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
retained_teachers = headers['% Retained']
|
||||
items[retained_teachers].to_f * 4 / 85 if retained_teachers.present?
|
||||
}
|
||||
admin_data_item_id = 'a-pcom-i1'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_pcom_i3
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native Hawaiian, Pacific Islander (%)',
|
||||
'Multi-Race,Non-Hispanic (%)', 'Females (%)', 'Males (%)', 'FTE Count']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Percentages' }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
white = headers['White (%)']
|
||||
result = ((100 - items[white].to_f) * 4) / 12.8 if white.present?
|
||||
|
||||
result = 1 if result < 1
|
||||
result
|
||||
}
|
||||
admin_data_item_id = 'a-pcom-i3'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,78 @@
|
||||
module Dese
|
||||
module Scraper
|
||||
DELAY = 20
|
||||
|
||||
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
||||
:calculation)
|
||||
def run
|
||||
academic_years = AcademicYear.all.order(range: :DESC)
|
||||
academic_years.each do |academic_year|
|
||||
prerequisites = yield academic_year
|
||||
|
||||
document = get_html(url: prerequisites.url,
|
||||
selectors: prerequisites.selectors,
|
||||
submit_id: prerequisites.submit_id)
|
||||
unless document.nil?
|
||||
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
||||
calculation: prerequisites.calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def browser
|
||||
@browser ||= Watir::Browser.new
|
||||
end
|
||||
|
||||
def get_html(url:, selectors:, submit_id:)
|
||||
browser.goto(url)
|
||||
|
||||
selectors.each do |key, value|
|
||||
return unless browser.option(text: value).present?
|
||||
|
||||
browser.select(id: key).select(text: value)
|
||||
end
|
||||
|
||||
browser.button(id: submit_id).click
|
||||
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
|
||||
Nokogiri::HTML(browser.html)
|
||||
end
|
||||
|
||||
def write_headers(filepath:, headers:)
|
||||
CSV.open(filepath, 'w') do |csv|
|
||||
csv << headers
|
||||
end
|
||||
end
|
||||
|
||||
def write_csv(document:, filepath:, range:, id:, calculation:)
|
||||
table = document.css('tr')
|
||||
headers = document.css('.sorting')
|
||||
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
||||
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
raw_likert_score = calculation.call(header_hash, items)
|
||||
raw_likert_score ||= 'NA'
|
||||
likert_score = raw_likert_score
|
||||
if likert_score != 'NA'
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score = 1 if likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
end
|
||||
|
||||
output = []
|
||||
output << raw_likert_score
|
||||
output << likert_score
|
||||
output << id
|
||||
output << range
|
||||
output << items
|
||||
output = output.flatten
|
||||
csv << output
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,35 @@
|
||||
require 'watir'
|
||||
|
||||
module Dese
|
||||
class Staffing
|
||||
include Dese::Scraper
|
||||
attr_reader :filepath
|
||||
|
||||
def initialize(filepath: Rails.root.join('data', 'staffing', 'staffing.csv'))
|
||||
@filepath = filepath
|
||||
end
|
||||
|
||||
def run_all
|
||||
scrape_staffing(filepath:)
|
||||
end
|
||||
|
||||
def scrape_staffing(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year',
|
||||
'School Name', 'DESE ID',
|
||||
'PK-2 (#)', '3-5 (#)', '6-8 (#)', '9-12 (#)', 'Multiple Grades (#)',
|
||||
'All Grades (#)', 'FTE Count']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'NA'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = ->(_headers, _items) { 'NA' }
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,45 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class ThreeAOne
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3A_1_average_class_size.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %',
|
||||
'English Language Learner %', 'Students with Disabilities %', 'Economically Disadvantaged %']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_reso_i1
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_reso_i1
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
class_size_index = headers['Average Class Size']
|
||||
average_class_size = items[class_size_index].to_f
|
||||
benchmark = 20
|
||||
if class_size_index.present? && !items[class_size_index] != ''
|
||||
((benchmark - average_class_size) + benchmark) * 4 / benchmark
|
||||
end
|
||||
}
|
||||
admin_data_item_id = 'a-reso-i1'
|
||||
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,163 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class ThreeATwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
|
||||
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
scrape_enrollments(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
write_a_sust_i1_headers(filepath:)
|
||||
run_a_sust_i1(filepath:)
|
||||
run_a_sust_i2(filepath:)
|
||||
run_a_sust_i3(filepath:)
|
||||
|
||||
filepath = filepaths[2]
|
||||
write_a_sust_i4_headers(filepath:)
|
||||
run_a_sust_i4(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def write_a_sust_i1_headers(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)',
|
||||
'49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count',
|
||||
'Student Count', 'Student to Guidance Counselor ratio']
|
||||
|
||||
write_headers(filepath:, headers:)
|
||||
end
|
||||
|
||||
def write_a_sust_i4_headers(filepath:)
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'PK-2 (# )', '3-5 (# )', '6-8 (# )', '9-12 (# )', 'Multiple Grades (# )', 'All Grades (# )', 'FTE Count',
|
||||
'Student Count', 'Student to Art Teacher ratio']
|
||||
|
||||
write_headers(filepath:, headers:)
|
||||
end
|
||||
|
||||
def run_a_sust_i1(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-sust-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Guidance Counselor' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
fte_index = headers['FTE Count']
|
||||
num_of_guidance_counselors = items[fte_index].to_f
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
school = School.find_by_dese_id(dese_id)
|
||||
|
||||
return 'NA' unless school.present? && school.is_hs?
|
||||
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
items << num_of_students
|
||||
benchmark = 250
|
||||
if fte_index.present? && !items[fte_index] != ''
|
||||
result = ((benchmark - (num_of_students / num_of_guidance_counselors)) + benchmark) * 4 / benchmark
|
||||
end
|
||||
items << (num_of_students / num_of_guidance_counselors)
|
||||
result
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_sust_i2(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-sust-i2'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Psychologist -- Non-Special Education' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
fte_index = headers['FTE Count']
|
||||
num_of_psychologists = items[fte_index].to_f
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
items << num_of_students
|
||||
benchmark = 250
|
||||
if fte_index.present? && !items[fte_index] != ''
|
||||
result = ((benchmark - (num_of_students / num_of_psychologists)) + benchmark) * 4 / benchmark
|
||||
end
|
||||
|
||||
items << (num_of_students / num_of_psychologists)
|
||||
result
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_sust_i3(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-sust-i3'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Paraprofessional' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
fte_index = headers['FTE Count']
|
||||
num_of_paraprofessionals = items[fte_index].to_f
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
items << num_of_students
|
||||
benchmark = 43.4
|
||||
if fte_index.present? && !items[fte_index] != ''
|
||||
result = ((benchmark - (num_of_students / num_of_paraprofessionals)) + benchmark) * 4 / benchmark
|
||||
end
|
||||
|
||||
items << (num_of_students / num_of_paraprofessionals)
|
||||
result
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_sust_i4(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-sust-i4'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
|
||||
range = academic_year.range
|
||||
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents',
|
||||
'ctl00_ContentPlaceHolder1_ddSubject' => 'Arts' }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |_headers, items|
|
||||
num_of_art_teachers = items.last.to_f
|
||||
dese_id = items[1].to_i
|
||||
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
|
||||
items << num_of_students
|
||||
benchmark = 500
|
||||
if num_of_art_teachers.present?
|
||||
result = ((benchmark - (num_of_students / num_of_art_teachers)) + benchmark) * 4 / benchmark
|
||||
end
|
||||
|
||||
items << (num_of_students / num_of_art_teachers)
|
||||
result
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,130 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class ThreeBOne
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_1_masscore.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_1_advcoursecomprate.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_1_ap.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_1_student_courses_ratio.csv')])
|
||||
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'# Graduated', '# Completed MassCore', '% Completed MassCore']
|
||||
write_headers(filepath:, headers:)
|
||||
|
||||
run_a_curv_i1(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced',
|
||||
'% ELA', '% Math', '% Science and Technology', '% Computer and Information Science',
|
||||
'% History and Social Sciences', '% Arts', '% All Other Subjects', '% All Other Subjects']
|
||||
write_headers(filepath:, headers:)
|
||||
run_a_curv_i2(filepath:)
|
||||
|
||||
filepath = filepaths[2]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Tests Taken', 'Score=1', 'Score=2', 'Score=3', 'Score=4', 'Score=5', '% Score 1-2', '% Score 3-5']
|
||||
write_headers(filepath:, headers:)
|
||||
run_a_curv_i3(filepath:)
|
||||
|
||||
filepath = filepaths[3]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %', 'English Language Learner %', 'Students with Disabilities %', 'Low Income %', 'Number of Students']
|
||||
write_headers(filepath:, headers:)
|
||||
run_a_curv_i5(filepath:)
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_curv_i1(filepath:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/masscore.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
completed_index = headers['% Completed MassCore']
|
||||
percent_completed = items[completed_index].to_f
|
||||
benchmark = 90
|
||||
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
|
||||
}
|
||||
admin_data_item_id = 'a-curv-i1'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_curv_i2(filepath:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx'
|
||||
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
completed_index = headers['% Students Completing Advanced']
|
||||
percent_completed = items[completed_index].to_f
|
||||
benchmark = 30
|
||||
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
|
||||
}
|
||||
admin_data_item_id = 'a-curv-i2'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_curv_i3(filepath:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/ap.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
completed_index = headers['% Score 3-5']
|
||||
percent_score = items[completed_index].to_f
|
||||
benchmark = 20
|
||||
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
|
||||
}
|
||||
admin_data_item_id = 'a-curv-i3'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_curv_i5(filepath:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
row = headers.keys.zip(items).to_h
|
||||
dese_id = row['School Code'].to_i
|
||||
is_hs = (row['School Name'] in /High School/i)
|
||||
school = School.find_by(dese_id:)
|
||||
is_hs = school.is_hs if school.present?
|
||||
next 'NA' unless is_hs
|
||||
|
||||
num_of_classes = row['Total # of Classes'].delete(',').to_f
|
||||
num_of_students = student_count(filepath: Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
|
||||
dese_id:, year: academic_year.range) || 0
|
||||
items << num_of_students
|
||||
actual = num_of_students / num_of_classes
|
||||
benchmark = 5
|
||||
((benchmark - actual) + benchmark) * 4 / benchmark if num_of_classes.present? && num_of_students.present?
|
||||
}
|
||||
admin_data_item_id = 'a-curv-i5'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,129 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class ThreeBTwo
|
||||
include Dese::Scraper
|
||||
include Dese::Enrollments
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_2_teacher_by_race_and_gender.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '3B_2_student_by_race_and_gender.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (%)', 'School Name', 'DESE ID',
|
||||
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native American (%)',
|
||||
'Native Hawaiian Pacific Islander (%)', 'Multi-Race Non-Hispanic (%)', 'Females (%)',
|
||||
'Males (%)', 'FTE Count']
|
||||
write_headers(filepath:, headers:)
|
||||
run_teacher_demographics(filepath:)
|
||||
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers %', 'Non-White Students %', 'School Name', 'DESE ID',
|
||||
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
|
||||
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
|
||||
'Females', 'Non-Binary', 'Students of color (%)']
|
||||
write_headers(filepath:, headers:)
|
||||
run_student_demographics(filepath:)
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_teacher_demographics(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = ''
|
||||
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range,
|
||||
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Percentages',
|
||||
'ctl00_ContentPlaceHolder1_ddClassification' => 'Teacher' }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
african_american_index = headers['African American (%)']
|
||||
african_american_number = items[african_american_index].to_f
|
||||
|
||||
asian_index = headers['Asian (%)']
|
||||
asian_number = items[asian_index].to_f
|
||||
|
||||
hispanic_index = headers['Hispanic (%)']
|
||||
hispanic_number = items[hispanic_index].to_f
|
||||
|
||||
native_american_index = headers['Native American (%)']
|
||||
native_american_number = items[native_american_index].to_f
|
||||
|
||||
native_hawaiian_index = headers['Native Hawaiian, Pacific Islander (%)']
|
||||
native_hawaiian_number = items[native_hawaiian_index].to_f
|
||||
|
||||
multi_race_index = headers['Multi-Race,Non-Hispanic (%)']
|
||||
multi_race_number = items[multi_race_index].to_f
|
||||
|
||||
non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
|
||||
items.unshift(non_white_teachers)
|
||||
|
||||
non_white_teachers
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def teacher_count(filepath:, dese_id:, year:)
|
||||
@teachers ||= {}
|
||||
@years_with_data ||= Set.new
|
||||
if @teachers.count == 0
|
||||
CSV.parse(File.read(filepath), headers: true).map do |row|
|
||||
academic_year = row['Academic Year']
|
||||
@years_with_data << academic_year
|
||||
school_id = row['DESE ID'].to_i
|
||||
total = row['Teachers of color (%)'].delete(',')
|
||||
total = 'NA' if total == '' || total.nil?
|
||||
@teachers[[school_id, academic_year]] = total
|
||||
end
|
||||
end
|
||||
return 'NA' unless @years_with_data.include? year
|
||||
|
||||
@teachers[[dese_id, year]]
|
||||
end
|
||||
|
||||
def run_student_demographics(filepath:)
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-cure-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx'
|
||||
range = academic_year.range
|
||||
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
white_index = headers['White']
|
||||
white_number = items[white_index].to_f
|
||||
dese_id = items[headers['School Code']].to_i
|
||||
non_white_student_percentage = (100 - white_number).to_f
|
||||
items.unshift(non_white_student_percentage)
|
||||
count_of_teachers = teacher_count(filepath: filepaths[0], dese_id:, year: academic_year.range)
|
||||
return 'NA' if count_of_teachers == 'NA'
|
||||
|
||||
non_white_teacher_percentage = count_of_teachers.to_f
|
||||
items.unshift(non_white_teacher_percentage)
|
||||
|
||||
floor = 5
|
||||
benchmark = 0.25
|
||||
|
||||
return 1 if non_white_student_percentage.zero? && non_white_teacher_percentage < floor
|
||||
|
||||
if non_white_teacher_percentage >= floor
|
||||
parity_index = non_white_teacher_percentage / non_white_student_percentage
|
||||
likert_score = parity_index * 4 / benchmark
|
||||
else
|
||||
likert_score = 1
|
||||
end
|
||||
likert_score
|
||||
}
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,70 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class TwoAOne
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '2A_1_students_suspended.csv'),
|
||||
Rails.root.join('data', 'admin_data', 'dese', '2A_1_students_disciplined.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
run_a_phys_i1
|
||||
run_a_phys_i3
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def run_a_phys_i1
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Students', 'Students Disciplined', '% In-School Suspension', '% Out-of-School Suspension', '% Expulsion', '% Removed to Alternate Setting',
|
||||
'% Emergency Removal', '% Students with a School-Based Arrest', '% Students with a Law Enforcement Referral']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/ssdr.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
suspensions_index = headers['% Out-of-School Suspension']
|
||||
benchmark = 5.27
|
||||
suspension_rate = items[suspensions_index].to_f
|
||||
if suspensions_index.present? && items[suspensions_index] != ''
|
||||
((benchmark - suspension_rate) + benchmark) * 4 / 5.27
|
||||
end
|
||||
}
|
||||
admin_data_item_id = 'a-phys-i1'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_phys_i3
|
||||
filepath = filepaths[1]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Students', 'Students Disciplined', '% 1 Day', '% 2 to 3 Days', '% 4 to 7 Days', '% 8 to 10 Days', '% > 10 Days']
|
||||
write_headers(filepath:, headers:)
|
||||
run do |academic_year|
|
||||
url = 'https://profiles.doe.mass.edu/statereport/ssdr_days_missed.aspx'
|
||||
range = academic_year.range
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
days_missed_index = headers['% > 10 Days']
|
||||
benchmark = 1
|
||||
missed_days = items[days_missed_index].to_f
|
||||
if days_missed_index.present? && items[days_missed_index] != ''
|
||||
((benchmark - missed_days) + benchmark) * 4 / benchmark
|
||||
end
|
||||
}
|
||||
admin_data_item_id = 'a-phys-i3'
|
||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,76 @@
|
||||
require 'watir'
|
||||
require 'csv'
|
||||
|
||||
module Dese
|
||||
class TwoCOne
|
||||
include Dese::Scraper
|
||||
attr_reader :filepaths
|
||||
|
||||
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '2C_1_attendance.csv')])
|
||||
@filepaths = filepaths
|
||||
end
|
||||
|
||||
def run_all
|
||||
write_a_vale_i1_headers
|
||||
run_a_vale_i1
|
||||
run_a_vale_i2
|
||||
|
||||
browser.close
|
||||
end
|
||||
|
||||
def write_a_vale_i1_headers
|
||||
filepath = filepaths[0]
|
||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||
'Attendance Rate', 'Average # of Absences', 'Absent 10 or more days', 'Chronically Absent (10% or more)',
|
||||
'Chronically Absent (20% or more)', 'Unexcused > 9 days']
|
||||
write_headers(filepath:, headers:)
|
||||
end
|
||||
|
||||
def run_a_vale_i1
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-vale-i1'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/attendance.aspx'
|
||||
range = case academic_year.range
|
||||
when '2021-22', '2020-21'
|
||||
"#{academic_year.range} (End of year)"
|
||||
else
|
||||
academic_year.range
|
||||
end
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
absence_index = headers['Chronically Absent (10% or more)']
|
||||
benchmark = 10
|
||||
absence_rate = items[absence_index].to_f
|
||||
if absence_index.present? && !items[absence_index].blank?
|
||||
((benchmark - absence_rate) + benchmark) * 4 / benchmark
|
||||
end
|
||||
}
|
||||
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
|
||||
def run_a_vale_i2
|
||||
run do |academic_year|
|
||||
admin_data_item_id = 'a-vale-i2'
|
||||
url = 'https://profiles.doe.mass.edu/statereport/attendance.aspx'
|
||||
range = case academic_year.range
|
||||
when '2021-22', '2020-21'
|
||||
"#{academic_year.range} (End of year)"
|
||||
else
|
||||
academic_year.range
|
||||
end
|
||||
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
|
||||
'ctl00_ContentPlaceHolder1_ddYear' => range }
|
||||
submit_id = 'btnViewReport'
|
||||
calculation = lambda { |headers, items|
|
||||
attendance = headers[' Attendance Rate ']
|
||||
benchmark = 90
|
||||
items[attendance].to_f * 4 / benchmark if attendance.present?
|
||||
}
|
||||
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,32 @@
|
||||
module Dashboard
|
||||
class DisaggregationLoader
|
||||
attr_reader :path
|
||||
|
||||
def initialize(path:)
|
||||
@path = path
|
||||
initialize_directory
|
||||
end
|
||||
|
||||
def load
|
||||
data = {}
|
||||
Dir.glob(Rails.root.join(path, "*.csv")).each do |filepath|
|
||||
puts filepath
|
||||
File.open(filepath) do |file|
|
||||
headers = CSV.parse(file.first).first
|
||||
|
||||
file.lazy.each_slice(1000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
values = DisaggregationRow.new(row:, headers:)
|
||||
data[[values.lasid, values.district, values.academic_year]] = values
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
data
|
||||
end
|
||||
|
||||
def initialize_directory
|
||||
FileUtils.mkdir_p(path)
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,56 @@
|
||||
module Dashboard
|
||||
class DisaggregationRow
|
||||
attr_reader :row, :headers
|
||||
|
||||
def initialize(row:, headers:)
|
||||
@row = row
|
||||
@headers = headers
|
||||
end
|
||||
|
||||
def district
|
||||
@district ||= value_from(pattern: /District/i)
|
||||
end
|
||||
|
||||
def academic_year
|
||||
@academic_year ||= value_from(pattern: /Academic\s*Year/i)
|
||||
end
|
||||
|
||||
def raw_income
|
||||
@income ||= value_from(pattern: /Low\s*Income/i)
|
||||
end
|
||||
|
||||
def lasid
|
||||
@lasid ||= value_from(pattern: /LASID/i)
|
||||
end
|
||||
|
||||
def raw_ell
|
||||
@raw_ell ||= value_from(pattern: /EL Student First Year/i)
|
||||
end
|
||||
|
||||
def ell
|
||||
@ell ||= begin
|
||||
value = value_from(pattern: /EL Student First Year/i).downcase
|
||||
|
||||
case value
|
||||
when /lep student 1st year|LEP student not 1st year/i
|
||||
"ELL"
|
||||
when /Does not apply/i
|
||||
"Not ELL"
|
||||
else
|
||||
"Unknown"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def value_from(pattern:)
|
||||
output = nil
|
||||
matches = headers.select do |header|
|
||||
pattern.match(header)
|
||||
end.map { |item| item.delete("\n") }
|
||||
matches.each do |match|
|
||||
output ||= row[match]
|
||||
end
|
||||
output
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,133 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "csv"
|
||||
|
||||
module Dashboard
|
||||
class EnrollmentLoader
|
||||
def self.load_data(filepath:)
|
||||
schools = []
|
||||
enrollments = []
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
row = EnrollmentRowValues.new(row:)
|
||||
|
||||
next unless row.school.present? && row.academic_year.present?
|
||||
|
||||
schools << row.school
|
||||
|
||||
enrollments << create_enrollment_entry(row:)
|
||||
end
|
||||
|
||||
# It's possible that instead of updating all columns on duplicate key, we could just update the student columns and leave total_teachers alone. Right now enrollment data loads before staffing data so it works correctly.
|
||||
Respondent.import enrollments, batch_size: 1000,
|
||||
on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
|
||||
|
||||
Respondent.where.not(school: schools).destroy_all
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.create_enrollment_entry(row:)
|
||||
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
|
||||
respondent.pk = row.pk
|
||||
respondent.k = row.k
|
||||
respondent.one = row.one
|
||||
respondent.two = row.two
|
||||
respondent.three = row.three
|
||||
respondent.four = row.four
|
||||
respondent.five = row.five
|
||||
respondent.six = row.six
|
||||
respondent.seven = row.seven
|
||||
respondent.eight = row.eight
|
||||
respondent.nine = row.nine
|
||||
respondent.ten = row.ten
|
||||
respondent.eleven = row.eleven
|
||||
respondent.twelve = row.twelve
|
||||
respondent.total_students = row.total_students
|
||||
respondent
|
||||
end
|
||||
|
||||
private_class_method :create_enrollment_entry
|
||||
end
|
||||
|
||||
class EnrollmentRowValues
|
||||
attr_reader :row
|
||||
|
||||
def initialize(row:)
|
||||
@row = row
|
||||
end
|
||||
|
||||
def school
|
||||
@school ||= begin
|
||||
dese_id = row["DESE ID"].try(:strip).to_i
|
||||
School.find_by_dese_id(dese_id)
|
||||
end
|
||||
end
|
||||
|
||||
def academic_year
|
||||
@academic_year ||= begin
|
||||
year = row["Academic Year"]
|
||||
AcademicYear.find_by_range(year)
|
||||
end
|
||||
end
|
||||
|
||||
def pk
|
||||
row["PK"] || row["pk"]
|
||||
end
|
||||
|
||||
def k
|
||||
row["K"] || row["k"]
|
||||
end
|
||||
|
||||
def one
|
||||
row["1"]
|
||||
end
|
||||
|
||||
def two
|
||||
row["2"]
|
||||
end
|
||||
|
||||
def three
|
||||
row["3"]
|
||||
end
|
||||
|
||||
def four
|
||||
row["4"]
|
||||
end
|
||||
|
||||
def five
|
||||
row["5"]
|
||||
end
|
||||
|
||||
def six
|
||||
row["6"]
|
||||
end
|
||||
|
||||
def seven
|
||||
row["7"]
|
||||
end
|
||||
|
||||
def eight
|
||||
row["8"]
|
||||
end
|
||||
|
||||
def nine
|
||||
row["9"]
|
||||
end
|
||||
|
||||
def ten
|
||||
row["10"]
|
||||
end
|
||||
|
||||
def eleven
|
||||
row["11"]
|
||||
end
|
||||
|
||||
def twelve
|
||||
row["12"]
|
||||
end
|
||||
|
||||
def total_students
|
||||
row["Total"].delete(",").to_i
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,55 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Dashboard
|
||||
class ResponseRateLoader
|
||||
def self.reset(schools: School.all, academic_years: AcademicYear.all, subcategories: Subcategory.all)
|
||||
subcategories.each do |subcategory|
|
||||
schools.each do |school|
|
||||
next if test_env? && (school != milford)
|
||||
|
||||
academic_years.each do |academic_year|
|
||||
next if test_env? && (academic_year != test_year)
|
||||
|
||||
process_response_rate(subcategory:, school:, academic_year:)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.milford
|
||||
School.find_by_slug "milford-high-school"
|
||||
end
|
||||
|
||||
def self.test_year
|
||||
AcademicYear.find_by_range "2020-21"
|
||||
end
|
||||
|
||||
def self.rails_env
|
||||
@rails_env ||= ENV["RAILS_ENV"]
|
||||
end
|
||||
|
||||
def self.process_response_rate(subcategory:, school:, academic_year:)
|
||||
student = StudentResponseRateCalculator.new(subcategory:, school:, academic_year:)
|
||||
teacher = TeacherResponseRateCalculator.new(subcategory:, school:, academic_year:)
|
||||
|
||||
response_rate = ResponseRate.find_or_create_by!(subcategory:, school:, academic_year:)
|
||||
|
||||
response_rate.update!(student_response_rate: student.rate,
|
||||
teacher_response_rate: teacher.rate,
|
||||
meets_student_threshold: student.meets_student_threshold?,
|
||||
meets_teacher_threshold: teacher.meets_teacher_threshold?)
|
||||
end
|
||||
|
||||
def self.test_env?
|
||||
rails_env == "test"
|
||||
end
|
||||
|
||||
private_class_method :milford
|
||||
private_class_method :test_year
|
||||
private_class_method :rails_env
|
||||
private_class_method :process_response_rate
|
||||
private_class_method :test_env?
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,29 @@
|
||||
require 'net/sftp'
|
||||
require 'uri'
|
||||
require 'csv'
|
||||
|
||||
module Sftp
|
||||
class Directory
|
||||
def self.open(path: '/data/survey_responses/clean', &block)
|
||||
sftptogo_url = ENV['MCIEA_SFTPTOGO_URL']
|
||||
uri = URI.parse(sftptogo_url)
|
||||
Net::SFTP.start(uri.host, uri.user, password: uri.password) do |sftp|
|
||||
sftp.dir.foreach(path) do |entry|
|
||||
next unless entry.file?
|
||||
|
||||
filename = entry.name
|
||||
puts filename
|
||||
|
||||
sftp.file.open(filepath(path:, filename:), 'r', &block)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.filepath(path:, filename:)
|
||||
path += '/' unless path.end_with?('/')
|
||||
"#{path}#{filename}"
|
||||
end
|
||||
|
||||
private_class_method :filepath
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,33 @@
|
||||
require 'net/sftp'
|
||||
require 'uri'
|
||||
require 'csv'
|
||||
|
||||
module Sftp
|
||||
class RaceLoader
|
||||
def self.load_data(path: '/data/survey_responses/')
|
||||
SurveyItemResponse.update_all(student_id: nil)
|
||||
StudentRace.delete_all
|
||||
Student.delete_all
|
||||
|
||||
sftptogo_url = ENV['SFTPTOGO_URL']
|
||||
uri = URI.parse(sftptogo_url)
|
||||
Net::SFTP.start(uri.host, uri.user, password: uri.password) do |sftp|
|
||||
sftp.dir.foreach(path) do |entry|
|
||||
filename = entry.name
|
||||
puts filename
|
||||
|
||||
sftp.file.open(filepath(path:, filename:), 'r') do |f|
|
||||
StudentLoader.from_file(file: f, rules: [Rule::SkipNonLowellSchools])
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.filepath(path:, filename:)
|
||||
path += '/' unless path.end_with?('/')
|
||||
"#{path}#{filename}"
|
||||
end
|
||||
|
||||
private_class_method :filepath
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,74 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "csv"
|
||||
|
||||
module Dashboard
|
||||
class StaffingLoader
|
||||
def self.load_data(filepath:)
|
||||
schools = []
|
||||
respondents = []
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
row = StaffingRowValues.new(row:)
|
||||
next unless row.school.present? && row.academic_year.present?
|
||||
|
||||
schools << row.school
|
||||
|
||||
respondents << create_staffing_entry(row:)
|
||||
end
|
||||
|
||||
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
|
||||
Respondent.where.not(school: schools).destroy_all
|
||||
end
|
||||
|
||||
def self.clone_previous_year_data
|
||||
years = AcademicYear.order(:range).last(2)
|
||||
previous_year = years.first
|
||||
current_year = years.last
|
||||
respondents = []
|
||||
School.all.each do |school|
|
||||
Respondent.where(school:, academic_year: previous_year).each do |respondent|
|
||||
current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
|
||||
current_respondent.total_teachers = respondent.total_teachers
|
||||
respondents << current_respondent
|
||||
end
|
||||
end
|
||||
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.create_staffing_entry(row:)
|
||||
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
|
||||
respondent.total_teachers = row.fte_count
|
||||
respondent
|
||||
end
|
||||
|
||||
private_class_method :create_staffing_entry
|
||||
end
|
||||
|
||||
class StaffingRowValues
|
||||
attr_reader :row
|
||||
|
||||
def initialize(row:)
|
||||
@row = row
|
||||
end
|
||||
|
||||
def school
|
||||
@school ||= begin
|
||||
dese_id = row["DESE ID"].strip.to_i
|
||||
School.find_by_dese_id(dese_id)
|
||||
end
|
||||
end
|
||||
|
||||
def academic_year
|
||||
@academic_year ||= begin
|
||||
year = row["Academic Year"]
|
||||
AcademicYear.find_by_range(year)
|
||||
end
|
||||
end
|
||||
|
||||
def fte_count
|
||||
row["FTE Count"]
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,305 @@
|
||||
module Dashboard
|
||||
class SurveyItemValues
|
||||
attr_reader :row, :headers, :survey_items, :schools
|
||||
|
||||
def initialize(row:, headers:, survey_items:, schools:)
|
||||
@row = row
|
||||
# Remove any newlines in headers
|
||||
headers = headers.map { |item| item.delete("\n") if item.present? }
|
||||
@headers = include_all_headers(headers:)
|
||||
@survey_items = survey_items
|
||||
@schools = schools
|
||||
|
||||
copy_likert_scores_from_variant_survey_items
|
||||
row["Income"] = income
|
||||
row["Raw Income"] = raw_income
|
||||
row["Raw ELL"] = raw_ell
|
||||
row["ELL"] = ell
|
||||
row["Raw SpEd"] = raw_sped
|
||||
row["SpEd"] = sped
|
||||
row["Progress Count"] = progress
|
||||
row["Race"] ||= races.join(",")
|
||||
row["Gender"] ||= gender
|
||||
|
||||
copy_data_to_main_column(main: /Race/i, secondary: /Race Secondary|Race-1/i)
|
||||
copy_data_to_main_column(main: /Gender/i, secondary: /Gender Secondary|Gender-1/i)
|
||||
end
|
||||
|
||||
def copy_data_to_main_column(main:, secondary:)
|
||||
main_column = headers.find { |header| main.match(header) }
|
||||
row[main_column] = value_from(pattern: secondary) if row[main_column].nil?
|
||||
end
|
||||
|
||||
# Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair.
|
||||
# We don't ensure that ids in the form of s-tint-q1 have a matching pair because not all questions have variants
|
||||
def include_all_headers(headers:)
|
||||
alternates = headers.filter(&:present?)
|
||||
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
|
||||
alternates.each do |header|
|
||||
main = header.sub(/-1\z/, "")
|
||||
headers.push(main) unless headers.include?(main)
|
||||
end
|
||||
headers
|
||||
end
|
||||
|
||||
def dese_id?
|
||||
dese_id.present?
|
||||
end
|
||||
|
||||
def recorded_date
|
||||
@recorded_date ||= begin
|
||||
recorded_date = value_from(pattern: /Recorded\s*Date/i)
|
||||
Date.parse(recorded_date)
|
||||
end
|
||||
end
|
||||
|
||||
def academic_year
|
||||
@academic_year ||= AcademicYear.find_by_date recorded_date
|
||||
end
|
||||
|
||||
def survey_item_response(survey_item:)
|
||||
@survey_item_response ||= Hash.new do |memo, survey_item|
|
||||
memo[survey_item] = survey_item_responses[[response_id, survey_item.id]]
|
||||
end
|
||||
|
||||
@survey_item_response[survey_item]
|
||||
end
|
||||
|
||||
def survey_item_responses
|
||||
@survey_item_responses ||= Hash.new do |memo|
|
||||
responses_hash = {}
|
||||
SurveyItemResponse.where(school:, academic_year:, response_id:).each do |response|
|
||||
responses_hash[[response.response_id, response.survey_item.id]] = response
|
||||
end
|
||||
memo[[school, academic_year]] = responses_hash
|
||||
end
|
||||
|
||||
@survey_item_responses[[school, academic_year]]
|
||||
end
|
||||
|
||||
def response_id
|
||||
@response_id ||= value_from(pattern: /Response\s*ID/i)
|
||||
end
|
||||
|
||||
def dese_id
|
||||
@dese_id ||= begin
|
||||
dese_id = value_from(pattern: /Dese\s*ID/i)
|
||||
dese_id ||= value_from(pattern: /^School$/i)
|
||||
dese_id ||= value_from(pattern: /School-\s*\w/i)
|
||||
|
||||
dese_id.to_i
|
||||
end
|
||||
end
|
||||
|
||||
def likert_score(survey_item_id:)
|
||||
row[survey_item_id] || row["#{survey_item_id}-1"]
|
||||
end
|
||||
|
||||
def school
|
||||
@school ||= schools[dese_id]
|
||||
end
|
||||
|
||||
def district
|
||||
@district ||= school&.district
|
||||
end
|
||||
|
||||
def grade
|
||||
@grade ||= begin
|
||||
raw_grade = value_from(pattern: /Grade|What grade are you in?/i)
|
||||
|
||||
return nil if raw_grade.blank?
|
||||
|
||||
raw_grade.to_i
|
||||
end
|
||||
end
|
||||
|
||||
def gender
|
||||
@gender ||= begin
|
||||
gender_code ||= value_from(pattern: /Gender self report/i)
|
||||
gender_code ||= value_from(pattern: /^Gender$/i)
|
||||
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
|
||||
gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
|
||||
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
|
||||
gender_code ||= value_from(pattern: /Gender - do not use/i)
|
||||
gender_code ||= value_from(pattern: /Gender/i)
|
||||
Gender.qualtrics_code_from(gender_code)
|
||||
end
|
||||
end
|
||||
|
||||
def races
|
||||
@races ||= begin
|
||||
race_codes ||= self_report = value_from(pattern: /Race\s*self\s*report/i)
|
||||
race_codes ||= value_from(pattern: /^RACE$/i)
|
||||
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
|
||||
race_codes ||= value_from(pattern: /Race Secondary/i)
|
||||
race_codes ||= sis ||= value_from(pattern: /Race-\s*SIS/i)
|
||||
race_codes ||= sis ||= value_from(pattern: /Race\s*-\s*Qcodes/i)
|
||||
race_codes ||= value_from(pattern: /RACE/i) || ""
|
||||
race_codes ||= []
|
||||
|
||||
race_codes = race_codes.split(",")
|
||||
.map do |word|
|
||||
word.split(/\s+and\s+/i)
|
||||
end.flatten
|
||||
.reject(&:blank?)
|
||||
.map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
|
||||
|
||||
# Only check the secondary hispanic column if we don't have self reported data and are relying on SIS data
|
||||
if self_report.nil? && sis.present?
|
||||
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
|
||||
race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1
|
||||
race_codes = race_codes.push(4) if hispanic == "true"
|
||||
end
|
||||
|
||||
Race.normalize_race_list(race_codes)
|
||||
end
|
||||
end
|
||||
|
||||
def lasid
|
||||
@lasid ||= value_from(pattern: /LASID/i)
|
||||
end
|
||||
|
||||
def raw_income
|
||||
@raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i)
|
||||
end
|
||||
|
||||
def income
|
||||
@income ||= Income.to_designation(raw_income)
|
||||
end
|
||||
|
||||
def raw_ell
|
||||
@raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i)
|
||||
end
|
||||
|
||||
def ell
|
||||
@ell ||= Ell.to_designation(raw_ell)
|
||||
end
|
||||
|
||||
def raw_sped
|
||||
@raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i)
|
||||
end
|
||||
|
||||
def sped
|
||||
@sped ||= Sped.to_designation(raw_sped)
|
||||
end
|
||||
|
||||
def value_from(pattern:)
|
||||
output = nil
|
||||
matches = headers.select do |header|
|
||||
pattern.match(header)
|
||||
end.map { |item| item.delete("\n") }
|
||||
|
||||
matches.each do |match|
|
||||
output ||= row[match]&.strip
|
||||
end
|
||||
|
||||
return nil if output&.match?(%r{^#*N/*A$}i) || output.blank?
|
||||
|
||||
output
|
||||
end
|
||||
|
||||
def sanitized_headers
|
||||
@sanitized_headers ||= headers.select(&:present?)
|
||||
.reject { |key, _value| key.start_with? "Q" }
|
||||
.reject { |key, _value| key.match?(/^[st]-\w*-\w*-1$/i) }
|
||||
end
|
||||
|
||||
def to_a
|
||||
sanitized_headers.map { |header| row[header] }
|
||||
end
|
||||
|
||||
def duration
|
||||
@duration ||= value_from(pattern: /Duration|Duration \(in seconds\)|Duration\.\.\(in\.seconds\)/i)
|
||||
end
|
||||
|
||||
def valid?
|
||||
valid_duration? && valid_progress? && valid_grade? && valid_sd?
|
||||
end
|
||||
|
||||
def respondent_type
|
||||
return :teacher if headers
|
||||
.filter(&:present?)
|
||||
.filter { |header| header.start_with? "t-" }.count > 0
|
||||
|
||||
:student
|
||||
end
|
||||
|
||||
def survey_type
|
||||
@survey_type ||= SurveyItem.survey_type(survey_item_ids:)
|
||||
end
|
||||
|
||||
def survey_item_ids
|
||||
@survey_item_ids ||= sanitized_headers.filter { |header| header.start_with?("t-", "s-") }
|
||||
end
|
||||
|
||||
def valid_duration?
|
||||
return true if duration.nil? || duration == "" || duration.downcase == "n/a" || duration.downcase == "na"
|
||||
|
||||
span_in_seconds = duration.to_i
|
||||
return span_in_seconds >= 300 if survey_type == :teacher
|
||||
return span_in_seconds >= 240 if survey_type == :standard
|
||||
return span_in_seconds >= 100 if survey_type == :short_form
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
def progress
|
||||
survey_item_ids.reject { |header| row[header].nil? }.count
|
||||
end
|
||||
|
||||
def valid_progress?
|
||||
return false if progress.nil?
|
||||
|
||||
return progress >= 12 if survey_type == :teacher
|
||||
return progress >= 11 if survey_type == :standard
|
||||
return progress >= 5 if survey_type == :short_form
|
||||
return progress >= 5 if survey_type == :early_education
|
||||
|
||||
false
|
||||
end
|
||||
|
||||
def valid_grade?
|
||||
return true if grade.nil?
|
||||
|
||||
return true if respondent_type == :teacher
|
||||
|
||||
respondents = Respondent.where(school:, academic_year:).first
|
||||
if respondents.present? && respondents.enrollment_by_grade[grade].present?
|
||||
enrollment = respondents.enrollment_by_grade[grade]
|
||||
end
|
||||
return false if enrollment.nil?
|
||||
|
||||
valid = enrollment > 0
|
||||
puts "Invalid grade #{grade} for #{school.name} #{academic_year.formatted_range}" unless valid
|
||||
valid
|
||||
end
|
||||
|
||||
def valid_sd?
|
||||
return true if survey_type == :early_education
|
||||
|
||||
survey_item_headers = headers.filter(&:present?).filter { |header| header.start_with?("s-", "t-") }
|
||||
likert_scores = []
|
||||
survey_item_headers.each do |header|
|
||||
likert_scores << likert_score(survey_item_id: header).to_i
|
||||
end
|
||||
likert_scores = likert_scores.compact.reject(&:zero?)
|
||||
return false if likert_scores.count < 2
|
||||
|
||||
!likert_scores.stdev.zero?
|
||||
end
|
||||
|
||||
def valid_school?
|
||||
school.present?
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def copy_likert_scores_from_variant_survey_items
|
||||
headers.filter(&:present?).filter { |header| header.end_with? "-1" }.each do |header|
|
||||
likert_score = row[header]
|
||||
main_item = header.gsub("-1", "")
|
||||
row[main_item] = likert_score if likert_score.present? && row[main_item].blank?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@ -0,0 +1,141 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Dashboard
|
||||
class SurveyResponsesDataLoader
|
||||
def load_data(filepath:)
|
||||
File.open(filepath) do |file|
|
||||
headers = file.first
|
||||
headers_array = CSV.parse(headers).first
|
||||
all_survey_items = survey_items(headers:)
|
||||
|
||||
file.lazy.each_slice(500) do |lines|
|
||||
survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
|
||||
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items,
|
||||
schools:))
|
||||
end
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500,
|
||||
on_duplicate_key_update: :all
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def from_file(file:)
|
||||
headers = file.gets
|
||||
headers_array = CSV.parse(headers).first
|
||||
all_survey_items = survey_items(headers:)
|
||||
|
||||
survey_item_responses = []
|
||||
row_count = 0
|
||||
until file.eof?
|
||||
line = file.gets
|
||||
next unless line.present?
|
||||
|
||||
CSV.parse(line, headers:).map do |row|
|
||||
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array,
|
||||
survey_items: all_survey_items, schools:))
|
||||
end
|
||||
|
||||
row_count += 1
|
||||
next unless row_count == 500
|
||||
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
|
||||
survey_item_responses = []
|
||||
row_count = 0
|
||||
end
|
||||
|
||||
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def schools
|
||||
@schools = School.school_hash
|
||||
end
|
||||
|
||||
def genders
|
||||
@genders = Gender.by_qualtrics_code
|
||||
end
|
||||
|
||||
def races
|
||||
@races = Race.by_qualtrics_code
|
||||
end
|
||||
|
||||
def incomes
|
||||
@incomes ||= Income.by_slug
|
||||
end
|
||||
|
||||
def ells
|
||||
@ells ||= Ell.by_designation
|
||||
end
|
||||
|
||||
def speds
|
||||
@speds ||= Sped.by_designation
|
||||
end
|
||||
|
||||
def process_row(row:)
|
||||
return unless row.dese_id?
|
||||
return unless row.school.present?
|
||||
|
||||
process_survey_items(row:)
|
||||
end
|
||||
|
||||
def process_survey_items(row:)
|
||||
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
|
||||
student.races.delete_all
|
||||
tmp_races = row.races.map { |race| races[race] }
|
||||
student.races += tmp_races
|
||||
|
||||
row.survey_items.map do |survey_item|
|
||||
likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
|
||||
|
||||
unless likert_score.valid_likert_score?
|
||||
puts "Response ID: #{row.response_id}, Likert score: #{likert_score} rejected" unless likert_score == "NA"
|
||||
next
|
||||
end
|
||||
response = row.survey_item_response(survey_item:)
|
||||
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
|
||||
end.compact
|
||||
end
|
||||
|
||||
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:)
|
||||
gender = genders[row.gender]
|
||||
grade = row.grade
|
||||
income = incomes[row.income.parameterize]
|
||||
ell = ells[row.ell]
|
||||
sped = speds[row.sped]
|
||||
|
||||
if survey_item_response.present?
|
||||
survey_item_response.likert_score = likert_score
|
||||
survey_item_response.grade = grade
|
||||
survey_item_response.gender = gender
|
||||
survey_item_response.recorded_date = row.recorded_date
|
||||
survey_item_response.income = income
|
||||
survey_item_response.ell = ell
|
||||
survey_item_response.sped = sped
|
||||
survey_item_response.student = student
|
||||
survey_item_response
|
||||
else
|
||||
SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:,
|
||||
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:)
|
||||
end
|
||||
end
|
||||
|
||||
def survey_items(headers:)
|
||||
SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:))
|
||||
end
|
||||
|
||||
def get_survey_item_ids_from_headers(headers:)
|
||||
CSV.parse(headers).first
|
||||
.filter(&:present?)
|
||||
.filter { |header| header.start_with? "t-", "s-" }
|
||||
end
|
||||
end
|
||||
|
||||
module StringMonkeyPatches
|
||||
def valid_likert_score?
|
||||
to_i.between? 1, 5
|
||||
end
|
||||
end
|
||||
|
||||
String.include StringMonkeyPatches
|
||||
end
|
||||
Loading…
Reference in new issue