chore:seed demographics

main
Nelson Jovel 2 years ago
parent 27550e0b30
commit bd8dfe45d3

@ -6,7 +6,7 @@ module Dashboard
academic_years << { range: }
end
AcademicYear.upsert_all(academic_years)
AcademicYear.upsert_all(academic_years, unique_by: :range)
end
def seed_districts_and_schools(csv_file)
@ -30,7 +30,7 @@ module Dashboard
is_hs: marked?(hs), slug: school_name.parameterize }
end
School.insert_all(schools)
School.insert_all(schools, unique_by: :dese_id)
Respondent.joins(:school).where.not("school.dese_id": dese_ids).destroy_all
School.where.not(dese_id: dese_ids).destroy_all
@ -97,9 +97,9 @@ module Dashboard
AdminDataItem.where.not(id: admin_data_item_ids).delete_all
end
# def seed_demographics(csv_file)
# DemographicLoader.load_data(filepath: csv_file)
# end
def seed_demographics(csv_file)
DemographicLoader.load_data(filepath: csv_file)
end
# def seed_enrollment(csv_file)
# EnrollmentLoader.load_data(filepath: csv_file)

@ -0,0 +1,146 @@
require "fileutils"
module Dashboard
class Cleaner
attr_reader :input_filepath, :output_filepath, :log_filepath
def initialize(input_filepath:, output_filepath:, log_filepath:)
@input_filepath = input_filepath
@output_filepath = output_filepath
@log_filepath = log_filepath
initialize_directories
end
def clean
Dir.glob(Rails.root.join(input_filepath, "*.csv")).each do |filepath|
puts filepath
File.open(filepath) do |file|
processed_data = process_raw_file(file:)
processed_data in [headers, clean_csv, log_csv, data]
return if data.empty?
filename = filename(headers:, data:, filepath:)
write_csv(data: clean_csv, output_filepath:, filename:)
write_csv(data: log_csv, output_filepath: log_filepath, prefix: "removed.", filename:)
end
end
end
def filename(headers:, data:, filepath:)
output = []
survey_item_ids = headers.filter(&:present?).filter do |header|
header.start_with?("s-", "t-")
end.reject { |item| item.end_with? "-1" }
survey_type = SurveyItem.survey_type(survey_item_ids:)
range = data.first.academic_year.range
districts = data.map do |row|
row.district.short_name
end.to_set.to_a
schools = data.map do |row|
row.school.name
end.to_set
part = filepath&.match(/[\b\s_.]+(part|form)[\W*_](?<label>[\w\d])/i)&.named_captures&.[]("label")&.upcase
school_name = schools.first.parameterize
output << districts.join(".")
output << school_name if schools.length == 1
output << survey_type.to_s
output << "Part-" + part unless part.nil?
output << range
output << "csv"
output.join(".")
end
def process_raw_file(file:)
clean_csv = []
log_csv = []
data = []
headers = CSV.parse(file.first).first
duplicate_header = headers.detect { |header| headers.count(header) > 1 }
unless duplicate_header.nil?
puts "\n>>>>>>>>>>>>>>>>>> Duplicate header found. This will misalign column headings. Please delete or rename the duplicate column: #{duplicate_header} \n>>>>>>>>>>>>>> \n"
end
headers = headers.to_set
headers = headers.merge(Set.new(["Raw Income", "Income", "Raw ELL", "ELL", "Raw SpEd", "SpEd", "Progress Count",
"Race", "Gender"])).to_a
filtered_headers = include_all_headers(headers:)
filtered_headers = remove_unwanted_headers(headers: filtered_headers)
log_headers = (filtered_headers + ["Valid Duration?", "Valid Progress?", "Valid Grade?",
"Valid Standard Deviation?"]).flatten
clean_csv << filtered_headers
log_csv << log_headers
all_survey_items = survey_items(headers:)
file.lazy.each_slice(1000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
values = SurveyItemValues.new(row:, headers:,
survey_items: all_survey_items, schools:)
next unless values.valid_school?
data << values
values.valid? ? clean_csv << values.to_a : log_csv << (values.to_a << values.valid_duration?.to_s << values.valid_progress?.to_s << values.valid_grade?.to_s << values.valid_sd?.to_s)
end
end
[headers, clean_csv, log_csv, data]
end
private
def include_all_headers(headers:)
alternates = headers.filter(&:present?)
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
alternates.each do |header|
main = header.sub(/-1\z/, "")
headers.push(main) unless headers.include?(main)
end
headers
end
def initialize_directories
create_ouput_directory
create_log_directory
end
def remove_unwanted_headers(headers:)
headers.to_set.to_a.compact.reject do |item|
item.start_with? "Q"
end.reject { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
end
def write_csv(data:, output_filepath:, filename:, prefix: "")
csv = CSV.generate do |csv|
data.each do |row|
csv << row
end
end
File.write(output_filepath.join(prefix + filename), csv)
end
def schools
@schools ||= School.school_hash
end
def genders
@genders ||= Gender.by_qualtrics_code
end
def survey_items(headers:)
survey_item_ids = headers
.filter(&:present?)
.filter { |header| header.start_with? "t-", "s-" }
@survey_items ||= SurveyItem.where(survey_item_id: survey_item_ids)
end
def create_ouput_directory
FileUtils.mkdir_p output_filepath
end
def create_log_directory
FileUtils.mkdir_p log_filepath
end
end
end

@ -0,0 +1,63 @@
# frozen_string_literal: true
require "csv"
module Dashboard
class DemographicLoader
def self.load_data(filepath:)
CSV.parse(File.read(filepath), headers: true) do |row|
process_race(row:)
process_gender(row:)
create_from_column(column: "Income", row:, model: Income)
create_from_column(column: "ELL", row:, model: Ell)
create_from_column(column: "Special Ed Status", row:, model: Sped)
end
end
def self.process_race(row:)
qualtrics_code = row["Race Qualtrics Code"].to_i
designation = row["Race/Ethnicity"]
return unless qualtrics_code && designation
if qualtrics_code.between?(6, 7)
UnknownRace.new(qualtrics_code:, designation:)
else
KnownRace.new(qualtrics_code:, designation:)
end
end
def self.process_gender(row:)
qualtrics_code = row["Gender Qualtrics Code"].to_i
designation = row["Sex/Gender"]
return unless qualtrics_code && designation
gender = Gender.find_or_create_by!(qualtrics_code:, designation:)
gender.save
end
def self.create_from_column(column:, row:, model:)
designation = row[column]
return unless designation
model.find_or_create_by!(designation:)
end
end
class KnownRace
def initialize(qualtrics_code:, designation:)
known = Race.find_or_create_by!(qualtrics_code:)
known.designation = designation
known.slug = designation.parameterize
known.save
end
end
class UnknownRace
def initialize(qualtrics_code:, designation:)
unknown = Race.find_or_create_by!(qualtrics_code: 99)
unknown.designation = "Race/Ethnicity Not Listed"
unknown.slug = designation.parameterize
unknown.save
end
end
end

@ -0,0 +1,38 @@
require 'watir'
require 'csv'
module Dese
module Enrollments
include Dese::Scraper
attr_reader :filepaths
def scrape_enrollments(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', 'Total']
write_headers(filepath:, headers:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbygrade.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = ->(_headers, _items) { 'NA' }
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def student_count(filepath:, dese_id:, year:)
@students ||= {}
if @students.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
school_id = row['DESE ID'].to_i
total = row['Total'].gsub(',', '').to_i
@students[[school_id, academic_year]] = total
end
end
@students[[dese_id, year]]
end
end
end

@ -0,0 +1,48 @@
require 'watir'
require 'csv'
module Dese
class FiveCOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '5C_1_art_course.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'K', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
'11', '12', 'All Grades', 'Total Students']
write_headers(filepath:, headers:)
run_a_picp_i1(filepath:)
browser.close
end
def run_a_picp_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-picp-i1'
url = 'https://profiles.doe.mass.edu/statereport/artcourse.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddView' => 'Percent' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
percent_graduated_index = headers['All Grades'] - 1
if items[percent_graduated_index].nil? || items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
return 'NA'
end
percent_passing = items[percent_graduated_index].to_f
benchmark = 77.5
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,56 @@
require 'watir'
require 'csv'
module Dese
class FiveDTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '5D_2_age_staffing.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
scrape_enrollments(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)', '49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count']
write_headers(filepath:, headers:)
run_a_phya_i1(filepath:)
browser.close
end
def run_a_phya_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-phya-i1'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Nurse -- Non-Special Education' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
nurse_index = headers['FTE Count']
return 'NA' if items[nurse_index] == '' || items[nurse_index].strip == '.0'
nurse_count = items[nurse_index].to_f
benchmark = 750
nurse_count * 4 / benchmark if nurse_index.present?
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
items << (num_of_students / nurse_count)
((benchmark - (num_of_students / nurse_count)) + benchmark) * 4 / benchmark
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,44 @@
require 'watir'
require 'csv'
module Dese
class FourAOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '4A_1_grade_nine_course_pass.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade Nine Students', '# Passing All Courses', '% Passing All Courses']
write_headers(filepath:, headers:)
run_a_ovpe_i1(filepath:)
browser.close
end
def run_a_ovpe_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-ovpe-i1'
url = 'https://profiles.doe.mass.edu/statereport/gradeninecoursepass.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
passing_index = headers['% Passing All Courses']
return 'NA' if items[passing_index] == '' || items[passing_index].strip == '.0'
percent_passing = items[passing_index].to_f
benchmark = 95
percent_passing * 4 / benchmark if passing_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,105 @@
require 'watir'
require 'csv'
module Dese
class FourBTwo
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '4B_2_four_year_grad.csv'),
Rails.root.join('data', 'admin_data', 'dese', '4B_2_retention.csv'),
Rails.root.join('data', 'admin_data', 'dese', '4B_2_five_year_grad.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
'% Dropped Out', '% Permanently Excluded']
write_headers(filepath:, headers:)
run_a_degr_i1(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Enrolled', '# Retained', '% Retained', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
'11', '12']
write_headers(filepath:, headers:)
run_a_degr_i2(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
'% Dropped Out', '% Permanently Excluded']
write_headers(filepath:, headers:)
run_a_degr_i3(filepath:)
browser.close
end
def run_a_degr_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-degr-i1'
url = 'https://profiles.doe.mass.edu/statereport/gradrates.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddRateType' => '4yr Grad' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
percent_graduated_index = headers['% Graduated']
return 'NA' if items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
percent_passing = items[percent_graduated_index].to_f
benchmark = 80
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_degr_i2(filepath:)
run do |academic_year|
admin_data_item_id = 'a-degr-i2'
url = 'https://profiles.doe.mass.edu/statereport/retention.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddView' => 'Percent' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
retained_index = headers['% Retained']
return 'NA' if items[retained_index] == '' || items[retained_index].strip == '.0'
percent_retained = items[retained_index].to_f
benchmark = 2
((benchmark - percent_retained) + benchmark) * 4 / benchmark if retained_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_degr_i3(filepath:)
run do |academic_year|
admin_data_item_id = 'a-degr-i3'
url = 'https://profiles.doe.mass.edu/statereport/gradrates.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddRateType' => '5yr Grad' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
percent_graduated_index = headers['% Graduated']
return 'NA' if items[percent_graduated_index] == '' || items[percent_graduated_index].strip == '.0'
percent_passing = items[percent_graduated_index].to_f
benchmark = 85
percent_passing * 4 / benchmark if percent_graduated_index.present?
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,74 @@
require 'watir'
require 'csv'
# TODO: convert this to simpler format and add a run_all method
module Dese
class FourDOne
attr_reader :filepath
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '4D_1_plans_of_grads.csv'))
@filepath = filepath
end
def run_all
url = 'https://profiles.doe.mass.edu/statereport/plansofhsgrads.aspx'
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
table = scrape(browser:, url:, range: academic_year.range)
id = 'a-cgpr-i1'
write_csv(table:, filepath:, range: academic_year.range, id:) unless table.nil?
end
browser.close
end
def scrape(browser:, url:, range:)
browser.goto(url)
return unless browser.option(text: range).present?
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(/School/)
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'btnViewReport').click
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
document = Nokogiri::HTML(browser.html)
document.css('tr')
end
def write_headers(filepath:)
CSV.open(filepath, 'w') do |csv|
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', '4 Year Private College', '4 Year Public College', '2 Year Private College', '2 Year Public College',
'Other Post Secondary', 'Apprenticeship', 'Work', 'Military', 'Other', 'Unknown', 'Total']
csv << headers
end
end
def write_csv(table:, filepath:, range:, id:)
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculate(cells: items)
likert_score = raw_likert_score
likert_score = 5 if raw_likert_score > 5
likert_score = 1 if raw_likert_score < 1
likert_score = likert_score.round(2)
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
csv << output.flatten
end
end
end
def calculate(cells:)
(cells[2].to_f + cells[3].to_f + cells[4].to_f + cells[5].to_f + cells[6].to_f + cells[7].to_f + cells[8].to_f) * 4 / 75
end
end
end

@ -0,0 +1,71 @@
require "csv"
module Dese
class Loader
def self.load_data(filepath:)
admin_data_values = []
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
next unless valid_likert_score(likert_score: score)
admin_data_values << create_admin_data_value(row:, score:)
end
AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
private
def self.valid_likert_score(likert_score:)
likert_score >= 1 && likert_score <= 5
end
def self.likert_score(row:)
likert_score = (row["Likert Score"] || row["LikertScore"] || row["Likert_Score"]).to_f
likert_score.round_up_to_one.round_down_to_five
end
def self.ay(row:)
row["Academic Year"] || row["AcademicYear"]
end
def self.dese_id(row:)
row["DESE ID"] || row["Dese ID"] || row["Dese Id"] || row["School ID"]
end
def self.admin_data_item(row:)
row["Admin Data Item"] || row["Item ID"] || row["Item Id"] || row["Item ID"]
end
def self.create_admin_data_value(row:, score:)
school = School.find_by_dese_id(dese_id(row:).to_i)
admin_data_item_id = admin_data_item(row:)
return if school.nil?
return if admin_data_item_id.nil? || admin_data_item_id.blank?
admin_data_value = AdminDataValue.find_by(academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item_id))
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save
nil
else
AdminDataValue.new(
likert_score: score,
academic_year: AcademicYear.find_by_range(ay(row:)),
school:,
admin_data_item: AdminDataItem.find_by_admin_data_item_id(admin_data_item(row:))
)
end
end
private_class_method :valid_likert_score
private_class_method :likert_score
private_class_method :ay
private_class_method :dese_id
private_class_method :admin_data_item
private_class_method :create_admin_data_value
end
end

@ -0,0 +1,115 @@
require 'watir'
require 'csv'
# TODO: convert this to simpler format and add a run_all method
module Dese
class OneAOne
attr_reader :filepath
def initialize(filepath: Rails.root.join('data', 'admin_data', 'dese', '1A_1_teacher_data.csv'))
@filepath = filepath
end
def run_all
url = 'https://profiles.doe.mass.edu/statereport/teacherdata.aspx'
browser = Watir::Browser.new
write_headers(filepath:)
academic_years = AcademicYear.all
academic_years.each do |academic_year|
document = scrape(browser:, url:, range: academic_year.range)
id = 'a-exp-i1'
write_csv(document:, filepath:, range: academic_year.range, id:) unless document.nil?
end
browser.close
end
def scrape(browser:, url:, range:)
browser.goto(url)
return unless browser.option(text: 'School').present?
return unless browser.option(text: range).present?
browser.select(id: 'ctl00_ContentPlaceHolder1_ddReportType').select(text: 'School')
browser.select(id: 'ctl00_ContentPlaceHolder1_ddYear').select(text: range)
browser.button(id: 'ctl00_ContentPlaceHolder1_btnViewReport').click
sleep Dese::Scraper::DELAY # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html)
end
def write_headers(filepath:)
CSV.open(filepath, 'w') do |csv|
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID', 'Total # of Teachers(FTE)', 'Percent of Teachers Licensed',
'Student/Teacher Ratio', 'Percent of Experienced Teachers', 'Percent of Teachers without Waiver or Provisional License', 'Percent Teaching in-field']
csv << headers
end
end
def write_csv(document:, filepath:, range:, id:)
table = document.css('tr')
headers = document.css('.sorting')
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
experienced_teacher_index = header_hash['Percent of Experienced Teachers']
dese_id_index = header_hash['School Code']
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = items[experienced_teacher_index].to_f * 4 / 80 if experienced_teacher_index.present?
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
output = []
output << raw_likert_score
output << likert_score
output << 'a-exp-i1'
output << range
output << items
output = output.flatten
csv << output
end
end
in_field_index = header_hash['Percent Teaching In-Field']
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[dese_id_index].to_i
next if dese_id.nil? || dese_id.zero?
percent_in_field = items[in_field_index].to_f if in_field_index.present?
if in_field_index.present? && percent_in_field.present? && !percent_in_field.zero?
raw_likert_score = percent_in_field * 4 / 95
end
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
output = []
output << raw_likert_score
output << likert_score
output << 'a-exp-i3'
output << range
output << items
output = output.flatten
csv << output
end
end
end
def calculate(cells:)
cells[5].to_f * 4 / 95
end
end
end

@ -0,0 +1,68 @@
require 'watir'
require 'csv'
module Dese
class OneAThree
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '1A_3_staffing_retention.csv'),
Rails.root.join('data', 'admin_data', 'dese', '1A_3_teachers_of_color.csv')])
@filepaths = filepaths
end
def run_all
run_a_pcom_i1
run_a_pcom_i3
browser.close
end
def run_a_pcom_i1
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Principal Total', 'Principal # Retained', 'Principal % Retained',
'Teacher Total', 'Teacher # Retained', 'Teacher % Retained']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/staffingRetentionRates.aspx'
range = "#{academic_year.range.split('-').last.to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
retained_teachers = headers['% Retained']
items[retained_teachers].to_f * 4 / 85 if retained_teachers.present?
}
admin_data_item_id = 'a-pcom-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_pcom_i3
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native Hawaiian, Pacific Islander (%)',
'Multi-Race,Non-Hispanic (%)', 'Females (%)', 'Males (%)', 'FTE Count']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Percentages' }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
white = headers['White (%)']
result = ((100 - items[white].to_f) * 4) / 12.8 if white.present?
result = 1 if result < 1
result
}
admin_data_item_id = 'a-pcom-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,78 @@
module Dese
module Scraper
DELAY = 20
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def run
academic_years = AcademicYear.all.order(range: :DESC)
academic_years.each do |academic_year|
prerequisites = yield academic_year
document = get_html(url: prerequisites.url,
selectors: prerequisites.selectors,
submit_id: prerequisites.submit_id)
unless document.nil?
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
calculation: prerequisites.calculation)
end
end
end
def browser
@browser ||= Watir::Browser.new
end
def get_html(url:, selectors:, submit_id:)
browser.goto(url)
selectors.each do |key, value|
return unless browser.option(text: value).present?
browser.select(id: key).select(text: value)
end
browser.button(id: submit_id).click
sleep DELAY # Sleep to prevent hitting mass.edu with too many requests
Nokogiri::HTML(browser.html)
end
def write_headers(filepath:, headers:)
CSV.open(filepath, 'w') do |csv|
csv << headers
end
end
def write_csv(document:, filepath:, range:, id:, calculation:)
table = document.css('tr')
headers = document.css('.sorting')
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
CSV.open(filepath, 'a') do |csv|
table.each do |row|
items = row.css('td').map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculation.call(header_hash, items)
raw_likert_score ||= 'NA'
likert_score = raw_likert_score
if likert_score != 'NA'
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)
end
output = []
output << raw_likert_score
output << likert_score
output << id
output << range
output << items
output = output.flatten
csv << output
end
end
end
end
end

@ -0,0 +1,35 @@
require 'watir'
module Dese
class Staffing
include Dese::Scraper
attr_reader :filepath
def initialize(filepath: Rails.root.join('data', 'staffing', 'staffing.csv'))
@filepath = filepath
end
def run_all
scrape_staffing(filepath:)
end
def scrape_staffing(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year',
'School Name', 'DESE ID',
'PK-2 (#)', '3-5 (#)', '6-8 (#)', '9-12 (#)', 'Multiple Grades (#)',
'All Grades (#)', 'FTE Count']
write_headers(filepath:, headers:)
run do |academic_year|
admin_data_item_id = 'NA'
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents' }
submit_id = 'btnViewReport'
calculation = ->(_headers, _items) { 'NA' }
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,45 @@
require 'watir'
require 'csv'
module Dese
class ThreeAOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3A_1_average_class_size.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %',
'English Language Learner %', 'Students with Disabilities %', 'Economically Disadvantaged %']
write_headers(filepath:, headers:)
run_a_reso_i1
browser.close
end
def run_a_reso_i1
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
class_size_index = headers['Average Class Size']
average_class_size = items[class_size_index].to_f
benchmark = 20
if class_size_index.present? && !items[class_size_index] != ''
((benchmark - average_class_size) + benchmark) * 4 / benchmark
end
}
admin_data_item_id = 'a-reso-i1'
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,163 @@
require 'watir'
require 'csv'
module Dese
class ThreeATwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3A_2_age_staffing.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3A_2_grade_subject_staffing.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
scrape_enrollments(filepath:)
filepath = filepaths[1]
write_a_sust_i1_headers(filepath:)
run_a_sust_i1(filepath:)
run_a_sust_i2(filepath:)
run_a_sust_i3(filepath:)
filepath = filepaths[2]
write_a_sust_i4_headers(filepath:)
run_a_sust_i4(filepath:)
browser.close
end
def write_a_sust_i1_headers(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)',
'49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', 'FTE Count',
'Student Count', 'Student to Guidance Counselor ratio']
write_headers(filepath:, headers:)
end
def write_a_sust_i4_headers(filepath:)
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK-2 (# )', '3-5 (# )', '6-8 (# )', '9-12 (# )', 'Multiple Grades (# )', 'All Grades (# )', 'FTE Count',
'Student Count', 'Student to Art Teacher ratio']
write_headers(filepath:, headers:)
end
def run_a_sust_i1(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i1'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Guidance Counselor' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
num_of_guidance_counselors = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
school = School.find_by_dese_id(dese_id)
return 'NA' unless school.present? && school.is_hs?
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 250
if fte_index.present? && !items[fte_index] != ''
result = ((benchmark - (num_of_students / num_of_guidance_counselors)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_guidance_counselors)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_sust_i2(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i2'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'School Psychologist -- Non-Special Education' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
num_of_psychologists = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 250
if fte_index.present? && !items[fte_index] != ''
result = ((benchmark - (num_of_students / num_of_psychologists)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_psychologists)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_sust_i3(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i3'
url = 'https://profiles.doe.mass.edu/statereport/agestaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddJobClassification' => 'Paraprofessional' }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
fte_index = headers['FTE Count']
num_of_paraprofessionals = items[fte_index].to_f
dese_id = items[headers['School Code']].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 43.4
if fte_index.present? && !items[fte_index] != ''
result = ((benchmark - (num_of_students / num_of_paraprofessionals)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_paraprofessionals)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_sust_i4(filepath:)
run do |academic_year|
admin_data_item_id = 'a-sust-i4'
url = 'https://profiles.doe.mass.edu/statereport/gradesubjectstaffing.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Full-time Equivalents',
'ctl00_ContentPlaceHolder1_ddSubject' => 'Arts' }
submit_id = 'btnViewReport'
calculation = lambda { |_headers, items|
num_of_art_teachers = items.last.to_f
dese_id = items[1].to_i
num_of_students = student_count(filepath: filepaths[0], dese_id:, year: academic_year.range) || 0
items << num_of_students
benchmark = 500
if num_of_art_teachers.present?
result = ((benchmark - (num_of_students / num_of_art_teachers)) + benchmark) * 4 / benchmark
end
items << (num_of_students / num_of_art_teachers)
result
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,130 @@
require 'watir'
require 'csv'
module Dese
class ThreeBOne
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_1_masscore.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_advcoursecomprate.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_ap.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_1_student_courses_ratio.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Graduated', '# Completed MassCore', '% Completed MassCore']
write_headers(filepath:, headers:)
run_a_curv_i1(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced',
'% ELA', '% Math', '% Science and Technology', '% Computer and Information Science',
'% History and Social Sciences', '% Arts', '% All Other Subjects', '% All Other Subjects']
write_headers(filepath:, headers:)
run_a_curv_i2(filepath:)
filepath = filepaths[2]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Tests Taken', 'Score=1', 'Score=2', 'Score=3', 'Score=4', 'Score=5', '% Score 1-2', '% Score 3-5']
write_headers(filepath:, headers:)
run_a_curv_i3(filepath:)
filepath = filepaths[3]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Total # of Classes', 'Average Class Size', 'Number of Students', 'Female %', 'Male %', 'English Language Learner %', 'Students with Disabilities %', 'Low Income %', 'Number of Students']
write_headers(filepath:, headers:)
run_a_curv_i5(filepath:)
browser.close
end
def run_a_curv_i1(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/masscore.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Completed MassCore']
percent_completed = items[completed_index].to_f
benchmark = 90
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i2(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx'
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Students Completing Advanced']
percent_completed = items[completed_index].to_f
benchmark = 30
percent_completed * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i2'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i3(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ap.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
completed_index = headers['% Score 3-5']
percent_score = items[completed_index].to_f
benchmark = 20
percent_score * 4 / benchmark if completed_index.present? && !items[completed_index] != ''
}
admin_data_item_id = 'a-curv-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_curv_i5(filepath:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/classsizebygenderpopulation.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
row = headers.keys.zip(items).to_h
dese_id = row['School Code'].to_i
is_hs = (row['School Name'] in /High School/i)
school = School.find_by(dese_id:)
is_hs = school.is_hs if school.present?
next 'NA' unless is_hs
num_of_classes = row['Total # of Classes'].delete(',').to_f
num_of_students = student_count(filepath: Rails.root.join('data', 'admin_data', 'dese', 'enrollments.csv'),
dese_id:, year: academic_year.range) || 0
items << num_of_students
actual = num_of_students / num_of_classes
benchmark = 5
((benchmark - actual) + benchmark) * 4 / benchmark if num_of_classes.present? && num_of_students.present?
}
admin_data_item_id = 'a-curv-i5'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,129 @@
require 'watir'
require 'csv'
module Dese
class ThreeBTwo
include Dese::Scraper
include Dese::Enrollments
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '3B_2_teacher_by_race_and_gender.csv'),
Rails.root.join('data', 'admin_data', 'dese', '3B_2_student_by_race_and_gender.csv')])
@filepaths = filepaths
end
def run_all
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (%)', 'School Name', 'DESE ID',
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native American (%)',
'Native Hawaiian Pacific Islander (%)', 'Multi-Race Non-Hispanic (%)', 'Females (%)',
'Males (%)', 'FTE Count']
write_headers(filepath:, headers:)
run_teacher_demographics(filepath:)
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers %', 'Non-White Students %', 'School Name', 'DESE ID',
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
'Females', 'Non-Binary', 'Students of color (%)']
write_headers(filepath:, headers:)
run_student_demographics(filepath:)
browser.close
end
def run_teacher_demographics(filepath:)
run do |academic_year|
admin_data_item_id = ''
url = 'https://profiles.doe.mass.edu/statereport/teacherbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range,
'ctl00_ContentPlaceHolder1_ddDisplay' => 'Percentages',
'ctl00_ContentPlaceHolder1_ddClassification' => 'Teacher' }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
african_american_index = headers['African American (%)']
african_american_number = items[african_american_index].to_f
asian_index = headers['Asian (%)']
asian_number = items[asian_index].to_f
hispanic_index = headers['Hispanic (%)']
hispanic_number = items[hispanic_index].to_f
native_american_index = headers['Native American (%)']
native_american_number = items[native_american_index].to_f
native_hawaiian_index = headers['Native Hawaiian, Pacific Islander (%)']
native_hawaiian_number = items[native_hawaiian_index].to_f
multi_race_index = headers['Multi-Race,Non-Hispanic (%)']
multi_race_number = items[multi_race_index].to_f
non_white_teachers = african_american_number + asian_number + hispanic_number + native_american_number + native_hawaiian_number + multi_race_number
items.unshift(non_white_teachers)
non_white_teachers
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def teacher_count(filepath:, dese_id:, year:)
@teachers ||= {}
@years_with_data ||= Set.new
if @teachers.count == 0
CSV.parse(File.read(filepath), headers: true).map do |row|
academic_year = row['Academic Year']
@years_with_data << academic_year
school_id = row['DESE ID'].to_i
total = row['Teachers of color (%)'].delete(',')
total = 'NA' if total == '' || total.nil?
@teachers[[school_id, academic_year]] = total
end
end
return 'NA' unless @years_with_data.include? year
@teachers[[dese_id, year]]
end
def run_student_demographics(filepath:)
run do |academic_year|
admin_data_item_id = 'a-cure-i1'
url = 'https://profiles.doe.mass.edu/statereport/enrollmentbyracegender.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
white_index = headers['White']
white_number = items[white_index].to_f
dese_id = items[headers['School Code']].to_i
non_white_student_percentage = (100 - white_number).to_f
items.unshift(non_white_student_percentage)
count_of_teachers = teacher_count(filepath: filepaths[0], dese_id:, year: academic_year.range)
return 'NA' if count_of_teachers == 'NA'
non_white_teacher_percentage = count_of_teachers.to_f
items.unshift(non_white_teacher_percentage)
floor = 5
benchmark = 0.25
return 1 if non_white_student_percentage.zero? && non_white_teacher_percentage < floor
if non_white_teacher_percentage >= floor
parity_index = non_white_teacher_percentage / non_white_student_percentage
likert_score = parity_index * 4 / benchmark
else
likert_score = 1
end
likert_score
}
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,70 @@
require 'watir'
require 'csv'
module Dese
class TwoAOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '2A_1_students_suspended.csv'),
Rails.root.join('data', 'admin_data', 'dese', '2A_1_students_disciplined.csv')])
@filepaths = filepaths
end
def run_all
run_a_phys_i1
run_a_phys_i3
browser.close
end
def run_a_phys_i1
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Students', 'Students Disciplined', '% In-School Suspension', '% Out-of-School Suspension', '% Expulsion', '% Removed to Alternate Setting',
'% Emergency Removal', '% Students with a School-Based Arrest', '% Students with a Law Enforcement Referral']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ssdr.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
suspensions_index = headers['% Out-of-School Suspension']
benchmark = 5.27
suspension_rate = items[suspensions_index].to_f
if suspensions_index.present? && items[suspensions_index] != ''
((benchmark - suspension_rate) + benchmark) * 4 / 5.27
end
}
admin_data_item_id = 'a-phys-i1'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_phys_i3
filepath = filepaths[1]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Students', 'Students Disciplined', '% 1 Day', '% 2 to 3 Days', '% 4 to 7 Days', '% 8 to 10 Days', '% > 10 Days']
write_headers(filepath:, headers:)
run do |academic_year|
url = 'https://profiles.doe.mass.edu/statereport/ssdr_days_missed.aspx'
range = academic_year.range
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'ctl00_ContentPlaceHolder1_btnViewReport'
calculation = lambda { |headers, items|
days_missed_index = headers['% > 10 Days']
benchmark = 1
missed_days = items[days_missed_index].to_f
if days_missed_index.present? && items[days_missed_index] != ''
((benchmark - missed_days) + benchmark) * 4 / benchmark
end
}
admin_data_item_id = 'a-phys-i3'
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,76 @@
require 'watir'
require 'csv'
module Dese
class TwoCOne
include Dese::Scraper
attr_reader :filepaths
def initialize(filepaths: [Rails.root.join('data', 'admin_data', 'dese', '2C_1_attendance.csv')])
@filepaths = filepaths
end
def run_all
write_a_vale_i1_headers
run_a_vale_i1
run_a_vale_i2
browser.close
end
def write_a_vale_i1_headers
filepath = filepaths[0]
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'Attendance Rate', 'Average # of Absences', 'Absent 10 or more days', 'Chronically Absent (10% or more)',
'Chronically Absent (20% or more)', 'Unexcused > 9 days']
write_headers(filepath:, headers:)
end
def run_a_vale_i1
run do |academic_year|
admin_data_item_id = 'a-vale-i1'
url = 'https://profiles.doe.mass.edu/statereport/attendance.aspx'
range = case academic_year.range
when '2021-22', '2020-21'
"#{academic_year.range} (End of year)"
else
academic_year.range
end
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
absence_index = headers['Chronically Absent (10% or more)']
benchmark = 10
absence_rate = items[absence_index].to_f
if absence_index.present? && !items[absence_index].blank?
((benchmark - absence_rate) + benchmark) * 4 / benchmark
end
}
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
end
end
def run_a_vale_i2
run do |academic_year|
admin_data_item_id = 'a-vale-i2'
url = 'https://profiles.doe.mass.edu/statereport/attendance.aspx'
range = case academic_year.range
when '2021-22', '2020-21'
"#{academic_year.range} (End of year)"
else
academic_year.range
end
selectors = { 'ctl00_ContentPlaceHolder1_ddReportType' => 'School',
'ctl00_ContentPlaceHolder1_ddYear' => range }
submit_id = 'btnViewReport'
calculation = lambda { |headers, items|
attendance = headers[' Attendance Rate ']
benchmark = 90
items[attendance].to_f * 4 / benchmark if attendance.present?
}
Prerequisites.new(filepaths[0], url, selectors, submit_id, admin_data_item_id, calculation)
end
end
end
end

@ -0,0 +1,32 @@
module Dashboard
class DisaggregationLoader
attr_reader :path
def initialize(path:)
@path = path
initialize_directory
end
def load
data = {}
Dir.glob(Rails.root.join(path, "*.csv")).each do |filepath|
puts filepath
File.open(filepath) do |file|
headers = CSV.parse(file.first).first
file.lazy.each_slice(1000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
values = DisaggregationRow.new(row:, headers:)
data[[values.lasid, values.district, values.academic_year]] = values
end
end
end
end
data
end
def initialize_directory
FileUtils.mkdir_p(path)
end
end
end

@ -0,0 +1,56 @@
module Dashboard
class DisaggregationRow
attr_reader :row, :headers
def initialize(row:, headers:)
@row = row
@headers = headers
end
def district
@district ||= value_from(pattern: /District/i)
end
def academic_year
@academic_year ||= value_from(pattern: /Academic\s*Year/i)
end
def raw_income
@income ||= value_from(pattern: /Low\s*Income/i)
end
def lasid
@lasid ||= value_from(pattern: /LASID/i)
end
def raw_ell
@raw_ell ||= value_from(pattern: /EL Student First Year/i)
end
def ell
@ell ||= begin
value = value_from(pattern: /EL Student First Year/i).downcase
case value
when /lep student 1st year|LEP student not 1st year/i
"ELL"
when /Does not apply/i
"Not ELL"
else
"Unknown"
end
end
end
def value_from(pattern:)
output = nil
matches = headers.select do |header|
pattern.match(header)
end.map { |item| item.delete("\n") }
matches.each do |match|
output ||= row[match]
end
output
end
end
end

@ -0,0 +1,133 @@
# frozen_string_literal: true
require "csv"
module Dashboard
class EnrollmentLoader
def self.load_data(filepath:)
schools = []
enrollments = []
CSV.parse(File.read(filepath), headers: true) do |row|
row = EnrollmentRowValues.new(row:)
next unless row.school.present? && row.academic_year.present?
schools << row.school
enrollments << create_enrollment_entry(row:)
end
# It's possible that instead of updating all columns on duplicate key, we could just update the student columns and leave total_teachers alone. Right now enrollment data loads before staffing data so it works correctly.
Respondent.import enrollments, batch_size: 1000,
on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
Respondent.where.not(school: schools).destroy_all
end
private
def self.create_enrollment_entry(row:)
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
respondent.pk = row.pk
respondent.k = row.k
respondent.one = row.one
respondent.two = row.two
respondent.three = row.three
respondent.four = row.four
respondent.five = row.five
respondent.six = row.six
respondent.seven = row.seven
respondent.eight = row.eight
respondent.nine = row.nine
respondent.ten = row.ten
respondent.eleven = row.eleven
respondent.twelve = row.twelve
respondent.total_students = row.total_students
respondent
end
private_class_method :create_enrollment_entry
end
class EnrollmentRowValues
attr_reader :row
def initialize(row:)
@row = row
end
def school
@school ||= begin
dese_id = row["DESE ID"].try(:strip).to_i
School.find_by_dese_id(dese_id)
end
end
def academic_year
@academic_year ||= begin
year = row["Academic Year"]
AcademicYear.find_by_range(year)
end
end
def pk
row["PK"] || row["pk"]
end
def k
row["K"] || row["k"]
end
def one
row["1"]
end
def two
row["2"]
end
def three
row["3"]
end
def four
row["4"]
end
def five
row["5"]
end
def six
row["6"]
end
def seven
row["7"]
end
def eight
row["8"]
end
def nine
row["9"]
end
def ten
row["10"]
end
def eleven
row["11"]
end
def twelve
row["12"]
end
def total_students
row["Total"].delete(",").to_i
end
end
end

@ -0,0 +1,55 @@
# frozen_string_literal: true
module Dashboard
class ResponseRateLoader
def self.reset(schools: School.all, academic_years: AcademicYear.all, subcategories: Subcategory.all)
subcategories.each do |subcategory|
schools.each do |school|
next if test_env? && (school != milford)
academic_years.each do |academic_year|
next if test_env? && (academic_year != test_year)
process_response_rate(subcategory:, school:, academic_year:)
end
end
end
end
private
def self.milford
School.find_by_slug "milford-high-school"
end
def self.test_year
AcademicYear.find_by_range "2020-21"
end
def self.rails_env
@rails_env ||= ENV["RAILS_ENV"]
end
def self.process_response_rate(subcategory:, school:, academic_year:)
student = StudentResponseRateCalculator.new(subcategory:, school:, academic_year:)
teacher = TeacherResponseRateCalculator.new(subcategory:, school:, academic_year:)
response_rate = ResponseRate.find_or_create_by!(subcategory:, school:, academic_year:)
response_rate.update!(student_response_rate: student.rate,
teacher_response_rate: teacher.rate,
meets_student_threshold: student.meets_student_threshold?,
meets_teacher_threshold: teacher.meets_teacher_threshold?)
end
def self.test_env?
rails_env == "test"
end
private_class_method :milford
private_class_method :test_year
private_class_method :rails_env
private_class_method :process_response_rate
private_class_method :test_env?
end
end

@ -0,0 +1,29 @@
require 'net/sftp'
require 'uri'
require 'csv'
module Sftp
class Directory
def self.open(path: '/data/survey_responses/clean', &block)
sftptogo_url = ENV['MCIEA_SFTPTOGO_URL']
uri = URI.parse(sftptogo_url)
Net::SFTP.start(uri.host, uri.user, password: uri.password) do |sftp|
sftp.dir.foreach(path) do |entry|
next unless entry.file?
filename = entry.name
puts filename
sftp.file.open(filepath(path:, filename:), 'r', &block)
end
end
end
def self.filepath(path:, filename:)
path += '/' unless path.end_with?('/')
"#{path}#{filename}"
end
private_class_method :filepath
end
end

@ -0,0 +1,33 @@
require 'net/sftp'
require 'uri'
require 'csv'
module Sftp
class RaceLoader
def self.load_data(path: '/data/survey_responses/')
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
sftptogo_url = ENV['SFTPTOGO_URL']
uri = URI.parse(sftptogo_url)
Net::SFTP.start(uri.host, uri.user, password: uri.password) do |sftp|
sftp.dir.foreach(path) do |entry|
filename = entry.name
puts filename
sftp.file.open(filepath(path:, filename:), 'r') do |f|
StudentLoader.from_file(file: f, rules: [Rule::SkipNonLowellSchools])
end
end
end
end
def self.filepath(path:, filename:)
path += '/' unless path.end_with?('/')
"#{path}#{filename}"
end
private_class_method :filepath
end
end

@ -0,0 +1,74 @@
# frozen_string_literal: true
require "csv"
module Dashboard
class StaffingLoader
def self.load_data(filepath:)
schools = []
respondents = []
CSV.parse(File.read(filepath), headers: true) do |row|
row = StaffingRowValues.new(row:)
next unless row.school.present? && row.academic_year.present?
schools << row.school
respondents << create_staffing_entry(row:)
end
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
Respondent.where.not(school: schools).destroy_all
end
def self.clone_previous_year_data
years = AcademicYear.order(:range).last(2)
previous_year = years.first
current_year = years.last
respondents = []
School.all.each do |school|
Respondent.where(school:, academic_year: previous_year).each do |respondent|
current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
current_respondent.total_teachers = respondent.total_teachers
respondents << current_respondent
end
end
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
end
private
def self.create_staffing_entry(row:)
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
respondent.total_teachers = row.fte_count
respondent
end
private_class_method :create_staffing_entry
end
class StaffingRowValues
attr_reader :row
def initialize(row:)
@row = row
end
def school
@school ||= begin
dese_id = row["DESE ID"].strip.to_i
School.find_by_dese_id(dese_id)
end
end
def academic_year
@academic_year ||= begin
year = row["Academic Year"]
AcademicYear.find_by_range(year)
end
end
def fte_count
row["FTE Count"]
end
end
end

@ -0,0 +1,305 @@
module Dashboard
class SurveyItemValues
attr_reader :row, :headers, :survey_items, :schools
def initialize(row:, headers:, survey_items:, schools:)
@row = row
# Remove any newlines in headers
headers = headers.map { |item| item.delete("\n") if item.present? }
@headers = include_all_headers(headers:)
@survey_items = survey_items
@schools = schools
copy_likert_scores_from_variant_survey_items
row["Income"] = income
row["Raw Income"] = raw_income
row["Raw ELL"] = raw_ell
row["ELL"] = ell
row["Raw SpEd"] = raw_sped
row["SpEd"] = sped
row["Progress Count"] = progress
row["Race"] ||= races.join(",")
row["Gender"] ||= gender
copy_data_to_main_column(main: /Race/i, secondary: /Race Secondary|Race-1/i)
copy_data_to_main_column(main: /Gender/i, secondary: /Gender Secondary|Gender-1/i)
end
def copy_data_to_main_column(main:, secondary:)
main_column = headers.find { |header| main.match(header) }
row[main_column] = value_from(pattern: secondary) if row[main_column].nil?
end
# Some survey items have variants, i.e. a survey item with an id of s-tint-q1 might have a variant that looks like s-tint-q1-1. We must ensure that all variants in the form of s-tint-q1-1 have a matching pair.
# We don't ensure that ids in the form of s-tint-q1 have a matching pair because not all questions have variants
def include_all_headers(headers:)
alternates = headers.filter(&:present?)
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
alternates.each do |header|
main = header.sub(/-1\z/, "")
headers.push(main) unless headers.include?(main)
end
headers
end
def dese_id?
dese_id.present?
end
def recorded_date
@recorded_date ||= begin
recorded_date = value_from(pattern: /Recorded\s*Date/i)
Date.parse(recorded_date)
end
end
def academic_year
@academic_year ||= AcademicYear.find_by_date recorded_date
end
def survey_item_response(survey_item:)
@survey_item_response ||= Hash.new do |memo, survey_item|
memo[survey_item] = survey_item_responses[[response_id, survey_item.id]]
end
@survey_item_response[survey_item]
end
def survey_item_responses
@survey_item_responses ||= Hash.new do |memo|
responses_hash = {}
SurveyItemResponse.where(school:, academic_year:, response_id:).each do |response|
responses_hash[[response.response_id, response.survey_item.id]] = response
end
memo[[school, academic_year]] = responses_hash
end
@survey_item_responses[[school, academic_year]]
end
def response_id
@response_id ||= value_from(pattern: /Response\s*ID/i)
end
def dese_id
@dese_id ||= begin
dese_id = value_from(pattern: /Dese\s*ID/i)
dese_id ||= value_from(pattern: /^School$/i)
dese_id ||= value_from(pattern: /School-\s*\w/i)
dese_id.to_i
end
end
def likert_score(survey_item_id:)
row[survey_item_id] || row["#{survey_item_id}-1"]
end
def school
@school ||= schools[dese_id]
end
def district
@district ||= school&.district
end
def grade
@grade ||= begin
raw_grade = value_from(pattern: /Grade|What grade are you in?/i)
return nil if raw_grade.blank?
raw_grade.to_i
end
end
def gender
@gender ||= begin
gender_code ||= value_from(pattern: /Gender self report/i)
gender_code ||= value_from(pattern: /^Gender$/i)
gender_code ||= value_from(pattern: /What is your gender?|What is your gender? - Selected Choice/i)
gender_code ||= value_from(pattern: /Gender-\s*SIS/i)
gender_code ||= value_from(pattern: /Gender-\s*Qcode/i)
gender_code ||= value_from(pattern: /Gender - do not use/i)
gender_code ||= value_from(pattern: /Gender/i)
Gender.qualtrics_code_from(gender_code)
end
end
def races
@races ||= begin
race_codes ||= self_report = value_from(pattern: /Race\s*self\s*report/i)
race_codes ||= value_from(pattern: /^RACE$/i)
race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i)
race_codes ||= value_from(pattern: /Race Secondary/i)
race_codes ||= sis ||= value_from(pattern: /Race-\s*SIS/i)
race_codes ||= sis ||= value_from(pattern: /Race\s*-\s*Qcodes/i)
race_codes ||= value_from(pattern: /RACE/i) || ""
race_codes ||= []
race_codes = race_codes.split(",")
.map do |word|
word.split(/\s+and\s+/i)
end.flatten
.reject(&:blank?)
.map { |race| Race.qualtrics_code_from(race) }.map(&:to_i)
# Only check the secondary hispanic column if we don't have self reported data and are relying on SIS data
if self_report.nil? && sis.present?
hispanic = value_from(pattern: /Hispanic\s*Latino/i)&.downcase
race_codes = race_codes.reject { |code| code == 5 } if hispanic == "true" && race_codes.count == 1
race_codes = race_codes.push(4) if hispanic == "true"
end
Race.normalize_race_list(race_codes)
end
end
def lasid
@lasid ||= value_from(pattern: /LASID/i)
end
def raw_income
@raw_income ||= value_from(pattern: /Low\s*Income|Raw\s*Income|SES-\s*SIS/i)
end
def income
@income ||= Income.to_designation(raw_income)
end
def raw_ell
@raw_ell ||= value_from(pattern: /EL Student First Year|Raw\s*ELL|ELL-\s*SIS/i)
end
def ell
@ell ||= Ell.to_designation(raw_ell)
end
def raw_sped
@raw_sped ||= value_from(pattern: /Special\s*Ed\s*Status|Raw\s*SpEd|SpEd-\s*SIS/i)
end
def sped
@sped ||= Sped.to_designation(raw_sped)
end
def value_from(pattern:)
output = nil
matches = headers.select do |header|
pattern.match(header)
end.map { |item| item.delete("\n") }
matches.each do |match|
output ||= row[match]&.strip
end
return nil if output&.match?(%r{^#*N/*A$}i) || output.blank?
output
end
def sanitized_headers
@sanitized_headers ||= headers.select(&:present?)
.reject { |key, _value| key.start_with? "Q" }
.reject { |key, _value| key.match?(/^[st]-\w*-\w*-1$/i) }
end
def to_a
sanitized_headers.map { |header| row[header] }
end
def duration
@duration ||= value_from(pattern: /Duration|Duration \(in seconds\)|Duration\.\.\(in\.seconds\)/i)
end
def valid?
valid_duration? && valid_progress? && valid_grade? && valid_sd?
end
def respondent_type
return :teacher if headers
.filter(&:present?)
.filter { |header| header.start_with? "t-" }.count > 0
:student
end
def survey_type
@survey_type ||= SurveyItem.survey_type(survey_item_ids:)
end
def survey_item_ids
@survey_item_ids ||= sanitized_headers.filter { |header| header.start_with?("t-", "s-") }
end
def valid_duration?
return true if duration.nil? || duration == "" || duration.downcase == "n/a" || duration.downcase == "na"
span_in_seconds = duration.to_i
return span_in_seconds >= 300 if survey_type == :teacher
return span_in_seconds >= 240 if survey_type == :standard
return span_in_seconds >= 100 if survey_type == :short_form
true
end
def progress
survey_item_ids.reject { |header| row[header].nil? }.count
end
def valid_progress?
return false if progress.nil?
return progress >= 12 if survey_type == :teacher
return progress >= 11 if survey_type == :standard
return progress >= 5 if survey_type == :short_form
return progress >= 5 if survey_type == :early_education
false
end
def valid_grade?
return true if grade.nil?
return true if respondent_type == :teacher
respondents = Respondent.where(school:, academic_year:).first
if respondents.present? && respondents.enrollment_by_grade[grade].present?
enrollment = respondents.enrollment_by_grade[grade]
end
return false if enrollment.nil?
valid = enrollment > 0
puts "Invalid grade #{grade} for #{school.name} #{academic_year.formatted_range}" unless valid
valid
end
def valid_sd?
return true if survey_type == :early_education
survey_item_headers = headers.filter(&:present?).filter { |header| header.start_with?("s-", "t-") }
likert_scores = []
survey_item_headers.each do |header|
likert_scores << likert_score(survey_item_id: header).to_i
end
likert_scores = likert_scores.compact.reject(&:zero?)
return false if likert_scores.count < 2
!likert_scores.stdev.zero?
end
def valid_school?
school.present?
end
private
def copy_likert_scores_from_variant_survey_items
headers.filter(&:present?).filter { |header| header.end_with? "-1" }.each do |header|
likert_score = row[header]
main_item = header.gsub("-1", "")
row[main_item] = likert_score if likert_score.present? && row[main_item].blank?
end
end
end
end

@ -0,0 +1,141 @@
# frozen_string_literal: true
module Dashboard
class SurveyResponsesDataLoader
def load_data(filepath:)
File.open(filepath) do |file|
headers = file.first
headers_array = CSV.parse(headers).first
all_survey_items = survey_items(headers:)
file.lazy.each_slice(500) do |lines|
survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items,
schools:))
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500,
on_duplicate_key_update: :all
end
end
end
def from_file(file:)
headers = file.gets
headers_array = CSV.parse(headers).first
all_survey_items = survey_items(headers:)
survey_item_responses = []
row_count = 0
until file.eof?
line = file.gets
next unless line.present?
CSV.parse(line, headers:).map do |row|
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array,
survey_items: all_survey_items, schools:))
end
row_count += 1
next unless row_count == 500
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
survey_item_responses = []
row_count = 0
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all
end
private
def schools
@schools = School.school_hash
end
def genders
@genders = Gender.by_qualtrics_code
end
def races
@races = Race.by_qualtrics_code
end
def incomes
@incomes ||= Income.by_slug
end
def ells
@ells ||= Ell.by_designation
end
def speds
@speds ||= Sped.by_designation
end
def process_row(row:)
return unless row.dese_id?
return unless row.school.present?
process_survey_items(row:)
end
def process_survey_items(row:)
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
student.races.delete_all
tmp_races = row.races.map { |race| races[race] }
student.races += tmp_races
row.survey_items.map do |survey_item|
likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
unless likert_score.valid_likert_score?
puts "Response ID: #{row.response_id}, Likert score: #{likert_score} rejected" unless likert_score == "NA"
next
end
response = row.survey_item_response(survey_item:)
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
end.compact
end
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:)
gender = genders[row.gender]
grade = row.grade
income = incomes[row.income.parameterize]
ell = ells[row.ell]
sped = speds[row.sped]
if survey_item_response.present?
survey_item_response.likert_score = likert_score
survey_item_response.grade = grade
survey_item_response.gender = gender
survey_item_response.recorded_date = row.recorded_date
survey_item_response.income = income
survey_item_response.ell = ell
survey_item_response.sped = sped
survey_item_response.student = student
survey_item_response
else
SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:,
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:)
end
end
def survey_items(headers:)
SurveyItem.where(survey_item_id: get_survey_item_ids_from_headers(headers:))
end
def get_survey_item_ids_from_headers(headers:)
CSV.parse(headers).first
.filter(&:present?)
.filter { |header| header.start_with? "t-", "s-" }
end
end
module StringMonkeyPatches
def valid_likert_score?
to_i.between? 1, 5
end
end
String.include StringMonkeyPatches
end

@ -6,6 +6,6 @@ class CreateDashboardAcademicYears < ActiveRecord::Migration[7.1]
t.timestamps
end
add_index :dashboard_academic_years, :range
add_index :dashboard_academic_years, :range, unique: true
end
end

@ -10,5 +10,7 @@ class CreateDashboardSchools < ActiveRecord::Migration[7.1]
t.timestamps
end
add_index :dashboard_schools, :dese_id, unique: true
end
end

@ -9,7 +9,7 @@ namespace :dashboard do
seeder.seed_districts_and_schools Dashboard::Engine.root.join("data", "dashboard",
"master_list_of_schools_and_districts.csv")
seeder.seed_sqm_framework Dashboard::Engine.root.join("data", "dashboard", "sqm_framework.csv")
# seeder.seed_demographics Rails.root.join("data", "demographics.csv")
seeder.seed_demographics Dashboard::Engine.root.join("data", "dashboard", "demographics.csv")
# seeder.seed_enrollment Rails.root.join("data", "enrollment", "enrollment.csv")
# seeder.seed_enrollment Rails.root.join("data", "enrollment", "nj_enrollment.csv")
# seeder.seed_enrollment Rails.root.join("data", "enrollment", "wi_enrollment.csv")

@ -18,7 +18,7 @@ ActiveRecord::Schema[7.1].define(version: 2024_01_04_192128) do
t.string "range"
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["range"], name: "index_dashboard_academic_years_on_range"
t.index ["range"], name: "index_dashboard_academic_years_on_range", unique: true
end
create_table "dashboard_admin_data_items", force: :cascade do |t|
@ -160,6 +160,7 @@ ActiveRecord::Schema[7.1].define(version: 2024_01_04_192128) do
t.datetime "created_at", null: false
t.datetime "updated_at", null: false
t.index ["dashboard_district_id"], name: "index_dashboard_schools_on_dashboard_district_id"
t.index ["dese_id"], name: "index_dashboard_schools_on_dese_id", unique: true
end
create_table "dashboard_scores", force: :cascade do |t|

Loading…
Cancel
Save