mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
Split academic year into seasons if the academic year's range is
initialized with a season, i.e. "2024-25 Fall". Update scapers for admin data, enrollment and staffing to use the new range standard correctly. Update the loaders for admin data, enrollment and staffing so that it populates all seasons in a given year. So admin data for 2024-25 gets loaded into "2024-25 Fall" and "2024-25 Spring". Add tests for the new range format. Set the default cutoff for the start of Spring season will be the last Sunday in February
This commit is contained in:
parent
996bb01d0b
commit
33da0859b9
43 changed files with 62404 additions and 28698 deletions
|
|
@ -48,7 +48,7 @@ class Cleaner
|
|||
output << school_name if schools.length == 1
|
||||
output << survey_type.to_s
|
||||
output << "Part-" + part unless part.nil?
|
||||
output << range
|
||||
output << range.parameterize
|
||||
output << "csv"
|
||||
output.join(".")
|
||||
end
|
||||
|
|
@ -77,7 +77,7 @@ class Cleaner
|
|||
file.lazy.each_slice(1000) do |lines|
|
||||
CSV.parse(lines.join, headers:).map do |row|
|
||||
values = SurveyItemValues.new(row:, headers:,
|
||||
survey_items: all_survey_items, schools:)
|
||||
survey_items: all_survey_items, schools:, academic_years:)
|
||||
next unless values.valid_school?
|
||||
|
||||
data << values
|
||||
|
|
@ -89,6 +89,10 @@ class Cleaner
|
|||
|
||||
private
|
||||
|
||||
def academic_years
|
||||
@academic_years ||= AcademicYear.all
|
||||
end
|
||||
|
||||
def include_all_headers(headers:)
|
||||
alternates = headers.filter(&:present?)
|
||||
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
|
||||
|
|
@ -120,7 +124,7 @@ class Cleaner
|
|||
end
|
||||
|
||||
def schools
|
||||
@schools ||= School.school_by_dese_id
|
||||
@schools ||= School.by_dese_id
|
||||
end
|
||||
|
||||
def genders
|
||||
|
|
|
|||
|
|
@ -1,15 +1,16 @@
|
|||
module Dese
|
||||
class Loader
|
||||
@memo = Hash.new
|
||||
@memo = {}
|
||||
def self.load_data(filepath:)
|
||||
admin_data_values = []
|
||||
@memo = Hash.new
|
||||
schools = School.school_by_dese_id
|
||||
@memo = {}
|
||||
schools = School.by_dese_id
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
score = likert_score(row:)
|
||||
next unless valid_likert_score(likert_score: score)
|
||||
|
||||
admin_data_values << create_admin_data_value(row:, score:, schools:)
|
||||
values = create_admin_data_value(row:, score:, schools:)
|
||||
admin_data_values.concat(values) if values
|
||||
end
|
||||
|
||||
AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
|
||||
|
|
@ -38,40 +39,47 @@ module Dese
|
|||
row["Admin Data Item"] || row["Item ID"] || row["Item Id"] || row["Item ID"]
|
||||
end
|
||||
|
||||
# these three methods do the memoization
|
||||
# these two methods do the memoization
|
||||
def self.find_admin_data_item(admin_data_item_id:)
|
||||
return @memo["admin" + admin_data_item_id] if @memo.key?("admin" + admin_data_item_id)
|
||||
|
||||
@memo["admin" + admin_data_item_id] ||= AdminDataItem.find_by_admin_data_item_id(admin_data_item_id)
|
||||
end
|
||||
|
||||
def self.find_ay(ay:)
|
||||
def self.find_ays(ay:)
|
||||
return @memo["year" + ay] if @memo.key?("year" + ay)
|
||||
@memo["year" + ay] ||= AcademicYear.find_by_range(ay)
|
||||
|
||||
@memo["year" + ay] ||= AcademicYear.of_year(ay)
|
||||
end
|
||||
|
||||
def self.create_admin_data_value(row:, score:, schools:)
|
||||
school = schools[dese_id(row:).to_i]
|
||||
admin_data_item_id = admin_data_item(row:)
|
||||
admin_data_item = find_admin_data_item(admin_data_item_id:)
|
||||
academic_year = find_ay(ay: ay(row:))
|
||||
academic_years = find_ays(ay: ay(row:))
|
||||
|
||||
return if school.nil?
|
||||
return if admin_data_item_id.nil? || admin_data_item_id.blank?
|
||||
return unless academic_years.size.positive?
|
||||
|
||||
admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:)
|
||||
out = []
|
||||
academic_years.each do |academic_year|
|
||||
admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:)
|
||||
|
||||
if admin_data_value.present?
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value.save
|
||||
nil
|
||||
else
|
||||
AdminDataValue.new(
|
||||
likert_score: score,
|
||||
academic_year:,
|
||||
school:,
|
||||
admin_data_item:
|
||||
)
|
||||
if admin_data_value.present?
|
||||
admin_data_value.likert_score = score
|
||||
admin_data_value.save
|
||||
[]
|
||||
else
|
||||
out << AdminDataValue.new(
|
||||
likert_score: score,
|
||||
academic_year:,
|
||||
school:,
|
||||
admin_data_item:
|
||||
)
|
||||
end
|
||||
end
|
||||
out
|
||||
end
|
||||
|
||||
private_class_method :valid_likert_score
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ module Dese
|
|||
module Scraper
|
||||
DELAY = 20 # The dese site will block you if you hit it too many times in a short period of time
|
||||
|
||||
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
||||
Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
|
||||
:calculation)
|
||||
def reverse_score(likert_score:)
|
||||
return nil unless likert_score.present?
|
||||
|
|
@ -14,6 +14,9 @@ module Dese
|
|||
|
||||
def run
|
||||
academic_years = AcademicYear.all.order(range: :DESC)
|
||||
.map(&:range_without_season)
|
||||
.uniq
|
||||
.map { |range| AcademicYear.new(range:) }
|
||||
academic_years.each do |academic_year|
|
||||
prerequisites = yield academic_year
|
||||
|
||||
|
|
@ -21,7 +24,7 @@ module Dese
|
|||
selectors: prerequisites.selectors,
|
||||
submit_id: prerequisites.submit_id)
|
||||
unless document.nil?
|
||||
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
|
||||
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range_without_season, id: prerequisites.admin_data_item_id,
|
||||
calculation: prerequisites.calculation)
|
||||
end
|
||||
end
|
||||
|
|
@ -46,26 +49,26 @@ module Dese
|
|||
end
|
||||
|
||||
def write_headers(filepath:, headers:)
|
||||
CSV.open(filepath, 'w') do |csv|
|
||||
CSV.open(filepath, "w") do |csv|
|
||||
csv << headers
|
||||
end
|
||||
end
|
||||
|
||||
def write_csv(document:, filepath:, range:, id:, calculation:)
|
||||
table = document.css('tr')
|
||||
headers = document.css('.sorting')
|
||||
table = document.css("tr")
|
||||
headers = document.css(".sorting")
|
||||
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
|
||||
|
||||
CSV.open(filepath, 'a') do |csv|
|
||||
CSV.open(filepath, "a") do |csv|
|
||||
table.each do |row|
|
||||
items = row.css('td').map(&:text)
|
||||
items = row.css("td").map(&:text)
|
||||
dese_id = items[1].to_i
|
||||
next if dese_id.nil? || dese_id.zero?
|
||||
|
||||
raw_likert_score = calculation.call(header_hash, items)
|
||||
raw_likert_score ||= 'NA'
|
||||
raw_likert_score ||= "NA"
|
||||
likert_score = raw_likert_score
|
||||
if likert_score != 'NA'
|
||||
if likert_score != "NA"
|
||||
likert_score = 5 if likert_score > 5
|
||||
likert_score = 1 if likert_score < 1
|
||||
likert_score = likert_score.round(2)
|
||||
|
|
|
|||
|
|
@ -7,46 +7,23 @@ class EnrollmentLoader
|
|||
enrollments = []
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
row = EnrollmentRowValues.new(row:)
|
||||
next unless row.school.present? && row.academic_year.present?
|
||||
next unless row.school.present? && row.academic_years.size.positive?
|
||||
|
||||
enrollments << create_enrollment_entry(row:)
|
||||
end
|
||||
|
||||
# It's possible that instead of updating all columns on duplicate key, we could just update the student columns and leave total_teachers alone. Right now enrollment data loads before staffing data so it works correctly.
|
||||
Respondent.import enrollments, batch_size: 1000,
|
||||
on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.create_enrollment_entry(row:)
|
||||
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
|
||||
respondent.pk = row.pk
|
||||
respondent.k = row.k
|
||||
respondent.one = row.one
|
||||
respondent.two = row.two
|
||||
respondent.three = row.three
|
||||
respondent.four = row.four
|
||||
respondent.five = row.five
|
||||
respondent.six = row.six
|
||||
respondent.seven = row.seven
|
||||
respondent.eight = row.eight
|
||||
respondent.nine = row.nine
|
||||
respondent.ten = row.ten
|
||||
respondent.eleven = row.eleven
|
||||
respondent.twelve = row.twelve
|
||||
respondent.total_students = row.total_students
|
||||
respondent
|
||||
Respondent.import enrollments.flatten, batch_size: 1000,
|
||||
on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
|
||||
end
|
||||
|
||||
def self.clone_previous_year_data
|
||||
years = AcademicYear.order(:range).last(2)
|
||||
previous_year = years.first
|
||||
current_year = years.last
|
||||
respondents = []
|
||||
School.all.each do |school|
|
||||
Respondent.where(school:, academic_year: previous_year).each do |respondent|
|
||||
current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
|
||||
academic_years_without_data(school:).each do |academic_year|
|
||||
respondent = Respondent.where(school:, academic_year: last_academic_year_with_data(school:)).first
|
||||
next if respondent.nil?
|
||||
|
||||
current_respondent = Respondent.find_or_initialize_by(school:, academic_year:)
|
||||
current_respondent.pk = respondent.pk
|
||||
current_respondent.k = respondent.k
|
||||
current_respondent.one = respondent.one
|
||||
|
|
@ -65,10 +42,54 @@ class EnrollmentLoader
|
|||
respondents << current_respondent
|
||||
end
|
||||
end
|
||||
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
|
||||
|
||||
Respondent.import respondents,
|
||||
batch_size: 1000, on_duplicate_key_ignore: true
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def self.create_enrollment_entry(row:)
|
||||
row.academic_years.map do |academic_year|
|
||||
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year:)
|
||||
respondent.pk = row.pk
|
||||
respondent.k = row.k
|
||||
respondent.one = row.one
|
||||
respondent.two = row.two
|
||||
respondent.three = row.three
|
||||
respondent.four = row.four
|
||||
respondent.five = row.five
|
||||
respondent.six = row.six
|
||||
respondent.seven = row.seven
|
||||
respondent.eight = row.eight
|
||||
respondent.nine = row.nine
|
||||
respondent.ten = row.ten
|
||||
respondent.eleven = row.eleven
|
||||
respondent.twelve = row.twelve
|
||||
respondent.total_students = row.total_students
|
||||
respondent
|
||||
end
|
||||
end
|
||||
|
||||
def self.last_academic_year_with_data(school:)
|
||||
AcademicYear.all.order(range: :DESC).find do |academic_year|
|
||||
Respondent.where(school:, academic_year:).any? do |respondent|
|
||||
respondent.total_students.positive?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def self.academic_years_without_data(school:)
|
||||
AcademicYear.all.order(range: :DESC).reject do |academic_year|
|
||||
Respondent.where(school:, academic_year:).any? do |respondent|
|
||||
respondent.total_students.positive?
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private_class_method :create_enrollment_entry
|
||||
private_class_method :last_academic_year_with_data
|
||||
private_class_method :academic_years_without_data
|
||||
end
|
||||
|
||||
class EnrollmentRowValues
|
||||
|
|
@ -85,10 +106,10 @@ class EnrollmentRowValues
|
|||
end
|
||||
end
|
||||
|
||||
def academic_year
|
||||
@academic_year ||= begin
|
||||
def academic_years
|
||||
@academic_years ||= begin
|
||||
year = row["Academic Year"]
|
||||
AcademicYear.find_by_range(year)
|
||||
AcademicYear.of_year(year)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -7,24 +7,26 @@ class StaffingLoader
|
|||
respondents = []
|
||||
CSV.parse(File.read(filepath), headers: true) do |row|
|
||||
row = StaffingRowValues.new(row:)
|
||||
next unless row.school.present? && row.academic_year.present?
|
||||
next unless row.school.present? && row.academic_years.size.positive?
|
||||
|
||||
respondents << create_staffing_entry(row:)
|
||||
respondents.concat(create_staffing_entry(row:))
|
||||
end
|
||||
|
||||
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
|
||||
end
|
||||
|
||||
# Clones staffing and enrollment data from previous year
|
||||
def self.clone_previous_year_data
|
||||
years = AcademicYear.order(:range).last(2)
|
||||
previous_year = years.first
|
||||
current_year = years.last
|
||||
respondents = []
|
||||
School.all.each do |school|
|
||||
Respondent.where(school:, academic_year: previous_year).each do |respondent|
|
||||
current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
|
||||
current_respondent.total_teachers = respondent.total_teachers
|
||||
academic_years_without_data(school:).each do |academic_year|
|
||||
year_with_data = last_academic_year_with_data(school:)
|
||||
respondent = Respondent.where(school:, academic_year: year_with_data).first
|
||||
next if respondent.nil?
|
||||
|
||||
current_respondent = Respondent.find_or_initialize_by(school:, academic_year:)
|
||||
if current_respondent.total_teachers.nil? || current_respondent.total_teachers.zero?
|
||||
current_respondent.total_teachers = respondent.total_teachers
|
||||
end
|
||||
respondents << current_respondent
|
||||
end
|
||||
end
|
||||
|
|
@ -34,11 +36,29 @@ class StaffingLoader
|
|||
private
|
||||
|
||||
def self.create_staffing_entry(row:)
|
||||
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
|
||||
respondent.total_teachers = row.fte_count
|
||||
respondent
|
||||
row.academic_years.map do |academic_year|
|
||||
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year:)
|
||||
respondent.total_teachers = row.fte_count
|
||||
respondent
|
||||
end
|
||||
end
|
||||
|
||||
def self.last_academic_year_with_data(school:)
|
||||
AcademicYear.all.order(range: :DESC).find do |academic_year|
|
||||
respondents = Respondent.find_by(school:, academic_year:)
|
||||
respondents&.total_teachers&.positive?
|
||||
end
|
||||
end
|
||||
|
||||
def self.academic_years_without_data(school:)
|
||||
AcademicYear.all.order(range: :DESC).select do |academic_year|
|
||||
respondents = Respondent.find_by(school:, academic_year:)
|
||||
respondents.nil? || respondents.total_teachers.nil? || respondents.total_teachers.zero?
|
||||
end
|
||||
end
|
||||
|
||||
private_class_method :last_academic_year_with_data
|
||||
private_class_method :academic_years_without_data
|
||||
private_class_method :create_staffing_entry
|
||||
end
|
||||
|
||||
|
|
@ -56,10 +76,10 @@ class StaffingRowValues
|
|||
end
|
||||
end
|
||||
|
||||
def academic_year
|
||||
def academic_years
|
||||
@academic_year ||= begin
|
||||
year = row["Academic Year"]
|
||||
AcademicYear.find_by_range(year)
|
||||
AcademicYear.of_year(year)
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,14 @@
|
|||
class SurveyItemValues
|
||||
attr_reader :row, :headers, :survey_items, :schools
|
||||
attr_reader :row, :headers, :survey_items, :schools, :academic_years
|
||||
|
||||
def initialize(row:, headers:, survey_items:, schools:)
|
||||
def initialize(row:, headers:, survey_items:, schools:, academic_years: AcademicYear.all)
|
||||
@row = row
|
||||
# Remove any newlines in headers
|
||||
headers = headers.map { |item| item.delete("\n") if item.present? }
|
||||
@headers = include_all_headers(headers:)
|
||||
@survey_items = survey_items
|
||||
@schools = schools
|
||||
@academic_years = academic_years
|
||||
|
||||
copy_likert_scores_from_variant_survey_items
|
||||
row["Income"] = income
|
||||
|
|
@ -59,7 +60,10 @@ class SurveyItemValues
|
|||
end
|
||||
|
||||
def academic_year
|
||||
@academic_year ||= AcademicYear.find_by_date recorded_date
|
||||
@academic_year ||= begin
|
||||
range = AcademicYear.range_from_date(recorded_date, academic_years.map(&:range))
|
||||
academic_years.find { |item| item.range == range }
|
||||
end
|
||||
end
|
||||
|
||||
def survey_item_response(survey_item:)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,8 @@ class SurveyResponsesDataLoader
|
|||
|
||||
file.lazy.each_slice(500) do |lines|
|
||||
survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
|
||||
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
|
||||
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:,
|
||||
academic_years:))
|
||||
end
|
||||
|
||||
SurveyItemResponse.import(
|
||||
|
|
@ -33,8 +34,8 @@ class SurveyResponsesDataLoader
|
|||
next unless line.present?
|
||||
|
||||
CSV.parse(line, headers:).map do |row|
|
||||
survey_item_responses <<
|
||||
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
|
||||
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array,
|
||||
survey_items: all_survey_items, schools:, academic_years:))
|
||||
end
|
||||
|
||||
row_count += 1
|
||||
|
|
@ -51,7 +52,7 @@ class SurveyResponsesDataLoader
|
|||
private
|
||||
|
||||
def schools
|
||||
@schools = School.school_by_dese_id
|
||||
@schools = School.by_dese_id
|
||||
end
|
||||
|
||||
def genders
|
||||
|
|
@ -74,6 +75,10 @@ class SurveyResponsesDataLoader
|
|||
@speds ||= Sped.by_designation
|
||||
end
|
||||
|
||||
def academic_years
|
||||
@academic_years ||= AcademicYear.all
|
||||
end
|
||||
|
||||
def process_row(row:)
|
||||
return unless row.dese_id?
|
||||
return unless row.school.present?
|
||||
|
|
@ -84,10 +89,7 @@ class SurveyResponsesDataLoader
|
|||
def process_survey_items(row:)
|
||||
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
|
||||
student.races.delete_all
|
||||
tmp_races = row.races.map do |race|
|
||||
races[race]
|
||||
end
|
||||
|
||||
tmp_races = row.races.map { |race| races[race] }
|
||||
student.races += tmp_races
|
||||
|
||||
row
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue