Split academic year into seasons if the academic year's range is

initialized with a season, i.e. "2024-25 Fall".  Update scapers for
admin data, enrollment and staffing to use the new range standard
correctly.   Update the loaders for admin data, enrollment and staffing
so that it populates all seasons in a given year.  So admin data for
2024-25 gets loaded into "2024-25 Fall" and "2024-25 Spring".  Add tests
for the new range format.  Set the default cutoff for the start of Spring season will be the last Sunday in February
This commit is contained in:
Nelson Jovel 2024-04-18 11:35:56 -07:00
parent 996bb01d0b
commit 33da0859b9
43 changed files with 62404 additions and 28698 deletions

View file

@ -48,7 +48,7 @@ class Cleaner
output << school_name if schools.length == 1
output << survey_type.to_s
output << "Part-" + part unless part.nil?
output << range
output << range.parameterize
output << "csv"
output.join(".")
end
@ -77,7 +77,7 @@ class Cleaner
file.lazy.each_slice(1000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
values = SurveyItemValues.new(row:, headers:,
survey_items: all_survey_items, schools:)
survey_items: all_survey_items, schools:, academic_years:)
next unless values.valid_school?
data << values
@ -89,6 +89,10 @@ class Cleaner
private
def academic_years
@academic_years ||= AcademicYear.all
end
def include_all_headers(headers:)
alternates = headers.filter(&:present?)
.filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
@ -120,7 +124,7 @@ class Cleaner
end
def schools
@schools ||= School.school_by_dese_id
@schools ||= School.by_dese_id
end
def genders

View file

@ -1,15 +1,16 @@
module Dese
class Loader
@memo = Hash.new
@memo = {}
def self.load_data(filepath:)
admin_data_values = []
@memo = Hash.new
schools = School.school_by_dese_id
@memo = {}
schools = School.by_dese_id
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
next unless valid_likert_score(likert_score: score)
admin_data_values << create_admin_data_value(row:, score:, schools:)
values = create_admin_data_value(row:, score:, schools:)
admin_data_values.concat(values) if values
end
AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
@ -38,40 +39,47 @@ module Dese
row["Admin Data Item"] || row["Item ID"] || row["Item Id"] || row["Item ID"]
end
# these three methods do the memoization
# these two methods do the memoization
def self.find_admin_data_item(admin_data_item_id:)
return @memo["admin" + admin_data_item_id] if @memo.key?("admin" + admin_data_item_id)
@memo["admin" + admin_data_item_id] ||= AdminDataItem.find_by_admin_data_item_id(admin_data_item_id)
end
def self.find_ay(ay:)
def self.find_ays(ay:)
return @memo["year" + ay] if @memo.key?("year" + ay)
@memo["year" + ay] ||= AcademicYear.find_by_range(ay)
@memo["year" + ay] ||= AcademicYear.of_year(ay)
end
def self.create_admin_data_value(row:, score:, schools:)
school = schools[dese_id(row:).to_i]
admin_data_item_id = admin_data_item(row:)
admin_data_item = find_admin_data_item(admin_data_item_id:)
academic_year = find_ay(ay: ay(row:))
academic_years = find_ays(ay: ay(row:))
return if school.nil?
return if admin_data_item_id.nil? || admin_data_item_id.blank?
return unless academic_years.size.positive?
admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:)
out = []
academic_years.each do |academic_year|
admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:)
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save
nil
else
AdminDataValue.new(
likert_score: score,
academic_year:,
school:,
admin_data_item:
)
if admin_data_value.present?
admin_data_value.likert_score = score
admin_data_value.save
[]
else
out << AdminDataValue.new(
likert_score: score,
academic_year:,
school:,
admin_data_item:
)
end
end
out
end
private_class_method :valid_likert_score

View file

@ -2,7 +2,7 @@ module Dese
module Scraper
DELAY = 20 # The dese site will block you if you hit it too many times in a short period of time
Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
:calculation)
def reverse_score(likert_score:)
return nil unless likert_score.present?
@ -14,6 +14,9 @@ module Dese
def run
academic_years = AcademicYear.all.order(range: :DESC)
.map(&:range_without_season)
.uniq
.map { |range| AcademicYear.new(range:) }
academic_years.each do |academic_year|
prerequisites = yield academic_year
@ -21,7 +24,7 @@ module Dese
selectors: prerequisites.selectors,
submit_id: prerequisites.submit_id)
unless document.nil?
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range_without_season, id: prerequisites.admin_data_item_id,
calculation: prerequisites.calculation)
end
end
@ -46,26 +49,26 @@ module Dese
end
def write_headers(filepath:, headers:)
CSV.open(filepath, 'w') do |csv|
CSV.open(filepath, "w") do |csv|
csv << headers
end
end
def write_csv(document:, filepath:, range:, id:, calculation:)
table = document.css('tr')
headers = document.css('.sorting')
table = document.css("tr")
headers = document.css(".sorting")
header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h
CSV.open(filepath, 'a') do |csv|
CSV.open(filepath, "a") do |csv|
table.each do |row|
items = row.css('td').map(&:text)
items = row.css("td").map(&:text)
dese_id = items[1].to_i
next if dese_id.nil? || dese_id.zero?
raw_likert_score = calculation.call(header_hash, items)
raw_likert_score ||= 'NA'
raw_likert_score ||= "NA"
likert_score = raw_likert_score
if likert_score != 'NA'
if likert_score != "NA"
likert_score = 5 if likert_score > 5
likert_score = 1 if likert_score < 1
likert_score = likert_score.round(2)

View file

@ -7,46 +7,23 @@ class EnrollmentLoader
enrollments = []
CSV.parse(File.read(filepath), headers: true) do |row|
row = EnrollmentRowValues.new(row:)
next unless row.school.present? && row.academic_year.present?
next unless row.school.present? && row.academic_years.size.positive?
enrollments << create_enrollment_entry(row:)
end
# It's possible that instead of updating all columns on duplicate key, we could just update the student columns and leave total_teachers alone. Right now enrollment data loads before staffing data so it works correctly.
Respondent.import enrollments, batch_size: 1000,
on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
end
private
def self.create_enrollment_entry(row:)
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
respondent.pk = row.pk
respondent.k = row.k
respondent.one = row.one
respondent.two = row.two
respondent.three = row.three
respondent.four = row.four
respondent.five = row.five
respondent.six = row.six
respondent.seven = row.seven
respondent.eight = row.eight
respondent.nine = row.nine
respondent.ten = row.ten
respondent.eleven = row.eleven
respondent.twelve = row.twelve
respondent.total_students = row.total_students
respondent
Respondent.import enrollments.flatten, batch_size: 1000,
on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
end
def self.clone_previous_year_data
years = AcademicYear.order(:range).last(2)
previous_year = years.first
current_year = years.last
respondents = []
School.all.each do |school|
Respondent.where(school:, academic_year: previous_year).each do |respondent|
current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
academic_years_without_data(school:).each do |academic_year|
respondent = Respondent.where(school:, academic_year: last_academic_year_with_data(school:)).first
next if respondent.nil?
current_respondent = Respondent.find_or_initialize_by(school:, academic_year:)
current_respondent.pk = respondent.pk
current_respondent.k = respondent.k
current_respondent.one = respondent.one
@ -65,10 +42,54 @@ class EnrollmentLoader
respondents << current_respondent
end
end
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
Respondent.import respondents,
batch_size: 1000, on_duplicate_key_ignore: true
end
private
def self.create_enrollment_entry(row:)
row.academic_years.map do |academic_year|
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year:)
respondent.pk = row.pk
respondent.k = row.k
respondent.one = row.one
respondent.two = row.two
respondent.three = row.three
respondent.four = row.four
respondent.five = row.five
respondent.six = row.six
respondent.seven = row.seven
respondent.eight = row.eight
respondent.nine = row.nine
respondent.ten = row.ten
respondent.eleven = row.eleven
respondent.twelve = row.twelve
respondent.total_students = row.total_students
respondent
end
end
def self.last_academic_year_with_data(school:)
AcademicYear.all.order(range: :DESC).find do |academic_year|
Respondent.where(school:, academic_year:).any? do |respondent|
respondent.total_students.positive?
end
end
end
def self.academic_years_without_data(school:)
AcademicYear.all.order(range: :DESC).reject do |academic_year|
Respondent.where(school:, academic_year:).any? do |respondent|
respondent.total_students.positive?
end
end
end
private_class_method :create_enrollment_entry
private_class_method :last_academic_year_with_data
private_class_method :academic_years_without_data
end
class EnrollmentRowValues
@ -85,10 +106,10 @@ class EnrollmentRowValues
end
end
def academic_year
@academic_year ||= begin
def academic_years
@academic_years ||= begin
year = row["Academic Year"]
AcademicYear.find_by_range(year)
AcademicYear.of_year(year)
end
end

View file

@ -7,24 +7,26 @@ class StaffingLoader
respondents = []
CSV.parse(File.read(filepath), headers: true) do |row|
row = StaffingRowValues.new(row:)
next unless row.school.present? && row.academic_year.present?
next unless row.school.present? && row.academic_years.size.positive?
respondents << create_staffing_entry(row:)
respondents.concat(create_staffing_entry(row:))
end
Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
end
# Clones staffing and enrollment data from previous year
def self.clone_previous_year_data
years = AcademicYear.order(:range).last(2)
previous_year = years.first
current_year = years.last
respondents = []
School.all.each do |school|
Respondent.where(school:, academic_year: previous_year).each do |respondent|
current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
current_respondent.total_teachers = respondent.total_teachers
academic_years_without_data(school:).each do |academic_year|
year_with_data = last_academic_year_with_data(school:)
respondent = Respondent.where(school:, academic_year: year_with_data).first
next if respondent.nil?
current_respondent = Respondent.find_or_initialize_by(school:, academic_year:)
if current_respondent.total_teachers.nil? || current_respondent.total_teachers.zero?
current_respondent.total_teachers = respondent.total_teachers
end
respondents << current_respondent
end
end
@ -34,11 +36,29 @@ class StaffingLoader
private
def self.create_staffing_entry(row:)
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
respondent.total_teachers = row.fte_count
respondent
row.academic_years.map do |academic_year|
respondent = Respondent.find_or_initialize_by(school: row.school, academic_year:)
respondent.total_teachers = row.fte_count
respondent
end
end
def self.last_academic_year_with_data(school:)
AcademicYear.all.order(range: :DESC).find do |academic_year|
respondents = Respondent.find_by(school:, academic_year:)
respondents&.total_teachers&.positive?
end
end
def self.academic_years_without_data(school:)
AcademicYear.all.order(range: :DESC).select do |academic_year|
respondents = Respondent.find_by(school:, academic_year:)
respondents.nil? || respondents.total_teachers.nil? || respondents.total_teachers.zero?
end
end
private_class_method :last_academic_year_with_data
private_class_method :academic_years_without_data
private_class_method :create_staffing_entry
end
@ -56,10 +76,10 @@ class StaffingRowValues
end
end
def academic_year
def academic_years
@academic_year ||= begin
year = row["Academic Year"]
AcademicYear.find_by_range(year)
AcademicYear.of_year(year)
end
end

View file

@ -1,13 +1,14 @@
class SurveyItemValues
attr_reader :row, :headers, :survey_items, :schools
attr_reader :row, :headers, :survey_items, :schools, :academic_years
def initialize(row:, headers:, survey_items:, schools:)
def initialize(row:, headers:, survey_items:, schools:, academic_years: AcademicYear.all)
@row = row
# Remove any newlines in headers
headers = headers.map { |item| item.delete("\n") if item.present? }
@headers = include_all_headers(headers:)
@survey_items = survey_items
@schools = schools
@academic_years = academic_years
copy_likert_scores_from_variant_survey_items
row["Income"] = income
@ -59,7 +60,10 @@ class SurveyItemValues
end
def academic_year
@academic_year ||= AcademicYear.find_by_date recorded_date
@academic_year ||= begin
range = AcademicYear.range_from_date(recorded_date, academic_years.map(&:range))
academic_years.find { |item| item.range == range }
end
end
def survey_item_response(survey_item:)

View file

@ -9,7 +9,8 @@ class SurveyResponsesDataLoader
file.lazy.each_slice(500) do |lines|
survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:,
academic_years:))
end
SurveyItemResponse.import(
@ -33,8 +34,8 @@ class SurveyResponsesDataLoader
next unless line.present?
CSV.parse(line, headers:).map do |row|
survey_item_responses <<
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array,
survey_items: all_survey_items, schools:, academic_years:))
end
row_count += 1
@ -51,7 +52,7 @@ class SurveyResponsesDataLoader
private
def schools
@schools = School.school_by_dese_id
@schools = School.by_dese_id
end
def genders
@ -74,6 +75,10 @@ class SurveyResponsesDataLoader
@speds ||= Sped.by_designation
end
def academic_years
@academic_years ||= AcademicYear.all
end
def process_row(row:)
return unless row.dese_id?
return unless row.school.present?
@ -84,10 +89,7 @@ class SurveyResponsesDataLoader
def process_survey_items(row:)
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
student.races.delete_all
tmp_races = row.races.map do |race|
races[race]
end
tmp_races = row.races.map { |race| races[race] }
student.races += tmp_races
row