Split academic year into seasons if the academic year's range is

initialized with a season, i.e. "2024-25 Fall". Update scapers for admin data, enrollment and staffing to use the new range standard correctly. Update the loaders for admin data, enrollment and staffing so that it populates all seasons in a given year. So admin data for 2024-25 gets loaded into "2024-25 Fall" and "2024-25 Spring". Add tests for the new range format. Set the default cutoff for the start of Spring season will be the last Sunday in February
2026-03-07 21:48:16 -08:00 · 2024-04-18 11:35:56 -07:00 · 2024-04-18 11:35:56 -07:00 · 33da0859b9
commit 33da0859b9
parent 996bb01d0b
43 changed files with 62404 additions and 28698 deletions
--- a/app/services/cleaner.rb
+++ b/app/services/cleaner.rb
@ -48,7 +48,7 @@ class Cleaner
    output << school_name if schools.length == 1
    output << survey_type.to_s
    output << "Part-" + part unless part.nil?
-    output << range
+    output << range.parameterize
    output << "csv"
    output.join(".")
  end
@ -77,7 +77,7 @@ class Cleaner
    file.lazy.each_slice(1000) do |lines|
      CSV.parse(lines.join, headers:).map do |row|
        values = SurveyItemValues.new(row:, headers:,
-                                      survey_items: all_survey_items, schools:)
+                                      survey_items: all_survey_items, schools:, academic_years:)
        next unless values.valid_school?

        data << values
@ -89,6 +89,10 @@ class Cleaner

  private

+  def academic_years
+    @academic_years ||= AcademicYear.all
+  end
+
  def include_all_headers(headers:)
    alternates = headers.filter(&:present?)
                        .filter { |header| header.match?(/^[st]-\w*-\w*-1$/i) }
@ -120,7 +124,7 @@ class Cleaner
  end

  def schools
-    @schools ||= School.school_by_dese_id
+    @schools ||= School.by_dese_id
  end

  def genders
--- a/app/services/dese/loader.rb
+++ b/app/services/dese/loader.rb
@ -1,15 +1,16 @@
 module Dese
  class Loader
-    @memo = Hash.new
+    @memo = {}
    def self.load_data(filepath:)
      admin_data_values = []
-      @memo = Hash.new
-      schools = School.school_by_dese_id
+      @memo = {}
+      schools = School.by_dese_id
      CSV.parse(File.read(filepath), headers: true) do |row|
        score = likert_score(row:)
        next unless valid_likert_score(likert_score: score)

-        admin_data_values << create_admin_data_value(row:, score:, schools:)
+        values = create_admin_data_value(row:, score:, schools:)
+        admin_data_values.concat(values) if values
      end

      AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
@ -38,40 +39,47 @@ module Dese
      row["Admin Data Item"] || row["Item ID"] || row["Item Id"] || row["Item  ID"]
    end

-    # these three methods do the memoization
+    # these two methods do the memoization
    def self.find_admin_data_item(admin_data_item_id:)
      return @memo["admin" + admin_data_item_id] if @memo.key?("admin" + admin_data_item_id)
+
      @memo["admin" + admin_data_item_id] ||= AdminDataItem.find_by_admin_data_item_id(admin_data_item_id)
    end

-    def self.find_ay(ay:)
+    def self.find_ays(ay:)
      return @memo["year" + ay] if @memo.key?("year" + ay)
-      @memo["year" + ay] ||= AcademicYear.find_by_range(ay)
+
+      @memo["year" + ay] ||= AcademicYear.of_year(ay)
    end

    def self.create_admin_data_value(row:, score:, schools:)
      school = schools[dese_id(row:).to_i]
      admin_data_item_id = admin_data_item(row:)
      admin_data_item = find_admin_data_item(admin_data_item_id:)
-      academic_year = find_ay(ay: ay(row:))
+      academic_years = find_ays(ay: ay(row:))

      return if school.nil?
      return if admin_data_item_id.nil? || admin_data_item_id.blank?
+      return unless academic_years.size.positive?

-      admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:)
+      out = []
+      academic_years.each do |academic_year|
+        admin_data_value = AdminDataValue.find_by(academic_year:, school:, admin_data_item:)

-      if admin_data_value.present?
-        admin_data_value.likert_score = score
-        admin_data_value.save
-        nil
-      else
-        AdminDataValue.new(
-          likert_score: score,
-          academic_year:,
-          school:,
-          admin_data_item:
-        )
+        if admin_data_value.present?
+          admin_data_value.likert_score = score
+          admin_data_value.save
+          []
+        else
+          out << AdminDataValue.new(
+            likert_score: score,
+            academic_year:,
+            school:,
+            admin_data_item:
+          )
+        end
      end
+      out
    end

    private_class_method :valid_likert_score
--- a/app/services/dese/scraper.rb
+++ b/app/services/dese/scraper.rb
@ -2,7 +2,7 @@ module Dese
  module Scraper
    DELAY = 20 # The dese site will block you if you hit it too many times in a short period of time

-    Prerequisites = Struct.new('Prerequisites', :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
+    Prerequisites = Struct.new("Prerequisites", :filepath, :url, :selectors, :submit_id, :admin_data_item_id,
                               :calculation)
    def reverse_score(likert_score:)
      return nil unless likert_score.present?
@ -14,6 +14,9 @@ module Dese

    def run
      academic_years = AcademicYear.all.order(range: :DESC)
+                                   .map(&:range_without_season)
+                                   .uniq
+                                   .map { |range| AcademicYear.new(range:) }
      academic_years.each do |academic_year|
        prerequisites = yield academic_year

@ -21,7 +24,7 @@ module Dese
                            selectors: prerequisites.selectors,
                            submit_id: prerequisites.submit_id)
        unless document.nil?
-          write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range, id: prerequisites.admin_data_item_id,
+          write_csv(document:, filepath: prerequisites.filepath, range: academic_year.range_without_season, id: prerequisites.admin_data_item_id,
                    calculation: prerequisites.calculation)
        end
      end
@ -46,26 +49,26 @@ module Dese
    end

    def write_headers(filepath:, headers:)
-      CSV.open(filepath, 'w') do |csv|
+      CSV.open(filepath, "w") do |csv|
        csv << headers
      end
    end

    def write_csv(document:, filepath:, range:, id:, calculation:)
-      table = document.css('tr')
-      headers = document.css('.sorting')
+      table = document.css("tr")
+      headers = document.css(".sorting")
      header_hash = headers.each_with_index.map { |header, index| [header.text, index] }.to_h

-      CSV.open(filepath, 'a') do |csv|
+      CSV.open(filepath, "a") do |csv|
        table.each do |row|
-          items = row.css('td').map(&:text)
+          items = row.css("td").map(&:text)
          dese_id = items[1].to_i
          next if dese_id.nil? || dese_id.zero?

          raw_likert_score = calculation.call(header_hash, items)
-          raw_likert_score ||= 'NA'
+          raw_likert_score ||= "NA"
          likert_score = raw_likert_score
-          if likert_score != 'NA'
+          if likert_score != "NA"
            likert_score = 5 if likert_score > 5
            likert_score = 1 if likert_score < 1
            likert_score = likert_score.round(2)
--- a/app/services/enrollment_loader.rb
+++ b/app/services/enrollment_loader.rb
@ -7,46 +7,23 @@ class EnrollmentLoader
    enrollments = []
    CSV.parse(File.read(filepath), headers: true) do |row|
      row = EnrollmentRowValues.new(row:)
-      next unless row.school.present? && row.academic_year.present?
+      next unless row.school.present? && row.academic_years.size.positive?

      enrollments << create_enrollment_entry(row:)
    end

-    # It's possible that instead of updating all columns on duplicate key, we could just update the student columns and leave total_teachers alone. Right now enrollment data loads before staffing data so it works correctly.
-    Respondent.import enrollments, batch_size: 1000,
-                                   on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
-  end
-
-  private
-
-  def self.create_enrollment_entry(row:)
-    respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
-    respondent.pk = row.pk
-    respondent.k = row.k
-    respondent.one = row.one
-    respondent.two = row.two
-    respondent.three = row.three
-    respondent.four = row.four
-    respondent.five = row.five
-    respondent.six = row.six
-    respondent.seven = row.seven
-    respondent.eight = row.eight
-    respondent.nine = row.nine
-    respondent.ten = row.ten
-    respondent.eleven = row.eleven
-    respondent.twelve = row.twelve
-    respondent.total_students = row.total_students
-    respondent
+    Respondent.import enrollments.flatten, batch_size: 1000,
+                                           on_duplicate_key_update: %i[pk k one two three four five six seven eight nine ten eleven twelve total_students]
  end

  def self.clone_previous_year_data
-    years = AcademicYear.order(:range).last(2)
-    previous_year = years.first
-    current_year = years.last
    respondents = []
    School.all.each do |school|
-      Respondent.where(school:, academic_year: previous_year).each do |respondent|
-        current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
+      academic_years_without_data(school:).each do |academic_year|
+        respondent = Respondent.where(school:, academic_year: last_academic_year_with_data(school:)).first
+        next if respondent.nil?
+
+        current_respondent = Respondent.find_or_initialize_by(school:, academic_year:)
        current_respondent.pk = respondent.pk
        current_respondent.k = respondent.k
        current_respondent.one = respondent.one
@ -65,10 +42,54 @@ class EnrollmentLoader
        respondents << current_respondent
      end
    end
-    Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
+
+    Respondent.import respondents,
+                      batch_size: 1000, on_duplicate_key_ignore: true
+  end
+
+  private
+
+  def self.create_enrollment_entry(row:)
+    row.academic_years.map do |academic_year|
+      respondent = Respondent.find_or_initialize_by(school: row.school, academic_year:)
+      respondent.pk = row.pk
+      respondent.k = row.k
+      respondent.one = row.one
+      respondent.two = row.two
+      respondent.three = row.three
+      respondent.four = row.four
+      respondent.five = row.five
+      respondent.six = row.six
+      respondent.seven = row.seven
+      respondent.eight = row.eight
+      respondent.nine = row.nine
+      respondent.ten = row.ten
+      respondent.eleven = row.eleven
+      respondent.twelve = row.twelve
+      respondent.total_students = row.total_students
+      respondent
+    end
+  end
+
+  def self.last_academic_year_with_data(school:)
+    AcademicYear.all.order(range: :DESC).find do |academic_year|
+      Respondent.where(school:, academic_year:).any? do |respondent|
+        respondent.total_students.positive?
+      end
+    end
+  end
+
+  def self.academic_years_without_data(school:)
+    AcademicYear.all.order(range: :DESC).reject do |academic_year|
+      Respondent.where(school:, academic_year:).any? do |respondent|
+        respondent.total_students.positive?
+      end
+    end
  end

  private_class_method :create_enrollment_entry
+  private_class_method :last_academic_year_with_data
+  private_class_method :academic_years_without_data
 end

 class EnrollmentRowValues
@ -85,10 +106,10 @@ class EnrollmentRowValues
    end
  end

-  def academic_year
-    @academic_year ||= begin
+  def academic_years
+    @academic_years ||= begin
      year = row["Academic Year"]
-      AcademicYear.find_by_range(year)
+      AcademicYear.of_year(year)
    end
  end

--- a/app/services/staffing_loader.rb
+++ b/app/services/staffing_loader.rb
@ -7,24 +7,26 @@ class StaffingLoader
    respondents = []
    CSV.parse(File.read(filepath), headers: true) do |row|
      row = StaffingRowValues.new(row:)
-      next unless row.school.present? && row.academic_year.present?
+      next unless row.school.present? && row.academic_years.size.positive?

-      respondents << create_staffing_entry(row:)
+      respondents.concat(create_staffing_entry(row:))
    end

    Respondent.import respondents, batch_size: 1000, on_duplicate_key_update: [:total_teachers]
  end

-  # Clones staffing and enrollment data from previous year
  def self.clone_previous_year_data
-    years = AcademicYear.order(:range).last(2)
-    previous_year = years.first
-    current_year = years.last
    respondents = []
    School.all.each do |school|
-      Respondent.where(school:, academic_year: previous_year).each do |respondent|
-        current_respondent = Respondent.find_or_initialize_by(school:, academic_year: current_year)
-        current_respondent.total_teachers = respondent.total_teachers
+      academic_years_without_data(school:).each do |academic_year|
+        year_with_data = last_academic_year_with_data(school:)
+        respondent = Respondent.where(school:, academic_year: year_with_data).first
+        next if respondent.nil?
+
+        current_respondent = Respondent.find_or_initialize_by(school:, academic_year:)
+        if current_respondent.total_teachers.nil? || current_respondent.total_teachers.zero?
+          current_respondent.total_teachers = respondent.total_teachers
+        end
        respondents << current_respondent
      end
    end
@ -34,11 +36,29 @@ class StaffingLoader
  private

  def self.create_staffing_entry(row:)
-    respondent = Respondent.find_or_initialize_by(school: row.school, academic_year: row.academic_year)
-    respondent.total_teachers = row.fte_count
-    respondent
+    row.academic_years.map do |academic_year|
+      respondent = Respondent.find_or_initialize_by(school: row.school, academic_year:)
+      respondent.total_teachers = row.fte_count
+      respondent
+    end
  end

+  def self.last_academic_year_with_data(school:)
+    AcademicYear.all.order(range: :DESC).find do |academic_year|
+      respondents = Respondent.find_by(school:, academic_year:)
+      respondents&.total_teachers&.positive?
+    end
+  end
+
+  def self.academic_years_without_data(school:)
+    AcademicYear.all.order(range: :DESC).select do |academic_year|
+      respondents = Respondent.find_by(school:, academic_year:)
+      respondents.nil? || respondents.total_teachers.nil? || respondents.total_teachers.zero?
+    end
+  end
+
+  private_class_method :last_academic_year_with_data
+  private_class_method :academic_years_without_data
  private_class_method :create_staffing_entry
 end

@ -56,10 +76,10 @@ class StaffingRowValues
    end
  end

-  def academic_year
+  def academic_years
    @academic_year ||= begin
      year = row["Academic Year"]
-      AcademicYear.find_by_range(year)
+      AcademicYear.of_year(year)
    end
  end

--- a/app/services/survey_item_values.rb
+++ b/app/services/survey_item_values.rb
@ -1,13 +1,14 @@
 class SurveyItemValues
-  attr_reader :row, :headers, :survey_items, :schools
+  attr_reader :row, :headers, :survey_items, :schools, :academic_years

-  def initialize(row:, headers:, survey_items:, schools:)
+  def initialize(row:, headers:, survey_items:, schools:, academic_years: AcademicYear.all)
    @row = row
    # Remove any newlines in headers
    headers = headers.map { |item| item.delete("\n") if item.present? }
    @headers = include_all_headers(headers:)
    @survey_items = survey_items
    @schools = schools
+    @academic_years = academic_years

    copy_likert_scores_from_variant_survey_items
    row["Income"] = income
@ -59,7 +60,10 @@ class SurveyItemValues
  end

  def academic_year
-    @academic_year ||= AcademicYear.find_by_date recorded_date
+    @academic_year ||= begin
+      range = AcademicYear.range_from_date(recorded_date, academic_years.map(&:range))
+      academic_years.find { |item| item.range == range }
+    end
  end

  def survey_item_response(survey_item:)
--- a/app/services/survey_responses_data_loader.rb
+++ b/app/services/survey_responses_data_loader.rb
@ -9,7 +9,8 @@ class SurveyResponsesDataLoader

      file.lazy.each_slice(500) do |lines|
        survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
-          process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
+          process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:,
+                                                academic_years:))
        end

        SurveyItemResponse.import(
@ -33,8 +34,8 @@ class SurveyResponsesDataLoader
      next unless line.present?

      CSV.parse(line, headers:).map do |row|
-        survey_item_responses <<
-          process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
+        survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array,
+                                                                       survey_items: all_survey_items, schools:, academic_years:))
      end

      row_count += 1
@ -51,7 +52,7 @@ class SurveyResponsesDataLoader
  private

  def schools
-    @schools = School.school_by_dese_id
+    @schools = School.by_dese_id
  end

  def genders
@ -74,6 +75,10 @@ class SurveyResponsesDataLoader
    @speds ||= Sped.by_designation
  end

+  def academic_years
+    @academic_years ||= AcademicYear.all
+  end
+
  def process_row(row:)
    return unless row.dese_id?
    return unless row.school.present?
@ -84,10 +89,7 @@ class SurveyResponsesDataLoader
  def process_survey_items(row:)
    student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
    student.races.delete_all
-    tmp_races = row.races.map do |race|
-      races[race]
-    end
-
+    tmp_races = row.races.map { |race| races[race] }
    student.races += tmp_races

    row