Faster admin data loader + rename School.school_hash

This commit is contained in:
Gabe Farrell 2024-04-22 14:46:37 -04:00
parent 0965841566
commit b3e6efdb2e
7 changed files with 128 additions and 77 deletions

View file

@ -8,7 +8,7 @@ class School < ApplicationRecord
validates :name, presence: true
scope :alphabetic, -> { order(name: :asc) }
scope :school_hash, -> { all.map { |school| [school.dese_id, school] }.to_h }
scope :school_by_dese_id, -> { all.map { |school| [school.dese_id, school] }.to_h }
include FriendlyId
friendly_id :name, use: [:slugged]
@ -16,7 +16,7 @@ class School < ApplicationRecord
def self.find_by_district_code_and_school_code(district_code, school_code)
School
.joins(:district)
.where(districts: { qualtrics_code: district_code })
.where(districts: {qualtrics_code: district_code})
.find_by_qualtrics_code(school_code)
end

View file

@ -120,7 +120,7 @@ class Cleaner
end
def schools
@schools ||= School.school_hash
@schools ||= School.school_by_dese_id
end
def genders

View file

@ -6,11 +6,12 @@ module Dese
def self.load_data(filepath:)
admin_data_values = []
@memo = Hash.new
schools = School.school_by_dese_id
CSV.parse(File.read(filepath), headers: true) do |row|
score = likert_score(row:)
next unless valid_likert_score(likert_score: score)
admin_data_values << create_admin_data_value(row:, score:)
admin_data_values << create_admin_data_value(row:, score:, schools:)
end
AdminDataValue.import(admin_data_values.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
@ -40,21 +41,18 @@ module Dese
end
# these three methods do the memoization
def self.find_school(dese_id:)
return @memo["school"+dese_id] if @memo.key? "school"+dese_id
@memo["school"+dese_id] ||= School.find_by_dese_id(dese_id.to_i)
end
def self.find_admin_data_item(admin_data_item_id:)
return @memo["admin"+admin_data_item_id] if @memo.key? "admin"+admin_data_item_id
@memo["admin"+admin_data_item_id] ||= AdminDataItem.find_by_admin_data_item_id(admin_data_item_id)
end
def self.find_ay(ay:)
return @memo["year"+ay] if @memo.key? "year"+ay
@memo["year"+ay] ||= AcademicYear.find_by_range(ay)
return @memo["admin" + admin_data_item_id] if @memo.key?("admin" + admin_data_item_id)
@memo["admin" + admin_data_item_id] ||= AdminDataItem.find_by_admin_data_item_id(admin_data_item_id)
end
def self.create_admin_data_value(row:, score:)
school = find_school(dese_id: dese_id(row:))
def self.find_ay(ay:)
return @memo["year" + ay] if @memo.key?("year" + ay)
@memo["year" + ay] ||= AcademicYear.find_by_range(ay)
end
def self.create_admin_data_value(row:, score:, schools:)
school = schools[dese_id(row:).to_i]
admin_data_item_id = admin_data_item(row:)
admin_data_item = find_admin_data_item(admin_data_item_id:)
academic_year = find_ay(ay: ay(row:))
@ -73,7 +71,7 @@ module Dese
likert_score: score,
academic_year:,
school:,
admin_data_item:,
admin_data_item:
)
end
end

View file

@ -12,8 +12,12 @@ class SurveyResponsesDataLoader
survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: BATCH_SIZE,
on_duplicate_key_update: :all
SurveyItemResponse.import(
survey_item_responses.compact.flatten,
batch_size: BATCH_SIZE,
on_duplicate_key_update: :all
)
end
end
end
@ -30,27 +34,33 @@ class SurveyResponsesDataLoader
next unless line.present?
CSV.parse(line, headers:).map do |row|
survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array,
survey_items: all_survey_items, schools:))
survey_item_responses <<
process_row(row: SurveyItemValues.new(row:, headers: headers_array, survey_items: all_survey_items, schools:))
end
row_count += 1
next unless row_count == BATCH_SIZE
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: BATCH_SIZE,
on_duplicate_key_update: :all
SurveyItemResponse.import(
survey_item_responses.compact.flatten,
batch_size: BATCH_SIZE,
on_duplicate_key_update: :all
)
survey_item_responses = []
row_count = 0
end
SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: BATCH_SIZE,
on_duplicate_key_update: :all
SurveyItemResponse.import(
survey_item_responses.compact.flatten,
batch_size: BATCH_SIZE,
on_duplicate_key_update: :all
)
end
private
def schools
@schools = School.school_hash
@schools = School.school_by_dese_id
end
def genders
@ -83,19 +93,27 @@ class SurveyResponsesDataLoader
def process_survey_items(row:)
student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid)
student.races.delete_all
tmp_races = row.races.map { |race| races[race] }
tmp_races = row.races.map do |race|
races[race]
end
student.races += tmp_races
row.survey_items.map do |survey_item|
likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
row
.survey_items
.map do |survey_item|
likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next
unless likert_score.valid_likert_score?
puts "Response ID: #{row.response_id}, Likert score: #{likert_score} rejected" unless likert_score == "NA"
next
unless likert_score.valid_likert_score?
puts("Response ID: #{row.response_id}, Likert score: #{likert_score} rejected") unless likert_score == "NA"
next
end
response = row.survey_item_response(survey_item:)
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
end
response = row.survey_item_response(survey_item:)
create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:)
end.compact
.compact
end
def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:)
@ -116,8 +134,20 @@ class SurveyResponsesDataLoader
survey_item_response.student = student
survey_item_response
else
SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:,
likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:)
SurveyItemResponse.new(
response_id: row.response_id,
academic_year: row.academic_year,
school: row.school,
survey_item:,
likert_score:,
grade:,
gender:,
recorded_date: row.recorded_date,
income:,
ell:,
sped:,
student:
)
end
end
@ -126,16 +156,18 @@ class SurveyResponsesDataLoader
end
def get_survey_item_ids_from_headers(headers:)
CSV.parse(headers).first
.filter(&:present?)
.filter { |header| header.start_with? "t-", "s-" }
CSV
.parse(headers)
.first
.filter(&:present?)
.filter { |header| header.start_with?("t-", "s-") }
end
end
module StringMonkeyPatches
def valid_likert_score?
to_i.between? 1, 5
to_i.between?(1, 5)
end
end
String.include StringMonkeyPatches
String.include(StringMonkeyPatches)