From 019b954ffab35377407ebb03ccff401f087eb1f6 Mon Sep 17 00:00:00 2001 From: rebuilt Date: Thu, 2 Nov 2023 09:45:30 -0700 Subject: [PATCH] feat: load student responses in the same pass as loading the survey responses chore: remove student loader since loading students is now done with the survey response loader --- app/services/student_loader.rb | 64 ------- app/services/survey_item_values.rb | 26 +-- app/services/survey_responses_data_loader.rb | 38 +++-- lib/tasks/data.rake | 32 ---- lib/tasks/one_off.rake | 24 +-- spec/services/student_loader_spec.rb | 160 ------------------ .../survey_responses_data_loader_spec.rb | 23 +++ 7 files changed, 64 insertions(+), 303 deletions(-) delete mode 100644 app/services/student_loader.rb delete mode 100644 spec/services/student_loader_spec.rb diff --git a/app/services/student_loader.rb b/app/services/student_loader.rb deleted file mode 100644 index 97163a78..00000000 --- a/app/services/student_loader.rb +++ /dev/null @@ -1,64 +0,0 @@ -# frozen_string_literal: true - -class StudentLoader - def self.load_data(filepath:, rules: []) - File.open(filepath) do |file| - headers = file.first - headers_array = headers.split(",") - - file.lazy.each_slice(1_000) do |lines| - CSV.parse(lines.join, headers:).map do |row| - row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:) - next if rules.any? do |rule| - rule.new(row:).skip_row? - end - - process_row(row:) - end - end - end - end - - def self.from_file(file:, rules: []) - headers = file.gets - headers_array = headers.split(",") - - until file.eof? - line = file.gets - next unless line.present? - - CSV.parse(line, headers:).map do |row| - row = SurveyItemValues.new(row:, headers: headers_array, genders: nil, survey_items: nil, schools:) - next if rules.any? do |rule| - rule.new(row:).skip_row? - end - - process_row(row:) - end - end - end - - def self.process_row(row:) - student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid) - student.races.delete_all - races = row.races - races.map do |race| - student.races << race - end - assign_student_to_responses(student:, response_id: row.response_id) - end - - def self.schools - @schools ||= School.all.map { |school| [school.dese_id, school] }.to_h - end - - def self.assign_student_to_responses(student:, response_id:) - responses = SurveyItemResponse.where(response_id:) - loadable_responses = responses.map do |response| - response.student = student - response - end - - SurveyItemResponse.import(loadable_responses.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all) - end -end diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb index 4969fe85..f7eb29b9 100644 --- a/app/services/survey_item_values.rb +++ b/app/services/survey_item_values.rb @@ -116,20 +116,24 @@ class SurveyItemValues end def gender - gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i) - gender_code ||= 99 - gender_code = gender_code.to_i - gender_code = 4 if gender_code == 3 - gender_code = 99 if gender_code.zero? - genders[gender_code] + @gender ||= begin + gender_code = value_from(pattern: /Gender|What is your gender?|What is your gender? - Selected Choice/i) + gender_code ||= 99 + gender_code = gender_code.to_i + gender_code = 4 if gender_code == 3 + gender_code = 99 if gender_code.zero? + genders[gender_code] + end end def races - race_codes = value_from(pattern: /RACE/i) - race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) - race_codes ||= value_from(pattern: /Race Secondary/i) || "" - race_codes = race_codes.split(",").map(&:to_i) || [] - process_races(codes: race_codes) + @races ||= begin + race_codes = value_from(pattern: /RACE/i) + race_codes ||= value_from(pattern: %r{What is your race/ethnicity?(Please select all that apply) - Selected Choice}i) + race_codes ||= value_from(pattern: /Race Secondary/i) || "" + race_codes = race_codes.split(",").map(&:to_i) || [] + process_races(codes: race_codes) + end end def lasid diff --git a/app/services/survey_responses_data_loader.rb b/app/services/survey_responses_data_loader.rb index f7abce63..0917d73c 100644 --- a/app/services/survey_responses_data_loader.rb +++ b/app/services/survey_responses_data_loader.rb @@ -12,7 +12,7 @@ class SurveyResponsesDataLoader process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:), rules:) end - SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500 + SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 500, on_duplicate_key_update: :all end end end @@ -23,7 +23,6 @@ class SurveyResponsesDataLoader all_survey_items = survey_items(headers:) survey_item_responses = [] - row_count = 0 until file.eof? line = file.gets next unless line.present? @@ -32,16 +31,9 @@ class SurveyResponsesDataLoader survey_item_responses << process_row(row: SurveyItemValues.new(row:, headers: headers_array, genders:, survey_items: all_survey_items, schools:), rules:) end - - row_count += 1 - next unless row_count == 1000 - - SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000 - survey_item_responses = [] - row_count = 0 end - SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000 + SurveyItemResponse.import survey_item_responses.compact.flatten, batch_size: 1000, on_duplicate_key_update: :all end private @@ -78,6 +70,13 @@ class SurveyResponsesDataLoader end def process_survey_items(row:) + student = Student.find_or_create_by(response_id: row.response_id, lasid: row.lasid) + student.races.delete_all + races = row.races + races.map do |race| + student.races << race + end + row.survey_items.map do |survey_item| likert_score = row.likert_score(survey_item_id: survey_item.survey_item_id) || next @@ -86,23 +85,30 @@ class SurveyResponsesDataLoader next end response = row.survey_item_response(survey_item:) - create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:) + create_or_update_response(survey_item_response: response, likert_score:, row:, survey_item:, student:) end.compact end - def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:) + def create_or_update_response(survey_item_response:, likert_score:, row:, survey_item:, student:) gender = row.gender grade = row.grade income = incomes[row.income.parameterize] ell = ells[row.ell] sped = speds[row.sped] + if survey_item_response.present? - survey_item_response.update!(likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, - sped:) - [] + survey_item_response.likert_score = likert_score + survey_item_response.grade = grade + survey_item_response.gender = gender + survey_item_response.recorded_date = row.recorded_date + survey_item_response.income = income + survey_item_response.ell = ell + survey_item_response.sped = sped + survey_item_response.student = student + survey_item_response else SurveyItemResponse.new(response_id: row.response_id, academic_year: row.academic_year, school: row.school, survey_item:, - likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:) + likert_score:, grade:, gender:, recorded_date: row.recorded_date, income:, ell:, sped:, student:) end end diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index dd233f62..6874628e 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -11,15 +11,6 @@ namespace :data do end puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses" - Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools]) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - - puts "Resetting race scores" - RaceScoreLoader.reset(fast_processing: false) - puts "=====================> Completed loading #{RaceScore.count} race scores" - Rails.cache.clear end @@ -33,11 +24,6 @@ namespace :data do end puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database" - Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: []) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - Rails.cache.clear end @@ -67,24 +53,6 @@ namespace :data do puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end - desc "load students" - task load_students: :environment do - SurveyItemResponse.update_all(student_id: nil) - StudentRace.delete_all - Student.delete_all - Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file| - puts "=====================> Loading student data from csv at path: #{file}" - StudentLoader.load_data filepath: file - end - puts "=====================> Completed loading #{Student.count} students" - - puts "Resetting race scores" - RaceScoreLoader.reset(fast_processing: false) - puts "=====================> Completed loading #{RaceScore.count} survey responses" - - Rails.cache.clear - end - desc "reset all cache counters" task reset_cache_counters: :environment do puts "=====================> Resetting Category counters" diff --git a/lib/tasks/one_off.rake b/lib/tasks/one_off.rake index 8441efc7..d02ab053 100644 --- a/lib/tasks/one_off.rake +++ b/lib/tasks/one_off.rake @@ -91,34 +91,18 @@ namespace :one_off do # should be somewhere near 295738 end - desc "delete 2023-24 survey responses" - task delete_survey_responses_2023_24: :environment do - response_count = SurveyItemResponse.all.count - SurveyItemResponse.where(academic_year: AcademicYear.find_by_range("2023-24")).delete_all - - puts "=====================> Deleted #{response_count - SurveyItemResponse.all.count} survey responses" - end - - desc "load survey responses for lowell schools 2022-23" - task load_survey_responses_for_lowell_2022_23: :environment do + desc "load survey responses" + task load_survey_responses: :environment do survey_item_response_count = SurveyItemResponse.count student_count = Student.count - path = "/data/survey_responses/2022-23/" + path = "/data/survey_responses/clean/" + schools = District.find_by_slug("maynard-public-schools").schools Sftp::Directory.open(path:) do |file| SurveyResponsesDataLoader.new.from_file(file:) end puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database" - Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: [Rule::SkipNonLowellSchools]) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - - puts "Resetting race scores" - RaceScoreLoader.reset(fast_processing: false, academic_years: [AcademicYear.find_by_range("2022-23")]) - puts "=====================> Completed loading #{RaceScore.count} race scores" - Rails.cache.clear end end diff --git a/spec/services/student_loader_spec.rb b/spec/services/student_loader_spec.rb deleted file mode 100644 index e998ea7f..00000000 --- a/spec/services/student_loader_spec.rb +++ /dev/null @@ -1,160 +0,0 @@ -require "rails_helper" - -describe StudentLoader do - let(:path_to_student_responses) { Rails.root.join("spec", "fixtures", "test_2020-21_student_survey_responses.csv") } - let(:american_indian) { create(:race, qualtrics_code: 1) } - let(:asian) { create(:race, qualtrics_code: 2) } - let(:black) { create(:race, qualtrics_code: 3) } - let(:latinx) { create(:race, qualtrics_code: 4) } - let(:white) { create(:race, qualtrics_code: 5) } - let(:middle_eastern) { create(:race, qualtrics_code: 8) } - let(:unknown_race) { create(:race, qualtrics_code: 99) } - let(:multiracial) { create(:race, qualtrics_code: 100) } - let(:female) { create(:gender, qualtrics_code: 1) } - let(:male) { create(:gender, qualtrics_code: 2) } - let(:another_gender) { create(:gender, qualtrics_code: 3) } - let(:non_binary) { create(:gender, qualtrics_code: 4) } - let(:unknown_gender) { create(:gender, qualtrics_code: 99) } - - before :each do - american_indian - asian - black - latinx - white - middle_eastern - unknown_race - multiracial - female - male - another_gender - non_binary - unknown_gender - end - - after :each do - DatabaseCleaner.clean - end - xdescribe "#process_races" do - context "as a standalone function" do - it "race codes of 6 or 7 get classified as an unknown race" do - codes = ["NA"] - expect(StudentLoader.process_races(codes:)).to eq [unknown_race] - codes = [] - expect(StudentLoader.process_races(codes:)).to eq [unknown_race] - codes = [1] - expect(StudentLoader.process_races(codes:)).to eq [american_indian] - codes = [2] - expect(StudentLoader.process_races(codes:)).to eq [asian] - codes = [3] - expect(StudentLoader.process_races(codes:)).to eq [black] - codes = [4] - expect(StudentLoader.process_races(codes:)).to eq [latinx] - codes = [5] - expect(StudentLoader.process_races(codes:)).to eq [white] - codes = [8] - expect(StudentLoader.process_races(codes:)).to eq [middle_eastern] - codes = [6] - expect(StudentLoader.process_races(codes:)).to eq [unknown_race] - codes = [7] - expect(StudentLoader.process_races(codes:)).to eq [unknown_race] - codes = [6, 7] - expect(StudentLoader.process_races(codes:)).to eq [unknown_race] - codes = [1, 6, 7] - expect(StudentLoader.process_races(codes:)).to eq [american_indian] - codes = [1, 6, 7, 2] - expect(StudentLoader.process_races(codes:)).to eq [american_indian, asian, multiracial] - codes = [3, 6, 7, 6, 6, 7, 7, 6, 2] - expect(StudentLoader.process_races(codes:)).to eq [black, asian, multiracial] - codes = [8, 2] - expect(StudentLoader.process_races(codes:)).to eq [middle_eastern, asian, multiracial] - end - end - end - - xdescribe "#add_multiracial_designation" do - it "adds the multiracial race code to the list of races" do - races = [unknown_race] - expect(StudentLoader.add_multiracial_designation(races:)).to eq [unknown_race] - races = [american_indian, asian] - expect(StudentLoader.add_multiracial_designation(races:)).to eq [american_indian, asian, multiracial] - races = [white] - expect(StudentLoader.add_multiracial_designation(races:)).to eq [white] - end - end - - # This fails in CI because github does not know what the key derivation salt is. - # I'm not sure how to securely set the key derivation salt as an environment variable in CI - describe "self.load_data" do - context "load student data for all schools" do - before :each do - SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses - StudentLoader.load_data filepath: path_to_student_responses - end - - it "ensures student responses load correctly" do - assigns_student_to_the_survey_item_responses - assigns_races_to_students - is_idempotent_for_students - end - end - - # TODO: get this test to run correctly. Since we are no longer seeding, we need to define schools, and districts; some Lowell, some not - xcontext "When using the rule to skip non Lowell schools" do - before :each do - SurveyResponsesDataLoader.new.load_data filepath: path_to_student_responses - StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools] - end - - it "only loads student data for lowell" do - expect(Student.find_by_response_id("student_survey_response_1")).to eq nil - expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race] - expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race] - expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white, - middle_eastern, multiracial] - expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white, - middle_eastern, multiracial] - expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race] - end - end - end -end - -def assigns_student_to_the_survey_item_responses - # The csv file has no responses for `student_survey_response_2` so we can't assign a student to nil responses - expect(SurveyItemResponse.find_by_response_id("student_survey_response_2")).to eq nil - - response_ids = %w[student_survey_response_1 student_survey_response_3 - student_survey_response_4 - student_survey_response_5 - student_survey_response_6 - student_survey_response_7] - - response_ids.each do |response_id| - responses = SurveyItemResponse.where(response_id:) - responses.each do |response| - expect(response.student).not_to eq nil - expect(response.student).to eq Student.find_by_response_id(response_id) - end - end -end - -def assigns_races_to_students - expect(Student.find_by_response_id("student_survey_response_1").races).to eq [american_indian] - expect(Student.find_by_response_id("student_survey_response_2").races).to eq [asian, black, latinx, multiracial] - expect(Student.find_by_response_id("student_survey_response_3").races).to eq [unknown_race] - expect(Student.find_by_response_id("student_survey_response_4").races).to eq [unknown_race] - expect(Student.find_by_response_id("student_survey_response_5").races).to eq [american_indian, asian, black, latinx, white, - middle_eastern, multiracial] - expect(Student.find_by_response_id("student_survey_response_6").races).to eq [american_indian, asian, black, latinx, white, - middle_eastern, multiracial] - expect(Student.find_by_response_id("student_survey_response_7").races).to eq [unknown_race] -end - -def is_idempotent_for_students - number_of_students = Student.count - number_of_responses = SurveyItemResponse.count - StudentLoader.load_data filepath: path_to_student_responses - expect(Student.count).to eq number_of_students - expect(SurveyItemResponse.count).to eq number_of_responses -end diff --git a/spec/services/survey_responses_data_loader_spec.rb b/spec/services/survey_responses_data_loader_spec.rb index d0d70fae..9e2931b6 100644 --- a/spec/services/survey_responses_data_loader_spec.rb +++ b/spec/services/survey_responses_data_loader_spec.rb @@ -51,13 +51,24 @@ describe SurveyResponsesDataLoader do let(:another_gender) { create(:gender, qualtrics_code: 3) } let(:non_binary) { create(:gender, qualtrics_code: 4) } let(:unknown_gender) { create(:gender, qualtrics_code: 99) } + let(:low_income) { create(:income, designation: "Economically Disadvantaged – Y") } let(:high_income) { create(:income, designation: "Economically Disadvantaged – N") } let(:unknown_income) { create(:income, designation: "Unknown") } + let(:yes_ell) { create(:ell, designation: "ELL") } let(:not_ell) { create(:ell, designation: "Not ELL") } let(:unknown_ell) { create(:ell, designation: "Unknown") } + let(:american_indian) { create(:race, qualtrics_code: 1) } + let(:asian) { create(:race, qualtrics_code: 2) } + let(:black) { create(:race, qualtrics_code: 3) } + let(:latinx) { create(:race, qualtrics_code: 4) } + let(:white) { create(:race, qualtrics_code: 5) } + let(:middle_eastern) { create(:race, qualtrics_code: 8) } + let(:unknown_race) { create(:race, qualtrics_code: 99) } + let(:multiracial) { create(:race, qualtrics_code: 100) } + let(:setup) do ay_2020_21 ay_2022_23 @@ -87,17 +98,29 @@ describe SurveyResponsesDataLoader do s_emsa_q1 s_emsa_q2 s_emsa_q3 + female male another_gender non_binary unknown_gender + low_income high_income unknown_income + yes_ell not_ell unknown_ell + + american_indian + asian + black + latinx + white + middle_eastern + unknown_race + multiracial end before :each do