From a1b7db9b2e566881ca3f7ac3c1c61c73ed066eb1 Mon Sep 17 00:00:00 2001 From: rebuilt Date: Sat, 3 Dec 2022 11:30:01 -0800 Subject: [PATCH] Add ability to load student demographic information for only lowell --- README.md | 8 ++ app/services/rule/skip_non_lowell_schools.rb | 3 + app/services/student_loader.rb | 6 +- app/services/survey_item_values.rb | 73 ++++++++++++++++++ app/services/survey_responses_data_loader.rb | 76 +------------------ lib/tasks/data.rake | 18 +++++ .../test_2020-21_student_survey_responses.csv | 1 + spec/services/student_loader_spec.rb | 30 ++++++-- 8 files changed, 133 insertions(+), 82 deletions(-) create mode 100644 app/services/survey_item_values.rb diff --git a/README.md b/README.md index 87dd98c1..f7befc77 100644 --- a/README.md +++ b/README.md @@ -205,6 +205,14 @@ $ heroku run:detached -a mciea-beta bundle exec rake data:load_students $ heroku run:detached -a mciea-dashboard bundle exec rake data:load_students ``` + +Or if you only want to load students for Lowell schools + +```bash +# locally +$ bundle exec rake data:load_students_for_lowell +``` + ### Load scores for each race For performance, the scores for an individual race must be precalculated. Make sure to load response rates, and student demographic information before recaculating scores by race diff --git a/app/services/rule/skip_non_lowell_schools.rb b/app/services/rule/skip_non_lowell_schools.rb index c791fd63..230f0b03 100644 --- a/app/services/rule/skip_non_lowell_schools.rb +++ b/app/services/rule/skip_non_lowell_schools.rb @@ -7,6 +7,9 @@ module Rule end def skip_row? + return true if row.school.nil? + return true if row.school.district.nil? + row.school.district.name != 'Lowell' end end diff --git a/app/services/student_loader.rb b/app/services/student_loader.rb index 30c9e6a6..4916406b 100644 --- a/app/services/student_loader.rb +++ b/app/services/student_loader.rb @@ -3,12 +3,16 @@ require 'csv' class StudentLoader - def self.load_data(filepath:) + def self.load_data(filepath:, rules: []) File.open(filepath) do |file| headers = file.first file.lazy.each_slice(1_000) do |lines| CSV.parse(lines.join, headers:).map do |row| + next if rules.any? do |rule| + rule.new(row: SurveyItemValues.new(row:, headers:, genders: nil, survey_items: nil)).skip_row? + end + process_row(row:) end end diff --git a/app/services/survey_item_values.rb b/app/services/survey_item_values.rb new file mode 100644 index 00000000..b1929f41 --- /dev/null +++ b/app/services/survey_item_values.rb @@ -0,0 +1,73 @@ +class SurveyItemValues + attr_reader :row, :headers, :genders, :survey_items + + def initialize(row:, headers:, genders:, survey_items:) + @row = row + @headers = headers + @genders = genders + @survey_items = survey_items + end + + def dese_id? + dese_id.present? + end + + def response_date + @response_date ||= Date.parse(row['Recorded Date'] || row['RecordedDate']) + end + + def academic_year + @academic_year ||= AcademicYear.find_by_date response_date + end + + def survey_item_response(survey_item:) + @survey_item_response ||= Hash.new do |memo, survey_item| + memo[survey_item] = survey_item_responses[[response_id, survey_item.id]] + end + + @survey_item_response[survey_item] + end + + def survey_item_responses + @survey_item_responses ||= Hash.new do |memo| + responses_hash = {} + SurveyItemResponse.where(school:, academic_year:, response_id:).each do |response| + responses_hash[[response.response_id, response.survey_item.id]] = response + end + memo[[school, academic_year]] = responses_hash + end + + @survey_item_responses[[school, academic_year]] + end + + def response_id + @response_id ||= row['Response ID'] || row['ResponseId'] || row['ResponseID'] + end + + def dese_id + @dese_id ||= (row['DESE ID' || 'Dese ID'] || row['DeseId'] || row['DeseID'] || row['School'] || row['school']).to_i + end + + def likert_score(survey_item_id:) + row[survey_item_id] + end + + def school + @school ||= School.includes(:district).find_by_dese_id(dese_id) + end + + def grade + @grade ||= begin + raw_grade = (row['grade'] || row['Grade'] || row['What grade are you in?']).to_i + raw_grade == 0 ? nil : raw_grade + end + end + + def gender + gender_code = row['gender'] || row['Gender'] || 99 + gender_code = gender_code.to_i + gender_code = 4 if gender_code == 3 + gender_code = 99 if gender_code.zero? + genders[gender_code] + end +end diff --git a/app/services/survey_responses_data_loader.rb b/app/services/survey_responses_data_loader.rb index 7c84bdfb..4635104c 100644 --- a/app/services/survey_responses_data_loader.rb +++ b/app/services/survey_responses_data_loader.rb @@ -11,7 +11,7 @@ class SurveyResponsesDataLoader file.lazy.each_slice(500) do |lines| survey_item_responses = CSV.parse(lines.join, headers:).map do |row| - process_row(row: Values.new(row:, headers:, genders: genders_hash, survey_items: all_survey_items), + process_row(row: SurveyItemValues.new(row:, headers:, genders: genders_hash, survey_items: all_survey_items), rules:) end @@ -85,80 +85,6 @@ class SurveyResponsesDataLoader private_class_method :get_survey_item_ids_from_headers end -class Values - attr_reader :row, :headers, :genders, :survey_items - - def initialize(row:, headers:, genders:, survey_items:) - @row = row - @headers = headers - @genders = genders - @survey_items = survey_items - end - - def dese_id? - dese_id.present? - end - - def response_date - @response_date ||= Date.parse(row['Recorded Date'] || row['RecordedDate']) - end - - def academic_year - @academic_year ||= AcademicYear.find_by_date response_date - end - - def survey_item_response(survey_item:) - @survey_item_response ||= Hash.new do |memo, survey_item| - memo[survey_item] = survey_item_responses[[response_id, survey_item.id]] - end - - @survey_item_response[survey_item] - end - - def survey_item_responses - @survey_item_responses ||= Hash.new do |memo| - responses_hash = {} - SurveyItemResponse.where(school:, academic_year:, response_id:).each do |response| - responses_hash[[response.response_id, response.survey_item.id]] = response - end - memo[[school, academic_year]] = responses_hash - end - - @survey_item_responses[[school, academic_year]] - end - - def response_id - @response_id ||= row['Response ID'] || row['ResponseId'] || row['ResponseID'] - end - - def dese_id - @dese_id ||= (row['DESE ID' || 'Dese ID'] || row['DeseId'] || row['DeseID'] || row['School'] || row['school']).to_i - end - - def likert_score(survey_item_id:) - row[survey_item_id] - end - - def school - @school ||= School.find_by_dese_id(dese_id) - end - - def grade - @grade ||= begin - raw_grade = (row['grade'] || row['Grade'] || row['What grade are you in?']).to_i - raw_grade == 0 ? nil : raw_grade - end - end - - def gender - gender_code = row['gender'] || row['Gender'] || 99 - gender_code = gender_code.to_i - gender_code = 4 if gender_code == 3 - gender_code = 99 if gender_code.zero? - genders[gender_code] - end -end - module StringMonkeyPatches def valid_likert_score? to_i.between? 1, 5 diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index 52591ea1..c603afdc 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -51,6 +51,24 @@ namespace :data do Rails.cache.clear end + desc 'load students for lowell' + task load_students_for_lowell: :environment do + SurveyItemResponse.update_all(student_id: nil) + StudentRace.delete_all + Student.delete_all + Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file| + puts "=====================> Loading student data from csv at path: #{file}" + StudentLoader.load_data filepath: file, rules: [Rule::SkipNonLowellSchools] + end + puts "=====================> Completed loading #{Student.count} students" + + puts 'Resetting race scores' + RaceScoreLoader.reset(fast_processing: false) + puts "=====================> Completed loading #{RaceScore.count} survey responses" + + Rails.cache.clear + end + task load_survey_responses_21_22: :environment do Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath| puts "=====================> Loading data from csv at path: #{filepath}" diff --git a/spec/fixtures/test_2020-21_student_survey_responses.csv b/spec/fixtures/test_2020-21_student_survey_responses.csv index b1ae390b..c67d62ef 100644 --- a/spec/fixtures/test_2020-21_student_survey_responses.csv +++ b/spec/fixtures/test_2020-21_student_survey_responses.csv @@ -6,3 +6,4 @@ Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Fini 2021-03-31 9:51:39,2021-03-31 10:01:36,0,73.47.153.77,100,596,1,2021-03-31T10:01:36,student_survey_response_5,567890,,,,,42.65820313,-71.30580139,anonymous,EN,3,2,1600310,6,15,109,3710,7,1,,2,2,2,,,,,,,,,,3,3,4,3,3,3,3,4,3,4,3,4,4,5,4,3,4,3,5,2,2,3,,,,,,,,,,,,1,2,5,1,3,3,2,4,3,5,4,,,,,,,,,,,,5,4,3,4,4,4,4,4,4,,,,,,,2,,2,,EN,,,Social Studies teacher,,"1,2,3,4,5,8,6,7",888,7,4 2021-03-31 9:51:39,2021-03-31 10:01:36,0,73.47.153.77,100,596,1,2021-03-31T10:01:36,student_survey_response_6,,,,,,42.65820313,-71.30580139,anonymous,EN,3,2,1600310,6,15,109,3710,7,1,,2,2,2,,,,,,,,,,3,3,4,3,3,3,3,4,3,4,3,4,4,5,4,3,4,3,5,2,2,3,,,,,,,,,,,,1,2,5,1,3,3,2,4,3,5,4,,,,,,,,,,,,5,4,3,4,4,4,4,4,4,,,,,,,2,,2,,EN,,,Social Studies teacher,,"1,2,3,4,5,8",888,3,NA 2021-03-31 9:51:39,2021-03-31 10:01:36,0,73.47.153.77,100,596,1,2021-03-31T10:01:36,student_survey_response_7,,,,,,42.65820313,-71.30580139,anonymous,EN,3,2,1600310,6,15,109,3710,7,1,,2,2,2,,,,,,,,,,3,3,4,3,3,3,3,4,3,4,3,4,4,5,4,3,4,3,5,2,2,3,,,,,,,,,,,,1,2,5,1,3,3,2,4,3,5,4,,,,,,,,,,,,5,4,3,4,4,4,4,4,4,,,,,,,2,,2,,EN,,,Social Studies teacher,,,,4, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"1,2,3,4,5,8",,, diff --git a/spec/services/student_loader_spec.rb b/spec/services/student_loader_spec.rb index 2faf93c2..131eed18 100644 --- a/spec/services/student_loader_spec.rb +++ b/spec/services/student_loader_spec.rb @@ -10,11 +10,11 @@ describe StudentLoader do let(:middle_eastern) { Race.find_by_qualtrics_code(8) } let(:unknown_race) { Race.find_by_qualtrics_code(99) } let(:multiracial) { Race.find_by_qualtrics_code(100) } - let(:female) {Gender.find_by_qualtrics_code(1)} - let(:male) {Gender.find_by_qualtrics_code(2)} - let(:another_gender) {Gender.find_by_qualtrics_code(3)} - let(:non_binary) {Gender.find_by_qualtrics_code(4)} - let(:unknown_gender) {Gender.find_by_qualtrics_code(99)} + let(:female) { Gender.find_by_qualtrics_code(1) } + let(:male) { Gender.find_by_qualtrics_code(2) } + let(:another_gender) { Gender.find_by_qualtrics_code(3) } + let(:non_binary) { Gender.find_by_qualtrics_code(4) } + let(:unknown_gender) { Gender.find_by_qualtrics_code(99) } before :each do Rails.application.load_seed @@ -74,7 +74,7 @@ describe StudentLoader do # This fails in CI because github does not know what the key derivation salt is. # I'm not sure how to securely set the key derivation salt as an environment variable in CI describe 'self.load_data' do - context 'load student data' do + context 'load student data for all schools' do before :each do SurveyResponsesDataLoader.load_data filepath: path_to_student_responses StudentLoader.load_data filepath: path_to_student_responses @@ -86,6 +86,24 @@ describe StudentLoader do is_idempotent_for_students end end + + context 'When using the rule to skip non Lowell schools' do + before :each do + SurveyResponsesDataLoader.load_data filepath: path_to_student_responses + StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools] + end + + it 'only loads student data for lowell' do + expect(Student.find_by_response_id('student_survey_response_1')).to eq nil + expect(Student.find_by_response_id('student_survey_response_3').races).to eq [unknown_race] + expect(Student.find_by_response_id('student_survey_response_4').races).to eq [unknown_race] + expect(Student.find_by_response_id('student_survey_response_5').races).to eq [american_indian, asian, black, latinx, white, + middle_eastern, multiracial] + expect(Student.find_by_response_id('student_survey_response_6').races).to eq [american_indian, asian, black, latinx, white, + middle_eastern, multiracial] + expect(Student.find_by_response_id('student_survey_response_7').races).to eq [unknown_race] + end + end end end