Add ability to load student demographic information for only lowell

pull/1/head
rebuilt 3 years ago
parent d49cf918ac
commit a1b7db9b2e

@ -205,6 +205,14 @@ $ heroku run:detached -a mciea-beta bundle exec rake data:load_students
$ heroku run:detached -a mciea-dashboard bundle exec rake data:load_students
```
Or if you only want to load students for Lowell schools
```bash
# locally
$ bundle exec rake data:load_students_for_lowell
```
### Load scores for each race
For performance, the scores for an individual race must be precalculated. Make sure to load response rates, and student demographic information before recaculating scores by race

@ -7,6 +7,9 @@ module Rule
end
def skip_row?
return true if row.school.nil?
return true if row.school.district.nil?
row.school.district.name != 'Lowell'
end
end

@ -3,12 +3,16 @@
require 'csv'
class StudentLoader
def self.load_data(filepath:)
def self.load_data(filepath:, rules: [])
File.open(filepath) do |file|
headers = file.first
file.lazy.each_slice(1_000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
next if rules.any? do |rule|
rule.new(row: SurveyItemValues.new(row:, headers:, genders: nil, survey_items: nil)).skip_row?
end
process_row(row:)
end
end

@ -0,0 +1,73 @@
class SurveyItemValues
attr_reader :row, :headers, :genders, :survey_items
def initialize(row:, headers:, genders:, survey_items:)
@row = row
@headers = headers
@genders = genders
@survey_items = survey_items
end
def dese_id?
dese_id.present?
end
def response_date
@response_date ||= Date.parse(row['Recorded Date'] || row['RecordedDate'])
end
def academic_year
@academic_year ||= AcademicYear.find_by_date response_date
end
def survey_item_response(survey_item:)
@survey_item_response ||= Hash.new do |memo, survey_item|
memo[survey_item] = survey_item_responses[[response_id, survey_item.id]]
end
@survey_item_response[survey_item]
end
def survey_item_responses
@survey_item_responses ||= Hash.new do |memo|
responses_hash = {}
SurveyItemResponse.where(school:, academic_year:, response_id:).each do |response|
responses_hash[[response.response_id, response.survey_item.id]] = response
end
memo[[school, academic_year]] = responses_hash
end
@survey_item_responses[[school, academic_year]]
end
def response_id
@response_id ||= row['Response ID'] || row['ResponseId'] || row['ResponseID']
end
def dese_id
@dese_id ||= (row['DESE ID' || 'Dese ID'] || row['DeseId'] || row['DeseID'] || row['School'] || row['school']).to_i
end
def likert_score(survey_item_id:)
row[survey_item_id]
end
def school
@school ||= School.includes(:district).find_by_dese_id(dese_id)
end
def grade
@grade ||= begin
raw_grade = (row['grade'] || row['Grade'] || row['What grade are you in?']).to_i
raw_grade == 0 ? nil : raw_grade
end
end
def gender
gender_code = row['gender'] || row['Gender'] || 99
gender_code = gender_code.to_i
gender_code = 4 if gender_code == 3
gender_code = 99 if gender_code.zero?
genders[gender_code]
end
end

@ -11,7 +11,7 @@ class SurveyResponsesDataLoader
file.lazy.each_slice(500) do |lines|
survey_item_responses = CSV.parse(lines.join, headers:).map do |row|
process_row(row: Values.new(row:, headers:, genders: genders_hash, survey_items: all_survey_items),
process_row(row: SurveyItemValues.new(row:, headers:, genders: genders_hash, survey_items: all_survey_items),
rules:)
end
@ -85,80 +85,6 @@ class SurveyResponsesDataLoader
private_class_method :get_survey_item_ids_from_headers
end
class Values
attr_reader :row, :headers, :genders, :survey_items
def initialize(row:, headers:, genders:, survey_items:)
@row = row
@headers = headers
@genders = genders
@survey_items = survey_items
end
def dese_id?
dese_id.present?
end
def response_date
@response_date ||= Date.parse(row['Recorded Date'] || row['RecordedDate'])
end
def academic_year
@academic_year ||= AcademicYear.find_by_date response_date
end
def survey_item_response(survey_item:)
@survey_item_response ||= Hash.new do |memo, survey_item|
memo[survey_item] = survey_item_responses[[response_id, survey_item.id]]
end
@survey_item_response[survey_item]
end
def survey_item_responses
@survey_item_responses ||= Hash.new do |memo|
responses_hash = {}
SurveyItemResponse.where(school:, academic_year:, response_id:).each do |response|
responses_hash[[response.response_id, response.survey_item.id]] = response
end
memo[[school, academic_year]] = responses_hash
end
@survey_item_responses[[school, academic_year]]
end
def response_id
@response_id ||= row['Response ID'] || row['ResponseId'] || row['ResponseID']
end
def dese_id
@dese_id ||= (row['DESE ID' || 'Dese ID'] || row['DeseId'] || row['DeseID'] || row['School'] || row['school']).to_i
end
def likert_score(survey_item_id:)
row[survey_item_id]
end
def school
@school ||= School.find_by_dese_id(dese_id)
end
def grade
@grade ||= begin
raw_grade = (row['grade'] || row['Grade'] || row['What grade are you in?']).to_i
raw_grade == 0 ? nil : raw_grade
end
end
def gender
gender_code = row['gender'] || row['Gender'] || 99
gender_code = gender_code.to_i
gender_code = 4 if gender_code == 3
gender_code = 99 if gender_code.zero?
genders[gender_code]
end
end
module StringMonkeyPatches
def valid_likert_score?
to_i.between? 1, 5

@ -51,6 +51,24 @@ namespace :data do
Rails.cache.clear
end
desc 'load students for lowell'
task load_students_for_lowell: :environment do
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
Dir.glob(Rails.root.join('data', 'survey_responses', '*student*.csv')).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file, rules: [Rule::SkipNonLowellSchools]
end
puts "=====================> Completed loading #{Student.count} students"
puts 'Resetting race scores'
RaceScoreLoader.reset(fast_processing: false)
puts "=====================> Completed loading #{RaceScore.count} survey responses"
Rails.cache.clear
end
task load_survey_responses_21_22: :environment do
Dir.glob(Rails.root.join('data', 'survey_responses', '*2021-22*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"

@ -6,3 +6,4 @@ Start Date,End Date,Response Type,IP Address,Progress,Duration (in seconds),Fini
2021-03-31 9:51:39,2021-03-31 10:01:36,0,73.47.153.77,100,596,1,2021-03-31T10:01:36,student_survey_response_5,567890,,,,,42.65820313,-71.30580139,anonymous,EN,3,2,1600310,6,15,109,3710,7,1,,2,2,2,,,,,,,,,,3,3,4,3,3,3,3,4,3,4,3,4,4,5,4,3,4,3,5,2,2,3,,,,,,,,,,,,1,2,5,1,3,3,2,4,3,5,4,,,,,,,,,,,,5,4,3,4,4,4,4,4,4,,,,,,,2,,2,,EN,,,Social Studies teacher,,"1,2,3,4,5,8,6,7",888,7,4
2021-03-31 9:51:39,2021-03-31 10:01:36,0,73.47.153.77,100,596,1,2021-03-31T10:01:36,student_survey_response_6,,,,,,42.65820313,-71.30580139,anonymous,EN,3,2,1600310,6,15,109,3710,7,1,,2,2,2,,,,,,,,,,3,3,4,3,3,3,3,4,3,4,3,4,4,5,4,3,4,3,5,2,2,3,,,,,,,,,,,,1,2,5,1,3,3,2,4,3,5,4,,,,,,,,,,,,5,4,3,4,4,4,4,4,4,,,,,,,2,,2,,EN,,,Social Studies teacher,,"1,2,3,4,5,8",888,3,NA
2021-03-31 9:51:39,2021-03-31 10:01:36,0,73.47.153.77,100,596,1,2021-03-31T10:01:36,student_survey_response_7,,,,,,42.65820313,-71.30580139,anonymous,EN,3,2,1600310,6,15,109,3710,7,1,,2,2,2,,,,,,,,,,3,3,4,3,3,3,3,4,3,4,3,4,4,5,4,3,4,3,5,2,2,3,,,,,,,,,,,,1,2,5,1,3,3,2,4,3,5,4,,,,,,,,,,,,5,4,3,4,4,4,4,4,4,,,,,,,2,,2,,EN,,,Social Studies teacher,,,,4,
,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"1,2,3,4,5,8",,,

1 Start Date End Date Response Type IP Address Progress Duration (in seconds) Finished RecordedDate ResponseId LASID Recipient Last Name Recipient First Name Recipient Email External Data Reference Location Latitude Location Longitude Distribution Channel User Language district school DESE ID #N/A #N/A #N/A #N/A #N/A #N/A #N/A s-emsa-q1 s-emsa-q2 s-emsa-q3 s-tint-q1 s-tint-q2 #N/A s-tint-q4 s-tint-q5 s-acpr-q1 s-acpr-q2 s-acpr-q3 s-acpr-q4 #N/A #N/A s-cure-q3 s-cure-q4 #N/A s-sten-q2 s-sten-q3 s-sper-q1 s-sper-q2 s-sper-q3 s-sper-q4 s-civp-q1 s-civp-q2 s-civp-q3 s-civp-q4 s-grmi-q1 #N/A #N/A s-grmi-q4 s-appa-q1 s-appa-q2 #N/A s-peff-q1 s-peff-q2 s-peff-q3 s-peff-q4 s-peff-q5 s-peff-q6 s-sbel-q1 s-sbel-q2 s-sbel-q3 s-sbel-q4 s-sbel-q5 s-phys-q1 s-phys-q2 s-phys-q3 s-phys-q4 s-vale-q1 #N/A #N/A s-vale-q4 #N/A s-acst-q2 s-acst-q3 #N/A #N/A s-grit-q1 s-grit-q2 s-grit-q3 s-grit-q4 #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A #N/A race What is your race/ethnicity?(Please select all that apply) - Selected Choice grade gender
6 2021-03-31 9:51:39 2021-03-31 10:01:36 0 73.47.153.77 100 596 1 2021-03-31T10:01:36 student_survey_response_5 567890 42.65820313 -71.30580139 anonymous EN 3 2 1600310 6 15 109 3710 7 1 2 2 2 3 3 4 3 3 3 3 4 3 4 3 4 4 5 4 3 4 3 5 2 2 3 1 2 5 1 3 3 2 4 3 5 4 5 4 3 4 4 4 4 4 4 2 2 EN Social Studies teacher 1,2,3,4,5,8,6,7 888 7 4
7 2021-03-31 9:51:39 2021-03-31 10:01:36 0 73.47.153.77 100 596 1 2021-03-31T10:01:36 student_survey_response_6 42.65820313 -71.30580139 anonymous EN 3 2 1600310 6 15 109 3710 7 1 2 2 2 3 3 4 3 3 3 3 4 3 4 3 4 4 5 4 3 4 3 5 2 2 3 1 2 5 1 3 3 2 4 3 5 4 5 4 3 4 4 4 4 4 4 2 2 EN Social Studies teacher 1,2,3,4,5,8 888 3 NA
8 2021-03-31 9:51:39 2021-03-31 10:01:36 0 73.47.153.77 100 596 1 2021-03-31T10:01:36 student_survey_response_7 42.65820313 -71.30580139 anonymous EN 3 2 1600310 6 15 109 3710 7 1 2 2 2 3 3 4 3 3 3 3 4 3 4 3 4 4 5 4 3 4 3 5 2 2 3 1 2 5 1 3 3 2 4 3 5 4 5 4 3 4 4 4 4 4 4 2 2 EN Social Studies teacher 4
9 1,2,3,4,5,8

@ -10,11 +10,11 @@ describe StudentLoader do
let(:middle_eastern) { Race.find_by_qualtrics_code(8) }
let(:unknown_race) { Race.find_by_qualtrics_code(99) }
let(:multiracial) { Race.find_by_qualtrics_code(100) }
let(:female) {Gender.find_by_qualtrics_code(1)}
let(:male) {Gender.find_by_qualtrics_code(2)}
let(:another_gender) {Gender.find_by_qualtrics_code(3)}
let(:non_binary) {Gender.find_by_qualtrics_code(4)}
let(:unknown_gender) {Gender.find_by_qualtrics_code(99)}
let(:female) { Gender.find_by_qualtrics_code(1) }
let(:male) { Gender.find_by_qualtrics_code(2) }
let(:another_gender) { Gender.find_by_qualtrics_code(3) }
let(:non_binary) { Gender.find_by_qualtrics_code(4) }
let(:unknown_gender) { Gender.find_by_qualtrics_code(99) }
before :each do
Rails.application.load_seed
@ -74,7 +74,7 @@ describe StudentLoader do
# This fails in CI because github does not know what the key derivation salt is.
# I'm not sure how to securely set the key derivation salt as an environment variable in CI
describe 'self.load_data' do
context 'load student data' do
context 'load student data for all schools' do
before :each do
SurveyResponsesDataLoader.load_data filepath: path_to_student_responses
StudentLoader.load_data filepath: path_to_student_responses
@ -86,6 +86,24 @@ describe StudentLoader do
is_idempotent_for_students
end
end
context 'When using the rule to skip non Lowell schools' do
before :each do
SurveyResponsesDataLoader.load_data filepath: path_to_student_responses
StudentLoader.load_data filepath: path_to_student_responses, rules: [Rule::SkipNonLowellSchools]
end
it 'only loads student data for lowell' do
expect(Student.find_by_response_id('student_survey_response_1')).to eq nil
expect(Student.find_by_response_id('student_survey_response_3').races).to eq [unknown_race]
expect(Student.find_by_response_id('student_survey_response_4').races).to eq [unknown_race]
expect(Student.find_by_response_id('student_survey_response_5').races).to eq [american_indian, asian, black, latinx, white,
middle_eastern, multiracial]
expect(Student.find_by_response_id('student_survey_response_6').races).to eq [american_indian, asian, black, latinx, white,
middle_eastern, multiracial]
expect(Student.find_by_response_id('student_survey_response_7').races).to eq [unknown_race]
end
end
end
end

Loading…
Cancel
Save