Load student data for all years and calculate race scores for all

pull/1/head
rebuilt 3 years ago
parent fc4fbb4644
commit d89a207c31

@ -160,9 +160,53 @@ $ heroku run:detached -a mciea-beta bundle exec rake data:load_admin_data
$ heroku run:detached -a mciea-dashboard bundle exec rake data:load_admin_data
```
### Load Response Rates
Many parts of the site rely on the response rate table being populated. The response rate is taken into account when determining if there is sufficient data to show
```bash
# locally
$ bundle exec rake data:reset_response_rates
# on heroku staging environment
$ heroku run:detached -a mciea-beta bundle exec rake data:reset_response_rates
# on heroku production environment
$ heroku run:detached -a mciea-dashboard bundle exec rake data:reset_response_rates
```
### Load Student demographic information
Student demographic information is needed for the Analyze page disagregation
```bash
# locally
$ bundle exec rake data:load_students
# on heroku staging environment
$ heroku run:detached -a mciea-beta bundle exec rake data:load_students
# on heroku production environment
$ heroku run:detached -a mciea-dashboard bundle exec rake data:load_students
```
### Load scores for each race
For performance, the scores for an individual race must be precalculated. Make sure to load response rates, and student demographic information before recaculating scores by race
```bash
# locally
$ bundle exec rake data:reset_race_scores
# on heroku staging environment
$ heroku run:detached -a mciea-beta bundle exec rake data:reset_race_scores
# on heroku production environment
$ heroku run:detached -a mciea-dashboard bundle exec rake data:reset_race_scores
```
## Running tests
### Concurrent test execution
### Single threaded test execution
Prepare the test database.

@ -1,17 +0,0 @@
# frozen_string_literal: true
class RaceScoreCalculator
include Analyze::Graph::Column::RacialScore
attr_reader :measure, :school, :academic_year, :race
def initialize(measure:, school:, academic_year:, race:)
@measure = measure
@school = school
@academic_year = academic_year
@race = race
end
def score
race_score(measure:, school:, academic_year:, race:)
end
end

@ -1,8 +1,8 @@
class Sample
attr_reader :school, :academic_year, :category, :measure, :race
def initialize(slug: 'milford-high-school', range: '2021-22', category_id: '1', subcategory_id: '1A',
measure_id: '1A-ii', race_code: 1)
def initialize(slug: 'a-irvin-studley-elementary-school', range: '2020-21', category_id: '1', subcategory_id: '1A',
measure_id: '1A-ii', race_code: 5)
@school = School.find_by_slug slug
@academic_year = AcademicYear.find_by_range range
@category = Category.find_by_category_id category_id
@ -10,4 +10,14 @@ class Sample
@measure = Measure.find_by_measure_id measure_id
@race = Race.find_by_qualtrics_code race_code
end
def count_students(school: @school, academic_year: @academic_year, race: @race)
students = StudentRace.where(race:).pluck(:student_id).uniq
SurveyItemResponse.where(school:, academic_year:,
student: students).map(&:student).uniq.count
end
def count_all_students(school: @school, academic_year: @academic_year)
SurveyItemResponse.where(school:, academic_year:).map(&:student).uniq.count
end
end

@ -1,71 +0,0 @@
# frozen_string_literal: true
module Analyze
module Graph
module Column
module RacialScore
def race_score(measure:, school:, academic_year:, race:)
rate = response_rate(school:, academic_year:, measure:)
return Score.new(0, false, false, false) unless rate.meets_student_threshold
survey_items = measure.student_survey_items
students = StudentRace.where(race:).pluck(:student_id).uniq
averages = grouped_responses(school:, academic_year:, survey_items:, students:)
meets_student_threshold = sufficient_responses(school:, academic_year:, students:)
scorify(responses: averages, meets_student_threshold:)
end
private
def grouped_responses(school:, academic_year:, survey_items:, students:)
SurveyItemResponse.where(school:,
academic_year:,
student: students,
survey_item: survey_items)
.group(:survey_item_id)
.average(:likert_score)
end
def total_responses(school:, academic_year:, students:, survey_items:)
@total_responses ||= SurveyItemResponse.where(school:,
academic_year:,
student: students,
survey_item: survey_items).count
end
def response_rate(school:, academic_year:, measure:)
@response_rate ||= Hash.new do |memo, (school, academic_year)|
memo[[school, academic_year]] =
ResponseRate.find_by(subcategory: measure.subcategory, school:, academic_year:)
end
@response_rate[[school, academic_year]]
end
def scorify(responses:, meets_student_threshold:)
averages = bubble_up_averages(responses:)
average = averages.average
average = 0 unless meets_student_threshold
Score.new(average, false, meets_student_threshold, false)
end
def sufficient_responses(school:, academic_year:, students:)
number_of_students_for_a_racial_group = SurveyItemResponse.where(school:, academic_year:,
student: students).map(&:student).uniq.count
number_of_students_for_a_racial_group >= 10
end
def bubble_up_averages(responses:)
measure.student_scales.map do |scale|
scale.survey_items.map do |survey_item|
responses[survey_item.id]
end.remove_blanks.average
end.remove_blanks
end
end
end
end
end

@ -1,25 +1,103 @@
class RaceScoreLoader
def self.reset(schools: School.all, academic_years: AcademicYear.all, measures: Measure.all, races: Race.all)
RaceScore.where(school: schools, academic_year: academic_years, measure: measures, race: races).delete_all
measures.each do |measure|
schools.each do |school|
academic_years.each do |academic_year|
races.each do |race|
loadable_race_scores = schools.map do |school|
academic_years.map do |academic_year|
races.map do |race|
process_score(measure:, school:, academic_year:, race:)
end
end
end
RaceScore.import(loadable_race_scores.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
end
private
def self.process_score(measure:, school:, academic_year:, race:)
score = RaceScoreCalculator.new(measure:, school:, academic_year:, race:).score
rs = RaceScore.find_or_create_by(measure:, school:, academic_year:, race:)
rs.average = score.average
rs.meets_student_threshold = score.meets_student_threshold?
rs.save
score = race_score(measure:, school:, academic_year:, race:)
{ measure_id: measure.id, school_id: school.id, academic_year_id: academic_year.id, race_id: race.id, average: score.average,
meets_student_threshold: score.meets_student_threshold? }
end
def self.race_score(measure:, school:, academic_year:, race:)
rate = response_rate(school:, academic_year:, measure:)
return Score.new(0, false, false, false) unless rate.meets_student_threshold
survey_items = measure.student_survey_items
students = StudentRace.where(race:).pluck(:student_id).uniq
averages = grouped_responses(school:, academic_year:, survey_items:, students:)
meets_student_threshold = sufficient_responses(school:, academic_year:, students:)
scorify(responses: averages, meets_student_threshold:, measure:)
end
def self.grouped_responses(school:, academic_year:, survey_items:, students:)
@grouped_responses ||= Hash.new do |memo, (school, academic_year, survey_items, students)|
memo[[school, academic_year, survey_items, students]] = SurveyItemResponse.where(school:,
academic_year:,
student: students,
survey_item: survey_items)
.group(:survey_item_id)
.average(:likert_score)
end
@grouped_responses[[school, academic_year, survey_items, students]]
end
def self.total_responses(school:, academic_year:, students:, survey_items:)
@total_responses ||= Hash.new do
memo[[school, academic_year, students, survey_items]] = SurveyItemResponse.where(school:,
academic_year:,
student: students,
survey_item: survey_items).count
end
@total_responses[[school, academic_year, students, survey_items]]
end
def self.response_rate(school:, academic_year:, measure:)
subcategory = measure.subcategory
@response_rate ||= Hash.new do |memo, (school, academic_year, subcategory)|
memo[[school, academic_year, subcategory]] =
ResponseRate.find_by(subcategory:, school:, academic_year:)
end
@response_rate[[school, academic_year, subcategory]]
end
def self.scorify(responses:, meets_student_threshold:, measure:)
averages = bubble_up_averages(responses:, measure:)
average = averages.average
average = 0 unless meets_student_threshold
Score.new(average, false, meets_student_threshold, false)
end
def self.sufficient_responses(school:, academic_year:, students:)
@sufficient_responses ||= Hash.new do |memo, (school, academic_year, students)|
number_of_students_for_a_racial_group = SurveyItemResponse.where(school:, academic_year:,
student: students).map(&:student).uniq.count
memo[[school, academic_year, students]] = number_of_students_for_a_racial_group >= 10
end
@sufficient_responses[[school, academic_year, students]]
end
def self.bubble_up_averages(responses:, measure:)
measure.student_scales.map do |scale|
scale.survey_items.map do |survey_item|
responses[survey_item.id]
end.remove_blanks.average
end.remove_blanks
end
private_class_method :process_score
private_class_method :race_score
private_class_method :grouped_responses
private_class_method :total_responses
private_class_method :response_rate
private_class_method :scorify
private_class_method :sufficient_responses
private_class_method :bubble_up_averages
end

@ -6,68 +6,69 @@
require 'csv'
class StudentLoader
def self.load_data(filepath:)
def self.load_data(filepath:, reinitialize: false)
destroy_students if reinitialize
File.open(filepath) do |file|
headers = file.first
students = []
file.lazy.each_slice(1000) do |lines|
file.lazy.each_slice(1_000) do |lines|
CSV.parse(lines.join, headers:).map do |row|
# students << process_row(row:)
process_row(row:)
end
end
# Student.import students.compact.flatten.to_set.to_a, batch_size: 1000,
# on_duplicate_key_update: { conflict_target: [:id] }
end
end
def self.destroy_students
SurveyItemResponse.update_all(student_id: nil)
StudentRace.delete_all
Student.delete_all
end
def self.process_row(row:)
race_codes = row['RACE'] || row['race'] || row['What is your race/ethnicity?(Please select all that apply) - Selected Choice'] || '99'
race_codes = race_codes.split(',').map(&:to_i) || []
races = process_races(codes: race_codes)
races = process_races(codes: race_codes(row:))
response_id = row['ResponseId'] || row['Responseid'] || row['ResponseID'] ||
row['Response ID'] || row['Response id'] || row['Response Id']
lasid = row['LASID'] || row['lasid']
# return nil if student_exists?(response_id:)
student = find_or_create_student(response_id:, lasid:, races:)
assign_student_to_responses(response_id:, student:)
student
find_or_create_student(response_id:, lasid:, races:)
end
def self.student_exists?(response_id:)
Student.find_by_response_id(response_id).present?
def self.race_codes(row:)
race_codes = row['RACE'] || row['Race'] || row['race'] || row['What is your race/ethnicity?(Please select all that apply) - Selected Choice'] || '99'
race_codes.split(',').map(&:to_i) || []
end
def self.assign_student_to_responses(response_id:, student:)
survey_responses = SurveyItemResponse.where(response_id:)
survey_responses.each do |response|
def self.assign_student_to_responses(student:, response_id:)
responses = SurveyItemResponse.where(response_id:)
loadable_responses = responses.map do |response|
response.student = student
response.save
response
end
# SurveyItemResponse.import survey_responses, on_duplicate_key_update: { conflict_target: [:id], columns: [:student] }
SurveyItemResponse.import(loadable_responses.flatten.compact, batch_size: 1_000, on_duplicate_key_update: :all)
end
def self.find_or_create_student(response_id:, lasid:, races:)
student = Student.find_or_create_by(response_id:)
student.races = []
races.each do |race|
student = Student.find_by(response_id:, lasid:)
return unless student.nil?
student = Student.create(response_id:, lasid:)
races.map do |race|
student.races << race
end
student.lasid = lasid
student.save
student
assign_student_to_responses(student:, response_id:)
end
def self.process_races(codes:)
codes = codes.map do |code|
code = 99 if [6, 7].include?(code)
races = codes.map do |code|
code = code.to_i
code = 99 if [6, 7].include?(code) || code.nil? || code.zero?
Race.find_by_qualtrics_code(code)
end
races = remove_unknown_race_if_other_races_present(races: codes.uniq)
end.uniq
races = add_unknown_race_if_other_races_missing(races:)
races = remove_unknown_race_if_other_races_present(races:)
add_multiracial_designation(races:)
end
@ -80,4 +81,9 @@ class StudentLoader
races << Race.find_by_qualtrics_code(100) if races.length > 1
races
end
def self.add_unknown_race_if_other_races_missing(races:)
races << Race.find_by_qualtrics_code(99) if races.length == 0
races
end
end

@ -9,7 +9,7 @@
<%= selected_academic_years.include?(year) ? "checked" : "" %>
data-action="click->analyze#refresh"
<%= empty_dataset?(measures: measures, school: school, academic_year: year) ? "disabled" : "" %>
<%= @graph.value == 'students-by-group' && year.range != @available_academic_years.last.range ? "disabled" : "" %>>
<%# <%= @graph.value == 'students-by-group' && year.range != @available_academic_years.last.range ? "disabled" : "" %1> %>>
<label class="px-3" for="<%= year.range %>"><%= year.range %></label><br>
<div class="bg-color-blue px-3" style="width:20px;height:20px;background-color:<%= colors[index] %>;"></div>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -68,6 +68,16 @@ namespace :one_off do
ResponseRateLoader.reset
puts "=====================> Completed recalculating #{ResponseRate.count} response rates"
end
desc 'load students'
task load_students: :environment do
Dir.glob(Rails.root.join('data', 'survey_responses', '2019-20_*student*.csv')).each do |file|
puts "=====================> Loading student data from csv at path: #{file}"
StudentLoader.load_data filepath: file
end
puts "=====================> Completed loading #{Student.count} survey responses"
end
desc 'load revere somerville warehame results for 2021-22'
task load_revere: :environment do
['2021-22_revere_somerville_wareham_student_survey_responses.csv',
@ -109,9 +119,9 @@ namespace :one_off do
desc 'reset race score calculations'
task reset_race_scores: :environment do
puts 'Resetting race scores'
RaceScoreLoader.reset(academic_years: [AcademicYear.find_by_range('2021-22')])
RaceScoreLoader.reset(schools: [School.find_by_slug('a-irvin-studley-elementary-school')])
Rails.cache.clear
puts "=====================> Completed loading #{RaceScore.count} survey responses"
puts "=====================> Completed loading #{RaceScore.count} race scores"
end
desc 'list scales that have no survey responses'

@ -1,42 +0,0 @@
require 'rails_helper'
describe RaceScoreCalculator do
let(:measure) { create(:measure, :with_student_survey_items) }
let(:school) { create(:school) }
let(:academic_year) { create(:academic_year) }
let(:race) { create(:race) }
let(:students) do
[].tap do |arr|
10.times do
s = create(:student)
s.races << race
s.save
arr << s
end
end
end
let(:survey_item_1) { measure.survey_items[0] }
let(:survey_item_2) { measure.survey_items[1] }
let(:survey_item_3) { measure.survey_items[2] }
let(:response_rate) do
create(:response_rate, school:, academic_year:, subcategory: measure.subcategory, meets_student_threshold: true)
end
context 'when survey item responses exist' do
before :each do
response_rate
students.each do |student|
create(:survey_item_response, school:, academic_year:, likert_score: 2, survey_item: survey_item_1, student:)
end
students.each do |student|
create(:survey_item_response, school:, academic_year:, likert_score: 3, survey_item: survey_item_2, student:)
end
end
xit 'returns a list of averages' do
expect(measure.student_survey_items.count).to eq 2
american_indian_score = RaceScoreCalculator.new(measure:, school:, academic_year:, race:).score
expect(american_indian_score).to eq Score.new(2.5, false, true, false)
end
end
end

@ -0,0 +1,67 @@
require 'rails_helper'
RSpec.describe Sample, type: :model do
let(:american_indian) { Race.create(qualtrics_code: 1) }
let(:asian) { Race.create(qualtrics_code: 2) }
let(:black) { Race.create(qualtrics_code: 3) }
let(:latinx) { Race.create(qualtrics_code: 4) }
let(:white) { Race.create(qualtrics_code: 5) }
let(:middle_eastern) { Race.create(qualtrics_code: 8) }
let(:unknown) { Race.create(qualtrics_code: 99) }
let(:multiracial) { Race.create(qualtrics_code: 100) }
let(:races) { [american_indian, asian, black, latinx, white, middle_eastern, unknown, multiracial] }
let(:school) { create(:school) }
let(:academic_year) { create(:academic_year) }
let(:multiracial_student) do
student = create(:student)
student.races << american_indian
student.races << asian
student.races << multiracial
student
end
before :each do
7.times do |index|
student = create(:student)
student.races << races[index]
create(:survey_item_response, response_id: student.response_id, student:, school:, academic_year:)
end
end
describe '#count_all_students' do
context 'When called without params' do
it 'returns a count of all students' do
sample = Sample.new(slug: school.slug, range: academic_year.range)
expect(sample.count_all_students).to eq 7
end
end
end
describe '#count_students' do
context 'When called with a race param' do
context 'and there are no multirace students' do
it 'returns a count of the race passed in' do
sample = Sample.new(slug: school.slug, range: academic_year.range)
expect(sample.count_students(race: american_indian)).to eq 1
expect(sample.count_students(race: asian)).to eq 1
expect(sample.count_students(race: black)).to eq 1
expect(sample.count_students(race: latinx)).to eq 1
expect(sample.count_students(race: white)).to eq 1
expect(sample.count_students(race: middle_eastern)).to eq 1
expect(sample.count_students(race: unknown)).to eq 1
end
end
context 'when there are multirace students' do
before do
create(:survey_item_response, response_id: multiracial_student.response_id, student: multiracial_student,
school:, academic_year:)
end
it 'counts the student for all categories' do
sample = Sample.new(slug: school.slug, range: academic_year.range)
expect(sample.count_students(race: american_indian)).to eq 2
expect(sample.count_students(race: asian)).to eq 2
expect(sample.count_students(race: multiracial)).to eq 1
end
end
end
end
end

@ -1,73 +0,0 @@
require 'rails_helper'
include Analyze::Graph::Column
# RacialScore is a module used in the RaceScoreCalculator class
describe RacialScore do
let(:measure) { create(:measure, :with_student_survey_items) }
let(:school) { create(:school) }
let(:academic_year) { create(:academic_year) }
let(:race) { create(:race) }
let(:students) do
[].tap do |arr|
10.times do
s = create(:student)
s.races << race
s.save
arr << s
end
end
end
let(:survey_item_1) { measure.survey_items[0] }
let(:survey_item_2) { measure.survey_items[1] }
let(:survey_item_3) { measure.survey_items[2] }
let(:response_rate) do
create(:response_rate, school:, academic_year:, subcategory: measure.subcategory, meets_student_threshold: true)
end
context 'when sufficient survey item responses exist' do
before :each do
response_rate
students.each do |student|
create(:survey_item_response, school:, academic_year:, likert_score: 2, survey_item: survey_item_1, student:)
end
students.each do |student|
create(:survey_item_response, school:, academic_year:, likert_score: 3, survey_item: survey_item_2, student:)
end
end
xit 'returns a list of averages' do
expect(measure.student_survey_items.count).to eq 2
expect(students.count).to eq 10
expect(SurveyItemResponse.count).to eq 20
american_indian_score = RaceScoreCalculator.new(measure:, school:, academic_year:, race:).score
expect(american_indian_score).to eq Score.new(2.5, false, true, false)
end
end
context 'when there NOT sufficient survey item responses' do
before :each do
response_rate
9.times do |index|
create(:survey_item_response, school:, academic_year:, likert_score: 2, survey_item: survey_item_1,
student: students[index])
end
9.times do |index|
create(:survey_item_response, school:, academic_year:, likert_score: 3, survey_item: survey_item_2,
student: students[index])
end
end
xit 'returns a list of averages' do
expect(measure.student_survey_items.count).to eq 2
expect(SurveyItemResponse.count).to eq 18
american_indian_score = RaceScoreCalculator.new(measure:, school:, academic_year:, race:).score
expect(american_indian_score).to eq Score.new(0, false, false, false)
end
end
end

@ -35,18 +35,43 @@ describe RaceScoreLoader do
RaceScoreLoader.reset
end
xit 'returns a list of averages' do
it 'returns a list of averages' do
expect(measure.student_survey_items.count).to eq 2
american_indian_score = RaceScore.find_by(measure:, school:, academic_year:, race:)
expect(american_indian_score.average).to eq 2.5
expect(american_indian_score.meets_student_threshold).to eq true
end
xit 'is idempotent' do
it 'is idempotent' do
original_count = RaceScore.count
RaceScoreLoader.reset
new_count = RaceScore.count
expect(original_count).to eq new_count
end
end
context 'when there NOT sufficient survey item responses' do
before :each do
response_rate
9.times do |index|
create(:survey_item_response, school:, academic_year:, likert_score: 2, survey_item: survey_item_1,
student: students[index])
end
9.times do |index|
create(:survey_item_response, school:, academic_year:, likert_score: 3, survey_item: survey_item_2,
student: students[index])
end
RaceScoreLoader.reset
end
it 'returns a list of averages' do
expect(measure.student_survey_items.count).to eq 2
expect(SurveyItemResponse.count).to eq 18
rs = RaceScore.find_by(measure:, school:, academic_year:, race:)
expect(rs.average).to eq 0
expect(rs.meets_student_threshold).to eq false
end
end
end

@ -21,6 +21,10 @@ describe StudentLoader do
describe '#process_races' do
context 'as a standalone function' do
it 'race codes of 6 or 7 get classified as an unknown race' do
codes = ['NA']
expect(StudentLoader.process_races(codes:)).to eq [unknown]
codes = []
expect(StudentLoader.process_races(codes:)).to eq [unknown]
codes = [1]
expect(StudentLoader.process_races(codes:)).to eq [american_indian]
codes = [2]
@ -64,7 +68,7 @@ describe StudentLoader do
# This fails in CI because github does not know what the key derivation salt is.
# I'm not sure how to securely set the key derivation salt as an environment variable in CI
xdescribe 'self.load_data' do
describe 'self.load_data' do
context 'load student data' do
before :each do
SurveyResponsesDataLoader.load_data filepath: path_to_student_responses
@ -81,10 +85,22 @@ describe StudentLoader do
end
def assigns_student_to_the_survey_item_responses
expect(SurveyItemResponse.find_by_response_id('student_survey_response_1').student).not_to eq nil
expect(SurveyItemResponse.find_by_response_id('student_survey_response_1').student).to eq Student.find_by_lasid('123456')
expect(SurveyItemResponse.find_by_response_id('student_survey_response_6').student).not_to eq nil
expect(SurveyItemResponse.find_by_response_id('student_survey_response_6').student).to eq Student.find_by_response_id('student_survey_response_6')
# The csv file has no responses for `student_survey_response_2` so we can't assign a student to nil responses
expect(SurveyItemResponse.find_by_response_id('student_survey_response_2')).to eq nil
response_ids = %w[student_survey_response_1 student_survey_response_3
student_survey_response_4
student_survey_response_5
student_survey_response_6
student_survey_response_7]
response_ids.each do |response_id|
responses = SurveyItemResponse.where(response_id:)
responses.each do |response|
expect(response.student).not_to eq nil
expect(response.student).to eq Student.find_by_response_id(response_id)
end
end
end
def assigns_races_to_students
@ -101,6 +117,8 @@ end
def is_idempotent_for_students
number_of_students = Student.count
number_of_responses = SurveyItemResponse.count
StudentLoader.load_data filepath: path_to_student_responses
expect(Student.count).to eq number_of_students
expect(SurveyItemResponse.count).to eq number_of_responses
end

Loading…
Cancel
Save