Add benchmarks to survey and admin data items. Remove them from measures. Modify seeder

Calculate benchmarks for measures based on a weighted average of survey
and admin data items

Added architectural records
This commit is contained in:
Nelson Jovel 2021-12-15 16:55:06 +01:00
parent 1a6c81e240
commit ad03606d66
157 changed files with 2443 additions and 1932 deletions

View file

@ -1,6 +1,5 @@
# PSQL: /Applications/Postgres.app/Contents/Versions/9.6/bin/psql -h localhost
# aws s3 presign s3://irrationaldesign/beta-data-080719a.dump
# sudo heroku pg:backups:restore 'https://irrationaldesign.s3.amazonaws.com/beta-data-080719a.dump?AWSAccessKeyId=AKIAIDGE3EMQEWUQZUJA&Signature=KrabUOeggEd5wrjLQ4bvgd9eZGU%3D&Expires=1565267251' DATABASE_URL -a mciea-beta
@ -36,7 +35,7 @@ require 'csv'
namespace :data do
@year = 2019
desc "load survey responses"
desc 'load survey responses'
task load_survey_responses: :environment do
Dir.glob(Rails.root.join('data', 'survey_responses', '*.csv')).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
@ -45,30 +44,28 @@ namespace :data do
puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses"
end
desc "Load in all data"
desc 'Load in all data'
task load: :environment do
# return if School.count > 0
Rake::Task["data:load_categories"].invoke
Rake::Task["data:load_questions"].invoke
Rake::Task["db:seed"].invoke
Rake::Task["data:load_responses"].invoke
Rake::Task["data:load_nonlikert_values"].invoke
Rake::Task['data:load_categories'].invoke
Rake::Task['data:load_questions'].invoke
Rake::Task['db:seed'].invoke
Rake::Task['data:load_responses'].invoke
Rake::Task['data:load_nonlikert_values'].invoke
end
desc 'Check question / category data against existing data'
task check_questions: :environment do
csv_string = File.read(File.expand_path("../../../data/MeasureKey#{@year}.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv = CSV.parse(csv_string, headers: true)
t = Time.new
csv.each_with_index do |question, index|
csv.each_with_index do |question, _index|
existing_question = Question.created_in(@year).find_by_external_id(question['qid'])
if existing_question.blank?
puts "NOT FOUND: #{question['qid']} -> #{question["Question Text"]}"
puts "NOT FOUND: #{question['qid']} -> #{question['Question Text']}"
else
if Question.where(external_id: question['qid']).count > 1
puts "MULTIPLE FOUND: #{question['qid']}"
end
puts "MULTIPLE FOUND: #{question['qid']}" if Question.where(external_id: question['qid']).count > 1
question_text = question['Question Text'].gsub(/[[:space:]]/, ' ').strip
if existing_question.text != question_text
@ -92,10 +89,10 @@ namespace :data do
desc 'Sync questions / category data against existing data'
task sync_questions: :environment do
csv_string = File.read(File.expand_path("../../../data/MeasureKey#{@year}.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv = CSV.parse(csv_string, headers: true)
t = Time.new
csv.each_with_index do |question, index|
csv.each_with_index do |question, _index|
existing_question = Question.created_in(@year).find_by_external_id(question['qid'])
if existing_question.blank?
categories = Category.where(name: question['Category Name'].titleize)
@ -114,7 +111,7 @@ namespace :data do
option4: question['R4'],
option5: question['R5'],
target_group: question['qid'].starts_with?('s') ? 'for_students' : 'for_teachers',
reverse: question['Reverse'] == "1"
reverse: question['Reverse'] == '1'
)
end
else
@ -145,7 +142,7 @@ namespace :data do
desc 'Load in category data'
task load_categories: :environment do
measures = JSON.parse(File.read(File.expand_path('../../../data/measures.json', __FILE__)))
measures = JSON.parse(File.read(File.expand_path('../../data/measures.json', __dir__)))
measures.each_with_index do |measure, index|
category = Category.create_with(
blurb: measure['blurb'],
@ -169,14 +166,14 @@ namespace :data do
external_id: subinfo_key
).find_or_create_by(name: subsubinfo['title'])
if subsubinfo['nonlikert'].present?
subsubinfo['nonlikert'].each do |nonlikert_info|
puts("NONLIKERT FOUND: #{nonlikert_info['title']}")
nonlikert = subsubcategory.child_categories.create_with(
benchmark_description: nonlikert_info['benchmark_explanation'],
benchmark: nonlikert_info['benchmark']
).find_or_create_by(name: nonlikert_info['title'])
end
next unless subsubinfo['nonlikert'].present?
subsubinfo['nonlikert'].each do |nonlikert_info|
puts("NONLIKERT FOUND: #{nonlikert_info['title']}")
nonlikert = subsubcategory.child_categories.create_with(
benchmark_description: nonlikert_info['benchmark_explanation'],
benchmark: nonlikert_info['benchmark']
).find_or_create_by(name: nonlikert_info['title'])
end
end
end
@ -190,18 +187,18 @@ namespace :data do
'teacher'
]
questions = JSON.parse(File.read(File.expand_path('../../../data/questions.json', __FILE__)))
questions = JSON.parse(File.read(File.expand_path('../../data/questions.json', __dir__)))
questions.each do |question|
category = nil
question['category'].split('-').each do |external_id|
categories = category.present? ? category.child_categories : Category
category = categories.where(external_id: external_id).first
if category.nil?
puts 'NOTHING'
puts external_id
puts categories.inspect
category = categories.create(name: question['Category Name'], external_id: external_id)
end
next unless category.nil?
puts 'NOTHING'
puts external_id
puts categories.inspect
category = categories.create(name: question['Category Name'], external_id: external_id)
end
question_text = question['text'].gsub(/[[:space:]]/, ' ').strip
if question_text.index('.* teacher').nil?
@ -213,7 +210,7 @@ namespace :data do
option4: question['answers'][3],
option5: question['answers'][4],
for_recipient_students: question['child'].present?,
reverse: question['Reverse'] == "1"
reverse: question['Reverse'] == '1'
)
else
variations.each do |variation|
@ -225,7 +222,7 @@ namespace :data do
option4: question['answers'][3],
option5: question['answers'][4],
for_recipient_students: question['child'].present?,
reverse: question['Reverse'] == "1"
reverse: question['Reverse'] == '1'
)
end
end
@ -240,21 +237,23 @@ namespace :data do
]
csv_string = File.read(File.expand_path("../../../data/MeasureKey#{@year}.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv = CSV.parse(csv_string, headers: true)
t = Time.new
csv.each_with_index do |question, index|
csv.each_with_index do |question, _index|
category = nil
question['Category19'].split('-').each do |external_id_raw|
external_id = external_id_raw.gsub(/[[:space:]]/, ' ').strip
categories = category.present? ? category.child_categories : Category
category = categories.where(external_id: external_id).first
if category.nil?
puts 'NOTHING'
puts "#{question['Category']} -- #{external_id}"
puts categories.map { |c| "#{c.name} - |#{c.external_id}| == |#{external_id}|: - #{external_id == c.external_id}"}.join(" ---- ")
category = categories.create(name: question['Category Name'], external_id: external_id)
end
next unless category.nil?
puts 'NOTHING'
puts "#{question['Category']} -- #{external_id}"
puts categories.map { |c|
"#{c.name} - |#{c.external_id}| == |#{external_id}|: - #{external_id == c.external_id}"
}.join(' ---- ')
category = categories.create(name: question['Category Name'], external_id: external_id)
end
question_text = question['Question Text'].gsub(/[[:space:]]/, ' ').strip
if question_text.index('.* teacher').nil?
@ -265,9 +264,9 @@ namespace :data do
option3: question['R3'],
option4: question['R4'],
option5: question['R5'],
for_recipient_students: question['Level'] == "Students",
for_recipient_students: question['Level'] == 'Students',
external_id: question['qid'],
reverse: question['Reverse'] == "1"
reverse: question['Reverse'] == '1'
)
else
variations.each do |variation|
@ -278,9 +277,9 @@ namespace :data do
option3: question['R3'],
option4: question['R4'],
option5: question['R5'],
for_recipient_students: question['Level'] == "Students",
for_recipient_students: question['Level'] == 'Students',
external_id: question['qid'],
reverse: question['Reverse'] == "1"
reverse: question['Reverse'] == '1'
)
end
end
@ -310,17 +309,17 @@ namespace :data do
missing_questions = {}
bad_answers = {}
timeToRun = 120000 * 60
timeToRun = 120_000 * 60
startIndex = 0
stopIndex = 1000000
stopIndex = 1_000_000
startTime = Time.new
# ['teacher_responses'].each do |file|
['student_responses', 'teacher_responses'].each do |file|
%w[student_responses teacher_responses].each do |file|
recipients = file.split('_')[0]
target_group = Question.target_groups["for_#{recipients}s"]
csv_string = File.read(File.expand_path("../../../data/#{file}_#{@year}.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv = CSV.parse(csv_string, headers: true)
puts("LOADING CSV: #{csv.length} ROWS")
t = Time.new
@ -338,16 +337,16 @@ namespace :data do
end
district_name = row['Q111'].strip
if district_name.blank? || district_name == "NA"
if district_name.blank? || district_name == 'NA'
puts "DISTRICT NOT FOUND: #{district_name}"
next
end
# district_name = row['To begin, please select your district.'] if district_name.nil?
district = District.find_or_create_by(name: district_name, state_id: 1)
school_name = row["SchoolName"].strip
school_name = row['SchoolName'].strip
if school_name.blank? || school_name == "NA"
if school_name.blank? || school_name == 'NA'
puts "BLANK SCHOOL NAME: #{district.name} - #{index}"
next
end
@ -356,38 +355,36 @@ namespace :data do
if school.nil?
next if unknown_schools[school_name]
puts "DATAERROR: Unable to find school: #{school_name} - #{index}"
unknown_schools[school_name] = true
next
end
respondent_id = "#{recipients}-#{index}-#{row["ResponseId"].strip}"
respondent_id = "#{recipients}-#{index}-#{row['ResponseId'].strip}"
recipient_id = respondent_map["#{school.id}-#{@year}-#{respondent_id}"]
if recipient_id.present?
recipient = school.recipients.where(id: recipient_id).first
end
recipient = school.recipients.where(id: recipient_id).first if recipient_id.present?
if recipient.nil?
begin
recipient = school.recipients.create(
name: "Survey Respondent Id: #{respondent_id}"
)
rescue
rescue StandardError
puts "DATAERROR: INDEX: #{index} ERROR AT #{index} - #{district.name} - #{school_name} #{school}: #{respondent_id}"
end
respondent_map["#{school.id}-#{respondent_id}"] = recipient.id
end
recipient_list = school.recipient_lists.find_by_name("#{recipients.titleize} List")
if recipient_list.nil?
recipient_list = school.recipient_lists.create(name: "#{recipients.titleize} List")
end
recipient_list = school.recipient_lists.create(name: "#{recipients.titleize} List") if recipient_list.nil?
recipient_list.recipient_id_array << recipient.id
recipient_list.save!
row.each do |key, value|
t1 = Time.new
next if value.nil? or key.nil? or value.to_s == "-99"
next if value.nil? or key.nil? or value.to_s == '-99'
key = key.gsub(/[[:space:]]/, ' ').gsub(/\./, '-').strip.gsub(/\s+/, ' ')
key = key.gsub(/-4-5/, '').gsub(/-6-12/, '')
value = value.gsub(/[[:space:]]/, ' ').strip.downcase
@ -400,23 +397,26 @@ namespace :data do
if question.nil?
next if missing_questions[key]
puts "DATAERROR: Unable to find question: #{key}"
missing_questions[key] = true
next
else
question.update_attributes(target_group: target_group) if question.unknown?
elsif question.unknown?
question.update_attributes(target_group: target_group)
end
if (value.to_i.blank?)
if value.to_i.blank?
answer_index = question.option_index(value)
answer_dictionary.each do |k, v|
break if answer_index.present?
answer_index = question.option_index(value.gsub(k.to_s, v.to_s))
answer_index = question.option_index(value.gsub(v.to_s, k.to_s)) if answer_index.nil?
end
if answer_index.nil?
next if bad_answers[key]
puts "DATAERROR: Unable to find answer: #{key} = #{value.downcase.strip} - #{question.options.inspect}"
bad_answers[key] = true
next
@ -429,7 +429,11 @@ namespace :data do
# answer_index = 6 - answer_index if question.reverse?
responded_at = Date.strptime(row['recordedDate'], '%Y-%m-%d %H:%M:%S') rescue Date.today
responded_at = begin
Date.strptime(row['recordedDate'], '%Y-%m-%d %H:%M:%S')
rescue StandardError
Date.today
end
begin
recipient.attempts.create(question: question, answer_index: answer_index, responded_at: responded_at)
rescue Exception => e
@ -448,65 +452,64 @@ namespace :data do
desc 'Load in nonlikert values for each school'
task load_nonlikert_values: :environment do
csv_string = File.read(File.expand_path("../../../data/MCIEA_18-19AdminData_Final.csv", __FILE__))
csv_string = File.read(File.expand_path('../../data/MCIEA_18-19AdminData_Final.csv', __dir__))
# csv_string = File.read(File.expand_path("../../../data/MCIEA_16-17_SGP.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv = CSV.parse(csv_string, headers: true)
puts("LOADING NONLIKERT CSV: #{csv.length} ROWS")
errors = []
csv.each_with_index do |row, index|
next if row["Likert_Value"].blank?
csv.each_with_index do |row, _index|
next if row['Likert_Value'].blank?
base = Category
category_ids = row["Category"].split("-")
category_ids = row['Category'].split('-')
category_ids.each do |category_id|
category_id = category_id.downcase if category_id.downcase =~ /i/
base = base.find_by_external_id(category_id)
if base.nil?
row["reason"] = "Unable to find category_id #{category_id} for category #{row["Category"]}"
row['reason'] = "Unable to find category_id #{category_id} for category #{row['Category']}"
errors << row
next
end
base = base.child_categories
end
nonlikert_category = base.where(name: row["NonLikert Title"]).first
nonlikert_category = base.where(name: row['NonLikert Title']).first
if nonlikert_category.nil?
row["reason"] = "Unable to find nonlikert category: #{row["NonLikert Title"]} in #{}"
row['reason'] = "Unable to find nonlikert category: #{row['NonLikert Title']} in "
errors << row
next
else
if (benchmark = row["Benchmark"]).present?
nonlikert_category.update(benchmark: benchmark)
end
elsif (benchmark = row['Benchmark']).present?
nonlikert_category.update(benchmark: benchmark)
end
district = District.where(name: row["District"], state_id: 1).first
district = District.where(name: row['District'], state_id: 1).first
if district.blank?
row["reason"] = "DISTRICT NOT FOUND: #{row["District"]}"
row['reason'] = "DISTRICT NOT FOUND: #{row['District']}"
errors << row
next
end
school = district.schools.where(name: row["School"]).first
school = district.schools.where(name: row['School']).first
if school.blank?
row["reason"] = "SCHOOL NOT FOUND: #{row["School"]} (#{district.name})"
row['reason'] = "SCHOOL NOT FOUND: #{row['School']} (#{district.name})"
errors << row
next
end
school_category = school.school_categories.find_or_create_by(category: nonlikert_category, year: "#{@year}")
school_category = school.school_categories.find_or_create_by(category: nonlikert_category, year: @year.to_s)
if school_category.blank?
row["reason"] = "SCHOOL CATEGORY NOT FOUND: #{school.name} (#{district.name}) #{nonlikert_category.name}"
row['reason'] = "SCHOOL CATEGORY NOT FOUND: #{school.name} (#{district.name}) #{nonlikert_category.name}"
errors << row
next
end
zscore = (([-2,[row["Likert_Value"].to_f-3,2].min].max * 10).to_i).to_f / 10.0
zscore = ([-2, [row['Likert_Value'].to_f - 3, 2].min].max * 10).to_i.to_f / 10.0
school_category.update(
nonlikert: row["NL_Value"],
nonlikert: row['NL_Value'],
zscore: zscore.to_f,
year: "#{@year}",
year: @year.to_s,
valid_child_count: 1
)
@ -514,40 +517,41 @@ namespace :data do
end
errors.each do |error|
puts "#{error["reason"]}: #{error["NonLikert Title"]} -> #{error["Likert_Value"]}"
puts "#{error['reason']}: #{error['NonLikert Title']} -> #{error['Likert_Value']}"
end
puts "COUNT: #{SchoolCategory.where(attempt_count: 0, answer_index_total: 0).where("nonlikert is not null and zscore is null").count}"
puts "COUNT: #{SchoolCategory.where(attempt_count: 0,
answer_index_total: 0).where('nonlikert is not null and zscore is null').count}"
end
desc 'Load in custom zones for each category'
task load_custom_zones: :environment do
ENV['BULK_PROCESS'] = 'true'
csv_string = File.read(File.expand_path("../../../data/Benchmarks2016-2017.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv_string = File.read(File.expand_path('../../data/Benchmarks2016-2017.csv', __dir__))
csv = CSV.parse(csv_string, headers: true)
csv.each_with_index do |row, index|
next if row["Warning High"].blank?
csv.each_with_index do |row, _index|
next if row['Warning High'].blank?
category = Category.find_by_name(row["Subcategory"])
category = Category.find_by_name(row['Subcategory'])
if category.nil?
puts "Unable to find category #{row["Subcategory"]}"
puts "Unable to find category #{row['Subcategory']}"
next
end
custom_zones = [
row["Warning High"],
row["Watch High"],
row["Growth High"],
row["Approval High"],
row['Warning High'],
row['Watch High'],
row['Growth High'],
row['Approval High'],
5
]
puts "#{category.name} -> #{custom_zones.join(",")}"
puts "#{category.name} -> #{custom_zones.join(',')}"
category.update(zones: custom_zones.join(","))
category.update(zones: custom_zones.join(','))
end
ENV.delete('BULK_PROCESS')
@ -559,7 +563,8 @@ namespace :data do
task move_likert_to_submeasures: :environment do
Question.all.each do |q|
category = q.category
next unless category.name.index("Scale").nil?
next unless category.name.index('Scale').nil?
new_category_name = "#{category.name} Scale"
new_category = category.child_categories.where(name: new_category_name).first
if new_category.nil?
@ -592,19 +597,19 @@ namespace :data do
new_school_questions = []
category.questions.created_in(@year).each do |question|
school = school_category.school
next if school.district.name != "Boston"
next if school.district.name != 'Boston'
school_question = school_category.school_questions.for(school, question).first
if school_question.present?
school_questions << school_question
else
attempt_data = Attempt.
joins(:question).
created_in(school_category.year).
for_question(question).
for_school(school).
select('count(attempts.answer_index) as response_count').
select('sum(case when questions.reverse then 6 - attempts.answer_index else attempts.answer_index end) as answer_index_total')[0]
attempt_data = Attempt
.joins(:question)
.created_in(school_category.year)
.for_question(question)
.for_school(school)
.select('count(attempts.answer_index) as response_count')
.select('sum(case when questions.reverse then 6 - attempts.answer_index else attempts.answer_index end) as answer_index_total')[0]
available_responders = school.available_responders_for(question)
@ -625,7 +630,6 @@ namespace :data do
SchoolQuestion.import new_school_questions
end
end
end
end
@ -634,17 +638,14 @@ namespace :data do
School.all.each do |school|
Category.all.each do |category|
school_category = SchoolCategory.for(school, category).in(@year).first
if school_category.nil?
school_category = school.school_categories.create(category: category, year: @year)
end
school_category = school.school_categories.create(category: category, year: @year) if school_category.nil?
school_category.sync_aggregated_responses
end
end
end
end
#<SchoolCategory id: 1, school_id: 1, category_id: 1, attempt_count: 277, response_count: 277, answer_index_total: 1073, created_at: "2017-10-17 00:21:52", updated_at: "2018-03-03 17:24:53", nonlikert: nil, zscore: 0.674396962759463, year: "2017">
# <SchoolCategory id: 1, school_id: 1, category_id: 1, attempt_count: 277, response_count: 277, answer_index_total: 1073, created_at: "2017-10-17 00:21:52", updated_at: "2018-03-03 17:24:53", nonlikert: nil, zscore: 0.674396962759463, year: "2017">
# require 'csv'
# student_counts_string = File.read(File.expand_path("data/bps_student_counts.csv"))
@ -691,8 +692,6 @@ end
#
#
# min_response_rate = 0.3
# level = 1
# # categories = Category.joins(:questions).uniq.all
@ -796,7 +795,6 @@ end
#
# puts "TOTAL: #{total}"
# [
# "next-wave-full-circle"
# ].each do |slug|