You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
sqm-dashboards/lib/tasks/data.rake

595 lines
22 KiB

# PSQL: /Applications/Postgres.app/Contents/Versions/9.6/bin/psql -h localhost
# LOAD DATA
# RAILS_ENV=development rails db:environment:set db:drop db:create db:migrate; /Applications/Postgres.app/Contents/Versions/9.6/bin/pg_restore --verbose --clean --no-acl --no-owner -h localhost -d mciea_development beta-data-080818.dump; rake db:migrate;
# rails c -> SchoolCategory.update_all(year: '2017')
# rake data:load_questions_csv; rake data:load_responses
# sudo heroku pg:reset DATABASE -a mciea-beta
# sudo heroku pg:backups:restore 'https://s3.amazonaws.com/irrationaldesign/beta-data-080818.dump' DATABASE_URL -a mciea-beta
# sudo heroku run rake db:migrate -a mciea-beta
# sudo heroku run console -a mciea-beta -> SchoolCategory.update_all(year: '2017') --
# RENAME SCHOOLS = s = SCHOOLS; s.each { |correct, incorrect| District.find_by_name("Boston").schools.find_by_name(incorrect[0]).update(name: correct) }
# s.map { |correct, incorrect| District.find_by_name("Boston").schools.find_by_name(incorrect.to_s).merge_into(correct) }
# sudo heroku run rake data:load_questions_csv -a mciea-beta
# sudo heroku run:detached rake data:load_responses -a mciea-beta --size performance-l
# sudo heroku run rake data:move_likert_to_submeasures -a mciea-beta
# sudo heroku run:detached rake data:sync -a mciea-beta --size performance-l
# sudo heroku run:detached rake data:create_school_questions -a mciea-dashboard --size performance-l
# Add:
#
# Category: unique_external_id (string)
# School Category: year (string)
#
# Update:
# Add year to existing school categories
require 'csv'
namespace :data do
@year = 2018
desc "Load in all data"
task load: :environment do
# return if School.count > 0
Rake::Task["data:load_categories"].invoke
Rake::Task["data:load_questions"].invoke
Rake::Task["db:seed"].invoke
Rake::Task["data:load_responses"].invoke
Rake::Task["data:load_nonlikert_values"].invoke
end
desc 'Load in category data'
task load_categories: :environment do
measures = JSON.parse(File.read(File.expand_path('../../../data/measures.json', __FILE__)))
measures.each_with_index do |measure, index|
category = Category.create_with(
blurb: measure['blurb'],
description: measure['text'],
external_id: measure['id'] || index + 1
).find_or_create_by(name: measure['title'])
measure['sub'].keys.sort.each do |key|
subinfo = measure['sub'][key]
subcategory = category.child_categories.create_with(
blurb: subinfo['blurb'],
description: subinfo['text'],
external_id: key
).find_or_create_by(name: subinfo['title'])
subinfo['measures'].keys.sort.each do |subinfo_key|
subsubinfo = subinfo['measures'][subinfo_key]
subsubcategory = subcategory.child_categories.create_with(
blurb: subsubinfo['blurb'],
description: subsubinfo['text'],
external_id: subinfo_key
).find_or_create_by(name: subsubinfo['title'])
if subsubinfo['nonlikert'].present?
subsubinfo['nonlikert'].each do |nonlikert_info|
puts("NONLIKERT FOUND: #{nonlikert_info['title']}")
nonlikert = subsubcategory.child_categories.create_with(
benchmark_description: nonlikert_info['benchmark_explanation'],
benchmark: nonlikert_info['benchmark']
).find_or_create_by(name: nonlikert_info['title'])
end
end
end
end
end
end
desc 'Load in question data from json'
task load_questions: :environment do
variations = [
'[Field-MathTeacher][Field-ScienceTeacher][Field-EnglishTeacher][Field-SocialTeacher]',
'teacher'
]
questions = JSON.parse(File.read(File.expand_path('../../../data/questions.json', __FILE__)))
questions.each do |question|
category = nil
question['category'].split('-').each do |external_id|
categories = category.present? ? category.child_categories : Category
category = categories.where(external_id: external_id).first
if category.nil?
puts 'NOTHING'
puts external_id
puts categories.inspect
category = categories.create(name: question['Category Name'], external_id: external_id)
end
end
question_text = question['text'].gsub(/[[:space:]]/, ' ').strip
if question_text.index('.* teacher').nil?
category.questions.create(
text: question_text,
option1: question['answers'][0],
option2: question['answers'][1],
option3: question['answers'][2],
option4: question['answers'][3],
option5: question['answers'][4],
for_recipient_students: question['child'].present?
)
else
variations.each do |variation|
category.questions.create(
text: question_text.gsub('.* teacher', variation),
option1: question['answers'][0],
option2: question['answers'][1],
option3: question['answers'][2],
option4: question['answers'][3],
option5: question['answers'][4],
for_recipient_students: question['child'].present?
)
end
end
end
end
desc 'Load in question data from csv'
task load_questions_csv: :environment do
variations = [
'[Field-MathTeacher][Field-ScienceTeacher][Field-EnglishTeacher][Field-SocialTeacher]',
'teacher'
]
csv_string = File.read(File.expand_path('../../../data/MeasureKey2018.csv', __FILE__))
csv = CSV.parse(csv_string, :headers => true)
t = Time.new
csv.each_with_index do |question, index|
category = nil
question['Category'].split('-').each do |external_id_raw|
external_id = external_id_raw.gsub(/[[:space:]]/, ' ').strip
categories = category.present? ? category.child_categories : Category
category = categories.where(external_id: external_id).first
if category.nil?
puts 'NOTHING'
puts "#{question['Category']} -- #{external_id}"
puts categories.map { |c| "#{c.name} - |#{c.external_id}| == |#{external_id}|: - #{external_id == c.external_id}"}.join(" ---- ")
category = categories.create(name: question['Category Name'], external_id: external_id)
end
end
question_text = question['Question Text'].gsub(/[[:space:]]/, ' ').strip
if question_text.index('.* teacher').nil?
category.questions.create(
text: question_text,
option1: question['R1'],
option2: question['R2'],
option3: question['R3'],
option4: question['R4'],
option5: question['R5'],
for_recipient_students: question['Level'] == "Students",
external_id: question['qid'],
reverse: question['Reverse'] == "1"
)
else
variations.each do |variation|
category.questions.create(
text: question_text.gsub('.* teacher', variation),
option1: question['R1'],
option2: question['R2'],
option3: question['R3'],
option4: question['R4'],
option5: question['R5'],
for_recipient_students: question['Level'] == "Students",
external_id: question['qid'],
reverse: question['Reverse'] == "1"
)
end
end
end
end
desc 'Load in student and teacher responses'
task load_responses: :environment do
ENV['BULK_PROCESS'] = 'true'
answer_dictionary = {
'Slightly': 'Somewhat',
'an incredible': 'a tremendous',
'a little': 'a little bit',
'slightly': 'somewhat',
'a little well': 'slightly well',
'quite': 'very',
'a tremendous': 'a very great',
'somewhat clearly': 'somewhat',
'almost never': 'once in a while',
'always': 'all the time',
'not at all strong': 'not strong at all',
'each': 'every'
}
respondent_map = {}
unknown_schools = {}
missing_questions = {}
bad_answers = {}
timeToRun = 120 * 60
startIndex = 0
stopIndex = 100000
startTime = Time.new
# ['student_responses'].each do |file|
['student_responses', 'teacher_responses'].each do |file|
recipients = file.split('_')[0]
target_group = Question.target_groups["for_#{recipients}s"]
csv_string = File.read(File.expand_path("../../../data/#{file}_#{@year}.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
puts("LOADING CSV: #{csv.length} ROWS")
t = Time.new
csv.each_with_index do |row, index|
next if index < startIndex
if Time.new - startTime >= timeToRun || index > stopIndex
puts("ENDING #{timeToRun} SECONDS: #{Time.new - startTime} = #{startIndex} -> #{index} = #{index - startIndex} or #{(Time.new - t) / (index - startIndex)} per second")
break
end
if index % 10 == 0
puts("DATAMSG: PROCESSING ROW: #{index} OUT OF #{csv.length} ROWS: #{Time.new - t} - Total: #{Time.new - startTime} - #{timeToRun - (Time.new - startTime)} TO GO / #{stopIndex - startIndex} ROWS TO GO")
t = Time.new
end
district_name = row['District']
if district_name.blank? || district_name == "NA"
next
end
# district_name = row['To begin, please select your district.'] if district_name.nil?
district = District.find_or_create_by(name: district_name, state_id: 1)
school_name = row["School.#{district_name}"]
if school_name.blank? || school_name == "NA"
puts "BLANK SCHOOL NAME: #{district.name} - #{index}"
next
end
school = district.schools.find_or_create_by(name: school_name)
if school.nil?
next if unknown_schools[school_name]
puts "DATAERROR: Unable to find school: #{school_name} - #{index}"
unknown_schools[school_name] = true
next
end
respondent_id = "#{recipients}-#{index}-#{row["X_recordId"]}"
recipient_id = respondent_map["#{school.id}-#{@year}-#{respondent_id}"]
if recipient_id.present?
recipient = school.recipients.where(id: recipient_id).first
end
if recipient.nil?
begin
recipient = school.recipients.create(
name: "Survey Respondent Id: #{respondent_id}"
)
rescue
puts "DATAERROR: INDEX: #{index} ERROR AT #{index} - #{district.name} - #{school_name} #{school}: #{respondent_id}"
end
respondent_map["#{school.id}-#{respondent_id}"] = recipient.id
end
recipient_list = school.recipient_lists.find_by_name("#{recipients.titleize} List")
if recipient_list.nil?
recipient_list = school.recipient_lists.create(name: "#{recipients.titleize} List")
end
recipient_list.recipient_id_array << recipient.id
recipient_list.save!
row.each do |key, value|
t1 = Time.new
next if value.nil? or key.nil? or value.to_s == "-99"
key = key.gsub(/[[:space:]]/, ' ').gsub(/\./, '-').strip.gsub(/\s+/, ' ')
key = key.gsub(/-4-5/, '').gsub(/-6-12/, '')
value = value.gsub(/[[:space:]]/, ' ').strip.downcase
begin
question = Question.find_by_external_id(key)
rescue Exception => e
puts "DATAERROR: INDEX: #{index} Failed finding question: #{key} -> #{e}"
end
if question.nil?
next if missing_questions[key]
puts "DATAERROR: Unable to find question: #{key}"
missing_questions[key] = true
next
else
question.update_attributes(target_group: target_group) if question.unknown?
end
if (value.to_i.blank?)
answer_index = question.option_index(value)
answer_dictionary.each do |k, v|
break if answer_index.present?
answer_index = question.option_index(value.gsub(k.to_s, v.to_s))
answer_index = question.option_index(value.gsub(v.to_s, k.to_s)) if answer_index.nil?
end
if answer_index.nil?
next if bad_answers[key]
puts "DATAERROR: Unable to find answer: #{key} = #{value.downcase.strip} - #{question.options.inspect}"
bad_answers[key] = true
next
end
else
answer_index = value.to_i
end
next if answer_index == 0
answer_index = 6 - answer_index if question.reverse?
responded_at = Date.strptime(row['recordedDate'], '%Y-%m-%d %H:%M:%S') rescue Date.today
begin
recipient.attempts.create(question: question, answer_index: answer_index, responded_at: responded_at)
rescue Exception => e
puts "DATAERROR: INDEX: #{index} Attempt failed for #{recipient.inspect} -> QUESTION: #{question.inspect}, ANSWER_INDEX: #{answer_index}, RESPONDED_AT: #{responded_at}, ERROR: #{e}"
next
end
end
end
end
ENV.delete('BULK_PROCESS')
# sync_school_category_aggregates
#
# Recipient.created_in(@year).each { |r| r.update_counts }
end
desc 'Load in nonlikert values for each school'
task load_nonlikert_values: :environment do
ENV['BULK_PROCESS'] = 'true'
csv_string = File.read(File.expand_path("../../../data/MCIEA_17-18AdminData.csv", __FILE__))
# csv_string = File.read(File.expand_path("../../../data/MCIEA_16-17_SGP.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
puts("LOADING NONLIKERT CSV: #{csv.length} ROWS")
csv.each_with_index do |row, index|
base = Category
category_ids = row["Category"].split("-")
category_ids.each do |category_id|
base = base.find_by_external_id(category_id).child_categories
end
nonlikert_category = base.find_or_create_by(name: row["NonLikert Title"])
if nonlikert_category.nil?
puts("Unable to find nonlikert category: #{row["NonLikert Title"]}")
next
else
if (benchmark = row["B_MCIEA"]).present?
nonlikert_category.update(benchmark: benchmark)
end
end
district = District.find_or_create_by(name: row["District"], state_id: 1)
school = district.schools.find_or_create_by(name: row["School"])
school_category = school.school_categories.find_or_create_by(category: nonlikert_category)
if row["Z-Score"].blank?
school_category.destroy
else
school_category.update(
nonlikert: row["NL_Value"],
zscore: [-2,[row["Z-Score"].to_f,2].min].max
)
end
end
ENV.delete('BULK_PROCESS')
# sync_school_category_aggregates
end
desc 'Load in custom zones for each category'
task load_custom_zones: :environment do
ENV['BULK_PROCESS'] = 'true'
csv_string = File.read(File.expand_path("../../../data/Benchmarks2016-2017.csv", __FILE__))
csv = CSV.parse(csv_string, :headers => true)
csv.each_with_index do |row, index|
next if row["Warning High"].blank?
category = Category.find_by_name(row["Subcategory"])
if category.nil?
puts "Unable to find category #{row["Subcategory"]}"
next
end
custom_zones = [
row["Warning High"],
row["Watch High"],
row["Growth High"],
row["Approval High"],
5
]
puts "#{category.name} -> #{custom_zones.join(",")}"
category.update(zones: custom_zones.join(","))
end
ENV.delete('BULK_PROCESS')
Category.all.each { |category| category.sync_child_zones }
end
desc 'Move all likert survey results to a new submeasure of current measure'
task move_likert_to_submeasures: :environment do
Question.all.each do |q|
category = q.category
next unless category.name.index("Scale").nil?
new_category_name = "#{category.name} Scale"
new_category = category.child_categories.where(name: new_category_name).first
if new_category.nil?
new_category = category.child_categories.create(
name: new_category_name,
blurb: "This measure contains all survey responses for #{category.name}.",
description: "The following survey questions concern perceptions of #{category.name}.",
zones: category.zones
)
end
q.update(category: new_category)
end
# sync_school_category_aggregates
end
desc 'Sync all school category aggregates'
task sync: :environment do
sync_school_category_aggregates
Recipient.created_in(@year).each { |r| r.update_counts }
end
desc 'Create School Questions'
task create_school_questions: :environment do
Category.joins(:questions).uniq.all.each do |category|
category.school_categories.includes(school: [:district]).find_in_batches(batch_size: 100) do |group|
group.each do |school_category|
school_questions = []
new_school_questions = []
category.questions.created_in(school_category.year).each do |question|
school = school_category.school
next if school.district.name != "Boston"
attempt_data = Attempt.
created_in(school_category.year).
for_question(question).
for_school(school).
select('count(attempts.answer_index) as response_count').
select('sum(case when questions.reverse then 6 - attempts.answer_index else attempts.answer_index end) as answer_index_total')[0]
available_responders = school.available_responders_for(question)
school_question = school_category.school_questions.for(school, question)
if school_question.present?
school_questions << school_question
else
school_question = school_category.school_questions.new(
school: school,
question: question,
school_category: school_category,
year: school_category.year,
attempt_count: available_responders,
response_count: attempt_data.response_count,
response_rate: attempt_data.response_count.to_f / available_responders.to_f,
response_total: attempt_data.answer_index_total
)
new_school_questions << school_question
school_questions << school_question
end
end
SchoolQuestion.import new_school_questions
valid_questions = school_questions.select { |sc| sc.response_rate > 0.3 }
school_category.update(
valid_child_count: valid_questions.length
)
end
end
end
end
def sync_school_category_aggregates
School.all.each do |school|
Category.all.each do |category|
school_category = SchoolCategory.for(school, category).in(@year).first
if school_category.nil?
school_category = school.school_categories.create(category: category, year: @year)
end
school_category.sync_aggregated_responses
end
end
end
end
#<SchoolCategory id: 1, school_id: 1, category_id: 1, attempt_count: 277, response_count: 277, answer_index_total: 1073, created_at: "2017-10-17 00:21:52", updated_at: "2018-03-03 17:24:53", nonlikert: nil, zscore: 0.674396962759463, year: "2017">
# require 'csv'
# student_counts_string = File.read(File.expand_path("data/bps_student_counts.csv"))
# student_counts = CSV.parse(student_counts_string, :headers => true)
# missing_schools = []
# student_counts.each_with_index do |count, index|
# school = School.find_by_name(count["SCHOOL NAME"])
#
# if school.nil?
# puts("Unable to find school: #{count["SCHOOL NAME"]}")
# missing_schools << count["SCHOOL NAME"]
# next
# end
#
# school.update(student_count: count["Student Enrollment (Grades 4-11)"])
# end
# puts ""
# puts "MISSING SCHOOLS: #{missing_schools.length}"
# missing_schools.each { |s| puts(s) }
#
#
# require 'csv'
# teacher_counts_string = File.read(File.expand_path("data/bps_teacher_counts.csv"))
# teacher_counts = CSV.parse(teacher_counts_string, :headers => true)
# missing_schools = []
# teacher_counts.each_with_index do |count, index|
# school = School.find_by_name(count["SCHOOL NAME"])
#
# if school.nil?
# puts("Unable to find school: #{count["SCHOOL NAME"]}")
# missing_schools << count["SCHOOL NAME"]
# next
# end
#
# school.update(teacher_count: count["Teacher Count"])
# end
# puts ""
# puts "MISSING SCHOOLS: #{missing_schools.length}"
# missing_schools.each { |s| puts(s) }
#
#
# level = 1
# categories = Category.joins(:questions).uniq.all
# loop do
# parent_categories = []
# categories.each_with_index do |category, i|
# parent_category = category.parent_category
# next if parent_category.nil? || parent_categories.include?(parent_category)
# parent_categories << parent_category
#
# school_categories = parent_category.school_categories.joins(school: :district).where("districts.name = 'Boston'")
# school_categories.each_with_index do |school_category, index|
# school = school_category.school
#
# children = SchoolCategory.for_parent_category(school, parent_category).in(school_category.year)
# valid_child_count = children.where("valid_child_count > 0").count
# school_category.update(
# valid_child_count: valid_child_count
# )
# puts ""
# puts ""
# puts("#{level} (#{i}/#{categories.length}) UPDATED (#{index}/#{school_categories.length}): #{school.slug} -> #{parent_category.slug} -> #{school_category.year} -> #{valid_child_count} --- PARENT: #{parent_categories.length}")
# puts ""
# puts ""
# end
# end
#
# puts ""
# puts ""
# puts "PARENT CATEGORIES: #{parent_categories.uniq.length}"
# puts ""
# puts ""
#
# level += 1
# categories = parent_categories.uniq
# break if categories.blank?
# end