# PSQL: /Applications/Postgres.app/Contents/Versions/9.6/bin/psql -h localhost # LOAD DATA # RAILS_ENV=development rails db:environment:set db:drop db:create db:migrate; /Applications/Postgres.app/Contents/Versions/9.6/bin/pg_restore --verbose --clean --no-acl --no-owner -h localhost -d mciea_development latest.dump; rake db:migrate; # rails c -> SchoolCategory.update_all(year: '2017') # rake data:load_questions_csv; rake data:load_responses # sudo heroku pg:reset DATABASE -a mciea-beta # sudo heroku pg:backups:restore 'https://s3.amazonaws.com/irrationaldesign/latest.dump' DATABASE_URL -a mciea-beta # sudo heroku run rake db:migrate -a mciea-beta # sudo heroku run console -a mciea-beta -> SchoolCategory.update_all(year: '2017') -- RENAME SCHOOLS # sudo heroku run rake data:load_questions_csv -a mciea-beta # sudo heroku run:detached rake data:load_responses -a mciea-beta --size performance-l # sudo heroku run rake data:move_likert_to_submeasures -a mciea-beta # sudo heroku run:detached rake data:sync -a mciea-beta --size performance-l # Add: # # Category: unique_external_id (string) # School Category: year (string) # # Update: # Add year to existing school categories require 'csv' namespace :data do @year = 2018 desc "Load in all data" task load: :environment do # return if School.count > 0 Rake::Task["data:load_categories"].invoke Rake::Task["data:load_questions"].invoke Rake::Task["db:seed"].invoke Rake::Task["data:load_responses"].invoke Rake::Task["data:load_nonlikert_values"].invoke end desc 'Load in category data' task load_categories: :environment do measures = JSON.parse(File.read(File.expand_path('../../../data/measures.json', __FILE__))) measures.each_with_index do |measure, index| category = Category.create_with( blurb: measure['blurb'], description: measure['text'], external_id: measure['id'] || index + 1 ).find_or_create_by(name: measure['title']) measure['sub'].keys.sort.each do |key| subinfo = measure['sub'][key] subcategory = category.child_categories.create_with( blurb: subinfo['blurb'], description: subinfo['text'], external_id: key ).find_or_create_by(name: subinfo['title']) subinfo['measures'].keys.sort.each do |subinfo_key| subsubinfo = subinfo['measures'][subinfo_key] subsubcategory = subcategory.child_categories.create_with( blurb: subsubinfo['blurb'], description: subsubinfo['text'], external_id: subinfo_key ).find_or_create_by(name: subsubinfo['title']) if subsubinfo['nonlikert'].present? subsubinfo['nonlikert'].each do |nonlikert_info| puts("NONLIKERT FOUND: #{nonlikert_info['title']}") nonlikert = subsubcategory.child_categories.create_with( benchmark_description: nonlikert_info['benchmark_explanation'], benchmark: nonlikert_info['benchmark'] ).find_or_create_by(name: nonlikert_info['title']) end end end end end end desc 'Load in question data from json' task load_questions: :environment do variations = [ '[Field-MathTeacher][Field-ScienceTeacher][Field-EnglishTeacher][Field-SocialTeacher]', 'teacher' ] questions = JSON.parse(File.read(File.expand_path('../../../data/questions.json', __FILE__))) questions.each do |question| category = nil question['category'].split('-').each do |external_id| categories = category.present? ? category.child_categories : Category category = categories.where(external_id: external_id).first if category.nil? puts 'NOTHING' puts external_id puts categories.inspect category = categories.create(name: question['Category Name'], external_id: external_id) end end question_text = question['text'].gsub(/[[:space:]]/, ' ').strip if question_text.index('.* teacher').nil? category.questions.create( text: question_text, option1: question['answers'][0], option2: question['answers'][1], option3: question['answers'][2], option4: question['answers'][3], option5: question['answers'][4], for_recipient_students: question['child'].present? ) else variations.each do |variation| category.questions.create( text: question_text.gsub('.* teacher', variation), option1: question['answers'][0], option2: question['answers'][1], option3: question['answers'][2], option4: question['answers'][3], option5: question['answers'][4], for_recipient_students: question['child'].present? ) end end end end desc 'Load in question data from csv' task load_questions_csv: :environment do variations = [ '[Field-MathTeacher][Field-ScienceTeacher][Field-EnglishTeacher][Field-SocialTeacher]', 'teacher' ] csv_string = File.read(File.expand_path('../../../data/MeasureKey2018.csv', __FILE__)) csv = CSV.parse(csv_string, :headers => true) t = Time.new csv.each_with_index do |question, index| category = nil question['Category'].split('-').each do |external_id_raw| external_id = external_id_raw.gsub(/[[:space:]]/, ' ').strip categories = category.present? ? category.child_categories : Category category = categories.where(external_id: external_id).first if category.nil? puts 'NOTHING' puts "#{question['Category']} -- #{external_id}" puts categories.map { |c| "#{c.name} - |#{c.external_id}| == |#{external_id}|: - #{external_id == c.external_id}"}.join(" ---- ") category = categories.create(name: question['Category Name'], external_id: external_id) end end question_text = question['Question Text'].gsub(/[[:space:]]/, ' ').strip if question_text.index('.* teacher').nil? category.questions.create( text: question_text, option1: question['R1'], option2: question['R2'], option3: question['R3'], option4: question['R4'], option5: question['R5'], for_recipient_students: question['Level'] == "Students", external_id: question['qid'], reverse: question['Reverse'] == "1" ) else variations.each do |variation| category.questions.create( text: question_text.gsub('.* teacher', variation), option1: question['R1'], option2: question['R2'], option3: question['R3'], option4: question['R4'], option5: question['R5'], for_recipient_students: question['Level'] == "Students", external_id: question['qid'], reverse: question['Reverse'] == "1" ) end end end end desc 'Load in student and teacher responses' task load_responses: :environment do ENV['BULK_PROCESS'] = 'true' answer_dictionary = { 'Slightly': 'Somewhat', 'an incredible': 'a tremendous', 'a little': 'a little bit', 'slightly': 'somewhat', 'a little well': 'slightly well', 'quite': 'very', 'a tremendous': 'a very great', 'somewhat clearly': 'somewhat', 'almost never': 'once in a while', 'always': 'all the time', 'not at all strong': 'not strong at all', 'each': 'every' } respondent_map = {} unknown_schools = {} missing_questions = {} bad_answers = {} timeToRun = 120 * 60 startIndex = 0 stopIndex = 100000 startTime = Time.new ['teacher_responses'].each do |file| # ['student_responses', 'teacher_responses'].each do |file| recipients = file.split('_')[0] target_group = Question.target_groups["for_#{recipients}s"] csv_string = File.read(File.expand_path("../../../data/MCIEA2018_teachersLowell_dashboard080518.csv", __FILE__)) # csv_string = File.read(File.expand_path("../../../data/#{file}_#{@year}.csv", __FILE__)) csv = CSV.parse(csv_string, :headers => true) puts("LOADING CSV: #{csv.length} ROWS") t = Time.new csv.each_with_index do |row, index| next if index < startIndex if Time.new - startTime >= timeToRun || index > stopIndex puts("ENDING #{timeToRun} SECONDS: #{Time.new - startTime} = #{startIndex} -> #{index} = #{index - startIndex} or #{(Time.new - t) / (index - startIndex)} per second") break end if index % 10 == 0 puts("DATAMSG: PROCESSING ROW: #{index} OUT OF #{csv.length} ROWS: #{Time.new - t} - Total: #{Time.new - startTime} - #{timeToRun - (Time.new - startTime)} TO GO / #{stopIndex - startIndex} ROWS TO GO") t = Time.new end district_name = row['District'] if district_name.blank? || district_name == "NA" next end # district_name = row['To begin, please select your district.'] if district_name.nil? district = District.find_or_create_by(name: district_name, state_id: 1) school_name = row["School.#{district_name}"] if school_name.blank? || school_name == "NA" puts "BLANK SCHOOL NAME: #{district.name} - #{index}" next end school = district.schools.find_or_create_by(name: school_name) if school.nil? next if unknown_schools[school_name] puts "DATAERROR: Unable to find school: #{school_name} - #{index}" unknown_schools[school_name] = true next end respondent_id = "#{recipients}-#{index}-#{row["X_recordId"]}" recipient_id = respondent_map["#{school.id}-#{@year}-#{respondent_id}"] if recipient_id.present? recipient = school.recipients.where(id: recipient_id).first end if recipient.nil? begin recipient = school.recipients.create( name: "Survey Respondent Id: #{respondent_id}" ) rescue puts "DATAERROR: INDEX: #{index} ERROR AT #{index} - #{district.name} - #{school_name} #{school}: #{respondent_id}" end respondent_map["#{school.id}-#{respondent_id}"] = recipient.id end recipient_list = school.recipient_lists.find_by_name("#{recipients.titleize} List") if recipient_list.nil? recipient_list = school.recipient_lists.create(name: "#{recipients.titleize} List") end recipient_list.recipient_id_array << recipient.id recipient_list.save! row.each do |key, value| t1 = Time.new next if value.nil? or key.nil? or value.to_s == "-99" key = key.gsub(/[[:space:]]/, ' ').gsub(/\./, '-').strip.gsub(/\s+/, ' ') key = key.gsub(/-4-5/, '').gsub(/-6-12/, '') value = value.gsub(/[[:space:]]/, ' ').strip.downcase begin question = Question.find_by_external_id(key) rescue Exception => e puts "DATAERROR: INDEX: #{index} Failed finding question: #{key} -> #{e}" end if question.nil? next if missing_questions[key] puts "DATAERROR: Unable to find question: #{key}" missing_questions[key] = true next else question.update_attributes(target_group: target_group) if question.unknown? end if (value.to_i.blank?) answer_index = question.option_index(value) answer_dictionary.each do |k, v| break if answer_index.present? answer_index = question.option_index(value.gsub(k.to_s, v.to_s)) answer_index = question.option_index(value.gsub(v.to_s, k.to_s)) if answer_index.nil? end if answer_index.nil? next if bad_answers[key] puts "DATAERROR: Unable to find answer: #{key} = #{value.downcase.strip} - #{question.options.inspect}" bad_answers[key] = true next end else answer_index = value.to_i end next if answer_index == 0 answer_index = 6 - answer_index if question.reverse? responded_at = Date.strptime(row['recordedDate'], '%Y-%m-%d %H:%M:%S') rescue Date.today begin recipient.attempts.create(question: question, answer_index: answer_index, responded_at: responded_at) rescue Exception => e puts "DATAERROR: INDEX: #{index} Attempt failed for #{recipient.inspect} -> QUESTION: #{question.inspect}, ANSWER_INDEX: #{answer_index}, RESPONDED_AT: #{responded_at}, ERROR: #{e}" next end end end end ENV.delete('BULK_PROCESS') sync_school_category_aggregates Recipient.created_in(@year).each { |r| r.update_counts } end desc 'Load in nonlikert values for each school' task load_nonlikert_values: :environment do ENV['BULK_PROCESS'] = 'true' # csv_string = File.read(File.expand_path("../../../data/MCIEA_17-18AdminData.csv", __FILE__)) # csv_string = File.read(File.expand_path("../../../data/MCIEA_16-17_SGP.csv", __FILE__)) csv = CSV.parse(csv_string, :headers => true) puts("LOADING NONLIKERT CSV: #{csv.length} ROWS") csv.each_with_index do |row, index| base = Category category_ids = row["Category"].split("-") category_ids.each do |category_id| base = base.find_by_external_id(category_id).child_categories end nonlikert_category = base.find_or_create_by(name: row["NonLikert Title"]) if nonlikert_category.nil? puts("Unable to find nonlikert category: #{row["NonLikert Title"]}") next else if (benchmark = row["B_MCIEA"]).present? nonlikert_category.update(benchmark: benchmark) end end district = District.find_or_create_by(name: row["District"], state_id: 1) school = district.schools.find_or_create_by(name: row["School"]) school_category = school.school_categories.find_or_create_by(category: nonlikert_category) if row["Z-Score"].blank? school_category.destroy else school_category.update( nonlikert: row["NL_Value"], zscore: [-2,[row["Z-Score"].to_f,2].min].max ) end end ENV.delete('BULK_PROCESS') # sync_school_category_aggregates end desc 'Load in custom zones for each category' task load_custom_zones: :environment do ENV['BULK_PROCESS'] = 'true' csv_string = File.read(File.expand_path("../../../data/Benchmarks2016-2017.csv", __FILE__)) csv = CSV.parse(csv_string, :headers => true) csv.each_with_index do |row, index| next if row["Warning High"].blank? category = Category.find_by_name(row["Subcategory"]) if category.nil? puts "Unable to find category #{row["Subcategory"]}" next end custom_zones = [ row["Warning High"], row["Watch High"], row["Growth High"], row["Approval High"], 5 ] puts "#{category.name} -> #{custom_zones.join(",")}" category.update(zones: custom_zones.join(",")) end ENV.delete('BULK_PROCESS') Category.all.each { |category| category.sync_child_zones } end desc 'Move all likert survey results to a new submeasure of current measure' task move_likert_to_submeasures: :environment do Question.all.each do |q| category = q.category next unless category.name.index("Scale").nil? new_category_name = "#{category.name} Scale" new_category = category.child_categories.where(name: new_category_name).first if new_category.nil? new_category = category.child_categories.create( name: new_category_name, blurb: "This measure contains all survey responses for #{category.name}.", description: "The following survey questions concern perceptions of #{category.name}.", zones: category.zones ) end q.update(category: new_category) end # sync_school_category_aggregates end desc 'Sync all school category aggregates' task sync: :environment do sync_school_category_aggregates Recipient.created_in(@year).each { |r| r.update_counts } end def sync_school_category_aggregates School.all.each do |school| Category.all.each do |category| school_category = SchoolCategory.for(school, category).in(@year).first if school_category.nil? school_category = school.school_categories.create(category: category, year: @year) end school_category.sync_aggregated_responses end end end end #