@ -1,6 +1,5 @@
# PSQL: /Applications/Postgres.app/Contents/Versions/9.6/bin/psql -h localhost
# aws s3 presign s3://irrationaldesign/beta-data-080719a.dump
# sudo heroku pg:backups:restore 'https://irrationaldesign.s3.amazonaws.com/beta-data-080719a.dump?AWSAccessKeyId=AKIAIDGE3EMQEWUQZUJA&Signature=KrabUOeggEd5wrjLQ4bvgd9eZGU%3D&Expires=1565267251' DATABASE_URL -a mciea-beta
@ -36,7 +35,7 @@ require 'csv'
namespace :data do
@year = 2019
desc " load survey responses "
desc 'load survey responses'
task load_survey_responses : :environment do
Dir . glob ( Rails . root . join ( 'data' , 'survey_responses' , '*.csv' ) ) . each do | filepath |
puts " =====================> Loading data from csv at path: #{ filepath } "
@ -45,30 +44,28 @@ namespace :data do
puts " =====================> Completed loading #{ SurveyItemResponse . count } survey responses "
end
desc " Load in all data "
desc 'Load in all data'
task load : :environment do
# return if School.count > 0
Rake :: Task [ " data:load_categories " ] . invoke
Rake :: Task [ " data:load_questions " ] . invoke
Rake :: Task [ " db:seed " ] . invoke
Rake :: Task [ " data:load_responses " ] . invoke
Rake :: Task [ " data:load_nonlikert_values " ] . invoke
Rake :: Task [ 'data:load_categories' ] . invoke
Rake :: Task [ 'data:load_questions' ] . invoke
Rake :: Task [ 'db:seed' ] . invoke
Rake :: Task [ 'data:load_responses' ] . invoke
Rake :: Task [ 'data:load_nonlikert_values' ] . invoke
end
desc 'Check question / category data against existing data'
task check_questions : :environment do
csv_string = File . read ( File . expand_path ( " ../../../data/MeasureKey #{ @year } .csv " , __FILE__ ) )
csv = CSV . parse ( csv_string , :headers = > true )
csv = CSV . parse ( csv_string , headers: true )
t = Time . new
csv . each_with_index do | question , index|
csv . each_with_index do | question , _ index|
existing_question = Question . created_in ( @year ) . find_by_external_id ( question [ 'qid' ] )
if existing_question . blank?
puts " NOT FOUND: #{ question [ 'qid' ] } -> #{ question [ " Question Text " ] } "
puts " NOT FOUND: #{ question [ 'qid' ] } -> #{ question [ 'Question Text' ] } "
else
if Question . where ( external_id : question [ 'qid' ] ) . count > 1
puts " MULTIPLE FOUND: #{ question [ 'qid' ] } "
end
puts " MULTIPLE FOUND: #{ question [ 'qid' ] } " if Question . where ( external_id : question [ 'qid' ] ) . count > 1
question_text = question [ 'Question Text' ] . gsub ( / [[:space:]] / , ' ' ) . strip
if existing_question . text != question_text
@ -92,10 +89,10 @@ namespace :data do
desc 'Sync questions / category data against existing data'
task sync_questions : :environment do
csv_string = File . read ( File . expand_path ( " ../../../data/MeasureKey #{ @year } .csv " , __FILE__ ) )
csv = CSV . parse ( csv_string , :headers = > true )
csv = CSV . parse ( csv_string , headers: true )
t = Time . new
csv . each_with_index do | question , index|
csv . each_with_index do | question , _ index|
existing_question = Question . created_in ( @year ) . find_by_external_id ( question [ 'qid' ] )
if existing_question . blank?
categories = Category . where ( name : question [ 'Category Name' ] . titleize )
@ -114,7 +111,7 @@ namespace :data do
option4 : question [ 'R4' ] ,
option5 : question [ 'R5' ] ,
target_group : question [ 'qid' ] . starts_with? ( 's' ) ? 'for_students' : 'for_teachers' ,
reverse : question [ 'Reverse' ] == " 1 "
reverse : question [ 'Reverse' ] == '1'
)
end
else
@ -145,7 +142,7 @@ namespace :data do
desc 'Load in category data'
task load_categories : :environment do
measures = JSON . parse ( File . read ( File . expand_path ( '../../ ../data/measures.json', __FILE __) ) )
measures = JSON . parse ( File . read ( File . expand_path ( '../../ data/measures.json', __dir __) ) )
measures . each_with_index do | measure , index |
category = Category . create_with (
blurb : measure [ 'blurb' ] ,
@ -169,14 +166,14 @@ namespace :data do
external_id : subinfo_key
) . find_or_create_by ( name : subsubinfo [ 'title' ] )
if subsubinfo [ 'nonlikert' ] . present?
subsubinfo [ 'nonlikert' ] . each do | nonlikert_info |
puts ( " NONLIKERT FOUND: #{ nonlikert_info [ 'title' ] } " )
nonlikert = subsubcategory . child_categories . create_with (
benchmark_description : nonlikert_info [ 'benchmark_explanation' ] ,
benchmark : nonlikert_info [ 'benchmark ']
) . find_or_create_by ( name : nonlikert_info [ 'title' ] )
end
next unless subsubinfo [ 'nonlikert' ] . present?
subsubinfo [ 'nonlikert' ] . each do | nonlikert_info |
puts ( " NONLIKERT FOUND: #{ nonlikert_info [ 'title' ] } " )
nonlikert = subsubcategory . child_categories . create_with (
benchmark _description : nonlikert_info [ 'benchmark _explanation '] ,
benchmark : nonlikert_info [ 'benchmark' ]
) . find_or_create_by ( name : nonlikert_info [ 'title' ] )
end
end
end
@ -190,18 +187,18 @@ namespace :data do
'teacher'
]
questions = JSON . parse ( File . read ( File . expand_path ( '../../ ../data/questions.json', __FILE __) ) )
questions = JSON . parse ( File . read ( File . expand_path ( '../../ data/questions.json', __dir __) ) )
questions . each do | question |
category = nil
question [ 'category' ] . split ( '-' ) . each do | external_id |
categories = category . present? ? category . child_categories : Category
category = categories . where ( external_id : external_id ) . first
if category . nil?
puts 'NOTHING'
puts external_id
puts categories . inspect
category = categories . create ( name : question [ 'Category Name' ] , external_id : external_id )
end
next unless category . nil?
puts 'NOTHING'
puts external_id
puts categories . inspect
category = categories . create ( name : question [ 'Category Name' ] , external_id : external_id )
end
question_text = question [ 'text' ] . gsub ( / [[:space:]] / , ' ' ) . strip
if question_text . index ( '.* teacher' ) . nil?
@ -213,7 +210,7 @@ namespace :data do
option4 : question [ 'answers' ] [ 3 ] ,
option5 : question [ 'answers' ] [ 4 ] ,
for_recipient_students : question [ 'child' ] . present? ,
reverse : question [ 'Reverse' ] == " 1 "
reverse : question [ 'Reverse' ] == '1'
)
else
variations . each do | variation |
@ -225,7 +222,7 @@ namespace :data do
option4 : question [ 'answers' ] [ 3 ] ,
option5 : question [ 'answers' ] [ 4 ] ,
for_recipient_students : question [ 'child' ] . present? ,
reverse : question [ 'Reverse' ] == " 1 "
reverse : question [ 'Reverse' ] == '1'
)
end
end
@ -240,21 +237,23 @@ namespace :data do
]
csv_string = File . read ( File . expand_path ( " ../../../data/MeasureKey #{ @year } .csv " , __FILE__ ) )
csv = CSV . parse ( csv_string , :headers = > true )
csv = CSV . parse ( csv_string , headers: true )
t = Time . new
csv . each_with_index do | question , index|
csv . each_with_index do | question , _ index|
category = nil
question [ 'Category19' ] . split ( '-' ) . each do | external_id_raw |
external_id = external_id_raw . gsub ( / [[:space:]] / , ' ' ) . strip
categories = category . present? ? category . child_categories : Category
category = categories . where ( external_id : external_id ) . first
if category . nil?
puts 'NOTHING'
puts " #{ question [ 'Category' ] } -- #{ external_id } "
puts categories . map { | c | " #{ c . name } - | #{ c . external_id } | == | #{ external_id } |: - #{ external_id == c . external_id } " } . join ( " ---- " )
category = categories . create ( name : question [ 'Category Name' ] , external_id : external_id )
end
next unless category . nil?
puts 'NOTHING'
puts " #{ question [ 'Category' ] } -- #{ external_id } "
puts categories . map { | c |
" #{ c . name } - | #{ c . external_id } | == | #{ external_id } |: - #{ external_id == c . external_id } "
} . join ( ' ---- ' )
category = categories . create ( name : question [ 'Category Name' ] , external_id : external_id )
end
question_text = question [ 'Question Text' ] . gsub ( / [[:space:]] / , ' ' ) . strip
if question_text . index ( '.* teacher' ) . nil?
@ -265,9 +264,9 @@ namespace :data do
option3 : question [ 'R3' ] ,
option4 : question [ 'R4' ] ,
option5 : question [ 'R5' ] ,
for_recipient_students : question [ 'Level' ] == " Students " ,
for_recipient_students : question [ 'Level' ] == 'Students' ,
external_id : question [ 'qid' ] ,
reverse : question [ 'Reverse' ] == " 1 "
reverse : question [ 'Reverse' ] == '1'
)
else
variations . each do | variation |
@ -278,9 +277,9 @@ namespace :data do
option3 : question [ 'R3' ] ,
option4 : question [ 'R4' ] ,
option5 : question [ 'R5' ] ,
for_recipient_students : question [ 'Level' ] == " Students " ,
for_recipient_students : question [ 'Level' ] == 'Students' ,
external_id : question [ 'qid' ] ,
reverse : question [ 'Reverse' ] == " 1 "
reverse : question [ 'Reverse' ] == '1'
)
end
end
@ -310,17 +309,17 @@ namespace :data do
missing_questions = { }
bad_answers = { }
timeToRun = 120 000 * 60
timeToRun = 120 _ 000 * 60
startIndex = 0
stopIndex = 1 000000
stopIndex = 1 _ 000_ 000
startTime = Time . new
# ['teacher_responses'].each do |file|
[ 'student_responses' , 'teacher_responses' ]. each do | file |
%w[ student_responses teacher_responses ]. each do | file |
recipients = file . split ( '_' ) [ 0 ]
target_group = Question . target_groups [ " for_ #{ recipients } s " ]
csv_string = File . read ( File . expand_path ( " ../../../data/ #{ file } _ #{ @year } .csv " , __FILE__ ) )
csv = CSV . parse ( csv_string , :headers = > true )
csv = CSV . parse ( csv_string , headers: true )
puts ( " LOADING CSV: #{ csv . length } ROWS " )
t = Time . new
@ -338,16 +337,16 @@ namespace :data do
end
district_name = row [ 'Q111' ] . strip
if district_name . blank? || district_name == " NA "
if district_name . blank? || district_name == 'NA'
puts " DISTRICT NOT FOUND: #{ district_name } "
next
end
# district_name = row['To begin, please select your district.'] if district_name.nil?
district = District . find_or_create_by ( name : district_name , state_id : 1 )
school_name = row [ " SchoolName " ] . strip
school_name = row [ 'SchoolName' ] . strip
if school_name . blank? || school_name == " NA "
if school_name . blank? || school_name == 'NA'
puts " BLANK SCHOOL NAME: #{ district . name } - #{ index } "
next
end
@ -356,38 +355,36 @@ namespace :data do
if school . nil?
next if unknown_schools [ school_name ]
puts " DATAERROR: Unable to find school: #{ school_name } - #{ index } "
unknown_schools [ school_name ] = true
next
end
respondent_id = " #{ recipients } - #{ index } - #{ row [ " ResponseId " ] . strip } "
respondent_id = " #{ recipients } - #{ index } - #{ row [ 'ResponseId' ] . strip } "
recipient_id = respondent_map [ " #{ school . id } - #{ @year } - #{ respondent_id } " ]
if recipient_id . present?
recipient = school . recipients . where ( id : recipient_id ) . first
end
recipient = school . recipients . where ( id : recipient_id ) . first if recipient_id . present?
if recipient . nil?
begin
recipient = school . recipients . create (
name : " Survey Respondent Id: #{ respondent_id } "
)
rescue
rescue StandardError
puts " DATAERROR: INDEX: #{ index } ERROR AT #{ index } - #{ district . name } - #{ school_name } #{ school } : #{ respondent_id } "
end
respondent_map [ " #{ school . id } - #{ respondent_id } " ] = recipient . id
end
recipient_list = school . recipient_lists . find_by_name ( " #{ recipients . titleize } List " )
if recipient_list . nil?
recipient_list = school . recipient_lists . create ( name : " #{ recipients . titleize } List " )
end
recipient_list = school . recipient_lists . create ( name : " #{ recipients . titleize } List " ) if recipient_list . nil?
recipient_list . recipient_id_array << recipient . id
recipient_list . save!
row . each do | key , value |
t1 = Time . new
next if value . nil? or key . nil? or value . to_s == " -99 "
next if value . nil? or key . nil? or value . to_s == '-99'
key = key . gsub ( / [[:space:]] / , ' ' ) . gsub ( / \ . / , '-' ) . strip . gsub ( / \ s+ / , ' ' )
key = key . gsub ( / -4-5 / , '' ) . gsub ( / -6-12 / , '' )
value = value . gsub ( / [[:space:]] / , ' ' ) . strip . downcase
@ -400,23 +397,26 @@ namespace :data do
if question . nil?
next if missing_questions [ key ]
puts " DATAERROR: Unable to find question: #{ key } "
missing_questions [ key ] = true
next
els e
question . update_attributes ( target_group : target_group ) if question . unknown?
els if qu estion. unknown?
question . update_attributes ( target_group : target_group )
end
if ( value . to_i . blank? )
if value . to_i . blank?
answer_index = question . option_index ( value )
answer_dictionary . each do | k , v |
break if answer_index . present?
answer_index = question . option_index ( value . gsub ( k . to_s , v . to_s ) )
answer_index = question . option_index ( value . gsub ( v . to_s , k . to_s ) ) if answer_index . nil?
end
if answer_index . nil?
next if bad_answers [ key ]
puts " DATAERROR: Unable to find answer: #{ key } = #{ value . downcase . strip } - #{ question . options . inspect } "
bad_answers [ key ] = true
next
@ -429,7 +429,11 @@ namespace :data do
# answer_index = 6 - answer_index if question.reverse?
responded_at = Date . strptime ( row [ 'recordedDate' ] , '%Y-%m-%d %H:%M:%S' ) rescue Date . today
responded_at = begin
Date . strptime ( row [ 'recordedDate' ] , '%Y-%m-%d %H:%M:%S' )
rescue StandardError
Date . today
end
begin
recipient . attempts . create ( question : question , answer_index : answer_index , responded_at : responded_at )
rescue Exception = > e
@ -448,65 +452,64 @@ namespace :data do
desc 'Load in nonlikert values for each school'
task load_nonlikert_values : :environment do
csv_string = File . read ( File . expand_path ( " ../../../data/MCIEA_18-19AdminData_Final.csv " , __FILE __) )
csv_string = File . read ( File . expand_path ( '../../data/MCIEA_18-19AdminData_Final.csv' , __dir __) )
# csv_string = File.read(File.expand_path("../../../data/MCIEA_16-17_SGP.csv", __FILE__))
csv = CSV . parse ( csv_string , :headers = > true )
csv = CSV . parse ( csv_string , headers: true )
puts ( " LOADING NONLIKERT CSV: #{ csv . length } ROWS " )
errors = [ ]
csv . each_with_index do | row , index |
next if row [ " Likert_Value " ] . blank?
csv . each_with_index do | row , _index |
next if row [ 'Likert_Value' ] . blank?
base = Category
category_ids = row [ " Category " ] . split ( " - " )
category_ids = row [ 'Category' ] . split ( '-' )
category_ids . each do | category_id |
category_id = category_id . downcase if category_id . downcase =~ / i /
base = base . find_by_external_id ( category_id )
if base . nil?
row [ " reason " ] = " Unable to find category_id #{ category_id } for category #{ row [ " Category " ] } "
row [ 'reason' ] = " Unable to find category_id #{ category_id } for category #{ row [ 'Category' ] } "
errors << row
next
end
base = base . child_categories
end
nonlikert_category = base . where ( name : row [ " NonLikert Title " ] ) . first
nonlikert_category = base . where ( name : row [ 'NonLikert Title' ] ) . first
if nonlikert_category . nil?
row [ " reason " ] = " Unable to find nonlikert category: #{ row [ " NonLikert Title " ] } in #{ } "
row [ 'reason' ] = " Unable to find nonlikert category: #{ row [ 'NonLikert Title' ] } in "
errors << row
next
else
if ( benchmark = row [ " Benchmark " ] ) . present?
nonlikert_category . update ( benchmark : benchmark )
end
elsif ( benchmark = row [ 'Benchmark' ] ) . present?
nonlikert_category . update ( benchmark : benchmark )
end
district = District . where ( name : row [ " District " ] , state_id : 1 ) . first
district = District . where ( name : row [ 'District' ] , state_id : 1 ) . first
if district . blank?
row [ " reason " ] = " DISTRICT NOT FOUND: #{ row [ " District " ] } "
row [ 'reason' ] = " DISTRICT NOT FOUND: #{ row [ 'District' ] } "
errors << row
next
end
school = district . schools . where ( name : row [ " School " ] ) . first
school = district . schools . where ( name : row [ 'School' ] ) . first
if school . blank?
row [ " reason " ] = " SCHOOL NOT FOUND: #{ row [ " School " ] } ( #{ district . name } ) "
row [ 'reason' ] = " SCHOOL NOT FOUND: #{ row [ 'School' ] } ( #{ district . name } ) "
errors << row
next
end
school_category = school . school_categories . find_or_create_by ( category : nonlikert_category , year : " #{ @year } " )
school_category = school . school_categories . find_or_create_by ( category : nonlikert_category , year : @year . to_s )
if school_category . blank?
row [ " reason " ] = " SCHOOL CATEGORY NOT FOUND: #{ school . name } ( #{ district . name } ) #{ nonlikert_category . name } "
row [ 'reason' ] = " SCHOOL CATEGORY NOT FOUND: #{ school . name } ( #{ district . name } ) #{ nonlikert_category . name } "
errors << row
next
end
zscore = ( ( [ - 2 , [ row [ " Likert_Value " ] . to_f - 3 , 2 ] . min ] . max * 10 ) . to_i ) . to_f / 10 . 0
zscore = ( [ - 2 , [ row [ 'Likert_Value' ] . to_f - 3 , 2 ] . min ] . max * 10 ) . to_i . to_f / 10 . 0
school_category . update (
nonlikert : row [ " NL_Value " ] ,
nonlikert : row [ 'NL_Value' ] ,
zscore : zscore . to_f ,
year : " #{ @year } " ,
year : @year . to_s ,
valid_child_count : 1
)
@ -514,40 +517,41 @@ namespace :data do
end
errors . each do | error |
puts " #{ error [ " reason " ] } : #{ error [ " NonLikert Title " ] } -> #{ error [ " Likert_Value " ] } "
puts " #{ error [ 'reason' ] } : #{ error [ 'NonLikert Title' ] } -> #{ error [ 'Likert_Value' ] } "
end
puts " COUNT: #{ SchoolCategory . where ( attempt_count : 0 , answer_index_total : 0 ) . where ( " nonlikert is not null and zscore is null " ) . count } "
puts " COUNT: #{ SchoolCategory . where ( attempt_count : 0 ,
answer_index_total : 0 ) . where ( 'nonlikert is not null and zscore is null' ) . count } "
end
desc 'Load in custom zones for each category'
task load_custom_zones : :environment do
ENV [ 'BULK_PROCESS' ] = 'true'
csv_string = File . read ( File . expand_path ( " ../../../data/Benchmarks2016-2017.csv " , __FILE __) )
csv = CSV . parse ( csv_string , :headers = > true )
csv_string = File . read ( File . expand_path ( '../../data/Benchmarks2016-2017.csv' , __dir __) )
csv = CSV . parse ( csv_string , headers: true )
csv . each_with_index do | row , index|
next if row [ " Warning High " ] . blank?
csv . each_with_index do | row , _ index|
next if row [ 'Warning High' ] . blank?
category = Category . find_by_name ( row [ " Subcategory " ] )
category = Category . find_by_name ( row [ 'Subcategory' ] )
if category . nil?
puts " Unable to find category #{ row [ " Subcategory " ] } "
puts " Unable to find category #{ row [ 'Subcategory' ] } "
next
end
custom_zones = [
row [ " Warning High " ] ,
row [ " Watch High " ] ,
row [ " Growth High " ] ,
row [ " Approval High " ] ,
row [ 'Warning High' ] ,
row [ 'Watch High' ] ,
row [ 'Growth High' ] ,
row [ 'Approval High' ] ,
5
]
puts " #{ category . name } -> #{ custom_zones . join ( " , " ) } "
puts " #{ category . name } -> #{ custom_zones . join ( ',' ) } "
category . update ( zones : custom_zones . join ( " , " ) )
category . update ( zones : custom_zones . join ( ',' ) )
end
ENV . delete ( 'BULK_PROCESS' )
@ -559,7 +563,8 @@ namespace :data do
task move_likert_to_submeasures : :environment do
Question . all . each do | q |
category = q . category
next unless category . name . index ( " Scale " ) . nil?
next unless category . name . index ( 'Scale' ) . nil?
new_category_name = " #{ category . name } Scale "
new_category = category . child_categories . where ( name : new_category_name ) . first
if new_category . nil?
@ -592,19 +597,19 @@ namespace :data do
new_school_questions = [ ]
category . questions . created_in ( @year ) . each do | question |
school = school_category . school
next if school . district . name != " Boston "
next if school . district . name != 'Boston'
school_question = school_category . school_questions . for ( school , question ) . first
if school_question . present?
school_questions << school_question
else
attempt_data = Attempt .
joins ( :question ) .
created_in ( school_category . year ) .
for_question ( question ) .
for_school ( school ) .
select( 'count(attempts.answer_index) as response_count' ) .
select( 'sum(case when questions.reverse then 6 - attempts.answer_index else attempts.answer_index end) as answer_index_total' ) [ 0 ]
attempt_data = Attempt
. joins ( :question )
. created_in ( school_category . year )
. for_question ( question )
. for_school ( school )
. select( 'count(attempts.answer_index) as response_count' )
. select( 'sum(case when questions.reverse then 6 - attempts.answer_index else attempts.answer_index end) as answer_index_total' ) [ 0 ]
available_responders = school . available_responders_for ( question )
@ -625,7 +630,6 @@ namespace :data do
SchoolQuestion . import new_school_questions
end
end
end
end
@ -634,17 +638,14 @@ namespace :data do
School . all . each do | school |
Category . all . each do | category |
school_category = SchoolCategory . for ( school , category ) . in ( @year ) . first
if school_category . nil?
school_category = school . school_categories . create ( category : category , year : @year )
end
school_category = school . school_categories . create ( category : category , year : @year ) if school_category . nil?
school_category . sync_aggregated_responses
end
end
end
end
#<SchoolCategory id: 1, school_id: 1, category_id: 1, attempt_count: 277, response_count: 277, answer_index_total: 1073, created_at: "2017-10-17 00:21:52", updated_at: "2018-03-03 17:24:53", nonlikert: nil, zscore: 0.674396962759463, year: "2017">
# <SchoolCategory id: 1, school_id: 1, category_id: 1, attempt_count: 277, response_count: 277, answer_index_total: 1073, created_at: "2017-10-17 00:21:52", updated_at: "2018-03-03 17:24:53", nonlikert: nil, zscore: 0.674396962759463, year: "2017">
# require 'csv'
# student_counts_string = File.read(File.expand_path("data/bps_student_counts.csv"))
@ -691,8 +692,6 @@ end
#
#
# min_response_rate = 0.3
# level = 1
# # categories = Category.joins(:questions).uniq.all
@ -796,7 +795,6 @@ end
#
# puts "TOTAL: #{total}"
# [
# "next-wave-full-circle"
# ].each do |slug|