From 76bd79e8a2c0aed917a0eedaed42619c2faa9797 Mon Sep 17 00:00:00 2001 From: Nelson Jovel Date: Wed, 20 Dec 2023 13:47:38 -0800 Subject: [PATCH] chore: clean up rake tasks --- lib/tasks/data.rake | 47 ++------------ lib/tasks/one_off.rake | 141 ----------------------------------------- 2 files changed, 4 insertions(+), 184 deletions(-) diff --git a/lib/tasks/data.rake b/lib/tasks/data.rake index ddfdaa07..495b4a2d 100644 --- a/lib/tasks/data.rake +++ b/lib/tasks/data.rake @@ -1,5 +1,3 @@ -require "csv" - namespace :data do desc "load survey responses" task load_survey_responses: :environment do @@ -11,11 +9,6 @@ namespace :data do end puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database" - Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: []) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - Rails.cache.clear end @@ -29,20 +22,7 @@ namespace :data do end puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database" - Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: []) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - - Rails.cache.clear - end - - desc "reset response rate values" - task reset_response_rates: :environment do - puts "Resetting response rates" - ResponseRateLoader.reset Rails.cache.clear - puts "=====================> Completed loading #{ResponseRate.count} survey responses" end desc "load admin_data" @@ -52,21 +32,12 @@ namespace :data do puts "=====================> Loading data from csv at path: #{filepath}" Dese::Loader.load_data filepath: end - puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" - end - desc "load students" - task load_students: :environment do - SurveyItemResponse.update_all(student_id: nil) - StudentRace.delete_all - Student.delete_all - Dir.glob(Rails.root.join("data", "survey_responses", "*student*.csv")).each do |file| - puts "=====================> Loading student data from csv at path: #{file}" - StudentLoader.load_data filepath: file + Dir.glob(Rails.root.join("data", "admin_data", "out_of_state", "*.csv")).each do |filepath| + puts "=====================> Loading data from csv at path: #{filepath}" + Dese::Loader.load_data filepath: end - puts "=====================> Completed loading #{Student.count} students" - - Rails.cache.clear + puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values" end desc "reset all cache counters" @@ -92,14 +63,4 @@ namespace :data do SurveyItem.reset_counters(survey_item.id, :survey_item_responses) end end - - desc "scrape dese site for admin data" - task scrape_all: :environment do - puts "scraping data from dese" - scrapers = [Dese::OneAOne, Dese::OneAThree, Dese::TwoAOne, Dese::TwoCOne, Dese::ThreeAOne, Dese::ThreeATwo, - Dese::ThreeBOne, Dese::ThreeBTwo, Dese::FourAOne, Dese::FourBTwo, Dese::FourDOne, Dese::FiveCOne, Dese::FiveDTwo] - scrapers.each do |scraper| - scraper.new.run_all - end - end end diff --git a/lib/tasks/one_off.rake b/lib/tasks/one_off.rake index b448e357..ef172acc 100644 --- a/lib/tasks/one_off.rake +++ b/lib/tasks/one_off.rake @@ -1,104 +1,4 @@ namespace :one_off do - task add_dese_ids: :environment do - all_schools = School.all.includes(:district) - updated_schools = [] - - qualtrics_schools = {} - - csv_file = Rails.root.join("data", "master_list_of_schools_and_districts.csv") - CSV.parse(File.read(csv_file), headers: true) do |row| - district_id = row["District Code"].to_i - school_id = row["School Code"].to_i - - if qualtrics_schools[[district_id, school_id]].present? - puts "Duplicate entry row #{row}" - next - end - - qualtrics_schools[[district_id, school_id]] = row - end - - qualtrics_schools.each do |(district_id, school_id), csv_row| - school = all_schools.find do |school| - school.qualtrics_code == school_id && school.district.qualtrics_code == district_id - end - - if school.nil? - school_name = csv_row["School Name"].strip - puts "Could not find school '#{school_name}' with district id: #{district_id}, school id: #{school_id}" - potential_school_ids = School.where("name like ?", "%#{school_name}%").map(&:id) - puts "Potential ID matches: #{potential_school_ids}" if potential_school_ids.present? - next - end - - school.update!(dese_id: csv_row["DESE School ID"]) - updated_schools << school.id - end - - School.where.not(id: updated_schools).each do |school| - puts "School with unchanged DESE id: #{school.name}, id: #{school.id}" - end - end - - desc "load a single file" - task load_single_file: :environment do - filepath = Rails.root.join("data", "survey_responses", - "2021-22_revere_somerville_wareham_student_survey_responses.csv") - puts "=====================> Loading data from csv at path: #{filepath}" - SurveyResponsesDataLoader.load_data(filepath:) - puts "=====================> Completed loading #{SurveyItemResponse.count} survey responses" - puts "Resetting response rates" - ResponseRateLoader.reset - puts "=====================> Completed recalculating #{ResponseRate.count} response rates" - end - - desc "load butler results for 2021-22" - task load_butler: :environment do - ["2022-23_butler_student_survey_responses.csv", - "2022-23_butler_teacher_survey_responses.csv"].each do |filepath| - filepath = Rails.root.join("data", "survey_responses", filepath) - puts "=====================> Loading data from csv at path: #{filepath}" - SurveyResponsesDataLoader.load_data filepath: - end - puts "Resetting response rates" - ResponseRateLoader.reset - puts "=====================> Completed recalculating #{ResponseRate.count} response rates" - end - - desc "load winchester results for 2021-22" - task load_winchester: :environment do - ["2021-22_winchester_student_survey_responses.csv", - "2021-22_winchester_teacher_survey_responses.csv"].each do |filepath| - filepath = Rails.root.join("data", "survey_responses", filepath) - puts "=====================> Loading data from csv at path: #{filepath}" - SurveyResponsesDataLoader.load_data filepath: - end - puts "Resetting response rates" - ResponseRateLoader.reset - puts "=====================> Completed recalculating #{ResponseRate.count} response rates" - end - - desc "load students" - task load_students: :environment do - Dir.glob(Rails.root.join("data", "survey_responses", "2021-22_*student*.csv")).each do |file| - puts "=====================> Loading student data from csv at path: #{file}" - StudentLoader.load_data filepath: file - end - puts "=====================> Completed loading #{Student.count} survey responses" - end - - desc "list scales that have no survey responses" - task list_scales_that_lack_survey_responses: :environment do - output = AcademicYear.all.map do |academic_year| - Scale.all.map do |scale| - [academic_year.range, scale.scale_id, scale.survey_item_responses.where(academic_year:).count] - end - end - - output = output.map { |year| year.reject { |scale| scale[2] > 0 || scale[1].starts_with?("a-") } } - pp output - end - desc "list survey_items that have no survey responses by district" task list_survey_items_that_lack_responses: :environment do output = AcademicYear.all.map do |academic_year| @@ -129,47 +29,6 @@ namespace :one_off do puts values end - desc "load survey responses for 2022-23" - task load_survey_responses_2022_23: :environment do - survey_item_response_count = SurveyItemResponse.count - academic_year = AcademicYear.find_by_range "2022-23" - student_count = Student.count - path = "/data/survey_responses/2022_23" - Sftp::Directory.open(path:) do |file| - SurveyResponsesDataLoader.new.from_file(file:) - end - puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database" - - Sftp::Directory.open(path:) do |file| - StudentLoader.from_file(file:, rules: []) - end - puts "=====================> Completed loading #{Student.count - student_count} students. #{Student.count} total students" - - District.all.each do |district| - num_of_respondents = SurveyItemResponse.joins(school: :district).where(academic_year:, - "schools.district": district).pluck(:response_id).uniq.count - teacher_respondents = SurveyItemResponse.joins(school: :district).where(academic_year:, - survey_item: SurveyItem.where("survey_item_id like ? ", "t-%"), "schools.district": district).pluck(:response_id).uniq.count - student_respondents = SurveyItemResponse.joins(school: :district).where(academic_year:, - survey_item: SurveyItem.where("survey_item_id like ? ", "s-%"), "schools.district": district).pluck(:response_id).uniq.count - - response_count = SurveyItemResponse.joins(school: :district).where(academic_year:, - "schools.district": district).count - student_response_count = SurveyItemResponse.joins(school: :district).joins(:survey_item).where(academic_year:, - survey_item: SurveyItem.where("survey_item_id like ? ", "s-%"), "schools.district": district).count - teacher_response_count = SurveyItemResponse.joins(school: :district).joins(:survey_item).where(academic_year:, - survey_item: SurveyItem.where("survey_item_id like ? ", "t-%"), "schools.district": district).count - puts "#{district.name} has #{num_of_respondents} respondents" - puts "#{district.name} has #{teacher_respondents} teacher respondents" - puts "#{district.name} has #{student_respondents} student respondents" - puts "#{district.name} has #{response_count} responses" - puts "#{district.name} has #{student_response_count} teacher responses" - puts "#{district.name} has #{teacher_response_count} student responses" - puts "\n" - end - Rails.cache.clear - end - desc "change dese id of Minot Forest Elementary School" task change_dese_id: :environment do school = School.find_by_name "Minot Forest Elementary School"