chore: seed academic years, districts and schools

This commit is contained in:
Nelson Jovel 2024-01-11 14:34:17 -08:00
parent 30f9f05a63
commit cd7b05df73
94 changed files with 697970 additions and 14 deletions

View file

@ -1,3 +1,3 @@
module Dashboard
VERSION = "0.1.9"
VERSION = "0.1.10"
end

View file

@ -1,8 +1,68 @@
namespace :dashboard do
desc "Explaining what the task does"
task :example do
# Task goes here
puts "compiling css"
`yarn build:css`
namespace :data do
desc "load survey responses"
task load_survey_responses: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/clean/"
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.new.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Rails.cache.clear
end
desc "load survey responses from a specific directory"
task load_survey_responses_from_path: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "#{ENV['SFTP_PATH']}"
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.new.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Rails.cache.clear
end
desc "load admin_data"
task load_admin_data: :environment do
original_count = AdminDataValue.count
Dir.glob(Rails.root.join("data", "admin_data", "dese", "*.csv")).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
Dese::Loader.load_data filepath:
end
Dir.glob(Rails.root.join("data", "admin_data", "out_of_state", "*.csv")).each do |filepath|
puts "=====================> Loading data from csv at path: #{filepath}"
Dese::Loader.load_data filepath:
end
puts "=====================> Completed loading #{AdminDataValue.count - original_count} admin data values"
end
desc "reset all cache counters"
task reset_cache_counters: :environment do
puts "=====================> Resetting Category counters"
Category.all.each do |category|
Category.reset_counters(category.id, :subcategories)
end
puts "=====================> Resetting Subcategory counters"
Subcategory.all.each do |subcategory|
Subcategory.reset_counters(subcategory.id, :measures)
end
puts "=====================> Resetting Measure counters"
Measure.all.each do |measure|
Measure.reset_counters(measure.id, :scales)
end
puts "=====================> Resetting Scale counters"
Scale.all.each do |scale|
Scale.reset_counters(scale.id, :survey_items)
end
puts "=====================> Resetting SurveyItem counters"
SurveyItem.all.each do |survey_item|
SurveyItem.reset_counters(survey_item.id, :survey_item_responses)
end
end
end
end

21
lib/tasks/db.rake Normal file
View file

@ -0,0 +1,21 @@
namespace :dashboard do
namespace :db do
desc "seed db"
task seed: :environment do
seeder = Dashboard::Seeder.new
seeder.seed_academic_years "2016-17", "2017-18", "2018-19", "2019-20", "2020-21", "2021-22", "2022-23",
"2023-24"
seeder.seed_districts_and_schools Dashboard::Engine.root.join("data", "dashboard",
"master_list_of_schools_and_districts.csv")
# seeder.seed_sqm_framework Dashboard::Engine.root.join("data", "dashboard", "sqm_framework.csv")
# seeder.seed_demographics Rails.root.join("data", "demographics.csv")
# seeder.seed_enrollment Rails.root.join("data", "enrollment", "enrollment.csv")
# seeder.seed_enrollment Rails.root.join("data", "enrollment", "nj_enrollment.csv")
# seeder.seed_enrollment Rails.root.join("data", "enrollment", "wi_enrollment.csv")
# seeder.seed_staffing Rails.root.join("data", "staffing", "staffing.csv")
# seeder.seed_staffing Rails.root.join("data", "staffing", "nj_staffing.csv")
# seeder.seed_staffing Rails.root.join("data", "staffing", "wi_staffing.csv")
end
end
end

110
lib/tasks/one_off.rake Normal file
View file

@ -0,0 +1,110 @@
namespace :dashboard do
namespace :one_off do
task add_dese_ids: :environment do
all_schools = School.all.includes(:district)
updated_schools = []
qualtrics_schools = {}
csv_file = Rails.root.join("data", "master_list_of_schools_and_districts.csv")
CSV.parse(File.read(csv_file), headers: true) do |row|
district_id = row["District Code"].to_i
school_id = row["School Code"].to_i
if qualtrics_schools[[district_id, school_id]].present?
puts "Duplicate entry row #{row}"
next
end
qualtrics_schools[[district_id, school_id]] = row
end
qualtrics_schools.each do |(district_id, school_id), csv_row|
school = all_schools.find do |school|
school.qualtrics_code == school_id && school.district.qualtrics_code == district_id
end
if school.nil?
school_name = csv_row["School Name"].strip
puts "Could not find school '#{school_name}' with district id: #{district_id}, school id: #{school_id}"
potential_school_ids = School.where("name like ?", "%#{school_name}%").map(&:id)
puts "Potential ID matches: #{potential_school_ids}" if potential_school_ids.present?
next
end
school.update!(dese_id: csv_row["DESE School ID"])
updated_schools << school.id
end
School.where.not(id: updated_schools).each do |school|
puts "School with unchanged DESE id: #{school.name}, id: #{school.id}"
end
end
desc "list scales that have no survey responses"
task list_scales_that_lack_survey_responses: :environment do
output = AcademicYear.all.map do |academic_year|
Scale.all.map do |scale|
[academic_year.range, scale.scale_id, scale.survey_item_responses.where(academic_year:).count]
end
end
output = output.map { |year| year.reject { |scale| scale[2] > 0 || scale[1].starts_with?("a-") } }
pp output
end
desc "list survey_items that have no survey responses by district"
task list_survey_items_that_lack_responses: :environment do
output = AcademicYear.all.map do |academic_year|
District.all.map do |district|
SurveyItem.all.map do |survey_item|
[academic_year.range, survey_item.survey_item_id,
survey_item.survey_item_responses.joins(:school).where("school.district": district, academic_year:).count, district.name]
end
end
end
output = output.map do |year|
year.map do |district|
district.reject do |survey_item|
survey_item[2] > 0 || survey_item[1].starts_with?("a-")
end
end
end
pp output
end
desc "list the most recent admin data values"
task list_recent_admin_data_values: :environment do
range = 4.weeks.ago..1.second.ago
values = AdminDataValue.where(updated_at: range).group(:admin_data_item).count.map do |item|
[item[0].admin_data_item_id, item[0].scale.measure.measure_id]
end
puts values
end
desc "delete 2022-23 survey responses"
task delete_survey_responses_2022_23: :environment do
response_count = SurveyItemResponse.all.count
SurveyItemResponse.where(academic_year: AcademicYear.find_by_range("2022-23")).delete_all
puts "=====================> Deleted #{response_count - SurveyItemResponse.all.count} survey responses"
# should be somewhere near 295738
end
desc "load survey responses"
task load_survey_responses: :environment do
survey_item_response_count = SurveyItemResponse.count
student_count = Student.count
path = "/data/survey_responses/clean/"
schools = District.find_by_slug("maynard-public-schools").schools
Sftp::Directory.open(path:) do |file|
SurveyResponsesDataLoader.new.from_file(file:)
end
puts "=====================> Completed loading #{SurveyItemResponse.count - survey_item_response_count} survey responses. #{SurveyItemResponse.count} total responses in the database"
Rails.cache.clear
end
end
end