mirror of
https://github.com/edcommonwealth/sqm-dashboards.git
synced 2026-03-07 21:48:16 -08:00
feat: Add scraper for a-curv-i7. Scrape new admin data for that item
This commit is contained in:
parent
cdd2a50b9e
commit
ef9e5b7ebb
2 changed files with 2894 additions and 2 deletions
|
|
@ -12,7 +12,10 @@ module Dese
|
||||||
Rails.root.join("data", "admin_data", "dese", "3B_1_course_ratio.csv"),
|
Rails.root.join("data", "admin_data", "dese", "3B_1_course_ratio.csv"),
|
||||||
Rails.root.join("data" , "admin_data", "dese", "3B_1_enrollments_by_race.csv") ,
|
Rails.root.join("data" , "admin_data", "dese", "3B_1_enrollments_by_race.csv") ,
|
||||||
Rails.root.join("data" , "admin_data", "dese", "3B_1_enrollments_by_grade.csv") ,
|
Rails.root.join("data" , "admin_data", "dese", "3B_1_enrollments_by_grade.csv") ,
|
||||||
Rails.root.join("data" , "admin_data", "dese", "3B_1_adv_courses_white_students.csv") ])
|
Rails.root.join("data" , "admin_data", "dese", "3B_1_adv_courses_white_students.csv"),
|
||||||
|
Rails.root.join("data" , "admin_data", "dese", "3B_1_students_of_color_completion_rate.csv")
|
||||||
|
|
||||||
|
])
|
||||||
@filepaths = filepaths
|
@filepaths = filepaths
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -50,6 +53,12 @@ module Dese
|
||||||
write_headers(filepath:, headers:)
|
write_headers(filepath:, headers:)
|
||||||
run_a_curv_i5(filepath:)
|
run_a_curv_i5(filepath:)
|
||||||
|
|
||||||
|
filepath = filepaths[8]
|
||||||
|
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||||
|
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced', '% ELA', '% Math', '% Science and Technology', '% Computer and Information Science', '% History and Social Sciences', '% Arts', '% All Other Subjects', 'Ch 74 Secondary Cooperative Program']
|
||||||
|
write_headers(filepath:, headers:)
|
||||||
|
run_a_curv_i7(filepath:)
|
||||||
|
|
||||||
browser.close
|
browser.close
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -171,6 +180,7 @@ module Dese
|
||||||
@eleventh_and_twelfth_grade_student_count
|
@eleventh_and_twelfth_grade_student_count
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
def scrape_advanced_courses_for_white_students(filepath:)
|
def scrape_advanced_courses_for_white_students(filepath:)
|
||||||
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
headers = ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
|
||||||
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced', '% ELA', '% Math', '% Science and Technology', '% Computer and Information Science', '% History and Social Sciences', '% Arts', '% All Other Subjects', 'Ch 74 Secondary Cooperative Program']
|
'# Grade 11 and 12 Students', '# Students Completing Advanced', '% Students Completing Advanced', '% ELA', '% Math', '% Science and Technology', '% Computer and Information Science', '% History and Social Sciences', '% Arts', '% All Other Subjects', 'Ch 74 Secondary Cooperative Program']
|
||||||
|
|
@ -196,7 +206,6 @@ module Dese
|
||||||
academic_year = row['Academic Year']
|
academic_year = row['Academic Year']
|
||||||
school_id = row['DESE ID'].to_i
|
school_id = row['DESE ID'].to_i
|
||||||
total_num_students_in_adv_courses = row["# Grade 11 and 12 Students"].to_f
|
total_num_students_in_adv_courses = row["# Grade 11 and 12 Students"].to_f
|
||||||
num_completing_adv_courses = row["# Students Completing Advanced"].to_f
|
|
||||||
|
|
||||||
@white_students_in_advanced_courses[[school_id, academic_year]] = total_num_students_in_adv_courses
|
@white_students_in_advanced_courses[[school_id, academic_year]] = total_num_students_in_adv_courses
|
||||||
end
|
end
|
||||||
|
|
@ -204,6 +213,20 @@ module Dese
|
||||||
@white_students_in_advanced_courses
|
@white_students_in_advanced_courses
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def white_students_completing_advanced_courses
|
||||||
|
@white_students_completing_advanced_courses ||= {}
|
||||||
|
if @white_students_completing_advanced_courses .count == 0
|
||||||
|
CSV.parse(File.read(filepaths[7]), headers: true).map do |row|
|
||||||
|
academic_year = row['Academic Year']
|
||||||
|
school_id = row['DESE ID'].to_i
|
||||||
|
num_completing_adv_courses = row["# Students Completing Advanced"].to_f
|
||||||
|
|
||||||
|
@white_students_completing_advanced_courses[[school_id, academic_year]] = num_completing_adv_courses
|
||||||
|
end
|
||||||
|
end
|
||||||
|
@white_students_completing_advanced_courses
|
||||||
|
end
|
||||||
|
|
||||||
# We don't need to check to see if this is a high school because the link only lists relevant schools
|
# We don't need to check to see if this is a high school because the link only lists relevant schools
|
||||||
def run_a_curv_i4(filepath:)
|
def run_a_curv_i4(filepath:)
|
||||||
scrape_enrollments_by_race(filepath: filepaths[5])
|
scrape_enrollments_by_race(filepath: filepaths[5])
|
||||||
|
|
@ -266,5 +289,42 @@ module Dese
|
||||||
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# We don't need to check to see if this is a high school because the link only lists relevant schools
|
||||||
|
def run_a_curv_i7(filepath:)
|
||||||
|
scrape_advanced_courses_for_white_students(filepath: filepaths[7])
|
||||||
|
|
||||||
|
run do |academic_year|
|
||||||
|
url = "https://profiles.doe.mass.edu/statereport/advcoursecomprate.aspx"
|
||||||
|
range = "#{academic_year.range.split('-')[1].to_i + 2000}"
|
||||||
|
selectors = { "ctl00_ContentPlaceHolder1_ddReportType" => "School",
|
||||||
|
"ctl00_ContentPlaceHolder1_ddYear" => range }
|
||||||
|
submit_id = "btnViewReport"
|
||||||
|
calculation = lambda { |headers, items|
|
||||||
|
school_id_index = headers["School Code"]
|
||||||
|
school_id = items[school_id_index].to_i
|
||||||
|
school_name_index = headers["School Name"]
|
||||||
|
school_name = items[school_name_index]
|
||||||
|
year = academic_year.range
|
||||||
|
|
||||||
|
total_num_students_in_adv_courses = items[headers["# Grade 11 and 12 Students"]].to_f
|
||||||
|
num_students_completing_adv_courses = items[headers["% Students Completing Advanced"]].to_f
|
||||||
|
|
||||||
|
return "NA" unless white_students_in_advanced_courses[[school_id, year]]
|
||||||
|
num_non_white_students_in_adv_courses = total_num_students_in_adv_courses - white_students_in_advanced_courses[[school_id, year]]
|
||||||
|
|
||||||
|
return "NA" unless white_students_completing_advanced_courses[[school_id, year]]
|
||||||
|
num_non_white_students_completing_adv_courses = num_students_completing_adv_courses - white_students_completing_advanced_courses[[school_id, year]]
|
||||||
|
|
||||||
|
percentage_non_white_completing_adv_courses = num_non_white_students_completing_adv_courses / num_non_white_students_in_adv_courses * 100
|
||||||
|
|
||||||
|
benchmark = 67.2
|
||||||
|
percentage_non_white_completing_adv_courses * 4 / benchmark
|
||||||
|
}
|
||||||
|
admin_data_item_id = "a-curv-i7"
|
||||||
|
Prerequisites.new(filepath, url, selectors, submit_id, admin_data_item_id, calculation)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
||||||
2832
data/admin_data/dese/3B_1_students_of_color_completion_rate.csv
Normal file
2832
data/admin_data/dese/3B_1_students_of_color_completion_rate.csv
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue