Add scrapers for admin data and their corresponding csv files of data.

Make sure specs are off so they don't run in github CI
This commit is contained in:
rebuilt 2022-09-16 17:21:52 -07:00
parent 94056a80c6
commit 1be6e79fd8
21 changed files with 67803 additions and 38 deletions

View file

@ -0,0 +1,55 @@
require 'rails_helper'
RSpec.describe Dese::FiveCOne do
let(:academic_years) do
[
create(:academic_year, range: '2020-21'),
create(:academic_year, range: '2019-20')
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', '5C_1_art_course.csv') }
let(:filepaths) do
[i1_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
xcontext '#run_all' do
it 'creates a csv file with the scraped data' do
Dese::FiveCOne.new(filepaths:).run_all
expect(i1_filepath).to exist
end
it 'has the correct headers for i1' do
headers = File.open(i1_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'K', '01', '02', '03', '04', '05', '06', '07', '08', '09', '10',
'11', '12', 'All Grades', "Total Students\n"]
end
it 'has the right likert score results for a-picp-i1' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-picp-i1' && row['Academic Year'] == '2020-21'
likert_score = row['Likert Score']
likert_score == 'NA' ? likert_score : likert_score.to_f
end.flatten.compact
expect(results.take(20)).to eq [4.95, 2.39, 4.81, 4.89, 4.63, 4.95, 2.25, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 3.75,
4.82, 1.0, 3.88, 3.14, 4.84, 5.0]
end
end
end

View file

@ -0,0 +1,66 @@
require 'rails_helper'
RSpec.describe Dese::FiveDTwo do
let(:academic_years) do
[
create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21')
# create(:academic_year, range: '2019-20')
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:enrollments_filepath) { Rails.root.join('tmp', 'spec', 'dese', '5D_2_enrollments.csv') }
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', '5D_2_age_staffing.csv') }
let(:filepaths) do
[enrollments_filepath, i1_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
xcontext '#run_all' do
it 'creates a csv file with the scraped data' do
Dese::FiveDTwo.new(filepaths:).run_all
expect(enrollments_filepath).to exist
expect(i1_filepath).to exist
end
it 'has the correct headers for enrollements' do
headers = File.open(enrollments_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'PK', 'K', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', 'SP', "Total\n"]
end
it 'has the correct headers for i1' do
headers = File.open(i1_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'<26 yrs (# )', '26-32 yrs (#)', '33-40 yrs (#)', '41-48 yrs (#)', '49-56 yrs (#)', '57-64 yrs (#)', 'Over 64 yrs (#)', "FTE Count\n"]
end
it 'has the right likert score results for a-phya-i1' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-phya-i1' && row['Academic Year'] == '2020-21'
likert_score = row['Likert Score']
likert_score == 'NA' ? likert_score : likert_score.to_f
end.flatten.compact
expect(results.take(20)).to eq [5.0, 1.0, 4.7, 4.59, 5.0, 5.0, 1.0, 3.33, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
5.0, 5.0, 4.78, 5.0]
end
end
end

View file

@ -0,0 +1,56 @@
require 'rails_helper'
require 'fileutils'
require 'csv'
RSpec.describe Dese::FourAOne do
let(:academic_years) do
[
create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21')
# create(:academic_year, range: '2019-20'),
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', '4A_1_grade_nine_course_pass.csv') }
let(:filepaths) do
[i1_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
xcontext '#run_all' do
it 'creates a csv file with the scraped data' do
Dese::FourAOne.new(filepaths:).run_all
expect(i1_filepath).to exist
end
it 'has the correct headers' do
headers = File.open(i1_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Grade Nine Students', '# Passing All Courses', "% Passing All Courses\n"]
end
it 'has the right likert score results for a-ovpe-i1' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-ovpe-i1' && row['Academic Year'] == '2020-21'
likert_score = row['Likert Score']
likert_score == 'NA' ? likert_score : likert_score.to_f
end.flatten.compact
expect(results.take(20)).to eq [3.73, 3.37, 3.03, 4.03, 3.78, 3.17, 2.93, 'NA', 3.5, 4.0, 2.98, 3.84, 3.76, 3.93,
4.05, 3.13, 3.92, 3.62, 3.49, 2.5]
end
end
end

View file

@ -0,0 +1,105 @@
require 'rails_helper'
require 'fileutils'
require 'csv'
RSpec.describe Dese::FourBTwo do
let(:academic_years) do
[
create(:academic_year, range: '2021-22'),
create(:academic_year, range: '2020-21'),
create(:academic_year, range: '2019-20')
# create(:academic_year, range: '2018-19'),
# create(:academic_year, range: '2017-18'),
# create(:academic_year, range: '2016-17')
]
end
let(:i1_filepath) { Rails.root.join('tmp', 'spec', 'dese', '4B_2_four_year_grad.csv') }
let(:i2_filepath) { Rails.root.join('tmp', 'spec', 'dese', '4B_2_retention.csv') }
let(:i3_filepath) { Rails.root.join('tmp', 'spec', 'dese', '4B_2_five_year_grad.csv') }
let(:filepaths) do
[i1_filepath, i2_filepath, i3_filepath]
end
before do
FileUtils.mkdir_p 'tmp/spec/dese'
end
before :each do
academic_years
end
xcontext '#run_all' do
it 'creates a csv file with the scraped data' do
Dese::FourBTwo.new(filepaths:).run_all
expect(i1_filepath).to exist
expect(i2_filepath).to exist
expect(i3_filepath).to exist
end
it 'has the correct headers for i1' do
headers = File.open(i1_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
'% Dropped Out', "% Permanently Excluded\n"]
end
it 'has the correct headers for i2' do
headers = File.open(i2_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# Enrolled', '# Retained', '% Retained', '01', '02', '03', '04', '05', '06', '07', '08', '09',
'10', '11', "12\n"]
end
it 'has the correct headers for i3' do
headers = File.open(i3_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'School Name', 'DESE ID',
'# in Cohort', '% Graduated', '% Still in School', '% Non-Grad Completers', '% H.S. Equiv.',
'% Dropped Out', "% Permanently Excluded\n"]
end
it 'has the right likert score results for a-degr-i1' do
results = CSV.parse(File.read(i1_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-degr-i1' && row['Academic Year'] == '2020-21'
likert_score = row['Likert Score']
likert_score == 'NA' ? likert_score : likert_score.to_f
end.flatten.compact
expect(results.take(20)).to eq [4.94, 4.69, 4.66, 4.94, 4.93, 4.63, 4.68, 4.29, 4.6, 4.9, 3.43, 4.84, 4.8, 4.86,
4.93, 3.62, 4.83, 3.4, 4.7, 4.62]
end
it 'has the right likert score results for a-degr-i2' do
results = CSV.parse(File.read(i2_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-degr-i2' && row['Academic Year'] == '2020-21'
likert_score = row['Likert Score']
likert_score == 'NA' ? likert_score : likert_score.to_f
end.flatten.compact
expect(results.take(20)).to eq [5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0,
5.0, 5.0, 5.0, 5.0]
end
it 'has the right likert score results for a-degr-i3' do
results = CSV.parse(File.read(i3_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-degr-i3' && row['Academic Year'] == '2019-20'
likert_score = row['Likert Score']
likert_score == 'NA' ? likert_score : likert_score.to_f
end.flatten.compact
expect(results.take(20)).to eq [4.55, 4.47, 4.5, 4.65, 4.71, 4.38, 4.51, 3.22, 4.44, 4.55, 4.57, 4.59, 4.58,
4.67, 4.04, 4.33, 4.07, 4.48, 4.5, 4.52]
end
end
end

View file

@ -14,7 +14,7 @@ RSpec.describe Dese::ThreeBTwo do
]
end
let(:enrollment_filepath) { Rails.root.join('tmp', 'spec', 'dese', 'enrollments.csv') }
let(:enrollment_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_enrollments.csv') }
let(:teacher_race_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_teacher_by_race_and_gender.csv') }
let(:student_race_filepath) { Rails.root.join('tmp', 'spec', 'dese', '3B_2_student_by_race_and_gender.csv') }
@ -41,33 +41,31 @@ RSpec.describe Dese::ThreeBTwo do
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (#)', 'School Name', 'DESE ID',
'African American (#)', 'Asian (#)', 'Hispanic (#)', 'White (#)', 'Native American (#)',
'Native Hawaiian Pacific Islander (#)', 'Multi-Race Non-Hispanic (#)', 'Females (#)',
'Males (#)', "FTE Count\n"]
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Teachers of color (%)', 'School Name', 'DESE ID',
'African American (%)', 'Asian (%)', 'Hispanic (%)', 'White (%)', 'Native American (%)',
'Native Hawaiian Pacific Islander (%)', 'Multi-Race Non-Hispanic (%)', 'Females (%)',
'Males (%)', "FTE Count\n"]
end
it 'has the correct headers for student demographic information' do
pending 'need feedback from peter'
headers = File.open(student_race_filepath) do |file|
headers = file.first
end.split(',')
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers', 'Non-White Students', 'School Name', 'DESE ID',
expect(headers).to eq ['Raw likert calculation', 'Likert Score', 'Admin Data Item', 'Academic Year', 'Non-White Teachers %', 'Non-White Students %', 'School Name', 'DESE ID',
'African American', 'Asian', 'Hispanic', 'White', 'Native American',
'Native Hawaiian or Pacific Islander', 'Multi-Race or Non-Hispanic', 'Males',
'Females', 'Non-Binary', "Students of color (%)\n"]
end
it 'has the right likert score results for a-cure-i1' do
pending 'not yet implemented'
results = CSV.parse(File.read(student_race_filepath), headers: true).map do |row|
next unless row['Admin Data Item'] == 'a-cure-i1' && row['Academic Year'] == '2020-21'
row['Likert Score'].to_f
end.flatten.compact
expect(results.take(20)).to eq [4.44, 4.44, 3.33, 3.83, 4.44, 3.6, 4.44, 4.44, 1, 4.44, 4.44, 4.44, 4.44, 3.89,
4.44, 4.44, 4.44, 4.44, 4.01, 3.92]
expect(results.take(20)).to eq [1.78, 1.0, 5.0, 5.0, 1.0, 1.0, 5.0, 1.25, 1.68, 2.22, 2.48, 2.84, 1.27, 2.15,
3.0, 1.83, 2.23, 1.0, 3.28, 2.74]
end
end
end