You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

253 lines
10 KiB

import merge
import unittest
import pandas as pd
tdata_student = pd.DataFrame({
'BadColumn': ['1', '2', '3', '4'],
'Gender': ['1', '', '', ''],
'gender:': ['', '', '2', '1'],
'Gender - SIS': ['', '2', '', ''],
})
class TestMergeCSV(unittest.TestCase):
def test_combine_variants(self):
td = pd.DataFrame({
's-peff-q1': ['1', '2', '3', '', ''],
's-peff-q10': ['1', '2', '3', '', ''],
's-peff-q1-1': ['', '', '', '4', '5'],
's-peff-q10-1': ['', '', '', '4', '5'],
})
td = merge.combine_variants(td)
expected = pd.DataFrame({
's-peff-q1': ['1', '2', '3', '4', '5'],
's-peff-q10': ['1', '2', '3', '4', '5'],
})
notexpected = pd.DataFrame({
's-peff-q1-1': ['1', '2', '3', '4', '5'],
's-peff-q10-1': ['1', '2', '3', '4', '5'],
})
self.assertTrue(td.equals(expected))
self.assertFalse(td.equals(notexpected))
def test_clean_cols_student(self):
td = pd.DataFrame({
'Start Date': ['1', '2', '3', '4', '5'],
'End Date': ['1', '2', '3', '4', '5'],
'Status': ['1', '2', '3', '4', '5'],
'Ip Address': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'LASID': ['1', '2', '3', '4', '5'],
'Grade': ['1', '2', '3', '4', '5'],
'Race': ['1', '2', '3', '4', '5'],
'Recorded Date': ['1', '2', '3', '4', '5'],
'Dese Id': ['1', '2', '3', '4', '5'],
'BadColumn': ['x', 'x', 'x', 'x', 'x'],
'Gender - SIS': ['1', '2', '3', '', ''],
'Gender': ['1', '2', '1', '2', '2'],
'Response Id': ['1', '2', '3', '4', '5'],
's-peff-q1': ['1', '2', '3', '4', '5'],
's-peff-q10': ['1', '2', '3', '4', '5'],
's-peff-q1.1': ['1', '2', '3', '4', '5'],
})
td = merge.clean_cols_student(td)
expected = pd.DataFrame({
'Start Date': ['1', '2', '3', '4', '5'],
'End Date': ['1', '2', '3', '4', '5'],
'Status': ['1', '2', '3', '4', '5'],
'Ip Address': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'LASID': ['1', '2', '3', '4', '5'],
'Grade': ['1', '2', '3', '4', '5'],
'Race': ['1', '2', '3', '4', '5'],
'Recorded Date': ['1', '2', '3', '4', '5'],
'Dese Id': ['1', '2', '3', '4', '5'],
'Gender': ['1', '2', '1', '2', '2'],
'Response Id': ['1', '2', '3', '4', '5'],
's-peff-q1': ['1', '2', '3', '4', '5'],
's-peff-q10': ['1', '2', '3', '4', '5'],
})
notexpected = pd.DataFrame({
'BadColumn': ['x', 'x', 'x', 'x', 'x'],
'Gender - SIS': ['1', '2', '3', '', ''],
's-peff-q1.1': ['1', '2', '3', '4', '5'],
})
self.assertTrue(td.equals(expected), td)
self.assertFalse(td.equals(notexpected), td)
def test_clean_cols_teacher(self):
td = pd.DataFrame({
'Start Date': ['1', '2', '3', '4', '5'],
'End Date': ['1', '2', '3', '4', '5'],
'Status': ['1', '2', '3', '4', '5'],
'Ip Address': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'Recorded Date': ['1', '2', '3', '4', '5'],
'BadColumn': ['x', 'x', 'x', 'x', 'x'],
'Blah Blah Blah': ['x', 'x', 'x', 'x', 'x'],
'Abbey Road': ['x', 'x', 'x', 'x', 'x'],
'Please List Your Cats': ['x', 'x', 'x', 'x', 'x'],
'Response Id': ['1', '2', '3', '4', '5'],
'Dese Id': ['1', '2', '3', '4', '5'],
't-peff-q1': ['1', '2', '3', '4', '5'],
't-peff-q10': ['1', '2', '3', '4', '5'],
't-peff-q1.1': ['1', '2', '3', '4', '5'],
't-peff-q10.1': ['1', '2', '3', '4', '5'],
})
td = merge.clean_cols_teacher(td)
expected = pd.DataFrame({
'Start Date': ['1', '2', '3', '4', '5'],
'End Date': ['1', '2', '3', '4', '5'],
'Status': ['1', '2', '3', '4', '5'],
'Ip Address': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'Recorded Date': ['1', '2', '3', '4', '5'],
'Response Id': ['1', '2', '3', '4', '5'],
'Dese Id': ['1', '2', '3', '4', '5'],
't-peff-q1': ['1', '2', '3', '4', '5'],
't-peff-q10': ['1', '2', '3', '4', '5'],
})
notexpected = pd.DataFrame({
'BadColumn': ['x', 'x', 'x', 'x', 'x'],
'Blah Blah Blah': ['x', 'x', 'x', 'x', 'x'],
'Abbey Road': ['x', 'x', 'x', 'x', 'x'],
'Please List Your Cats': ['x', 'x', 'x', 'x', 'x'],
't-peff-q1.1': ['1', '2', '3', '4', '5'],
't-peff-q10.1': ['1', '2', '3', '4', '5'],
})
self.assertTrue(td.equals(expected), td)
self.assertFalse(td.equals(notexpected), td)
def test_combine_cols(self):
td = pd.DataFrame({
'My Column': ['1', '', '', '', ''],
'My Other Column': ['', '2', '3', '', ''],
'Not My Column': ['1', '2', '3', '4', '5'],
'My Last Column': ['', '', '', '4', '5'],
})
expected = pd.DataFrame({
'My Column': ['1', '2', '3', '4', '5'],
'Not My Column': ['1', '2', '3', '4', '5'],
})
td = merge.combine_cols(td, 'My Column', ['my other column', 'my last column'])
self.assertTrue(td.equals(expected), f'\n{td}')
def test_repair_cols_student(self):
td = pd.DataFrame({
'Start Date': ['', '', '', '4', '5'],
'End Date': ['', '', '', '4', '5'],
'Ip Address': ['', '', '', '4', '5'],
'StartDate': ['1', '2', '3', '', ''],
'EndDate': ['1', '2', '3', '', ''],
'IpAddress': ['1', '2', '3', '', ''],
'Status': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'Recorded Date': ['', '', '', '4', '5'],
'RecordedDate': ['1', '2', '3', '', ''],
'Response Id': ['1', '2', '3', '4', '5'],
'Dese Id': ['', '', '', '4', '5'],
'School': ['1', '2', '3', '', ''],
'LASID': ['1', '2', '3', '', ''],
'Please enter your Locally Assigned Student ID Number (LASID, or student lunch number).': ['', '', '', '4', '5'],
'Grade': ['1', '2', '3', '', ''],
'What grade are you in?': ['', '', '', '4', '5'],
'Gender': ['1', '2', '3', '', ''],
'What is your gender?': ['', '', '', '4', '5'],
'Race': ['1', '2', '3', '4', '5'],
})
expected = pd.DataFrame({
'Start Date': ['1', '2', '3', '4', '5'],
'End Date': ['1', '2', '3', '4', '5'],
'Ip Address': ['1', '2', '3', '4', '5'],
'Status': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'Recorded Date': ['1', '2', '3', '4', '5'],
'Response Id': ['1', '2', '3', '4', '5'],
'Dese Id': ['1', '2', '3', '4', '5'],
'LASID': ['1', '2', '3', '4', '5'],
'Grade': ['1', '2', '3', '4', '5'],
'Gender': ['1', '2', '3', '4', '5'],
'Race': ['1', '2', '3', '4', '5'],
})
td = merge.repair_student_columns(td)
self.assertTrue(td.equals(expected), f'\n{td}')
def test_repair_cols_teacher(self):
td = pd.DataFrame({
'Start Date': ['', '', '', '4', '5'],
'End Date': ['', '', '', '4', '5'],
'Ip Address': ['', '', '', '4', '5'],
'StartDate': ['1', '2', '3', '', ''],
'EndDate': ['1', '2', '3', '', ''],
'IpAddress': ['1', '2', '3', '', ''],
'Status': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'Recorded Date': ['', '', '', '4', '5'],
'RecordedDate': ['1', '2', '3', '', ''],
'Response Id': ['1', '2', '3', '4', '5'],
'Dese Id': ['', '', '', '4', '5'],
'School': ['1', '2', '3', '', ''],
})
expected = pd.DataFrame({
'Start Date': ['1', '2', '3', '4', '5'],
'End Date': ['1', '2', '3', '4', '5'],
'Ip Address': ['1', '2', '3', '4', '5'],
'Status': ['1', '2', '3', '4', '5'],
'Progress': ['1', '2', '3', '4', '5'],
'Duration': ['1', '2', '3', '4', '5'],
'District': ['1', '2', '3', '4', '5'],
'Recorded Date': ['1', '2', '3', '4', '5'],
'Response Id': ['1', '2', '3', '4', '5'],
'Dese Id': ['1', '2', '3', '4', '5'],
})
td = merge.repair_teacher_columns(td)
self.assertTrue(td.equals(expected), td)
def test_date_str_to_academic_year(self):
# test for spring semester
datestr = "3/16/2023 13:23"
academic_year = merge.date_str_to_academic_year(datestr)
self.assertEqual(academic_year, '2022-23')
# test for fall semester
datestr = "9/16/2021 13:23"
academic_year = merge.date_str_to_academic_year(datestr)
self.assertEqual(academic_year, '2021-22')
def test_add_academic_year(self):
td = pd.DataFrame({
'Recorded Date': [
'9/16/2021 13:23',
'3/16/2023 13:23'
]
})
expected = pd.DataFrame({
'Recorded Date': [
'9/16/2021 13:23',
'3/16/2023 13:23'
],
'Academic Year': [
'2021-22',
'2022-23'
]
})
td = merge.add_academic_year(td)
self.assertTrue(td.equals(expected), td)
if __name__ == '__main__':
unittest.main()