Add Academic Year

main
Gabe Farrell 3 years ago
parent 25898233c3
commit 0558d02d29

@ -7,6 +7,8 @@ import itertools as it
import argparse import argparse
import re import re
from urllib.parse import urlparse from urllib.parse import urlparse
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
import pysftp import pysftp
#NOTE for now, each of the arrays should be all lowercase #NOTE for now, each of the arrays should be all lowercase
@ -228,6 +230,9 @@ def do_merge_student(cwd, mwd):
# clean out unnecessary columns # clean out unnecessary columns
if not argQuiet: print('Cleaning out columns...') if not argQuiet: print('Cleaning out columns...')
df = clean_cols_student(df) df = clean_cols_student(df)
# add academic year column
if not argQuiet: print('Adding \'Academic Year\' column...')
df = add_academic_year(df)
# ensure line count matches what is expected # ensure line count matches what is expected
if df.shape[0] != lines: if df.shape[0] != lines:
print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}') print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
@ -266,6 +271,9 @@ def do_merge_teacher(cwd, mwd):
# clean out unnecessary columns # clean out unnecessary columns
if not argQuiet: print('Cleaning out columns...') if not argQuiet: print('Cleaning out columns...')
df = clean_cols_teacher(df) df = clean_cols_teacher(df)
# add academic year column
if not argQuiet: print('Adding \'Academic Year\' column...')
df = add_academic_year(df)
# ensure line count matches what is expected # ensure line count matches what is expected
if df.shape[0] != lines: if df.shape[0] != lines:
print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}') print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
@ -313,6 +321,35 @@ def combine_variants(df):
df = df.drop(columns=drops) df = df.drop(columns=drops)
return df return df
# take the dates in 'Recorded Date' and use them to add
# a column for academic year
# note: must be used after the columns are merged because this only
# looks for the column 'Recorded Date'
def add_academic_year(df):
academic_year = []
recorded_date = df['Recorded Date'].tolist()
for datestr in recorded_date:
academic_year.append(date_str_to_academic_year(datestr))
df['Academic Year'] = academic_year
# probably unnecessary to return df here, but this is the convention so far
return df
def date_str_to_academic_year(str):
# get date from string
try:
date = parse(str).date()
except TypeError:
# I would like this to only print once if the merged csv will have Undefined, but whatever
print('WARN: Found non-date value in \'Recorded Date\' column, \'Academic Year\' will contain \'Undefined\' for some rows')
return 'Undefined'
# I wanted to use dates to calculate the nextyear and lastyear values, but LEAP YEARS !!!!
if date.month < 7: # spring semester
lastyear = date.year-1
return f'{lastyear}-{date.strftime("%y")}'
else: # fall semester
nextyear = date.year+1 - 2000
return f'{date.year}-{nextyear}'
if __name__ == '__main__': if __name__ == '__main__':

@ -219,5 +219,35 @@ class TestMergeCSV(unittest.TestCase):
td = merge.repair_teacher_columns(td) td = merge.repair_teacher_columns(td)
self.assertTrue(td.equals(expected), td) self.assertTrue(td.equals(expected), td)
def test_date_str_to_academic_year(self):
# test for spring semester
datestr = "3/16/2023 13:23"
academic_year = merge.date_str_to_academic_year(datestr)
self.assertEqual(academic_year, '2022-23')
# test for fall semester
datestr = "9/16/2021 13:23"
academic_year = merge.date_str_to_academic_year(datestr)
self.assertEqual(academic_year, '2021-22')
def test_add_academic_year(self):
td = pd.DataFrame({
'Recorded Date': [
'9/16/2021 13:23',
'3/16/2023 13:23'
]
})
expected = pd.DataFrame({
'Recorded Date': [
'9/16/2021 13:23',
'3/16/2023 13:23'
],
'Academic Year': [
'2021-22',
'2022-23'
]
})
td = merge.add_academic_year(td)
self.assertTrue(td.equals(expected), td)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Loading…
Cancel
Save