mirror of
https://github.com/edcommonwealth/merge-csv.git
synced 2026-03-07 13:38:13 -08:00
Add Academic Year
This commit is contained in:
parent
25898233c3
commit
0558d02d29
2 changed files with 67 additions and 0 deletions
37
merge.py
37
merge.py
|
|
@ -7,6 +7,8 @@ import itertools as it
|
|||
import argparse
|
||||
import re
|
||||
from urllib.parse import urlparse
|
||||
from dateutil.parser import parse
|
||||
from dateutil.relativedelta import relativedelta
|
||||
import pysftp
|
||||
|
||||
#NOTE for now, each of the arrays should be all lowercase
|
||||
|
|
@ -228,6 +230,9 @@ def do_merge_student(cwd, mwd):
|
|||
# clean out unnecessary columns
|
||||
if not argQuiet: print('Cleaning out columns...')
|
||||
df = clean_cols_student(df)
|
||||
# add academic year column
|
||||
if not argQuiet: print('Adding \'Academic Year\' column...')
|
||||
df = add_academic_year(df)
|
||||
# ensure line count matches what is expected
|
||||
if df.shape[0] != lines:
|
||||
print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
|
||||
|
|
@ -266,6 +271,9 @@ def do_merge_teacher(cwd, mwd):
|
|||
# clean out unnecessary columns
|
||||
if not argQuiet: print('Cleaning out columns...')
|
||||
df = clean_cols_teacher(df)
|
||||
# add academic year column
|
||||
if not argQuiet: print('Adding \'Academic Year\' column...')
|
||||
df = add_academic_year(df)
|
||||
# ensure line count matches what is expected
|
||||
if df.shape[0] != lines:
|
||||
print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
|
||||
|
|
@ -313,6 +321,35 @@ def combine_variants(df):
|
|||
df = df.drop(columns=drops)
|
||||
return df
|
||||
|
||||
# take the dates in 'Recorded Date' and use them to add
|
||||
# a column for academic year
|
||||
# note: must be used after the columns are merged because this only
|
||||
# looks for the column 'Recorded Date'
|
||||
def add_academic_year(df):
|
||||
academic_year = []
|
||||
recorded_date = df['Recorded Date'].tolist()
|
||||
for datestr in recorded_date:
|
||||
academic_year.append(date_str_to_academic_year(datestr))
|
||||
df['Academic Year'] = academic_year
|
||||
# probably unnecessary to return df here, but this is the convention so far
|
||||
return df
|
||||
|
||||
|
||||
def date_str_to_academic_year(str):
|
||||
# get date from string
|
||||
try:
|
||||
date = parse(str).date()
|
||||
except TypeError:
|
||||
# I would like this to only print once if the merged csv will have Undefined, but whatever
|
||||
print('WARN: Found non-date value in \'Recorded Date\' column, \'Academic Year\' will contain \'Undefined\' for some rows')
|
||||
return 'Undefined'
|
||||
# I wanted to use dates to calculate the nextyear and lastyear values, but LEAP YEARS !!!!
|
||||
if date.month < 7: # spring semester
|
||||
lastyear = date.year-1
|
||||
return f'{lastyear}-{date.strftime("%y")}'
|
||||
else: # fall semester
|
||||
nextyear = date.year+1 - 2000
|
||||
return f'{date.year}-{nextyear}'
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
30
test.py
30
test.py
|
|
@ -218,6 +218,36 @@ class TestMergeCSV(unittest.TestCase):
|
|||
})
|
||||
td = merge.repair_teacher_columns(td)
|
||||
self.assertTrue(td.equals(expected), td)
|
||||
|
||||
def test_date_str_to_academic_year(self):
|
||||
# test for spring semester
|
||||
datestr = "3/16/2023 13:23"
|
||||
academic_year = merge.date_str_to_academic_year(datestr)
|
||||
self.assertEqual(academic_year, '2022-23')
|
||||
# test for fall semester
|
||||
datestr = "9/16/2021 13:23"
|
||||
academic_year = merge.date_str_to_academic_year(datestr)
|
||||
self.assertEqual(academic_year, '2021-22')
|
||||
|
||||
def test_add_academic_year(self):
|
||||
td = pd.DataFrame({
|
||||
'Recorded Date': [
|
||||
'9/16/2021 13:23',
|
||||
'3/16/2023 13:23'
|
||||
]
|
||||
})
|
||||
expected = pd.DataFrame({
|
||||
'Recorded Date': [
|
||||
'9/16/2021 13:23',
|
||||
'3/16/2023 13:23'
|
||||
],
|
||||
'Academic Year': [
|
||||
'2021-22',
|
||||
'2022-23'
|
||||
]
|
||||
})
|
||||
td = merge.add_academic_year(td)
|
||||
self.assertTrue(td.equals(expected), td)
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue