Add Academic Year

3 years ago · 0558d02d29
parent 25898233c3
commit 0558d02d29
2 changed files with 67 additions and 0 deletions
--- a/merge.py
+++ b/merge.py
@ -7,6 +7,8 @@ import itertools as it
 import argparse
 import re
 from urllib.parse import urlparse
+from dateutil.parser import parse
+from dateutil.relativedelta import relativedelta
 import pysftp

 #NOTE for now, each of the arrays should be all lowercase
@ -228,6 +230,9 @@ def do_merge_student(cwd, mwd):
    # clean out unnecessary columns
    if not argQuiet: print('Cleaning out columns...')
    df = clean_cols_student(df)
+    # add academic year column
+    if not argQuiet: print('Adding \'Academic Year\' column...')
+    df = add_academic_year(df)
    # ensure line count matches what is expected
    if df.shape[0] != lines:
        print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
@ -266,6 +271,9 @@ def do_merge_teacher(cwd, mwd):
    # clean out unnecessary columns
    if not argQuiet: print('Cleaning out columns...')
    df = clean_cols_teacher(df)
+    # add academic year column
+    if not argQuiet: print('Adding \'Academic Year\' column...')
+    df = add_academic_year(df)
    # ensure line count matches what is expected
    if df.shape[0] != lines:
        print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
@ -313,6 +321,35 @@ def combine_variants(df):
    df = df.drop(columns=drops)
    return df

+# take the dates in 'Recorded Date' and use them to add
+# a column for academic year
+# note: must be used after the columns are merged because this only
+# looks for the column 'Recorded Date'
+def add_academic_year(df):
+    academic_year = []
+    recorded_date = df['Recorded Date'].tolist()
+    for datestr in recorded_date:
+        academic_year.append(date_str_to_academic_year(datestr))
+    df['Academic Year'] = academic_year
+    # probably unnecessary to return df here, but this is the convention so far
+    return df
+
+
+def date_str_to_academic_year(str):
+    # get date from string
+    try:
+        date = parse(str).date()
+    except TypeError:
+        # I would like this to only print once if the merged csv will have Undefined, but whatever
+        print('WARN: Found non-date value in \'Recorded Date\' column, \'Academic Year\' will contain \'Undefined\' for some rows')
+        return 'Undefined'
+    # I wanted to use dates to calculate the nextyear and lastyear values, but LEAP YEARS !!!!
+    if date.month < 7: # spring semester
+        lastyear = date.year-1
+        return f'{lastyear}-{date.strftime("%y")}'
+    else: # fall semester
+        nextyear = date.year+1 - 2000
+        return f'{date.year}-{nextyear}'


 if __name__ == '__main__':
--- a/test.py
+++ b/test.py
@ -218,6 +218,36 @@ class TestMergeCSV(unittest.TestCase):
        })
        td = merge.repair_teacher_columns(td)
        self.assertTrue(td.equals(expected), td)
+
+    def test_date_str_to_academic_year(self):
+        # test for spring semester 
+        datestr = "3/16/2023 13:23"
+        academic_year = merge.date_str_to_academic_year(datestr)
+        self.assertEqual(academic_year, '2022-23')
+        # test for fall semester
+        datestr = "9/16/2021 13:23"
+        academic_year = merge.date_str_to_academic_year(datestr)
+        self.assertEqual(academic_year, '2021-22')
+
+    def test_add_academic_year(self):
+        td = pd.DataFrame({
+            'Recorded Date': [
+                '9/16/2021 13:23',
+                '3/16/2023 13:23'
+            ]
+        })
+        expected = pd.DataFrame({
+            'Recorded Date': [
+                '9/16/2021 13:23',
+                '3/16/2023 13:23'
+            ],
+            'Academic Year': [
+                '2021-22',
+                '2022-23'
+            ]
+        })
+        td = merge.add_academic_year(td)
+        self.assertTrue(td.equals(expected), td)
    
 if __name__ == '__main__':
    unittest.main()