Add Academic Year

This commit is contained in:
Gabe Farrell 2023-05-23 03:57:06 +00:00
parent 25898233c3
commit 0558d02d29
2 changed files with 67 additions and 0 deletions

View file

@ -7,6 +7,8 @@ import itertools as it
import argparse
import re
from urllib.parse import urlparse
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
import pysftp
#NOTE for now, each of the arrays should be all lowercase
@ -228,6 +230,9 @@ def do_merge_student(cwd, mwd):
# clean out unnecessary columns
if not argQuiet: print('Cleaning out columns...')
df = clean_cols_student(df)
# add academic year column
if not argQuiet: print('Adding \'Academic Year\' column...')
df = add_academic_year(df)
# ensure line count matches what is expected
if df.shape[0] != lines:
print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
@ -266,6 +271,9 @@ def do_merge_teacher(cwd, mwd):
# clean out unnecessary columns
if not argQuiet: print('Cleaning out columns...')
df = clean_cols_teacher(df)
# add academic year column
if not argQuiet: print('Adding \'Academic Year\' column...')
df = add_academic_year(df)
# ensure line count matches what is expected
if df.shape[0] != lines:
print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
@ -313,6 +321,35 @@ def combine_variants(df):
df = df.drop(columns=drops)
return df
# take the dates in 'Recorded Date' and use them to add
# a column for academic year
# note: must be used after the columns are merged because this only
# looks for the column 'Recorded Date'
def add_academic_year(df):
academic_year = []
recorded_date = df['Recorded Date'].tolist()
for datestr in recorded_date:
academic_year.append(date_str_to_academic_year(datestr))
df['Academic Year'] = academic_year
# probably unnecessary to return df here, but this is the convention so far
return df
def date_str_to_academic_year(str):
# get date from string
try:
date = parse(str).date()
except TypeError:
# I would like this to only print once if the merged csv will have Undefined, but whatever
print('WARN: Found non-date value in \'Recorded Date\' column, \'Academic Year\' will contain \'Undefined\' for some rows')
return 'Undefined'
# I wanted to use dates to calculate the nextyear and lastyear values, but LEAP YEARS !!!!
if date.month < 7: # spring semester
lastyear = date.year-1
return f'{lastyear}-{date.strftime("%y")}'
else: # fall semester
nextyear = date.year+1 - 2000
return f'{date.year}-{nextyear}'
if __name__ == '__main__':