From e8269febe1dd937fb09348fcdd2b6ce6f9fb73ed Mon Sep 17 00:00:00 2001 From: Gabe Farrell Date: Thu, 27 Apr 2023 23:31:49 -0400 Subject: [PATCH] Clean Out Unecessary Columns --- merge-csv.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/merge-csv.py b/merge-csv.py index 7630972..f2c82dc 100644 --- a/merge-csv.py +++ b/merge-csv.py @@ -143,6 +143,26 @@ def combine_cols(df, col, possibilities): return df +def clean_cols(df): + remove = [ + 'Please select the month, day and year of your birthday. - Month'.lower(), + 'Please select the month, day and year of your birthday. - Day'.lower(), + 'Please select the month, day and year of your birthday. - Year'.lower(), + 'Gender - SIS'.lower(), + 'Race - SIS'.lower(), + 'Finished'.lower(), + 'STDev'.lower(), + ] + drops = [] + for col in df.columns: + if col.lower() in remove: + drops.append(col) + df = df.drop(columns=drops) + if args.verbose: print(f'Dropped columns: {drops}') + return df + + + def do_merge_student(cwd, mwd): # identify and merge student files if not args.quiet: print('---Merging Student Data---') @@ -159,6 +179,8 @@ def do_merge_student(cwd, mwd): df = pd.concat(files, axis=0) if not args.quiet: print('Repairing rows...') df = repair_student_rows(df) + if not args.quiet: print('Cleaning out columns...') + df = clean_cols(df) if df.shape[0] != lines: print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}') date = get_date() @@ -188,6 +210,8 @@ def do_merge_teacher(cwd, mwd): df = pd.concat(files, axis=0) if not args.quiet: print('Repairing rows...') df = repair_teacher_rows(df) + if not args.quiet: print('Cleaning out columns...') + df = clean_cols(df) if df.shape[0] != lines: print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}') date = get_date()