From 4d025ba681e33189042705dbd9c51156c8d646e1 Mon Sep 17 00:00:00 2001 From: Gabe Farrell Date: Fri, 28 Apr 2023 21:41:03 -0400 Subject: [PATCH] Fix Column Cleaning --- merge-csv.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/merge-csv.py b/merge-csv.py index f2c82dc..4a8abb9 100644 --- a/merge-csv.py +++ b/merge-csv.py @@ -144,18 +144,27 @@ def combine_cols(df, col, possibilities): def clean_cols(df): - remove = [ - 'Please select the month, day and year of your birthday. - Month'.lower(), - 'Please select the month, day and year of your birthday. - Day'.lower(), - 'Please select the month, day and year of your birthday. - Year'.lower(), - 'Gender - SIS'.lower(), - 'Race - SIS'.lower(), - 'Finished'.lower(), - 'STDev'.lower(), + keep = [ + 'StartDate', + 'EndDate', + 'Status', + 'IPAddress', + 'Progress', + 'Finished', + 'District', + 'LASID', + 'Grade', + 'Gender', + 'Race', + 'Response ID', + 'Response Id', + 'DeseId', + 'Dese Id', ] + keep = list(map(str.lower, keep)) drops = [] for col in df.columns: - if col.lower() in remove: + if col.lower() not in keep and not col.startswith('s-') and not col.startswith('t-'): drops.append(col) df = df.drop(columns=drops) if args.verbose: print(f'Dropped columns: {drops}')