Fix Column Cleaning

main
Gabe Farrell 3 years ago
parent e8269febe1
commit 4d025ba681

@ -144,18 +144,27 @@ def combine_cols(df, col, possibilities):
def clean_cols(df): def clean_cols(df):
remove = [ keep = [
'Please select the month, day and year of your birthday. - Month'.lower(), 'StartDate',
'Please select the month, day and year of your birthday. - Day'.lower(), 'EndDate',
'Please select the month, day and year of your birthday. - Year'.lower(), 'Status',
'Gender - SIS'.lower(), 'IPAddress',
'Race - SIS'.lower(), 'Progress',
'Finished'.lower(), 'Finished',
'STDev'.lower(), 'District',
'LASID',
'Grade',
'Gender',
'Race',
'Response ID',
'Response Id',
'DeseId',
'Dese Id',
] ]
keep = list(map(str.lower, keep))
drops = [] drops = []
for col in df.columns: for col in df.columns:
if col.lower() in remove: if col.lower() not in keep and not col.startswith('s-') and not col.startswith('t-'):
drops.append(col) drops.append(col)
df = df.drop(columns=drops) df = df.drop(columns=drops)
if args.verbose: print(f'Dropped columns: {drops}') if args.verbose: print(f'Dropped columns: {drops}')

Loading…
Cancel
Save