|
|
|
@ -144,18 +144,27 @@ def combine_cols(df, col, possibilities):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clean_cols(df):
|
|
|
|
def clean_cols(df):
|
|
|
|
remove = [
|
|
|
|
keep = [
|
|
|
|
'Please select the month, day and year of your birthday. - Month'.lower(),
|
|
|
|
'StartDate',
|
|
|
|
'Please select the month, day and year of your birthday. - Day'.lower(),
|
|
|
|
'EndDate',
|
|
|
|
'Please select the month, day and year of your birthday. - Year'.lower(),
|
|
|
|
'Status',
|
|
|
|
'Gender - SIS'.lower(),
|
|
|
|
'IPAddress',
|
|
|
|
'Race - SIS'.lower(),
|
|
|
|
'Progress',
|
|
|
|
'Finished'.lower(),
|
|
|
|
'Finished',
|
|
|
|
'STDev'.lower(),
|
|
|
|
'District',
|
|
|
|
|
|
|
|
'LASID',
|
|
|
|
|
|
|
|
'Grade',
|
|
|
|
|
|
|
|
'Gender',
|
|
|
|
|
|
|
|
'Race',
|
|
|
|
|
|
|
|
'Response ID',
|
|
|
|
|
|
|
|
'Response Id',
|
|
|
|
|
|
|
|
'DeseId',
|
|
|
|
|
|
|
|
'Dese Id',
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
keep = list(map(str.lower, keep))
|
|
|
|
drops = []
|
|
|
|
drops = []
|
|
|
|
for col in df.columns:
|
|
|
|
for col in df.columns:
|
|
|
|
if col.lower() in remove:
|
|
|
|
if col.lower() not in keep and not col.startswith('s-') and not col.startswith('t-'):
|
|
|
|
drops.append(col)
|
|
|
|
drops.append(col)
|
|
|
|
df = df.drop(columns=drops)
|
|
|
|
df = df.drop(columns=drops)
|
|
|
|
if args.verbose: print(f'Dropped columns: {drops}')
|
|
|
|
if args.verbose: print(f'Dropped columns: {drops}')
|
|
|
|
|