mirror of
https://github.com/edcommonwealth/merge-csv.git
synced 2026-03-07 13:38:13 -08:00
Fix Column Cleaning
This commit is contained in:
parent
e8269febe1
commit
4d025ba681
1 changed files with 18 additions and 9 deletions
27
merge-csv.py
27
merge-csv.py
|
|
@ -144,18 +144,27 @@ def combine_cols(df, col, possibilities):
|
|||
|
||||
|
||||
def clean_cols(df):
|
||||
remove = [
|
||||
'Please select the month, day and year of your birthday. - Month'.lower(),
|
||||
'Please select the month, day and year of your birthday. - Day'.lower(),
|
||||
'Please select the month, day and year of your birthday. - Year'.lower(),
|
||||
'Gender - SIS'.lower(),
|
||||
'Race - SIS'.lower(),
|
||||
'Finished'.lower(),
|
||||
'STDev'.lower(),
|
||||
keep = [
|
||||
'StartDate',
|
||||
'EndDate',
|
||||
'Status',
|
||||
'IPAddress',
|
||||
'Progress',
|
||||
'Finished',
|
||||
'District',
|
||||
'LASID',
|
||||
'Grade',
|
||||
'Gender',
|
||||
'Race',
|
||||
'Response ID',
|
||||
'Response Id',
|
||||
'DeseId',
|
||||
'Dese Id',
|
||||
]
|
||||
keep = list(map(str.lower, keep))
|
||||
drops = []
|
||||
for col in df.columns:
|
||||
if col.lower() in remove:
|
||||
if col.lower() not in keep and not col.startswith('s-') and not col.startswith('t-'):
|
||||
drops.append(col)
|
||||
df = df.drop(columns=drops)
|
||||
if args.verbose: print(f'Dropped columns: {drops}')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue