mirror of
https://github.com/edcommonwealth/merge-csv.git
synced 2026-03-07 21:48:13 -08:00
Fix Column Cleaning
This commit is contained in:
parent
e8269febe1
commit
4d025ba681
1 changed files with 18 additions and 9 deletions
27
merge-csv.py
27
merge-csv.py
|
|
@ -144,18 +144,27 @@ def combine_cols(df, col, possibilities):
|
||||||
|
|
||||||
|
|
||||||
def clean_cols(df):
|
def clean_cols(df):
|
||||||
remove = [
|
keep = [
|
||||||
'Please select the month, day and year of your birthday. - Month'.lower(),
|
'StartDate',
|
||||||
'Please select the month, day and year of your birthday. - Day'.lower(),
|
'EndDate',
|
||||||
'Please select the month, day and year of your birthday. - Year'.lower(),
|
'Status',
|
||||||
'Gender - SIS'.lower(),
|
'IPAddress',
|
||||||
'Race - SIS'.lower(),
|
'Progress',
|
||||||
'Finished'.lower(),
|
'Finished',
|
||||||
'STDev'.lower(),
|
'District',
|
||||||
|
'LASID',
|
||||||
|
'Grade',
|
||||||
|
'Gender',
|
||||||
|
'Race',
|
||||||
|
'Response ID',
|
||||||
|
'Response Id',
|
||||||
|
'DeseId',
|
||||||
|
'Dese Id',
|
||||||
]
|
]
|
||||||
|
keep = list(map(str.lower, keep))
|
||||||
drops = []
|
drops = []
|
||||||
for col in df.columns:
|
for col in df.columns:
|
||||||
if col.lower() in remove:
|
if col.lower() not in keep and not col.startswith('s-') and not col.startswith('t-'):
|
||||||
drops.append(col)
|
drops.append(col)
|
||||||
df = df.drop(columns=drops)
|
df = df.drop(columns=drops)
|
||||||
if args.verbose: print(f'Dropped columns: {drops}')
|
if args.verbose: print(f'Dropped columns: {drops}')
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue