diff --git a/.gitignore b/.gitignore
index 055afd6..a04fcdf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
-.env
-.idea
-test-csv
-ecp-csv
-test-csv-large
-merged
+.env
+.idea
+test-*
+ecp-csv
+test-csv-large
+merged
 bfg.jar
\ No newline at end of file
diff --git a/merge-csv.py b/merge-csv.py
index f1b10c7..ce968c7 100644
--- a/merge-csv.py
+++ b/merge-csv.py
@@ -9,6 +9,38 @@ import re
 from urllib.parse import urlparse
 import pysftp
 
+# TODO make sure these are not case sensitive
+# all of the columns we want to extract from the csv file
+# excluding the question ids (they are found using regex)
+final_columns_student = {
+    'Start Date': ['startdate', 'start date'],
+    'End Date': ['enddate', 'end date'],
+    'Status': ['status'],
+    'Ip Address': ['ip address', 'ipaddress'],
+    'Progress': ['progress'],
+    'Duration': ['duration', 'duration..in.seconds', 'duration (in seconds)'],
+    'District': ['district', 'please select your school district.'],
+    'LASID': ['lasid', 'Please enter your Locally Assigned Student ID Number (LASID, or student lunch number).'],
+    'Grade': ['grade', 'what grade are you in?'],
+    'Gender': ['gender', 'what is your gender?', 'what is your gender? - selected choice'],
+    'Race': ['race'],
+    'Recorded Date': ['recorded date', 'recordeddate'],
+    'Response Id': ['responseid', 'response id'],
+    'Dese Id': ['deseid', 'dese id', 'school'],
+}
+
+final_columns_teacher = {
+    'Start Date': ['startdate', 'start date'],
+    'End Date': ['enddate', 'end date'],
+    'Status': ['status'],
+    'Ip Address': ['ip address', 'ipaddress'],
+    'Progress': ['progress'],
+    'Duration': ['duration', 'duration..in.seconds', 'duration (in seconds)'],
+    'District': ['district', 'please select your school district.'],
+    'Recorded Date': ['recorded date', 'recordeddate'],
+    'Response Id': ['responseid', 'response id'],
+    'Dese Id': ['deseid', 'dese id', 'school'],
+}
 
 class Sftp:
     def __init__(self, hostname, username, password, cnopts, port=22):
@@ -97,28 +129,25 @@ class Sftp:
             raise Exception(err)
 
 
+# prepare csv and merged csv directories
 def prep_dir(folder=''):
     # prepare directories
     cwd = os.path.join(os.getcwd(), folder)
     mwd = os.path.join(cwd, 'merged')
     if not os.path.exists(mwd):
+        if args.verbose: print(f'Creating directory {mwd}')
         os.mkdir(mwd)
     if args.verbose: print('Source data directory: ' + cwd)
     if args.verbose: print('Merged data directory: ' + mwd)
     return cwd, mwd
 
 
+# get current date in Month-XX-YYYY format
 def get_date():
     return datetime.date.today().strftime("%B-%d-%Y")
 
-# UNUSED
-# def cap_permutations(s):
-#     if len(s) > 15:
-#         return [s]
-#     lu_sequence = ((c.lower(), c.upper()) for c in s)
-#     return [''.join(x) for x in it.product(*lu_sequence)]
-
 
+# in dataframe df, merges any column in possibilities into the final column col
 def combine_cols(df, col, possibilities):
     # if final column doesn't exist, create it
     if col not in df.columns:
@@ -143,38 +172,12 @@ def combine_cols(df, col, possibilities):
     return df
 
 
-def clean_cols(df):
-    keep = [
-        'StartDate',
-        'EndDate',
-        'Start Date',
-        'End Date',
-        'Status',
-        'Response Type',
-        'IpAddress',
-        'Ip Address'
-        'Progress',
-        'Duration',
-        'Please enter your Locally Assigned Student ID Number (LASID, or student lunch number).',
-        'Finished',
-        'District',
-        'LASID',
-        'Recorded Date',
-        'RecordedDate',
-        'Grade',
-        'Gender',
-        'Race',
-        'Response Id',
-        'ResponseId',
-        'DeseId',
-        'Dese Id',
-        'School',
-        'District',
-        'Please select your school district.',
-    ]
+# removes unused columns from student data
+def clean_cols_student(df):
+    keep = list(final_columns_student.keys())
     keep = list(map(str.lower, keep))
     drops = []
-    question_pattern = re.compile("^[s,t]-[a-zA-Z]{4}-q[0-9][0-9]?$")
+    question_pattern = re.compile("^s-[a-zA-Z]{4}-q[0-9][0-9]?$")
     for col in df.columns:
         if col.lower() not in keep and not bool(question_pattern.match(col)):
             drops.append(col)
@@ -183,7 +186,21 @@ def clean_cols(df):
     return df
 
 
+# removes unused columns from teacher data
+def clean_cols_teacher(df):
+    keep = list(final_columns_teacher.keys())
+    keep = list(map(str.lower, keep))
+    drops = []
+    question_pattern = re.compile("^t-[a-zA-Z]{4}-q[0-9][0-9]?$")
+    for col in df.columns:
+        if col.lower() not in keep and not bool(question_pattern.match(col)):
+            drops.append(col)
+    df = df.drop(columns=drops)
+    if args.verbose: print(f'Dropped columns: {drops}')
+    return df
 
+
+# performs all merging operations for student data
 def do_merge_student(cwd, mwd):
     # identify and merge student files
     if not args.quiet: print('---Merging Student Data---')
@@ -194,16 +211,22 @@ def do_merge_student(cwd, mwd):
         return
     if not args.quiet: print('Merging...')
     files = [pd.read_csv(f, low_memory=False) for f in all_files]
+    # count lines in read csv files
     lines = 0
     for fi in files:
         lines += fi.shape[0]
+    # combine csv files
     df = pd.concat(files, axis=0)
+    # combine related columns
     if not args.quiet: print('Repairing rows...')
-    df = repair_student_rows(df)
+    df = repair_student_columns(df)
+    # clean out unnecessary columns
     if not args.quiet: print('Cleaning out columns...')
-    df = clean_cols(df)
+    df = clean_cols_student(df)
+    # ensure line count matches what is expected
     if df.shape[0] != lines:
         print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
+    # save merged file
     date = get_date()
     if args.project:
         proj = '-' + args.project
@@ -215,6 +238,7 @@ def do_merge_student(cwd, mwd):
     return fn
 
 
+# performs all merging operations for teacher data
 def do_merge_teacher(cwd, mwd):
     # identify and merge teacher files
     if not args.quiet: print('---Merging Teacher Data---')
@@ -225,16 +249,22 @@ def do_merge_teacher(cwd, mwd):
         return
     if not args.quiet: print('Merging...')
     files = [pd.read_csv(f, low_memory=False) for f in all_files]
+    # count lines in read csv files
     lines = 0
     for f in files:
         lines += f.shape[0]
+    # combine csv files
     df = pd.concat(files, axis=0)
-    if not args.quiet: print('Repairing rows...')
-    df = repair_teacher_rows(df)
+    # combine related columns
+    if not args.quiet: print('Repairing columns...')
+    df = repair_teacher_columns(df)
+    # clean out unnecessary columns
     if not args.quiet: print('Cleaning out columns...')
-    df = clean_cols(df)
+    df = clean_cols_teacher(df)
+    # ensure line count matches what is expected
     if df.shape[0] != lines:
         print(f'Warning: Line count mismatch: {lines} expected, but got {df.shape[0]}')
+    # save merged file
     date = get_date()
     if args.project:
         proj = '-' + args.project
@@ -246,29 +276,28 @@ def do_merge_teacher(cwd, mwd):
     return fn
 
 
-def repair_teacher_rows(df):
-    df = combine_cols(df, 'Recorded Date', ['recorded date', 'recordeddate'])
-    df = combine_cols(df, 'Response ID', ['responseid', 'response id'])
-    df = combine_cols(df, 'DeseId', ['deseid', 'dese id', 'school'])
+# merges teacher columns that may have mismatched names
+def repair_teacher_columns(df):
+    for col in final_columns_teacher:
+        df = combine_cols(df, col, final_columns_teacher[col])
     return df
 
 
-def repair_student_rows(df):
-    df = combine_cols(df, 'Recorded Date', ['recorded date', 'recordeddate'])
-    df = combine_cols(df, 'Response ID', ['responseid', 'response id'])
-    df = combine_cols(df, 'DeseId', ['deseid', 'dese id', 'school'])
-    df = combine_cols(df, 'Grade', ['grade', 'what grade are you in?'])
-    df = combine_cols(df, 'Gender', ['gender', 'what is your gender?', 'what is your gender? - selected choice'])
-    df = combine_cols(df, 'Race', ['race'])
+# merges student columns that may have mismatched names, 
+# and combines question variants
+def repair_student_columns(df):
+    for col in final_columns_student:
+        df = combine_cols(df, col, final_columns_student[col])
     if not args.quiet: print('Combining Question Variants...')
     df = combine_variants(df)
     return df
 
 
+# combines question variants into non-variant columns
 def combine_variants(df):
     drops = []
     for col in df:
-        x = re.search(r's-[a-z]{4}-q[0-9][0-9]?-1', col)
+        x = re.search(r'^s-[a-z]{4}-q[0-9][0-9]?-1$', col)
         if x is not None:
             # get non variant version
             nonvar = col[:-2]