diff --git a/.gitignore b/.gitignore
index a04fcdf..052ba9c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,6 @@ test-*
 ecp-csv
 test-csv-large
 merged
-bfg.jar
\ No newline at end of file
+bfg.jar
+*.sh
+__pycache__
\ No newline at end of file
diff --git a/README.md b/README.md
index 72411cc..d604cf9 100644
Binary files a/README.md and b/README.md differ
diff --git a/merge-csv.py b/merge.py
similarity index 83%
rename from merge-csv.py
rename to merge.py
index ce968c7..7f70390 100644
--- a/merge-csv.py
+++ b/merge.py
@@ -9,7 +9,8 @@ import re
 from urllib.parse import urlparse
 import pysftp
 
-# TODO make sure these are not case sensitive
+#NOTE for now, each of the arrays should be all lowercase
+#TODO eventually make them case agnostic
 # all of the columns we want to extract from the csv file
 # excluding the question ids (they are found using regex)
 final_columns_student = {
@@ -20,7 +21,7 @@ final_columns_student = {
     'Progress': ['progress'],
     'Duration': ['duration', 'duration..in.seconds', 'duration (in seconds)'],
     'District': ['district', 'please select your school district.'],
-    'LASID': ['lasid', 'Please enter your Locally Assigned Student ID Number (LASID, or student lunch number).'],
+    'LASID': ['lasid', 'please enter your locally assigned student id number (lasid, or student lunch number).'],
     'Grade': ['grade', 'what grade are you in?'],
     'Gender': ['gender', 'what is your gender?', 'what is your gender? - selected choice'],
     'Race': ['race'],
@@ -42,6 +43,10 @@ final_columns_teacher = {
     'Dese Id': ['deseid', 'dese id', 'school'],
 }
 
+argVerbose = False
+argQuiet = True
+
+
 class Sftp:
     def __init__(self, hostname, username, password, cnopts, port=22):
         """Constructor Method"""
@@ -68,12 +73,12 @@ class Sftp:
         except Exception as err:
             raise Exception(err)
         finally:
-            if not args.quiet: print(f"Connected to {self.hostname} as {self.username}.")
+            if not argQuiet: print(f"Connected to {self.hostname} as {self.username}.")
 
     def disconnect(self):
         """Closes the sftp connection"""
         self.connection.close()
-        if not args.quiet: print(f"Disconnected from host {self.hostname}")
+        if not argQuiet: print(f"Disconnected from host {self.hostname}")
 
     def listdir(self, remote_path):
         """lists all the files and directories in the specified path and returns them"""
@@ -92,7 +97,7 @@ class Sftp:
         """
 
         try:
-            if not args.quiet: print(
+            if not argQuiet: print(
                 f"downloading from {self.hostname} as {self.username} [(remote path : {remote_path});(local path: {target_local_path})]"
             )
 
@@ -106,7 +111,7 @@ class Sftp:
 
             # Download from remote sftp server to local
             self.connection.get(remote_path, target_local_path)
-            if not args.quiet: print("download completed")
+            if not argQuiet: print("download completed")
 
         except Exception as err:
             raise Exception(err)
@@ -117,13 +122,13 @@ class Sftp:
         """
 
         try:
-            if not args.quiet: print(
+            if not argQuiet: print(
                 f"uploading to {self.hostname} as {self.username} [(remote path: {remote_path});(source local path: {source_local_path})]"
             )
 
             # Download file from SFTP
             self.connection.put(source_local_path, remote_path)
-            if not args.quiet: print("upload completed")
+            if not argQuiet: print("upload completed")
 
         except Exception as err:
             raise Exception(err)
@@ -135,10 +140,10 @@ def prep_dir(folder=''):
     cwd = os.path.join(os.getcwd(), folder)
     mwd = os.path.join(cwd, 'merged')
     if not os.path.exists(mwd):
-        if args.verbose: print(f'Creating directory {mwd}')
+        if argVerbose: print(f'Creating directory {mwd}')
         os.mkdir(mwd)
-    if args.verbose: print('Source data directory: ' + cwd)
-    if args.verbose: print('Merged data directory: ' + mwd)
+    if argVerbose: print('Source data directory: ' + cwd)
+    if argVerbose: print('Merged data directory: ' + mwd)
     return cwd, mwd
 
 
@@ -162,13 +167,13 @@ def combine_cols(df, col, possibilities):
             if cl == col:
                 continue
             # replace the column...
-            if args.verbose: print(f'Replacing column {cl}')
+            if argVerbose: print(f'Replacing column {cl}')
             df[col] = df[col].replace(r'^\s*$', np.nan, regex=True).fillna(df[cl])
             # and add it to the drop list
             drops.append(cl)
     # drop spent columns
     df = df.drop(columns=drops)
-    if args.verbose: print(f'Dropped columns: {drops}')
+    if argVerbose: print(f'Dropped columns: {drops}')
     return df
 
 
@@ -182,7 +187,7 @@ def clean_cols_student(df):
         if col.lower() not in keep and not bool(question_pattern.match(col)):
             drops.append(col)
     df = df.drop(columns=drops)
-    if args.verbose: print(f'Dropped columns: {drops}')
+    if argVerbose: print(f'Dropped columns: {drops}')
     return df
 
 
@@ -196,20 +201,20 @@ def clean_cols_teacher(df):
         if col.lower() not in keep and not bool(question_pattern.match(col)):
             drops.append(col)
     df = df.drop(columns=drops)
-    if args.verbose: print(f'Dropped columns: {drops}')
+    if argVerbose: print(f'Dropped columns: {drops}')
     return df
 
 
 # performs all merging operations for student data
 def do_merge_student(cwd, mwd):
     # identify and merge student files
-    if not args.quiet: print('---Merging Student Data---')
+    if not argQuiet: print('---Merging Student Data---')
     all_files = glob.glob(os.path.join(cwd, "*student*.csv"))
-    if not args.quiet: print(f'Found {len(all_files)} Student CSV files')
+    if not argQuiet: print(f'Found {len(all_files)} Student CSV files')
     if len(all_files) < 1:
-        if not args.quiet: print('No files found. Skipping merge...')
+        if not argQuiet: print('No files found. Skipping merge...')
         return
-    if not args.quiet: print('Merging...')
+    if not argQuiet: print('Merging...')
     files = [pd.read_csv(f, low_memory=False) for f in all_files]
     # count lines in read csv files
     lines = 0
@@ -218,10 +223,10 @@ def do_merge_student(cwd, mwd):
     # combine csv files
     df = pd.concat(files, axis=0)
     # combine related columns
-    if not args.quiet: print('Repairing rows...')
+    if not argQuiet: print('Repairing rows...')
     df = repair_student_columns(df)
     # clean out unnecessary columns
-    if not args.quiet: print('Cleaning out columns...')
+    if not argQuiet: print('Cleaning out columns...')
     df = clean_cols_student(df)
     # ensure line count matches what is expected
     if df.shape[0] != lines:
@@ -234,20 +239,20 @@ def do_merge_student(cwd, mwd):
         proj = ''
     fn = f'{date}{proj}-student-data-merged.csv'
     df.to_csv(os.path.join(mwd, fn), index=False)
-    if not args.quiet: print('Student data merged successfully!')
+    if not argQuiet: print('Student data merged successfully!')
     return fn
 
 
 # performs all merging operations for teacher data
 def do_merge_teacher(cwd, mwd):
     # identify and merge teacher files
-    if not args.quiet: print('---Merging Teacher Data---')
+    if not argQuiet: print('---Merging Teacher Data---')
     all_files = glob.glob(os.path.join(cwd, "*teacher*.csv"))
-    if not args.quiet: print(f'Found {len(all_files)} Teacher CSV files')
+    if not argQuiet: print(f'Found {len(all_files)} Teacher CSV files')
     if len(all_files) < 1:
-        if not args.quiet: print('No files found. Skipping merge...')
+        if not argQuiet: print('No files found. Skipping merge...')
         return
-    if not args.quiet: print('Merging...')
+    if not argQuiet: print('Merging...')
     files = [pd.read_csv(f, low_memory=False) for f in all_files]
     # count lines in read csv files
     lines = 0
@@ -256,10 +261,10 @@ def do_merge_teacher(cwd, mwd):
     # combine csv files
     df = pd.concat(files, axis=0)
     # combine related columns
-    if not args.quiet: print('Repairing columns...')
+    if not argQuiet: print('Repairing columns...')
     df = repair_teacher_columns(df)
     # clean out unnecessary columns
-    if not args.quiet: print('Cleaning out columns...')
+    if not argQuiet: print('Cleaning out columns...')
     df = clean_cols_teacher(df)
     # ensure line count matches what is expected
     if df.shape[0] != lines:
@@ -272,7 +277,7 @@ def do_merge_teacher(cwd, mwd):
         proj = ''
     fn = f'{date}{proj}-teacher-data-merged.csv'
     df.to_csv(os.path.join(mwd, fn), index=False)
-    if not args.quiet: print('Teacher data merged successfully!')
+    if not argQuiet: print('Teacher data merged successfully!')
     return fn
 
 
@@ -288,7 +293,7 @@ def repair_teacher_columns(df):
 def repair_student_columns(df):
     for col in final_columns_student:
         df = combine_cols(df, col, final_columns_student[col])
-    if not args.quiet: print('Combining Question Variants...')
+    if not argQuiet: print('Combining Question Variants...')
     df = combine_variants(df)
     return df
 
@@ -309,6 +314,7 @@ def combine_variants(df):
     return df
 
 
+
 if __name__ == '__main__':
 
     # parse flags
@@ -344,23 +350,26 @@ if __name__ == '__main__':
                         help='sftp url for remote merging')
     args = parser.parse_args()
 
+    argVerbose = args.verbose
+    argQuiet = args.quiet
+
     #quiet takes precedence over verbose
-    if args.quiet:
-        args.verbose = False
+    if argQuiet:
+        argVerbose = False
 
     # make sure -s or -t is set
     if not (args.student or args.teacher):
-        if not args.quiet: print('Notice: Neither -s nor -t are specified. No merge will be performed.')
+        if not argQuiet: print('Notice: Neither -s nor -t are specified. No merge will be performed.')
 
     if args.directory and not args.remote_url:
         c, m = prep_dir(args.directory)
     elif not args.directory:
-        if not args.quiet: print('Notice: No directory specified. Defaulting to current directory.')
+        if not argQuiet: print('Notice: No directory specified. Defaulting to current directory.')
         c, m = prep_dir()
 
     # prepare sftp if flagged
     if args.remote_url:
-        if not args.quiet: print(f'Remote destination set, fetching files...')
+        if not argQuiet: print(f'Remote destination set, fetching files...')
         parsed_url = urlparse(args.remote_url)
         cnopts = pysftp.CnOpts()
         cnopts.hostkeys = None
@@ -386,7 +395,7 @@ if __name__ == '__main__':
         for file in sftp.listdir_attr(path):
             if file.filename.endswith(".csv"):
                 filelist.append(file.filename)
-                if not args.quiet: print(f'Fetching file {file.filename}...')
+                if not argQuiet: print(f'Fetching file {file.filename}...')
                 sftp.download(path + file.filename, c + file.filename)
 
     # perform merges
@@ -397,11 +406,11 @@ if __name__ == '__main__':
 
     if args.remote_url:
         # upload tmd and smd to remote
-        if not args.quiet: print('Uploading merged data...')
+        if not argQuiet: print('Uploading merged data...')
         sftp.upload(m + '/' + tmd, path + 'merged/' + tmd)
         sftp.upload(m + '/' + smd, path + 'merged/' + smd)
         # remove merged directory
-        if not args.quiet: print('Cleaning up...')
+        if not argQuiet: print('Cleaning up...')
         os.remove(m + '/' + tmd)
         os.remove(m + '/' + smd)
         os.rmdir(m)
@@ -410,4 +419,4 @@ if __name__ == '__main__':
             if os.path.exists(f):
                 os.remove(f)
         sftp.disconnect()
-    if not args.quiet: print('Done!')
+    if not argQuiet: print('Done!')
diff --git a/test.py b/test.py
new file mode 100644
index 0000000..5feefcc
--- /dev/null
+++ b/test.py
@@ -0,0 +1,223 @@
+import merge
+import unittest
+import pandas as pd
+
+
+tdata_student = pd.DataFrame({
+    'BadColumn': ['1', '2', '3', '4'],
+    'Gender': ['1', '', '', ''],
+    'gender:': ['', '', '2', '1'],
+    'Gender - SIS': ['', '2', '', ''],
+
+    })
+
+class TestMergeCSV(unittest.TestCase):
+
+    def test_combine_variants(self):
+        td = pd.DataFrame({
+            's-peff-q1': ['1', '2', '3', '', ''],
+            's-peff-q10': ['1', '2', '3', '', ''],
+            's-peff-q1-1': ['', '', '', '4', '5'],
+            's-peff-q10-1': ['', '', '', '4', '5'],
+        })
+        td = merge.combine_variants(td)
+        expected = pd.DataFrame({
+            's-peff-q1': ['1', '2', '3', '4', '5'],
+            's-peff-q10': ['1', '2', '3', '4', '5'],
+        })
+        notexpected = pd.DataFrame({
+            's-peff-q1-1': ['1', '2', '3', '4', '5'],
+            's-peff-q10-1': ['1', '2', '3', '4', '5'],
+        })
+        self.assertTrue(td.equals(expected))
+        self.assertFalse(td.equals(notexpected))
+    
+    def test_clean_cols_student(self):
+        td = pd.DataFrame({
+            'Start Date': ['1', '2', '3', '4', '5'],
+            'End Date': ['1', '2', '3', '4', '5'],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Ip Address': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'LASID': ['1', '2', '3', '4', '5'],
+            'Grade': ['1', '2', '3', '4', '5'],
+            'Race': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['1', '2', '3', '4', '5'],
+            'BadColumn': ['x', 'x', 'x', 'x', 'x'],
+            'Gender - SIS': ['1', '2', '3', '', ''],
+            'Gender': ['1', '2', '1', '2', '2'],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            's-peff-q1': ['1', '2', '3', '4', '5'],
+            's-peff-q10': ['1', '2', '3', '4', '5'],
+            's-peff-q1.1': ['1', '2', '3', '4', '5'],
+        })
+        td = merge.clean_cols_student(td)
+        expected = pd.DataFrame({
+            'Start Date': ['1', '2', '3', '4', '5'],
+            'End Date': ['1', '2', '3', '4', '5'],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Ip Address': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'LASID': ['1', '2', '3', '4', '5'],
+            'Grade': ['1', '2', '3', '4', '5'],
+            'Race': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['1', '2', '3', '4', '5'],
+            'Gender': ['1', '2', '1', '2', '2'],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            's-peff-q1': ['1', '2', '3', '4', '5'],
+            's-peff-q10': ['1', '2', '3', '4', '5'],
+        })
+        notexpected = pd.DataFrame({
+            'BadColumn': ['x', 'x', 'x', 'x', 'x'],
+            'Gender - SIS': ['1', '2', '3', '', ''],
+            's-peff-q1.1': ['1', '2', '3', '4', '5'],
+        })
+        self.assertTrue(td.equals(expected), td)
+        self.assertFalse(td.equals(notexpected), td)
+    
+    def test_clean_cols_teacher(self):
+        td = pd.DataFrame({
+            'Start Date': ['1', '2', '3', '4', '5'],
+            'End Date': ['1', '2', '3', '4', '5'],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Ip Address': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['1', '2', '3', '4', '5'],
+            'BadColumn': ['x', 'x', 'x', 'x', 'x'],
+            'Blah Blah Blah': ['x', 'x', 'x', 'x', 'x'],
+            'Abbey Road': ['x', 'x', 'x', 'x', 'x'],
+            'Please List Your Cats': ['x', 'x', 'x', 'x', 'x'],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['1', '2', '3', '4', '5'],
+            't-peff-q1': ['1', '2', '3', '4', '5'],
+            't-peff-q10': ['1', '2', '3', '4', '5'],
+            't-peff-q1.1': ['1', '2', '3', '4', '5'],
+            't-peff-q10.1': ['1', '2', '3', '4', '5'],
+        })
+        td = merge.clean_cols_teacher(td)
+        expected = pd.DataFrame({
+            'Start Date': ['1', '2', '3', '4', '5'],
+            'End Date': ['1', '2', '3', '4', '5'],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Ip Address': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['1', '2', '3', '4', '5'],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['1', '2', '3', '4', '5'],
+            't-peff-q1': ['1', '2', '3', '4', '5'],
+            't-peff-q10': ['1', '2', '3', '4', '5'],
+        })
+        notexpected = pd.DataFrame({
+            'BadColumn': ['x', 'x', 'x', 'x', 'x'],
+            'Blah Blah Blah': ['x', 'x', 'x', 'x', 'x'],
+            'Abbey Road': ['x', 'x', 'x', 'x', 'x'],
+            'Please List Your Cats': ['x', 'x', 'x', 'x', 'x'],
+            't-peff-q1.1': ['1', '2', '3', '4', '5'],
+            't-peff-q10.1': ['1', '2', '3', '4', '5'],
+        })
+        self.assertTrue(td.equals(expected), td)
+        self.assertFalse(td.equals(notexpected), td)
+    
+    def test_combine_cols(self):
+        td = pd.DataFrame({
+            'My Column': ['1', '', '', '', ''],
+            'My Other Column': ['', '2', '3', '', ''],
+            'Not My Column': ['1', '2', '3', '4', '5'],
+            'My Last Column': ['', '', '', '4', '5'],
+        })
+        expected = pd.DataFrame({
+            'My Column': ['1', '2', '3', '4', '5'],
+            'Not My Column': ['1', '2', '3', '4', '5'],
+        })
+        td = merge.combine_cols(td, 'My Column', ['my other column', 'my last column'])
+        self.assertTrue(td.equals(expected), f'\n{td}')
+    
+    def test_repair_cols_student(self):
+        td = pd.DataFrame({
+            'Start Date': ['', '', '', '4', '5'],
+            'End Date': ['', '', '', '4', '5'],
+            'Ip Address': ['', '', '', '4', '5'],
+            'StartDate': ['1', '2', '3', '', ''],
+            'EndDate': ['1', '2', '3', '', ''],
+            'IpAddress': ['1', '2', '3', '', ''],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['', '', '', '4', '5'],
+            'RecordedDate': ['1', '2', '3', '', ''],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['', '', '', '4', '5'],
+            'School': ['1', '2', '3', '', ''],
+            'LASID': ['1', '2', '3', '', ''],
+            'Please enter your Locally Assigned Student ID Number (LASID, or student lunch number).': ['', '', '', '4', '5'],
+            'Grade': ['1', '2', '3', '', ''],
+            'What grade are you in?': ['', '', '', '4', '5'],
+            'Gender': ['1', '2', '3', '', ''],
+            'What is your gender?': ['', '', '', '4', '5'],
+            'Race': ['1', '2', '3', '4', '5'],
+        })
+        expected = pd.DataFrame({
+            'Start Date': ['1', '2', '3', '4', '5'],
+            'End Date': ['1', '2', '3', '4', '5'],
+            'Ip Address': ['1', '2', '3', '4', '5'],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['1', '2', '3', '4', '5'],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['1', '2', '3', '4', '5'],
+            'LASID': ['1', '2', '3', '4', '5'],
+            'Grade': ['1', '2', '3', '4', '5'],
+            'Gender': ['1', '2', '3', '4', '5'],
+            'Race': ['1', '2', '3', '4', '5'],
+        })
+        td = merge.repair_student_columns(td)
+        self.assertTrue(td.equals(expected), f'\n{td}')
+    
+    def test_repair_cols_teacher(self):
+        td = pd.DataFrame({
+            'Start Date': ['', '', '', '4', '5'],
+            'End Date': ['', '', '', '4', '5'],
+            'Ip Address': ['', '', '', '4', '5'],
+            'StartDate': ['1', '2', '3', '', ''],
+            'EndDate': ['1', '2', '3', '', ''],
+            'IpAddress': ['1', '2', '3', '', ''],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['', '', '', '4', '5'],
+            'RecordedDate': ['1', '2', '3', '', ''],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['', '', '', '4', '5'],
+            'School': ['1', '2', '3', '', ''],
+        })
+        expected = pd.DataFrame({
+            'Start Date': ['1', '2', '3', '4', '5'],
+            'End Date': ['1', '2', '3', '4', '5'],
+            'Ip Address': ['1', '2', '3', '4', '5'],
+            'Status': ['1', '2', '3', '4', '5'],
+            'Progress': ['1', '2', '3', '4', '5'],
+            'Duration': ['1', '2', '3', '4', '5'],
+            'District': ['1', '2', '3', '4', '5'],
+            'Recorded Date': ['1', '2', '3', '4', '5'],
+            'Response Id': ['1', '2', '3', '4', '5'],
+            'Dese Id': ['1', '2', '3', '4', '5'],
+        })
+        td = merge.repair_teacher_columns(td)
+        self.assertTrue(td.equals(expected), td)
+    
+if __name__ == '__main__':
+    unittest.main()
\ No newline at end of file