s-good
diff --git a/‎AutoQC.py
+13-14 b/‎AutoQC.py
+13-14
diff --git a/‎analyse-results.py
+17-17 b/‎analyse-results.py
+17-17
diff --git a/‎build-db.py
+7-7 b/‎build-db.py
+7-7
diff --git a/‎catchall.py
+10-10 b/‎catchall.py
+10-10
diff --git a/‎cotede_qc/cotede_test.py
+6-6 b/‎cotede_qc/cotede_test.py
+6-6
diff --git a/‎filter-db.py
+1-1 b/‎filter-db.py
+1-1
diff --git a/‎plot-roc.py
+1-1 b/‎plot-roc.py
+1-1
diff --git a/‎qctests/AOML_climatology_test.py
+2-3 b/‎qctests/AOML_climatology_test.py
+2-3
diff --git a/‎qctests/AOML_gross.py
+2-2 b/‎qctests/AOML_gross.py
+2-2
diff --git a/‎qctests/CoTeDe_Argo_density_inversion.py
+1-1 b/‎qctests/CoTeDe_Argo_density_inversion.py
+1-1
@@ -12,21 +12,20 @@ def run(test, profiles, parameters):
   verbose = []
   exec('from qctests import ' + test)
   for profile in profiles:
-    exec('result = ' + test + '.test(profile, parameters)')
-    verbose.append(result)
+    exec('verbose.append(' + test + '.test(profile, parameters))')
 
   return verbose
 
 def process_row(uid, logdir):
   '''run all tests on the indicated database row'''
-  
+
   # reroute stdout, stderr to separate files for each profile to preserve logs
   sys.stdout = open(logdir + "/" + str(uid) + ".stdout", "w")
   sys.stderr = open(logdir + "/" + str(uid) + ".stderr", "w")
 
   # extract profile
   profile = main.get_profile_from_db(uid)
-  
+
   # mask out error codes in temperature data
   main.catchFlags(profile)
 
@@ -35,14 +34,14 @@ def process_row(uid, logdir):
     try:
       result = run(test, [profile], parameterStore)[0]
     except:
-      print test, 'exception', sys.exc_info()
+      print(test, 'exception', sys.exc_info())
       result = np.zeros(1, dtype=bool)
 
     try:
       query = "UPDATE " + sys.argv[1] + " SET " + test + "=? WHERE uid=" + str(profile.uid()) + ";"
       main.dbinteract(query, [main.pack_array(result)])
     except:
-      print 'db exception', sys.exc_info()
+      print('db exception', sys.exc_info())
 
 
 ########################################
@@ -76,12 +75,12 @@ def process_row(uid, logdir):
     try:
       exec(test + '.loadParameters(parameterStore)')
     except:
-      print 'No parameters to load for', test
-      
+      print('No parameters to load for', test)
+
   # connect to database & fetch list of all uids
-  query = 'SELECT uid FROM ' + sys.argv[1] + ' ORDER BY uid;' 
+  query = 'SELECT uid FROM ' + sys.argv[1] + ' WHERE subset=1 ORDER BY uid LIMIT 10;'
   uids = main.dbinteract(query)
-  
+
   # launch async processes
   if len(sys.argv) > 4:
     batchnumber = int(sys.argv[3])
@@ -96,9 +95,9 @@ def process_row(uid, logdir):
     pool.apply_async(process_row, (uids[i][0], logdir))
   pool.close()
   pool.join()
-    
+
 else:
-  print 'Please add command line arguments to name your output file and set parallelization:'
-  print 'python AutoQC <database results table> <number of processes> <batch> <number of processes per batch> [<batchnumber> <number per batch>]'
-  print 'will use <database results table> to log QC results in the database, and run the calculation parallelized over <number of processes>. By default all profiles will be processed, but optionally the processing can be done in batches of size <number per batch>.'
+  print('Please add command line arguments to name your output file and set parallelization:')
+  print('python AutoQC <database results table> <number of processes> <batch> <number of processes per batch> [<batchnumber> <number per batch>]')
+  print('will use <database results table> to log QC results in the database, and run the calculation parallelized over <number of processes>. By default all profiles will be processed, but optionally the processing can be done in batches of size <number per batch>.')
 
@@ -118,15 +118,15 @@ def find_roc(table,
     for c in cols:
         if len(pandas.unique(df[c])) == 1:
             nondiscrim.append(c)
-            if verbose: print c + ' is nondiscriminating and will be removed'
+            if verbose: print(c + ' is nondiscriminating and will be removed')
     cols = [t for t in cols if t not in nondiscrim]
     df = df[cols]
-    print list(df)
+    print(list(df))
     testNames = df.columns[2:].values.tolist()
 
     if verbose:
-        print 'Number of profiles is: ', len(df.index)
-        print 'Number of quality checks to process is: ', len(testNames)
+        print('Number of profiles is: ', len(df.index))
+        print('Number of quality checks to process is: ', len(testNames))
 
     # mark chosen profiles as part of the training set 
     all_uids = main.dbinteract('SELECT uid from ' + sys.argv[1] + ';')
@@ -161,7 +161,7 @@ def find_roc(table,
                 tprs.append(tpr)
                 fprs.append(fpr)
     del df # No further need for the data frame.
-    if verbose: print 'Number of quality checks after adding reverses and removing zero TPR was: ', len(names)
+    if verbose: print('Number of quality checks after adding reverses and removing zero TPR was: ', len(names))
 
     # Create storage to hold the roc curve.
     cumulative = truth.copy()
@@ -175,7 +175,7 @@ def find_roc(table,
 
     # Pre-select some tests if required.
     if enforce_types_of_check:
-        if verbose: print 'Enforcing types of checks'
+        if verbose: print('Enforcing types of checks')
         while len(groupdefinition['At least one from group']) > 0:
             bestchoice = ''
             bestgroup  = ''
@@ -191,22 +191,22 @@ def find_roc(table,
                                 cumulativenew = np.logical_or(cumulative, tests[itest])
                                 tpr, fpr, fnr, tnr = main.calcRates(cumulativenew, truth)
                                 newdist = return_cost(costratio, tpr, fpr)
-                                print '    ', tpr, fpr, newdist, bestdist, testname
+                                print('    ', tpr, fpr, newdist, bestdist, testname)
                                 if newdist == bestdist:
                                     if verbose:
-                                        print '  ' + bestchoice + ' and ' + testname + ' have the same results and the first is kept'
+                                        print('  ' + bestchoice + ' and ' + testname + ' have the same results and the first is kept')
                                 elif newdist < bestdist:
                                     bestchoice = testname
                                     bestdist   = newdist
                                     besti      = itest
                                     bestgroup  = key
                     else:
-                        if verbose: print '    ' + testname + ' not found and so was skipped'
+                        if verbose: print('    ' + testname + ' not found and so was skipped')
             #assert bestchoice != '', '    Error, did not make a choice in group ' + key
-            if verbose: print '  ' + bestchoice + ' was selected from group ' + bestgroup
+            if verbose: print('  ' + bestchoice + ' was selected from group ' + bestgroup)
             if fprs[besti] > 0:
                 if tprs[besti] / fprs[besti] < effectiveness_ratio:
-                    print 'WARNING - ' + bestchoice + ' TPR / FPR is below the effectiveness ratio limit: ', tprs[besti] / fprs[besti], effectiveness_ratio
+                    print('WARNING - ' + bestchoice + ' TPR / FPR is below the effectiveness ratio limit: ', tprs[besti] / fprs[besti], effectiveness_ratio)
             cumulative = np.logical_or(cumulative, tests[besti])
             currenttpr, currentfpr, fnr, tnr = main.calcRates(cumulative, truth)
             testcomb.append(names[besti])
@@ -219,15 +219,15 @@ def find_roc(table,
             del fprs[besti]
             del tprs[besti]
             del groupdefinition['At least one from group'][bestgroup]
-            print 'ROC point from enforced group: ', currenttpr, currentfpr, testcomb[-1], bestgroup
+            print('ROC point from enforced group: ', currenttpr, currentfpr, testcomb[-1], bestgroup)
 
     # Make combinations of the single checks and store.
     assert n_combination_iterations <= 2, 'Setting n_combination_iterations > 2 results in a very large number of combinations'
-    if verbose: print 'Starting construction of combinations with number of iterations: ', n_combination_iterations
+    if verbose: print('Starting construction of combinations with number of iterations: ', n_combination_iterations)
     for its in range(n_combination_iterations):
         ntests = len(names)
         for i in range(ntests - 1):
-            if verbose: print 'Processing iteration ', its + 1, ' out of ', n_combination_iterations, ' step ', i + 1, ' out of ', ntests - 1, ' with number of tests now ', len(names)
+            if verbose: print('Processing iteration ', its + 1, ' out of ', n_combination_iterations, ' step ', i + 1, ' out of ', ntests - 1, ' with number of tests now ', len(names))
             for j in range(i + 1, ntests):
                 # Create the name for this combination.
                 newname = ('&').join(sorted((names[i] + '&' + names[j]).split('&')))
@@ -240,7 +240,7 @@ def find_roc(table,
                     tprs.append(tpr)
                     fprs.append(fpr)
                     names.append(newname)
-    if verbose: print 'Completed generation of tests, now constructing roc from number of tests: ', len(names)         
+    if verbose: print('Completed generation of tests, now constructing roc from number of tests: ', len(names))
 
     # Create roc.
     used      = np.zeros(len(names), dtype=bool)
@@ -287,7 +287,7 @@ def find_roc(table,
             r_fprs.append(currentfpr)
             r_tprs.append(currenttpr)
             groupsel.append(False)
-            print 'ROC point: ', currenttpr, currentfpr, names[besti], overallbest
+            print('ROC point: ', currenttpr, currentfpr, names[besti], overallbest)
 
     if plot_roc:
         plt.plot(r_fprs, r_tprs, 'k')
@@ -323,5 +323,5 @@ def find_roc(table,
     elif len(sys.argv) == 4:
         find_roc(sys.argv[1], n_profiles_to_analyse=sys.argv[2], costratio=[10.0, 10.0])
     else:
-        print 'Usage - python analyse_results.py tablename <number of profiles to train ROC curve on> <optional character or number to indicate that we want a conservative set of QC tests i.e. with very low false positive rate>'
+        print('Usage - python analyse_results.py tablename <number of profiles to train ROC curve on> <optional character or number to indicate that we want a conservative set of QC tests i.e. with very low false positive rate>')
 
@@ -31,7 +31,7 @@ def assessProfile(p, check_originator_flag_type, months_to_use):
     if check_originator_flag_type:
         if int(p.originator_flag_type()) not in range(1,15):
             return False
-            
+
     # check month
     if p.month() not in months_to_use:
         return False
@@ -168,22 +168,22 @@ def builddb(check_originator_flag_type = True,
             break
 
     conn.commit()
-    print 'number of clean profiles written:', good
-    print 'number of flagged profiles written:', bad
-    print 'total number of profiles written:', good+bad
+    print('number of clean profiles written:', good)
+    print('number of flagged profiles written:', bad)
+    print('total number of profiles written:', good+bad)
 
 if len(sys.argv) == 3:
 
     builddb()
-    
+
 elif len(sys.argv) == 5:
 
     builddb(ast.literal_eval(sys.argv[3]), ast.literal_eval(sys.argv[4]))  
 
 else:
 
-    print 'Usage: python build-db.py <inputdatafile> <databasetable> <demand originator flags> <list of months to include (with no spaces or enclose in quotes)>' 
-    print 'Example: python build-db.py data.wod mytable False [1,2,3,10]'
+    print('Usage: python build-db.py <inputdatafile> <databasetable> <demand originator flags> <list of months to include (with no spaces or enclose in quotes)>')
+    print('Example: python build-db.py data.wod mytable False [1,2,3,10]')
 
 
 
@@ -38,9 +38,9 @@ def amend(combo, df):
     name = '&'.join(combo)
     return df.assign(xx=decision).rename(index=str, columns={'xx': name})
 
-print '=============='
-print sys.argv[1]
-print '=============='
+print('==============')
+print(sys.argv[1])
+print('==============')
 
 # Read QC test specifications if required.
 groupdefinition = ar.read_qc_groups()
@@ -74,7 +74,7 @@ def amend(combo, df):
 for test in testNames:
     tpr, fpr, fnr, tnr = main.calcRates(df[test].tolist(), df['Truth'].tolist())
     if fpr > 0 and tpr / fpr < perf_thresh:
-        print 'dropping', test, '; tpr/fpr = ', tpr/fpr
+        print('dropping', test, '; tpr/fpr = ', tpr/fpr)
         df.drop([test], axis=1)
         bad.drop([test], axis=1)
         drop_tests.append(test)
@@ -95,16 +95,16 @@ def amend(combo, df):
     fprs.append([x, fpr, tpr])
 
 # accept tests that flag bad profiles with no false positives
-print 'number of bad profiles to consider:', len(bad)
+print('number of bad profiles to consider:', len(bad))
 for test in fprs:
     if test[1] == 0 and test[2] > 0:
         accepted.append(test[0])
-        print 'accepted', test[0], 0
+        print('accepted', test[0], 0)
         bad = bad[bad[test[0]]==False]
         bad = bad.drop([test[0]], axis=1)
         testNames.remove(test[0])
 fprs = [elt for elt in fprs if elt[0] not in accepted]
-print 'number of bad profiles remaining:', len(bad)
+print('number of bad profiles remaining:', len(bad))
 
 # algo. step 3
 # add a column to df for each combo, summarizing its decision for each profile
@@ -129,7 +129,7 @@ def amend(combo, df):
             winner = x[x].keys()[0]
             accepted.append(winner)		# accept the combo as the only one flagging this bad profile
             ff = [x for x in fprs if x[0] == winner][0][1]
-            print 'accepted', winner, ff
+            print('accepted', winner, ff)
             bad = bad[bad[winner]==False]	# drop all bad profiles flagged by this combo
             bad = bad.drop([winner], axis=1)	# remove the combo from consideration
             testNames = [elt for elt in testNames if elt is not winner]
@@ -145,8 +145,8 @@ def amend(combo, df):
         combonames = [x for x in combonames if x is not maxfpr]
         del fprs[-1]
 
-print 'profiles not caught by any test:'
-print unflagged
+print('profiles not caught by any test:')
+print(unflagged)
 
 f = open('catchall.json', 'w')
 r = {'tests': accepted}
 
@@ -21,15 +21,15 @@ def get_qc(p, config, test):
     global cotede_results
 
     # Disable logging messages from CoTeDe unless they are more
-    # severe than a warning. 
-    logging.disable('warn')
-    
+    # severe than a warning.
+    logging.disable(logging.WARNING)
+
     # Create a dummy results variable if this is the first call.
-    try: 
+    try:
         cotede_results
     except NameError:
         cotede_results = [-1, '', {}, None]
-    
+
     var = 'TEMP'
 
     # Check if we need to perform the quality control.
@@ -50,7 +50,7 @@ def get_qc(p, config, test):
 
                 if test == config:
                     # AutoQC runs only on TEMP, so clean the rest.
-                    for v in cfg.keys():
+                    for v in list(cfg):
                         if v not in ['main', var]:
                             del(cfg[v])
                 # If is a specific test,
 
@@ -22,4 +22,4 @@
     conn.commit()
 else:
 
-    print "usage: python filter-db.py <full table name> <filtered table name> <number of good / bad profiles to pick>"
+    print("usage: python filter-db.py <full table name> <filtered table name> <number of good / bad profiles to pick>")
@@ -119,4 +119,4 @@ def plot_roc():
     if len(sys.argv) == 2:
         plot_roc()
     else:
-        print 'Usage - python plot-roc.py tablename'
+        print('Usage - python plot-roc.py tablename')
@@ -6,7 +6,6 @@
 import util.AOMLnetcdf as read_netcdf
 
 def test(p, parameters):
-
     qc = numpy.zeros(p.n_levels(), dtype=bool)
 
     # check for gaps in data
@@ -26,15 +25,15 @@ def test(p, parameters):
         interpTemp = interp_helper.temperature_interpolation_process(p.longitude(), p.latitude(), p.z()[i], depthColumns1, latLonsList1, lonlatWithTempsList1, False, "climaInterpTemperature")
         if interpTemp == 99999.99:
             continue
-    
+
         interpTempSD = interp_helper.temperature_interpolation_process(p.longitude(), p.latitude(), p.z()[i], depthColumns2, latLonsList2, lonlatWithTempsList2, False, "climaInterpStandardDev")
         if interpTempSD == 99999.99:
             continue
 
         # check if temperature at this depth is sufficiently close to the climatological expectation
         qc[i] = climatology_check(p.t()[i], interpTemp, interpTempSD) >= 4
 
-    return qc 
+    return qc
 
 def climatology_check(temperature, interpMNTemp, interpSDTemp, sigmaFactor=5.0):
   """
 
@@ -12,9 +12,9 @@ def test(p, parameters):
     isDepth = (z.mask==False)
 
     for i in range(p.n_levels()):
-    	if isTemperature[i] and not (-2.5 <= t[i] <= 40):
+        if isTemperature[i] and not (-2.5 <= t[i] <= 40):
             qc[i] = True
         if isDepth[i] and not (0 <= z[i] <= 2000):
             qc[i] = True
 
-    return qc
+    return qc
@@ -6,7 +6,7 @@ def test(p, parameters):
 
     config   = 'argo'
     testname = 'density_inversion'
-    
+
     qc = get_qc(p, config, testname)
 
     return qc