Skip to content

Commit 603ee1b

Browse files
committed
beginning work on python2->3 port
1 parent 448ec30 commit 603ee1b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+196
-201
lines changed

AutoQC.py

+13-14
Original file line numberDiff line numberDiff line change
@@ -12,21 +12,20 @@ def run(test, profiles, parameters):
1212
verbose = []
1313
exec('from qctests import ' + test)
1414
for profile in profiles:
15-
exec('result = ' + test + '.test(profile, parameters)')
16-
verbose.append(result)
15+
exec('verbose.append(' + test + '.test(profile, parameters))')
1716

1817
return verbose
1918

2019
def process_row(uid, logdir):
2120
'''run all tests on the indicated database row'''
22-
21+
2322
# reroute stdout, stderr to separate files for each profile to preserve logs
2423
sys.stdout = open(logdir + "/" + str(uid) + ".stdout", "w")
2524
sys.stderr = open(logdir + "/" + str(uid) + ".stderr", "w")
2625

2726
# extract profile
2827
profile = main.get_profile_from_db(uid)
29-
28+
3029
# mask out error codes in temperature data
3130
main.catchFlags(profile)
3231

@@ -35,14 +34,14 @@ def process_row(uid, logdir):
3534
try:
3635
result = run(test, [profile], parameterStore)[0]
3736
except:
38-
print test, 'exception', sys.exc_info()
37+
print(test, 'exception', sys.exc_info())
3938
result = np.zeros(1, dtype=bool)
4039

4140
try:
4241
query = "UPDATE " + sys.argv[1] + " SET " + test + "=? WHERE uid=" + str(profile.uid()) + ";"
4342
main.dbinteract(query, [main.pack_array(result)])
4443
except:
45-
print 'db exception', sys.exc_info()
44+
print('db exception', sys.exc_info())
4645

4746

4847
########################################
@@ -76,12 +75,12 @@ def process_row(uid, logdir):
7675
try:
7776
exec(test + '.loadParameters(parameterStore)')
7877
except:
79-
print 'No parameters to load for', test
80-
78+
print('No parameters to load for', test)
79+
8180
# connect to database & fetch list of all uids
82-
query = 'SELECT uid FROM ' + sys.argv[1] + ' ORDER BY uid;'
81+
query = 'SELECT uid FROM ' + sys.argv[1] + ' WHERE subset=1 ORDER BY uid LIMIT 10;'
8382
uids = main.dbinteract(query)
84-
83+
8584
# launch async processes
8685
if len(sys.argv) > 4:
8786
batchnumber = int(sys.argv[3])
@@ -96,9 +95,9 @@ def process_row(uid, logdir):
9695
pool.apply_async(process_row, (uids[i][0], logdir))
9796
pool.close()
9897
pool.join()
99-
98+
10099
else:
101-
print 'Please add command line arguments to name your output file and set parallelization:'
102-
print 'python AutoQC <database results table> <number of processes> <batch> <number of processes per batch> [<batchnumber> <number per batch>]'
103-
print 'will use <database results table> to log QC results in the database, and run the calculation parallelized over <number of processes>. By default all profiles will be processed, but optionally the processing can be done in batches of size <number per batch>.'
100+
print('Please add command line arguments to name your output file and set parallelization:')
101+
print('python AutoQC <database results table> <number of processes> <batch> <number of processes per batch> [<batchnumber> <number per batch>]')
102+
print('will use <database results table> to log QC results in the database, and run the calculation parallelized over <number of processes>. By default all profiles will be processed, but optionally the processing can be done in batches of size <number per batch>.')
104103

analyse-results.py

+17-17
Original file line numberDiff line numberDiff line change
@@ -118,15 +118,15 @@ def find_roc(table,
118118
for c in cols:
119119
if len(pandas.unique(df[c])) == 1:
120120
nondiscrim.append(c)
121-
if verbose: print c + ' is nondiscriminating and will be removed'
121+
if verbose: print(c + ' is nondiscriminating and will be removed')
122122
cols = [t for t in cols if t not in nondiscrim]
123123
df = df[cols]
124-
print list(df)
124+
print(list(df))
125125
testNames = df.columns[2:].values.tolist()
126126

127127
if verbose:
128-
print 'Number of profiles is: ', len(df.index)
129-
print 'Number of quality checks to process is: ', len(testNames)
128+
print('Number of profiles is: ', len(df.index))
129+
print('Number of quality checks to process is: ', len(testNames))
130130

131131
# mark chosen profiles as part of the training set
132132
all_uids = main.dbinteract('SELECT uid from ' + sys.argv[1] + ';')
@@ -161,7 +161,7 @@ def find_roc(table,
161161
tprs.append(tpr)
162162
fprs.append(fpr)
163163
del df # No further need for the data frame.
164-
if verbose: print 'Number of quality checks after adding reverses and removing zero TPR was: ', len(names)
164+
if verbose: print('Number of quality checks after adding reverses and removing zero TPR was: ', len(names))
165165

166166
# Create storage to hold the roc curve.
167167
cumulative = truth.copy()
@@ -175,7 +175,7 @@ def find_roc(table,
175175

176176
# Pre-select some tests if required.
177177
if enforce_types_of_check:
178-
if verbose: print 'Enforcing types of checks'
178+
if verbose: print('Enforcing types of checks')
179179
while len(groupdefinition['At least one from group']) > 0:
180180
bestchoice = ''
181181
bestgroup = ''
@@ -191,22 +191,22 @@ def find_roc(table,
191191
cumulativenew = np.logical_or(cumulative, tests[itest])
192192
tpr, fpr, fnr, tnr = main.calcRates(cumulativenew, truth)
193193
newdist = return_cost(costratio, tpr, fpr)
194-
print ' ', tpr, fpr, newdist, bestdist, testname
194+
print(' ', tpr, fpr, newdist, bestdist, testname)
195195
if newdist == bestdist:
196196
if verbose:
197-
print ' ' + bestchoice + ' and ' + testname + ' have the same results and the first is kept'
197+
print(' ' + bestchoice + ' and ' + testname + ' have the same results and the first is kept')
198198
elif newdist < bestdist:
199199
bestchoice = testname
200200
bestdist = newdist
201201
besti = itest
202202
bestgroup = key
203203
else:
204-
if verbose: print ' ' + testname + ' not found and so was skipped'
204+
if verbose: print(' ' + testname + ' not found and so was skipped')
205205
#assert bestchoice != '', ' Error, did not make a choice in group ' + key
206-
if verbose: print ' ' + bestchoice + ' was selected from group ' + bestgroup
206+
if verbose: print(' ' + bestchoice + ' was selected from group ' + bestgroup)
207207
if fprs[besti] > 0:
208208
if tprs[besti] / fprs[besti] < effectiveness_ratio:
209-
print 'WARNING - ' + bestchoice + ' TPR / FPR is below the effectiveness ratio limit: ', tprs[besti] / fprs[besti], effectiveness_ratio
209+
print('WARNING - ' + bestchoice + ' TPR / FPR is below the effectiveness ratio limit: ', tprs[besti] / fprs[besti], effectiveness_ratio)
210210
cumulative = np.logical_or(cumulative, tests[besti])
211211
currenttpr, currentfpr, fnr, tnr = main.calcRates(cumulative, truth)
212212
testcomb.append(names[besti])
@@ -219,15 +219,15 @@ def find_roc(table,
219219
del fprs[besti]
220220
del tprs[besti]
221221
del groupdefinition['At least one from group'][bestgroup]
222-
print 'ROC point from enforced group: ', currenttpr, currentfpr, testcomb[-1], bestgroup
222+
print('ROC point from enforced group: ', currenttpr, currentfpr, testcomb[-1], bestgroup)
223223

224224
# Make combinations of the single checks and store.
225225
assert n_combination_iterations <= 2, 'Setting n_combination_iterations > 2 results in a very large number of combinations'
226-
if verbose: print 'Starting construction of combinations with number of iterations: ', n_combination_iterations
226+
if verbose: print('Starting construction of combinations with number of iterations: ', n_combination_iterations)
227227
for its in range(n_combination_iterations):
228228
ntests = len(names)
229229
for i in range(ntests - 1):
230-
if verbose: print 'Processing iteration ', its + 1, ' out of ', n_combination_iterations, ' step ', i + 1, ' out of ', ntests - 1, ' with number of tests now ', len(names)
230+
if verbose: print('Processing iteration ', its + 1, ' out of ', n_combination_iterations, ' step ', i + 1, ' out of ', ntests - 1, ' with number of tests now ', len(names))
231231
for j in range(i + 1, ntests):
232232
# Create the name for this combination.
233233
newname = ('&').join(sorted((names[i] + '&' + names[j]).split('&')))
@@ -240,7 +240,7 @@ def find_roc(table,
240240
tprs.append(tpr)
241241
fprs.append(fpr)
242242
names.append(newname)
243-
if verbose: print 'Completed generation of tests, now constructing roc from number of tests: ', len(names)
243+
if verbose: print('Completed generation of tests, now constructing roc from number of tests: ', len(names))
244244

245245
# Create roc.
246246
used = np.zeros(len(names), dtype=bool)
@@ -287,7 +287,7 @@ def find_roc(table,
287287
r_fprs.append(currentfpr)
288288
r_tprs.append(currenttpr)
289289
groupsel.append(False)
290-
print 'ROC point: ', currenttpr, currentfpr, names[besti], overallbest
290+
print('ROC point: ', currenttpr, currentfpr, names[besti], overallbest)
291291

292292
if plot_roc:
293293
plt.plot(r_fprs, r_tprs, 'k')
@@ -323,5 +323,5 @@ def find_roc(table,
323323
elif len(sys.argv) == 4:
324324
find_roc(sys.argv[1], n_profiles_to_analyse=sys.argv[2], costratio=[10.0, 10.0])
325325
else:
326-
print 'Usage - python analyse_results.py tablename <number of profiles to train ROC curve on> <optional character or number to indicate that we want a conservative set of QC tests i.e. with very low false positive rate>'
326+
print('Usage - python analyse_results.py tablename <number of profiles to train ROC curve on> <optional character or number to indicate that we want a conservative set of QC tests i.e. with very low false positive rate>')
327327

build-db.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def assessProfile(p, check_originator_flag_type, months_to_use):
3131
if check_originator_flag_type:
3232
if int(p.originator_flag_type()) not in range(1,15):
3333
return False
34-
34+
3535
# check month
3636
if p.month() not in months_to_use:
3737
return False
@@ -168,22 +168,22 @@ def builddb(check_originator_flag_type = True,
168168
break
169169

170170
conn.commit()
171-
print 'number of clean profiles written:', good
172-
print 'number of flagged profiles written:', bad
173-
print 'total number of profiles written:', good+bad
171+
print('number of clean profiles written:', good)
172+
print('number of flagged profiles written:', bad)
173+
print('total number of profiles written:', good+bad)
174174

175175
if len(sys.argv) == 3:
176176

177177
builddb()
178-
178+
179179
elif len(sys.argv) == 5:
180180

181181
builddb(ast.literal_eval(sys.argv[3]), ast.literal_eval(sys.argv[4]))
182182

183183
else:
184184

185-
print 'Usage: python build-db.py <inputdatafile> <databasetable> <demand originator flags> <list of months to include (with no spaces or enclose in quotes)>'
186-
print 'Example: python build-db.py data.wod mytable False [1,2,3,10]'
185+
print('Usage: python build-db.py <inputdatafile> <databasetable> <demand originator flags> <list of months to include (with no spaces or enclose in quotes)>')
186+
print('Example: python build-db.py data.wod mytable False [1,2,3,10]')
187187

188188

189189

catchall.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,9 @@ def amend(combo, df):
3838
name = '&'.join(combo)
3939
return df.assign(xx=decision).rename(index=str, columns={'xx': name})
4040

41-
print '=============='
42-
print sys.argv[1]
43-
print '=============='
41+
print('==============')
42+
print(sys.argv[1])
43+
print('==============')
4444

4545
# Read QC test specifications if required.
4646
groupdefinition = ar.read_qc_groups()
@@ -74,7 +74,7 @@ def amend(combo, df):
7474
for test in testNames:
7575
tpr, fpr, fnr, tnr = main.calcRates(df[test].tolist(), df['Truth'].tolist())
7676
if fpr > 0 and tpr / fpr < perf_thresh:
77-
print 'dropping', test, '; tpr/fpr = ', tpr/fpr
77+
print('dropping', test, '; tpr/fpr = ', tpr/fpr)
7878
df.drop([test], axis=1)
7979
bad.drop([test], axis=1)
8080
drop_tests.append(test)
@@ -95,16 +95,16 @@ def amend(combo, df):
9595
fprs.append([x, fpr, tpr])
9696

9797
# accept tests that flag bad profiles with no false positives
98-
print 'number of bad profiles to consider:', len(bad)
98+
print('number of bad profiles to consider:', len(bad))
9999
for test in fprs:
100100
if test[1] == 0 and test[2] > 0:
101101
accepted.append(test[0])
102-
print 'accepted', test[0], 0
102+
print('accepted', test[0], 0)
103103
bad = bad[bad[test[0]]==False]
104104
bad = bad.drop([test[0]], axis=1)
105105
testNames.remove(test[0])
106106
fprs = [elt for elt in fprs if elt[0] not in accepted]
107-
print 'number of bad profiles remaining:', len(bad)
107+
print('number of bad profiles remaining:', len(bad))
108108

109109
# algo. step 3
110110
# add a column to df for each combo, summarizing its decision for each profile
@@ -129,7 +129,7 @@ def amend(combo, df):
129129
winner = x[x].keys()[0]
130130
accepted.append(winner) # accept the combo as the only one flagging this bad profile
131131
ff = [x for x in fprs if x[0] == winner][0][1]
132-
print 'accepted', winner, ff
132+
print('accepted', winner, ff)
133133
bad = bad[bad[winner]==False] # drop all bad profiles flagged by this combo
134134
bad = bad.drop([winner], axis=1) # remove the combo from consideration
135135
testNames = [elt for elt in testNames if elt is not winner]
@@ -145,8 +145,8 @@ def amend(combo, df):
145145
combonames = [x for x in combonames if x is not maxfpr]
146146
del fprs[-1]
147147

148-
print 'profiles not caught by any test:'
149-
print unflagged
148+
print('profiles not caught by any test:')
149+
print(unflagged)
150150

151151
f = open('catchall.json', 'w')
152152
r = {'tests': accepted}

cotede_qc/cotede_test.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ def get_qc(p, config, test):
2121
global cotede_results
2222

2323
# Disable logging messages from CoTeDe unless they are more
24-
# severe than a warning.
25-
logging.disable('warn')
26-
24+
# severe than a warning.
25+
logging.disable(logging.WARNING)
26+
2727
# Create a dummy results variable if this is the first call.
28-
try:
28+
try:
2929
cotede_results
3030
except NameError:
3131
cotede_results = [-1, '', {}, None]
32-
32+
3333
var = 'TEMP'
3434

3535
# Check if we need to perform the quality control.
@@ -50,7 +50,7 @@ def get_qc(p, config, test):
5050

5151
if test == config:
5252
# AutoQC runs only on TEMP, so clean the rest.
53-
for v in cfg.keys():
53+
for v in list(cfg):
5454
if v not in ['main', var]:
5555
del(cfg[v])
5656
# If is a specific test,

filter-db.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@
2222
conn.commit()
2323
else:
2424

25-
print "usage: python filter-db.py <full table name> <filtered table name> <number of good / bad profiles to pick>"
25+
print("usage: python filter-db.py <full table name> <filtered table name> <number of good / bad profiles to pick>")

plot-roc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,4 +119,4 @@ def plot_roc():
119119
if len(sys.argv) == 2:
120120
plot_roc()
121121
else:
122-
print 'Usage - python plot-roc.py tablename'
122+
print('Usage - python plot-roc.py tablename')

qctests/AOML_climatology_test.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
import util.AOMLnetcdf as read_netcdf
77

88
def test(p, parameters):
9-
109
qc = numpy.zeros(p.n_levels(), dtype=bool)
1110

1211
# check for gaps in data
@@ -26,15 +25,15 @@ def test(p, parameters):
2625
interpTemp = interp_helper.temperature_interpolation_process(p.longitude(), p.latitude(), p.z()[i], depthColumns1, latLonsList1, lonlatWithTempsList1, False, "climaInterpTemperature")
2726
if interpTemp == 99999.99:
2827
continue
29-
28+
3029
interpTempSD = interp_helper.temperature_interpolation_process(p.longitude(), p.latitude(), p.z()[i], depthColumns2, latLonsList2, lonlatWithTempsList2, False, "climaInterpStandardDev")
3130
if interpTempSD == 99999.99:
3231
continue
3332

3433
# check if temperature at this depth is sufficiently close to the climatological expectation
3534
qc[i] = climatology_check(p.t()[i], interpTemp, interpTempSD) >= 4
3635

37-
return qc
36+
return qc
3837

3938
def climatology_check(temperature, interpMNTemp, interpSDTemp, sigmaFactor=5.0):
4039
"""

qctests/AOML_gross.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ def test(p, parameters):
1212
isDepth = (z.mask==False)
1313

1414
for i in range(p.n_levels()):
15-
if isTemperature[i] and not (-2.5 <= t[i] <= 40):
15+
if isTemperature[i] and not (-2.5 <= t[i] <= 40):
1616
qc[i] = True
1717
if isDepth[i] and not (0 <= z[i] <= 2000):
1818
qc[i] = True
1919

20-
return qc
20+
return qc

qctests/CoTeDe_Argo_density_inversion.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ def test(p, parameters):
66

77
config = 'argo'
88
testname = 'density_inversion'
9-
9+
1010
qc = get_qc(p, config, testname)
1111

1212
return qc

0 commit comments

Comments
 (0)