forked from h2oai/h2o-3
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgrabGLRMJenkinRunResults.py
executable file
·259 lines (195 loc) · 9.78 KB
/
grabGLRMJenkinRunResults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#!/usr/bin/python
import sys
import os
import json
"""
This script is written to analysis the Jenkins run logs that we have saved from jenkins to our local computer.
For me, this is how I will call this script and the input arguments I will use:
python grabGLRMJenkinRunResults.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16
"""
# --------------------------------------------------------------------
# Main program
# --------------------------------------------------------------------
# directory where we are running out code from h2o-3/scripts
g_test_root_dir = os.path.dirname(os.path.realpath(__file__))
# base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16
g_log_base_dir = ""
g_airline_java = "java_0_0.out_airline.txt" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt
g_milsongs_java = "java_1_0.out_milsongs.txt" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt
# name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt
g_airline_python = "pyunit_airlines_performance_profile.py.out.txt"
# name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt
g_milsongs_python = "pyunit_milsong_performance_profile.py.out.txt"
g_direct_name_start = "Build"
g_initialXY = "Time taken (ms) to initializeXY with" # text of interest
g_reguarlize_Y = "Time taken (ms) to calculate regularize_y"
g_regularize_X_objective = "Time taken (ms) to calculate regularize_x and calculate"
g_updateX = "Time taken (ms) to updateX"
g_updateY = "Time taken (ms) to updateY"
g_objective = "Time taken (ms) to calculate new objective function value"
g_stepsize = "Time taken (ms) to set the step size"
g_history = "Time taken (ms) to history of run"
g_py_runtime = "Run time in ms: "
g_py_iteration = "number of iterations:"
def generate_octave_java_ascii(java_dict, fname):
global g_log_base_dir
updateX = java_dict["update X (ms)"]
updateY = java_dict["update Y (ms)"]
obj = java_dict["objective (ms)"]
with open(os.path.join(g_log_base_dir, fname),'w') as test_file:
for ind in range(0, len(updateX)):
temp_str = str(updateX[ind])+" "+str(updateY[ind])+" "+str(obj[ind])+"\n"
test_file.write(temp_str)
def generate_octave_py_ascii(py_dict, fname):
global g_log_base_dir
run_time = py_dict["total time (ms)"]
iter_number = py_dict["iteration number"]
time_per_iter = py_dict["time (ms) per iteration"]
with open(os.path.join(g_log_base_dir, fname),'wb') as test_file:
for ind in range(0,len(run_time)):
temp_str = str(run_time[ind])+" "+str(iter_number[ind])+" "+str(time_per_iter[ind])+"\n"
test_file.write(temp_str)
def init_java_dict():
dict_name = dict()
dict_name["total time (ms)"] = []
dict_name["initialXY (ms)"] = []
dict_name["regularize Y (ms)"] = []
dict_name["regularize X and objective (ms)"] = []
dict_name["update X (ms)"] = []
dict_name["update Y (ms)"] = []
dict_name["objective (ms)"] = []
dict_name["step size (ms)"] = []
dict_name["update history (ms)"] = []
return dict_name
def init_python_dict():
dict_name = dict()
dict_name["total time (ms)"] = []
dict_name["iteration number"] = []
dict_name["time (ms) per iteration"] = []
return dict_name
def grab_java_results(dirName, java_file, run_result):
global g_direct_name_start
global g_log_base_dir
global g_initialXY
global g_reguarlize_Y
global g_regularize_X_objective
global g_updateX
global g_updateY
global g_objective
global g_stepsize
global g_history
if not (g_direct_name_start in dirName):
print("Cannot find your java log file. Nothing is done.\n")
return run_result
logText = os.path.join(os.path.join(g_log_base_dir, dirName), java_file)
total_run_time = -1
val = 0.0
with open(logText, 'r') as thefile: # go into tempfile and grab test run info
for each_line in thefile:
temp_string = each_line.split()
if len(temp_string) > 0:
val = temp_string[-1].replace('\\','')
if g_initialXY in each_line: # start of a new file
if total_run_time > 0: # update total run time
run_result["total time (ms)"].append(total_run_time)
total_run_time = 0.0
else:
total_run_time = 0.0
run_result["initialXY (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_reguarlize_Y in each_line:
run_result["regularize Y (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_regularize_X_objective in each_line:
run_result["regularize X and objective (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_updateX in each_line:
run_result["update X (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_updateY in each_line:
run_result["update Y (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_objective in each_line:
run_result["objective (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_stepsize in each_line:
run_result["step size (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
if g_history in each_line:
run_result["update history (ms)"].append(float(val))
total_run_time = total_run_time+float(val)
run_result["total time (ms)"].append(total_run_time) # save the last one
return run_result
def grab_py_results(dirName, python_file, run_result):
global g_direct_name_start
global g_log_base_dir
global g_py_runtime
global g_py_iteration
if not (g_direct_name_start in dirName):
print("Cannot find your python log file. Nothing is done.\n")
return run_result
logText = os.path.join(os.path.join(g_log_base_dir, dirName), python_file)
with open(logText, 'r') as thefile: # go into tempfile and grab test run info
for each_line in thefile:
temp_string = each_line.split(':')
if len(temp_string) > 0:
val = temp_string[-1].replace('\n','')
if g_py_runtime in each_line: # found run time sequence
run_result["total time (ms)"].extend(eval(val))
if g_py_iteration in each_line:
run_result["iteration number"].extend(eval(val))
return run_result
def transform_time_python(run_result):
run_times = run_result["total time (ms)"]
total_iterations = run_result["iteration number"]
for ind in range(0, len(run_times)):
run_result["time (ms) per iteration"].append(run_times[ind]*1.0/total_iterations[ind])
return run_result
def main(argv):
"""
Main program.
@return: none
"""
global g_log_base_dir
global g_airline_java
global g_milsongs_java
global g_airline_python
global g_milsongs_python
if len(argv) < 2:
print "python grabGLRMrunLogs logsBaseDirectory\n"
sys.exit(1)
else: # we may be in business
g_log_base_dir = argv[1]
if (os.path.isdir(g_log_base_dir)): # open directory and start to process logs in each one
airline_java_dict = init_java_dict()
milsongs_java_dict = init_java_dict()
airline_py_dict = init_python_dict()
milsongs_py_dict = init_python_dict()
allBuilds = os.listdir(g_log_base_dir)
for dirName in allBuilds:
airline_java_dict = grab_java_results(dirName, g_airline_java, airline_java_dict)
milsongs_java_dict = grab_java_results(dirName, g_milsongs_java, milsongs_java_dict)
airline_py_dict = grab_py_results(dirName, g_airline_python, airline_py_dict)
milsongs_py_dict = grab_py_results(dirName, g_milsongs_python, milsongs_py_dict)
airline_py_dict = transform_time_python(airline_py_dict) # calculate time taken per iteration
milsongs_py_dict = transform_time_python(milsongs_py_dict)
print("Airline Java log results: \n {0}".format(airline_java_dict))
print("Airline Python log results: \n {0}".format(airline_py_dict))
print("Milsongs Java log results: \n {0}".format(milsongs_java_dict))
print("Milsongs Python log results: \n {0}".format(milsongs_py_dict))
# dump dictionary into json files for later analysis
with open(os.path.join(g_log_base_dir, "airline_java_dict"),'wb') as test_file:
json.dump(airline_java_dict, test_file)
with open(os.path.join(g_log_base_dir, "airline_py_dict"),'wb') as test_file:
json.dump(airline_py_dict, test_file)
with open(os.path.join(g_log_base_dir, "milsongs_java_dict"),'wb') as test_file:
json.dump(milsongs_java_dict, test_file)
with open(os.path.join(g_log_base_dir, "milsongs_py_dict"),'wb') as test_file:
json.dump(milsongs_py_dict, test_file)
# dump analysis results into json format that octave can understand and process
generate_octave_java_ascii(airline_java_dict, "airline_java_octave")
generate_octave_java_ascii(milsongs_java_dict, "milsongs_java_octave")
generate_octave_py_ascii(airline_py_dict, "airline_py_octave")
generate_octave_py_ascii(milsongs_py_dict, "milsongs_py_octave")
if __name__ == "__main__":
main(sys.argv)