forked from h2oai/h2o-3
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PUBDEV-3435: GLRM stalling. Added scripts to collect and analyze Jenk…
…ins GLRM runs.
- Loading branch information
1 parent
d15064b
commit 1ab270f
Showing
2 changed files
with
380 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,259 @@ | ||
#!/usr/bin/python | ||
|
||
import sys | ||
import os | ||
import json | ||
|
||
""" | ||
This script is written to analysis the Jenkins run logs that we have saved from jenkins to our local computer. | ||
For me, this is how I will call this script and the input arguments I will use: | ||
python grabGLRMJenkinRunResults.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16 | ||
""" | ||
|
||
# -------------------------------------------------------------------- | ||
# Main program | ||
# -------------------------------------------------------------------- | ||
# directory where we are running out code from h2o-3/scripts | ||
g_test_root_dir = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
|
||
# base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16 | ||
g_log_base_dir = "" | ||
g_airline_java = "java_0_0.out_airline.txt" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt | ||
g_milsongs_java = "java_1_0.out_milsongs.txt" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt | ||
# name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt | ||
g_airline_python = "pyunit_airlines_performance_profile.py.out.txt" | ||
# name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt | ||
g_milsongs_python = "pyunit_milsong_performance_profile.py.out.txt" | ||
g_direct_name_start = "Build" | ||
g_initialXY = "Time taken (ms) to initializeXY with" # text of interest | ||
g_reguarlize_Y = "Time taken (ms) to calculate regularize_y" | ||
g_regularize_X_objective = "Time taken (ms) to calculate regularize_x and calculate" | ||
g_updateX = "Time taken (ms) to updateX" | ||
g_updateY = "Time taken (ms) to updateY" | ||
g_objective = "Time taken (ms) to calculate new objective function value" | ||
g_stepsize = "Time taken (ms) to set the step size" | ||
g_history = "Time taken (ms) to history of run" | ||
g_py_runtime = "Run time in ms: " | ||
g_py_iteration = "number of iterations:" | ||
|
||
|
||
def generate_octave_java_ascii(java_dict, fname): | ||
global g_log_base_dir | ||
|
||
updateX = java_dict["update X (ms)"] | ||
updateY = java_dict["update Y (ms)"] | ||
obj = java_dict["objective (ms)"] | ||
|
||
|
||
with open(os.path.join(g_log_base_dir, fname),'w') as test_file: | ||
for ind in range(0, len(updateX)): | ||
temp_str = str(updateX[ind])+" "+str(updateY[ind])+" "+str(obj[ind])+"\n" | ||
test_file.write(temp_str) | ||
|
||
|
||
def generate_octave_py_ascii(py_dict, fname): | ||
global g_log_base_dir | ||
|
||
run_time = py_dict["total time (ms)"] | ||
iter_number = py_dict["iteration number"] | ||
time_per_iter = py_dict["time (ms) per iteration"] | ||
|
||
with open(os.path.join(g_log_base_dir, fname),'wb') as test_file: | ||
for ind in range(0,len(run_time)): | ||
temp_str = str(run_time[ind])+" "+str(iter_number[ind])+" "+str(time_per_iter[ind])+"\n" | ||
test_file.write(temp_str) | ||
|
||
|
||
def init_java_dict(): | ||
dict_name = dict() | ||
dict_name["total time (ms)"] = [] | ||
dict_name["initialXY (ms)"] = [] | ||
dict_name["regularize Y (ms)"] = [] | ||
dict_name["regularize X and objective (ms)"] = [] | ||
dict_name["update X (ms)"] = [] | ||
dict_name["update Y (ms)"] = [] | ||
dict_name["objective (ms)"] = [] | ||
dict_name["step size (ms)"] = [] | ||
dict_name["update history (ms)"] = [] | ||
|
||
return dict_name | ||
|
||
|
||
def init_python_dict(): | ||
dict_name = dict() | ||
dict_name["total time (ms)"] = [] | ||
dict_name["iteration number"] = [] | ||
dict_name["time (ms) per iteration"] = [] | ||
|
||
return dict_name | ||
|
||
|
||
def grab_java_results(dirName, java_file, run_result): | ||
global g_direct_name_start | ||
global g_log_base_dir | ||
global g_initialXY | ||
global g_reguarlize_Y | ||
global g_regularize_X_objective | ||
global g_updateX | ||
global g_updateY | ||
global g_objective | ||
global g_stepsize | ||
global g_history | ||
|
||
if not (g_direct_name_start in dirName): | ||
print("Cannot find your java log file. Nothing is done.\n") | ||
return run_result | ||
|
||
logText = os.path.join(os.path.join(g_log_base_dir, dirName), java_file) | ||
total_run_time = -1 | ||
val = 0.0 | ||
|
||
with open(logText, 'r') as thefile: # go into tempfile and grab test run info | ||
for each_line in thefile: | ||
temp_string = each_line.split() | ||
|
||
if len(temp_string) > 0: | ||
val = temp_string[-1].replace('\\','') | ||
|
||
if g_initialXY in each_line: # start of a new file | ||
if total_run_time > 0: # update total run time | ||
run_result["total time (ms)"].append(total_run_time) | ||
total_run_time = 0.0 | ||
else: | ||
total_run_time = 0.0 | ||
|
||
run_result["initialXY (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_reguarlize_Y in each_line: | ||
run_result["regularize Y (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_regularize_X_objective in each_line: | ||
run_result["regularize X and objective (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_updateX in each_line: | ||
run_result["update X (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_updateY in each_line: | ||
run_result["update Y (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_objective in each_line: | ||
run_result["objective (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_stepsize in each_line: | ||
run_result["step size (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
if g_history in each_line: | ||
run_result["update history (ms)"].append(float(val)) | ||
total_run_time = total_run_time+float(val) | ||
|
||
run_result["total time (ms)"].append(total_run_time) # save the last one | ||
|
||
return run_result | ||
|
||
|
||
def grab_py_results(dirName, python_file, run_result): | ||
global g_direct_name_start | ||
global g_log_base_dir | ||
global g_py_runtime | ||
global g_py_iteration | ||
|
||
if not (g_direct_name_start in dirName): | ||
print("Cannot find your python log file. Nothing is done.\n") | ||
return run_result | ||
|
||
logText = os.path.join(os.path.join(g_log_base_dir, dirName), python_file) | ||
with open(logText, 'r') as thefile: # go into tempfile and grab test run info | ||
for each_line in thefile: | ||
temp_string = each_line.split(':') | ||
|
||
if len(temp_string) > 0: | ||
val = temp_string[-1].replace('\n','') | ||
|
||
if g_py_runtime in each_line: # found run time sequence | ||
run_result["total time (ms)"].extend(eval(val)) | ||
|
||
if g_py_iteration in each_line: | ||
run_result["iteration number"].extend(eval(val)) | ||
return run_result | ||
|
||
def transform_time_python(run_result): | ||
run_times = run_result["total time (ms)"] | ||
total_iterations = run_result["iteration number"] | ||
|
||
for ind in range(0, len(run_times)): | ||
run_result["time (ms) per iteration"].append(run_times[ind]*1.0/total_iterations[ind]) | ||
|
||
return run_result | ||
|
||
|
||
def main(argv): | ||
""" | ||
Main program. | ||
@return: none | ||
""" | ||
global g_log_base_dir | ||
global g_airline_java | ||
global g_milsongs_java | ||
global g_airline_python | ||
global g_milsongs_python | ||
|
||
if len(argv) < 2: | ||
print "python grabGLRMrunLogs logsBaseDirectory\n" | ||
sys.exit(1) | ||
else: # we may be in business | ||
g_log_base_dir = argv[1] | ||
|
||
if (os.path.isdir(g_log_base_dir)): # open directory and start to process logs in each one | ||
airline_java_dict = init_java_dict() | ||
milsongs_java_dict = init_java_dict() | ||
airline_py_dict = init_python_dict() | ||
milsongs_py_dict = init_python_dict() | ||
|
||
allBuilds = os.listdir(g_log_base_dir) | ||
for dirName in allBuilds: | ||
airline_java_dict = grab_java_results(dirName, g_airline_java, airline_java_dict) | ||
milsongs_java_dict = grab_java_results(dirName, g_milsongs_java, milsongs_java_dict) | ||
airline_py_dict = grab_py_results(dirName, g_airline_python, airline_py_dict) | ||
milsongs_py_dict = grab_py_results(dirName, g_milsongs_python, milsongs_py_dict) | ||
|
||
airline_py_dict = transform_time_python(airline_py_dict) # calculate time taken per iteration | ||
milsongs_py_dict = transform_time_python(milsongs_py_dict) | ||
|
||
print("Airline Java log results: \n {0}".format(airline_java_dict)) | ||
print("Airline Python log results: \n {0}".format(airline_py_dict)) | ||
print("Milsongs Java log results: \n {0}".format(milsongs_java_dict)) | ||
print("Milsongs Python log results: \n {0}".format(milsongs_py_dict)) | ||
|
||
# dump dictionary into json files for later analysis | ||
with open(os.path.join(g_log_base_dir, "airline_java_dict"),'wb') as test_file: | ||
json.dump(airline_java_dict, test_file) | ||
|
||
with open(os.path.join(g_log_base_dir, "airline_py_dict"),'wb') as test_file: | ||
json.dump(airline_py_dict, test_file) | ||
|
||
with open(os.path.join(g_log_base_dir, "milsongs_java_dict"),'wb') as test_file: | ||
json.dump(milsongs_java_dict, test_file) | ||
|
||
with open(os.path.join(g_log_base_dir, "milsongs_py_dict"),'wb') as test_file: | ||
json.dump(milsongs_py_dict, test_file) | ||
|
||
# dump analysis results into json format that octave can understand and process | ||
generate_octave_java_ascii(airline_java_dict, "airline_java_octave") | ||
generate_octave_java_ascii(milsongs_java_dict, "milsongs_java_octave") | ||
generate_octave_py_ascii(airline_py_dict, "airline_py_octave") | ||
generate_octave_py_ascii(milsongs_py_dict, "milsongs_py_octave") | ||
|
||
|
||
if __name__ == "__main__": | ||
main(sys.argv) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
#!/usr/bin/python | ||
|
||
import sys | ||
import os | ||
import json | ||
import pickle | ||
|
||
import copy | ||
import subprocess | ||
|
||
|
||
""" | ||
This script is written to grab the logs from our GLRM runs and saved them onto our local machines for | ||
later analysis. For me, this is how I will call this script and the input arguments I will use: | ||
python grabGLRMrunLogs.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16 | ||
http://mr-0xa1:8080/view/wendy/job/glrm_memory_performance/ | ||
/Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16 | ||
java_0_0.out_airline.txt java_1_0.out_milsongs.txt pyunit_airlines_performance_profile.py.out.txt | ||
pyunit_milsongs_performance_profile.py.out.txt 8 26 | ||
""" | ||
|
||
# -------------------------------------------------------------------- | ||
# Main program | ||
# -------------------------------------------------------------------- | ||
# directory where we are running out code from h2o-3/scripts | ||
g_test_root_dir = os.path.dirname(os.path.realpath(__file__)) | ||
|
||
g_airline_py_tail = "/artifact/h2o-py/GLRM_performance_tests/results/pyunit_airlines_performance_profile.py.out.txt" | ||
g_milsongs_py_tail = "/artifact/h2o-py/GLRM_performance_tests/results/pyunit_milsong_performance_profile.py.out.txt" | ||
g_airline_java_tail = "/artifact/h2o-py/GLRM_performance_tests/results/java_0_0.out.txt" | ||
g_milsongs_java_tail = "/artifact/h2o-py/GLRM_performance_tests/results/java_1_0.out.txt" | ||
|
||
|
||
# base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16 | ||
g_log_base_dir = "" | ||
g_airline_java = "" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt | ||
g_milsongs_java = "" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt | ||
# name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt | ||
g_airline_python = "" | ||
# name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt | ||
g_milsongs_python = "" | ||
g_jenkins_url = "" # url to your jenkins job, e.g.http://mr-0xa1:8080/view/wendy/job/glrm_original_performance/ | ||
g_start_build_number = 0 # starting build number to collect your data | ||
g_end_build_number = 1 # ending build number to collect your data | ||
|
||
|
||
def get_file_out(build_index, python_name, jenkin_name): | ||
""" | ||
This function will grab one log file from Jenkins and save it to local user directory | ||
:param g_jenkins_url: | ||
:param build_index: | ||
:param airline_java: | ||
:param airline_java_tail: | ||
:return: | ||
""" | ||
global g_log_base_dir | ||
global g_jenkins_url | ||
global g_log_base_dir | ||
|
||
directoryB = g_log_base_dir+'/Build'+str(build_index) | ||
|
||
if not(os.path.isdir(directoryB)): # make directory if it does not exist | ||
os.mkdir(directoryB) | ||
|
||
url_string_full = g_jenkins_url+'/'+str(build_index)+jenkin_name | ||
filename = os.path.join(directoryB, python_name) | ||
|
||
full_command = 'curl ' + url_string_full + ' > ' + filename | ||
subprocess.call(full_command,shell=True) | ||
|
||
|
||
def main(argv): | ||
""" | ||
Main program. | ||
@return: none | ||
""" | ||
global g_log_base_dir | ||
global g_airline_java | ||
global g_milsongs_java | ||
global g_airline_python | ||
global g_milsongs_python | ||
global g_jenkins_url | ||
global g_airline_py_tail | ||
global g_milsongs_py_tail | ||
global g_airline_java_tail | ||
global g_milsongs_java_tail | ||
|
||
if len(argv) < 9: | ||
print "python grabGLRMrunLogs logsBaseDirectory airlineJavaFileNameWithPath milsongJavaFileNameWithPath " \ | ||
"airlinePyunitWithPath airlinePyunitWithPath jenkinsJobURL startBuild# endBuild#.\n" | ||
sys.exit(1) | ||
else: # we may be in business | ||
# g_script_name = os.path.basename(argv[0]) # get name of script being run. | ||
# base directory where all logs will be collected according to build # | ||
g_log_base_dir = argv[1] | ||
g_jenkins_url = argv[2] | ||
g_airline_java = argv[3] | ||
g_milsongs_java = argv[4] | ||
g_airline_python = argv[5] | ||
g_milsongs_python = argv[6] | ||
start_number = int(argv[7]) | ||
end_number = int(argv[8]) | ||
|
||
if (start_number > end_number): | ||
print "startBuild# must be <= end_number" | ||
sys.exit(1) | ||
else: | ||
for build_index in range(start_number, end_number+1): # grab log info for all builds | ||
# copy the java jobs | ||
get_file_out(build_index, g_airline_java, g_airline_java_tail) | ||
get_file_out(build_index, g_milsongs_java, g_milsongs_java_tail) | ||
|
||
# copy the pyunit jobs | ||
get_file_out(build_index, g_airline_python, g_airline_py_tail) | ||
get_file_out(build_index, g_milsongs_python, g_milsongs_py_tail) | ||
|
||
|
||
if __name__ == "__main__": | ||
main(sys.argv) |