Skip to content

Commit

Permalink
PUBDEV-3435: GLRM stalling. Added scripts to collect and analyze Jenk…
Browse files Browse the repository at this point in the history
…ins GLRM runs.
  • Loading branch information
wendycwong committed Nov 7, 2016
1 parent d15064b commit 1ab270f
Show file tree
Hide file tree
Showing 2 changed files with 380 additions and 0 deletions.
259 changes: 259 additions & 0 deletions scripts/grabGLRMJenkinRunResults.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,259 @@
#!/usr/bin/python

import sys
import os
import json

"""
This script is written to analysis the Jenkins run logs that we have saved from jenkins to our local computer.
For me, this is how I will call this script and the input arguments I will use:
python grabGLRMJenkinRunResults.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16
"""

# --------------------------------------------------------------------
# Main program
# --------------------------------------------------------------------
# directory where we are running out code from h2o-3/scripts
g_test_root_dir = os.path.dirname(os.path.realpath(__file__))


# base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16
g_log_base_dir = ""
g_airline_java = "java_0_0.out_airline.txt" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt
g_milsongs_java = "java_1_0.out_milsongs.txt" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt
# name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt
g_airline_python = "pyunit_airlines_performance_profile.py.out.txt"
# name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt
g_milsongs_python = "pyunit_milsong_performance_profile.py.out.txt"
g_direct_name_start = "Build"
g_initialXY = "Time taken (ms) to initializeXY with" # text of interest
g_reguarlize_Y = "Time taken (ms) to calculate regularize_y"
g_regularize_X_objective = "Time taken (ms) to calculate regularize_x and calculate"
g_updateX = "Time taken (ms) to updateX"
g_updateY = "Time taken (ms) to updateY"
g_objective = "Time taken (ms) to calculate new objective function value"
g_stepsize = "Time taken (ms) to set the step size"
g_history = "Time taken (ms) to history of run"
g_py_runtime = "Run time in ms: "
g_py_iteration = "number of iterations:"


def generate_octave_java_ascii(java_dict, fname):
global g_log_base_dir

updateX = java_dict["update X (ms)"]
updateY = java_dict["update Y (ms)"]
obj = java_dict["objective (ms)"]


with open(os.path.join(g_log_base_dir, fname),'w') as test_file:
for ind in range(0, len(updateX)):
temp_str = str(updateX[ind])+" "+str(updateY[ind])+" "+str(obj[ind])+"\n"
test_file.write(temp_str)


def generate_octave_py_ascii(py_dict, fname):
global g_log_base_dir

run_time = py_dict["total time (ms)"]
iter_number = py_dict["iteration number"]
time_per_iter = py_dict["time (ms) per iteration"]

with open(os.path.join(g_log_base_dir, fname),'wb') as test_file:
for ind in range(0,len(run_time)):
temp_str = str(run_time[ind])+" "+str(iter_number[ind])+" "+str(time_per_iter[ind])+"\n"
test_file.write(temp_str)


def init_java_dict():
dict_name = dict()
dict_name["total time (ms)"] = []
dict_name["initialXY (ms)"] = []
dict_name["regularize Y (ms)"] = []
dict_name["regularize X and objective (ms)"] = []
dict_name["update X (ms)"] = []
dict_name["update Y (ms)"] = []
dict_name["objective (ms)"] = []
dict_name["step size (ms)"] = []
dict_name["update history (ms)"] = []

return dict_name


def init_python_dict():
dict_name = dict()
dict_name["total time (ms)"] = []
dict_name["iteration number"] = []
dict_name["time (ms) per iteration"] = []

return dict_name


def grab_java_results(dirName, java_file, run_result):
global g_direct_name_start
global g_log_base_dir
global g_initialXY
global g_reguarlize_Y
global g_regularize_X_objective
global g_updateX
global g_updateY
global g_objective
global g_stepsize
global g_history

if not (g_direct_name_start in dirName):
print("Cannot find your java log file. Nothing is done.\n")
return run_result

logText = os.path.join(os.path.join(g_log_base_dir, dirName), java_file)
total_run_time = -1
val = 0.0

with open(logText, 'r') as thefile: # go into tempfile and grab test run info
for each_line in thefile:
temp_string = each_line.split()

if len(temp_string) > 0:
val = temp_string[-1].replace('\\','')

if g_initialXY in each_line: # start of a new file
if total_run_time > 0: # update total run time
run_result["total time (ms)"].append(total_run_time)
total_run_time = 0.0
else:
total_run_time = 0.0

run_result["initialXY (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_reguarlize_Y in each_line:
run_result["regularize Y (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_regularize_X_objective in each_line:
run_result["regularize X and objective (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_updateX in each_line:
run_result["update X (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_updateY in each_line:
run_result["update Y (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_objective in each_line:
run_result["objective (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_stepsize in each_line:
run_result["step size (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

if g_history in each_line:
run_result["update history (ms)"].append(float(val))
total_run_time = total_run_time+float(val)

run_result["total time (ms)"].append(total_run_time) # save the last one

return run_result


def grab_py_results(dirName, python_file, run_result):
global g_direct_name_start
global g_log_base_dir
global g_py_runtime
global g_py_iteration

if not (g_direct_name_start in dirName):
print("Cannot find your python log file. Nothing is done.\n")
return run_result

logText = os.path.join(os.path.join(g_log_base_dir, dirName), python_file)
with open(logText, 'r') as thefile: # go into tempfile and grab test run info
for each_line in thefile:
temp_string = each_line.split(':')

if len(temp_string) > 0:
val = temp_string[-1].replace('\n','')

if g_py_runtime in each_line: # found run time sequence
run_result["total time (ms)"].extend(eval(val))

if g_py_iteration in each_line:
run_result["iteration number"].extend(eval(val))
return run_result

def transform_time_python(run_result):
run_times = run_result["total time (ms)"]
total_iterations = run_result["iteration number"]

for ind in range(0, len(run_times)):
run_result["time (ms) per iteration"].append(run_times[ind]*1.0/total_iterations[ind])

return run_result


def main(argv):
"""
Main program.
@return: none
"""
global g_log_base_dir
global g_airline_java
global g_milsongs_java
global g_airline_python
global g_milsongs_python

if len(argv) < 2:
print "python grabGLRMrunLogs logsBaseDirectory\n"
sys.exit(1)
else: # we may be in business
g_log_base_dir = argv[1]

if (os.path.isdir(g_log_base_dir)): # open directory and start to process logs in each one
airline_java_dict = init_java_dict()
milsongs_java_dict = init_java_dict()
airline_py_dict = init_python_dict()
milsongs_py_dict = init_python_dict()

allBuilds = os.listdir(g_log_base_dir)
for dirName in allBuilds:
airline_java_dict = grab_java_results(dirName, g_airline_java, airline_java_dict)
milsongs_java_dict = grab_java_results(dirName, g_milsongs_java, milsongs_java_dict)
airline_py_dict = grab_py_results(dirName, g_airline_python, airline_py_dict)
milsongs_py_dict = grab_py_results(dirName, g_milsongs_python, milsongs_py_dict)

airline_py_dict = transform_time_python(airline_py_dict) # calculate time taken per iteration
milsongs_py_dict = transform_time_python(milsongs_py_dict)

print("Airline Java log results: \n {0}".format(airline_java_dict))
print("Airline Python log results: \n {0}".format(airline_py_dict))
print("Milsongs Java log results: \n {0}".format(milsongs_java_dict))
print("Milsongs Python log results: \n {0}".format(milsongs_py_dict))

# dump dictionary into json files for later analysis
with open(os.path.join(g_log_base_dir, "airline_java_dict"),'wb') as test_file:
json.dump(airline_java_dict, test_file)

with open(os.path.join(g_log_base_dir, "airline_py_dict"),'wb') as test_file:
json.dump(airline_py_dict, test_file)

with open(os.path.join(g_log_base_dir, "milsongs_java_dict"),'wb') as test_file:
json.dump(milsongs_java_dict, test_file)

with open(os.path.join(g_log_base_dir, "milsongs_py_dict"),'wb') as test_file:
json.dump(milsongs_py_dict, test_file)

# dump analysis results into json format that octave can understand and process
generate_octave_java_ascii(airline_java_dict, "airline_java_octave")
generate_octave_java_ascii(milsongs_java_dict, "milsongs_java_octave")
generate_octave_py_ascii(airline_py_dict, "airline_py_octave")
generate_octave_py_ascii(milsongs_py_dict, "milsongs_py_octave")


if __name__ == "__main__":
main(sys.argv)
121 changes: 121 additions & 0 deletions scripts/grabGLRMrunLogs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#!/usr/bin/python

import sys
import os
import json
import pickle

import copy
import subprocess


"""
This script is written to grab the logs from our GLRM runs and saved them onto our local machines for
later analysis. For me, this is how I will call this script and the input arguments I will use:
python grabGLRMrunLogs.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16
http://mr-0xa1:8080/view/wendy/job/glrm_memory_performance/
/Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16
java_0_0.out_airline.txt java_1_0.out_milsongs.txt pyunit_airlines_performance_profile.py.out.txt
pyunit_milsongs_performance_profile.py.out.txt 8 26
"""

# --------------------------------------------------------------------
# Main program
# --------------------------------------------------------------------
# directory where we are running out code from h2o-3/scripts
g_test_root_dir = os.path.dirname(os.path.realpath(__file__))

g_airline_py_tail = "/artifact/h2o-py/GLRM_performance_tests/results/pyunit_airlines_performance_profile.py.out.txt"
g_milsongs_py_tail = "/artifact/h2o-py/GLRM_performance_tests/results/pyunit_milsong_performance_profile.py.out.txt"
g_airline_java_tail = "/artifact/h2o-py/GLRM_performance_tests/results/java_0_0.out.txt"
g_milsongs_java_tail = "/artifact/h2o-py/GLRM_performance_tests/results/java_1_0.out.txt"


# base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16
g_log_base_dir = ""
g_airline_java = "" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt
g_milsongs_java = "" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt
# name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt
g_airline_python = ""
# name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt
g_milsongs_python = ""
g_jenkins_url = "" # url to your jenkins job, e.g.http://mr-0xa1:8080/view/wendy/job/glrm_original_performance/
g_start_build_number = 0 # starting build number to collect your data
g_end_build_number = 1 # ending build number to collect your data


def get_file_out(build_index, python_name, jenkin_name):
"""
This function will grab one log file from Jenkins and save it to local user directory
:param g_jenkins_url:
:param build_index:
:param airline_java:
:param airline_java_tail:
:return:
"""
global g_log_base_dir
global g_jenkins_url
global g_log_base_dir

directoryB = g_log_base_dir+'/Build'+str(build_index)

if not(os.path.isdir(directoryB)): # make directory if it does not exist
os.mkdir(directoryB)

url_string_full = g_jenkins_url+'/'+str(build_index)+jenkin_name
filename = os.path.join(directoryB, python_name)

full_command = 'curl ' + url_string_full + ' > ' + filename
subprocess.call(full_command,shell=True)


def main(argv):
"""
Main program.
@return: none
"""
global g_log_base_dir
global g_airline_java
global g_milsongs_java
global g_airline_python
global g_milsongs_python
global g_jenkins_url
global g_airline_py_tail
global g_milsongs_py_tail
global g_airline_java_tail
global g_milsongs_java_tail

if len(argv) < 9:
print "python grabGLRMrunLogs logsBaseDirectory airlineJavaFileNameWithPath milsongJavaFileNameWithPath " \
"airlinePyunitWithPath airlinePyunitWithPath jenkinsJobURL startBuild# endBuild#.\n"
sys.exit(1)
else: # we may be in business
# g_script_name = os.path.basename(argv[0]) # get name of script being run.
# base directory where all logs will be collected according to build #
g_log_base_dir = argv[1]
g_jenkins_url = argv[2]
g_airline_java = argv[3]
g_milsongs_java = argv[4]
g_airline_python = argv[5]
g_milsongs_python = argv[6]
start_number = int(argv[7])
end_number = int(argv[8])

if (start_number > end_number):
print "startBuild# must be <= end_number"
sys.exit(1)
else:
for build_index in range(start_number, end_number+1): # grab log info for all builds
# copy the java jobs
get_file_out(build_index, g_airline_java, g_airline_java_tail)
get_file_out(build_index, g_milsongs_java, g_milsongs_java_tail)

# copy the pyunit jobs
get_file_out(build_index, g_airline_python, g_airline_py_tail)
get_file_out(build_index, g_milsongs_python, g_milsongs_py_tail)


if __name__ == "__main__":
main(sys.argv)

0 comments on commit 1ab270f

Please sign in to comment.