diff --git a/scripts/grabGLRMJenkinRunResults.py b/scripts/grabGLRMJenkinRunResults.py new file mode 100755 index 000000000000..585013b98163 --- /dev/null +++ b/scripts/grabGLRMJenkinRunResults.py @@ -0,0 +1,259 @@ +#!/usr/bin/python + +import sys +import os +import json + +""" +This script is written to analysis the Jenkins run logs that we have saved from jenkins to our local computer. + For me, this is how I will call this script and the input arguments I will use: + +python grabGLRMJenkinRunResults.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16 + +""" + +# -------------------------------------------------------------------- +# Main program +# -------------------------------------------------------------------- + # directory where we are running out code from h2o-3/scripts +g_test_root_dir = os.path.dirname(os.path.realpath(__file__)) + + + # base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16 +g_log_base_dir = "" +g_airline_java = "java_0_0.out_airline.txt" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt +g_milsongs_java = "java_1_0.out_milsongs.txt" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt + # name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt +g_airline_python = "pyunit_airlines_performance_profile.py.out.txt" + # name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt +g_milsongs_python = "pyunit_milsong_performance_profile.py.out.txt" +g_direct_name_start = "Build" +g_initialXY = "Time taken (ms) to initializeXY with" # text of interest +g_reguarlize_Y = "Time taken (ms) to calculate regularize_y" +g_regularize_X_objective = "Time taken (ms) to calculate regularize_x and calculate" +g_updateX = "Time taken (ms) to updateX" +g_updateY = "Time taken (ms) to updateY" +g_objective = "Time taken (ms) to calculate new objective function value" +g_stepsize = "Time taken (ms) to set the step size" +g_history = "Time taken (ms) to history of run" +g_py_runtime = "Run time in ms: " +g_py_iteration = "number of iterations:" + + +def generate_octave_java_ascii(java_dict, fname): + global g_log_base_dir + + updateX = java_dict["update X (ms)"] + updateY = java_dict["update Y (ms)"] + obj = java_dict["objective (ms)"] + + + with open(os.path.join(g_log_base_dir, fname),'w') as test_file: + for ind in range(0, len(updateX)): + temp_str = str(updateX[ind])+" "+str(updateY[ind])+" "+str(obj[ind])+"\n" + test_file.write(temp_str) + + +def generate_octave_py_ascii(py_dict, fname): + global g_log_base_dir + + run_time = py_dict["total time (ms)"] + iter_number = py_dict["iteration number"] + time_per_iter = py_dict["time (ms) per iteration"] + + with open(os.path.join(g_log_base_dir, fname),'wb') as test_file: + for ind in range(0,len(run_time)): + temp_str = str(run_time[ind])+" "+str(iter_number[ind])+" "+str(time_per_iter[ind])+"\n" + test_file.write(temp_str) + + +def init_java_dict(): + dict_name = dict() + dict_name["total time (ms)"] = [] + dict_name["initialXY (ms)"] = [] + dict_name["regularize Y (ms)"] = [] + dict_name["regularize X and objective (ms)"] = [] + dict_name["update X (ms)"] = [] + dict_name["update Y (ms)"] = [] + dict_name["objective (ms)"] = [] + dict_name["step size (ms)"] = [] + dict_name["update history (ms)"] = [] + + return dict_name + + +def init_python_dict(): + dict_name = dict() + dict_name["total time (ms)"] = [] + dict_name["iteration number"] = [] + dict_name["time (ms) per iteration"] = [] + + return dict_name + + +def grab_java_results(dirName, java_file, run_result): + global g_direct_name_start + global g_log_base_dir + global g_initialXY + global g_reguarlize_Y + global g_regularize_X_objective + global g_updateX + global g_updateY + global g_objective + global g_stepsize + global g_history + + if not (g_direct_name_start in dirName): + print("Cannot find your java log file. Nothing is done.\n") + return run_result + + logText = os.path.join(os.path.join(g_log_base_dir, dirName), java_file) + total_run_time = -1 + val = 0.0 + + with open(logText, 'r') as thefile: # go into tempfile and grab test run info + for each_line in thefile: + temp_string = each_line.split() + + if len(temp_string) > 0: + val = temp_string[-1].replace('\\','') + + if g_initialXY in each_line: # start of a new file + if total_run_time > 0: # update total run time + run_result["total time (ms)"].append(total_run_time) + total_run_time = 0.0 + else: + total_run_time = 0.0 + + run_result["initialXY (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_reguarlize_Y in each_line: + run_result["regularize Y (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_regularize_X_objective in each_line: + run_result["regularize X and objective (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_updateX in each_line: + run_result["update X (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_updateY in each_line: + run_result["update Y (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_objective in each_line: + run_result["objective (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_stepsize in each_line: + run_result["step size (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + if g_history in each_line: + run_result["update history (ms)"].append(float(val)) + total_run_time = total_run_time+float(val) + + run_result["total time (ms)"].append(total_run_time) # save the last one + + return run_result + + +def grab_py_results(dirName, python_file, run_result): + global g_direct_name_start + global g_log_base_dir + global g_py_runtime + global g_py_iteration + + if not (g_direct_name_start in dirName): + print("Cannot find your python log file. Nothing is done.\n") + return run_result + + logText = os.path.join(os.path.join(g_log_base_dir, dirName), python_file) + with open(logText, 'r') as thefile: # go into tempfile and grab test run info + for each_line in thefile: + temp_string = each_line.split(':') + + if len(temp_string) > 0: + val = temp_string[-1].replace('\n','') + + if g_py_runtime in each_line: # found run time sequence + run_result["total time (ms)"].extend(eval(val)) + + if g_py_iteration in each_line: + run_result["iteration number"].extend(eval(val)) + return run_result + +def transform_time_python(run_result): + run_times = run_result["total time (ms)"] + total_iterations = run_result["iteration number"] + + for ind in range(0, len(run_times)): + run_result["time (ms) per iteration"].append(run_times[ind]*1.0/total_iterations[ind]) + + return run_result + + +def main(argv): + """ + Main program. + + @return: none + """ + global g_log_base_dir + global g_airline_java + global g_milsongs_java + global g_airline_python + global g_milsongs_python + + if len(argv) < 2: + print "python grabGLRMrunLogs logsBaseDirectory\n" + sys.exit(1) + else: # we may be in business + g_log_base_dir = argv[1] + + if (os.path.isdir(g_log_base_dir)): # open directory and start to process logs in each one + airline_java_dict = init_java_dict() + milsongs_java_dict = init_java_dict() + airline_py_dict = init_python_dict() + milsongs_py_dict = init_python_dict() + + allBuilds = os.listdir(g_log_base_dir) + for dirName in allBuilds: + airline_java_dict = grab_java_results(dirName, g_airline_java, airline_java_dict) + milsongs_java_dict = grab_java_results(dirName, g_milsongs_java, milsongs_java_dict) + airline_py_dict = grab_py_results(dirName, g_airline_python, airline_py_dict) + milsongs_py_dict = grab_py_results(dirName, g_milsongs_python, milsongs_py_dict) + + airline_py_dict = transform_time_python(airline_py_dict) # calculate time taken per iteration + milsongs_py_dict = transform_time_python(milsongs_py_dict) + + print("Airline Java log results: \n {0}".format(airline_java_dict)) + print("Airline Python log results: \n {0}".format(airline_py_dict)) + print("Milsongs Java log results: \n {0}".format(milsongs_java_dict)) + print("Milsongs Python log results: \n {0}".format(milsongs_py_dict)) + + # dump dictionary into json files for later analysis + with open(os.path.join(g_log_base_dir, "airline_java_dict"),'wb') as test_file: + json.dump(airline_java_dict, test_file) + + with open(os.path.join(g_log_base_dir, "airline_py_dict"),'wb') as test_file: + json.dump(airline_py_dict, test_file) + + with open(os.path.join(g_log_base_dir, "milsongs_java_dict"),'wb') as test_file: + json.dump(milsongs_java_dict, test_file) + + with open(os.path.join(g_log_base_dir, "milsongs_py_dict"),'wb') as test_file: + json.dump(milsongs_py_dict, test_file) + + # dump analysis results into json format that octave can understand and process + generate_octave_java_ascii(airline_java_dict, "airline_java_octave") + generate_octave_java_ascii(milsongs_java_dict, "milsongs_java_octave") + generate_octave_py_ascii(airline_py_dict, "airline_py_octave") + generate_octave_py_ascii(milsongs_py_dict, "milsongs_py_octave") + + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/grabGLRMrunLogs.py b/scripts/grabGLRMrunLogs.py new file mode 100755 index 000000000000..40a389099f32 --- /dev/null +++ b/scripts/grabGLRMrunLogs.py @@ -0,0 +1,121 @@ +#!/usr/bin/python + +import sys +import os +import json +import pickle + +import copy +import subprocess + + +""" +This script is written to grab the logs from our GLRM runs and saved them onto our local machines for +later analysis. For me, this is how I will call this script and the input arguments I will use: + +python grabGLRMrunLogs.py /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16 + http://mr-0xa1:8080/view/wendy/job/glrm_memory_performance/ + /Users/wendycwong/Documents/PUBDEV_3454_GLRM/experimentdata/glrm_memory_10_25_16 + java_0_0.out_airline.txt java_1_0.out_milsongs.txt pyunit_airlines_performance_profile.py.out.txt + pyunit_milsongs_performance_profile.py.out.txt 8 26 +""" + +# -------------------------------------------------------------------- +# Main program +# -------------------------------------------------------------------- + # directory where we are running out code from h2o-3/scripts +g_test_root_dir = os.path.dirname(os.path.realpath(__file__)) + +g_airline_py_tail = "/artifact/h2o-py/GLRM_performance_tests/results/pyunit_airlines_performance_profile.py.out.txt" +g_milsongs_py_tail = "/artifact/h2o-py/GLRM_performance_tests/results/pyunit_milsong_performance_profile.py.out.txt" +g_airline_java_tail = "/artifact/h2o-py/GLRM_performance_tests/results/java_0_0.out.txt" +g_milsongs_java_tail = "/artifact/h2o-py/GLRM_performance_tests/results/java_1_0.out.txt" + + + # base directory where different logs will be stored under different build directory, e.g. glrm_memory_10_25_16 +g_log_base_dir = "" +g_airline_java = "" # name of java log file for airline runs you want to store as, e.g. java_0_0.out_airline.txt +g_milsongs_java = "" # name of java log file for milsongs runs you want to store as, e.g. java_1_0.out_milsongs.txt + # name of python run results for airline runs to store as, e.g. pyunit_airlines_performance_profile.py.out.txt +g_airline_python = "" + # name of python run results for airline runs to store as, e.g. pyunit_milsong_performance_profile.py.out.txt +g_milsongs_python = "" +g_jenkins_url = "" # url to your jenkins job, e.g.http://mr-0xa1:8080/view/wendy/job/glrm_original_performance/ +g_start_build_number = 0 # starting build number to collect your data +g_end_build_number = 1 # ending build number to collect your data + + +def get_file_out(build_index, python_name, jenkin_name): + """ + This function will grab one log file from Jenkins and save it to local user directory + :param g_jenkins_url: + :param build_index: + :param airline_java: + :param airline_java_tail: + :return: + """ + global g_log_base_dir + global g_jenkins_url + global g_log_base_dir + + directoryB = g_log_base_dir+'/Build'+str(build_index) + + if not(os.path.isdir(directoryB)): # make directory if it does not exist + os.mkdir(directoryB) + + url_string_full = g_jenkins_url+'/'+str(build_index)+jenkin_name + filename = os.path.join(directoryB, python_name) + + full_command = 'curl ' + url_string_full + ' > ' + filename + subprocess.call(full_command,shell=True) + + +def main(argv): + """ + Main program. + + @return: none + """ + global g_log_base_dir + global g_airline_java + global g_milsongs_java + global g_airline_python + global g_milsongs_python + global g_jenkins_url + global g_airline_py_tail + global g_milsongs_py_tail + global g_airline_java_tail + global g_milsongs_java_tail + + if len(argv) < 9: + print "python grabGLRMrunLogs logsBaseDirectory airlineJavaFileNameWithPath milsongJavaFileNameWithPath " \ + "airlinePyunitWithPath airlinePyunitWithPath jenkinsJobURL startBuild# endBuild#.\n" + sys.exit(1) + else: # we may be in business +# g_script_name = os.path.basename(argv[0]) # get name of script being run. + # base directory where all logs will be collected according to build # + g_log_base_dir = argv[1] + g_jenkins_url = argv[2] + g_airline_java = argv[3] + g_milsongs_java = argv[4] + g_airline_python = argv[5] + g_milsongs_python = argv[6] + start_number = int(argv[7]) + end_number = int(argv[8]) + + if (start_number > end_number): + print "startBuild# must be <= end_number" + sys.exit(1) + else: + for build_index in range(start_number, end_number+1): # grab log info for all builds + # copy the java jobs + get_file_out(build_index, g_airline_java, g_airline_java_tail) + get_file_out(build_index, g_milsongs_java, g_milsongs_java_tail) + + # copy the pyunit jobs + get_file_out(build_index, g_airline_python, g_airline_py_tail) + get_file_out(build_index, g_milsongs_python, g_milsongs_py_tail) + + +if __name__ == "__main__": + main(sys.argv)