diff --git a/scripts/addjavamessage2ignore.py b/scripts/addjavamessage2ignore.py new file mode 100755 index 000000000000..9e80130d2eb6 --- /dev/null +++ b/scripts/addjavamessage2ignore.py @@ -0,0 +1,445 @@ +#!/usr/bin/python + +import sys +import os +import pickle + +import copy +import subprocess + + +""" +This script is written for a user +1. to add new java messages that we can ignore during a log scraping session; +2. to remove old java messages that are okay to ignore in the past but cannot be ignored anymore. + +To see how to call this script correctly, see usage(). + +To exclude java messages, the user can edit a text file that contains the following: +keyName = general +IgnoredMessage = nfolds: nfolds cannot be larger than the number of rows (406). +KeyName = pyunit_cv_cars_gbm.py +IgnoredMessage = Caught exception: Illegal argument(s) for GBM model: GBM_model_python_1452503348770_2586. Details: ERRR on field: _nfolds: nfolds must be either 0 or >1. +... +KeyName = pyunit_cv_cars_gbm.py +IgnoredMessage = Stacktrace: [water.exceptions.H2OModelBuilderIllegalArgumentException.makeFromBuilder(H2OModelBuilderIllegalArgumentException.java:19), \ +water.api.ModelBuilderHandler.handle(ModelBuilderHandler.java:45), water.api.RequestServer.handle(RequestServer.java:617), \ +water.api.RequestServer.serve(RequestServer.java:558), water.JettyHTTPD$H2oDefaultServlet.doGeneric(JettyHTTPD.java:616), \ +water.JettyHTTPD$H2oDefaultServlet.doPost(JettyHTTPD.java:564), javax.servlet.http.HttpServlet.service(HttpServlet.java:755), \ +javax.servlet.http.HttpServlet.service(HttpServlet.java:848), org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)]; \ +Values: {"messages":[{"_log_level":1,"_field_name":"_nfolds","_message":"nfolds must be either 0 or >1."},\ +{"_log_level":5,"_field_name":"_tweedie_power","_message":"Only for Tweedie Distribution."},{"_log_level":5,"_field_name":"_max_after_balance_size",\ +"_message":"Balance classes is false, hide max_after_balance_size"},{"_log_level":5,"_field_name":"_max_after_balance_size","_message":"Only used with balanced classes"},\ +{"_log_level":5,"_field_name":"_class_sampling_factors","_message":"Class sampling factors is only applicable if balancing classes."}], "algo":"GBM", \ +"parameters":{"_train":{"name":"py_3","type":"Key"},"_valid":null,"_nfolds":-1,"_keep_cross_validation_predictions":false,"_fold_assignment":"AUTO",\ +"_distribution":"multinomial","_tweedie_power":1.5,"_ignored_columns":["economy_20mpg","fold_assignments","name","economy"],"_ignore_const_cols":true,\ +"_weights_column":null,"_offset_column":null,"_fold_column":null,"_score_each_iteration":false,"_stopping_rounds":0,"_stopping_metric":"AUTO",\ +"_stopping_tolerance":0.001,"_response_column":"cylinders","_balance_classes":false,"_max_after_balance_size":5.0,"_class_sampling_factors":null,\ +"_max_confusion_matrix_size":20,"_checkpoint":null,"_ntrees":5,"_max_depth":5,"_min_rows":10.0,"_nbins":20,"_nbins_cats":1024,"_r2_stopping":0.999999,\ +"_seed":-1,"_nbins_top_level":1024,"_build_tree_one_node":false,"_initial_score_interval":4000,"_score_interval":4000,"_sample_rate":1.0,\ +"_col_sample_rate_per_tree":1.0,"_learn_rate":0.1,"_col_sample_rate":1.0}, "error_count":1} + +Given the above text file, this script will build a dict structure (g_ok_java_message_dict) that contains the +following key/value pairs: +g_ok_java_message_dict["general"] = ["nfolds: nfolds cannot be larger than the number of rows (406)."] +g_ok_java_message_dict["pyunit_cv_cars_gbm.py"] = ["Caught exception: Illegal argument(s) for GBM model: GBM_model_python_1452503348770_2586. \ + Details: ERRR on field: _nfolds: nfolds must be either 0 or >1.","Stacktrace: [water.exceptions.H2OModelBuilderIllegalArgumentException.makeFromBuilder(H2OModelBuilderIllegalArgumentException.java:19), \ +water.api.ModelBuilderHandler.handle(ModelBuilderHandler.java:45), water.api.RequestServer.handle(RequestServer.java:617), \ +water.api.RequestServer.serve(RequestServer.java:558), water.JettyHTTPD$H2oDefaultServlet.doGeneric(JettyHTTPD.java:616), \ +water.JettyHTTPD$H2oDefaultServlet.doPost(JettyHTTPD.java:564), javax.servlet.http.HttpServlet.service(HttpServlet.java:755), \ +javax.servlet.http.HttpServlet.service(HttpServlet.java:848), org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:684)]; \ +Values: {"messages":[{"_log_level":1,"_field_name":"_nfolds","_message":"nfolds must be either 0 or >1."},\ +{"_log_level":5,"_field_name":"_tweedie_power","_message":"Only for Tweedie Distribution."},{"_log_level":5,"_field_name":"_max_after_balance_size",\ +"_message":"Balance classes is false, hide max_after_balance_size"},{"_log_level":5,"_field_name":"_max_after_balance_size","_message":"Only used with balanced classes"},\ +{"_log_level":5,"_field_name":"_class_sampling_factors","_message":"Class sampling factors is only applicable if balancing classes."}], "algo":"GBM", \ +"parameters":{"_train":{"name":"py_3","type":"Key"},"_valid":null,"_nfolds":-1,"_keep_cross_validation_predictions":false,"_fold_assignment":"AUTO",\ +"_distribution":"multinomial","_tweedie_power":1.5,"_ignored_columns":["economy_20mpg","fold_assignments","name","economy"],"_ignore_const_cols":true,\ +"_weights_column":null,"_offset_column":null,"_fold_column":null,"_score_each_iteration":false,"_stopping_rounds":0,"_stopping_metric":"AUTO",\ +"_stopping_tolerance":0.001,"_response_column":"cylinders","_balance_classes":false,"_max_after_balance_size":5.0,"_class_sampling_factors":null,\ +"_max_confusion_matrix_size":20,"_checkpoint":null,"_ntrees":5,"_max_depth":5,"_min_rows":10.0,"_nbins":20,"_nbins_cats":1024,"_r2_stopping":0.999999,\ +"_seed":-1,"_nbins_top_level":1024,"_build_tree_one_node":false,"_initial_score_interval":4000,"_score_interval":4000,"_sample_rate":1.0,\ +"_col_sample_rate_per_tree":1.0,"_learn_rate":0.1,"_col_sample_rate":1.0}, "error_count":1"] + +The key value "general" implies that the java message stored in g_ok_java_message_dict["general"] will be ignored +for all unit tests. The java messages stored by the specific unit test name is only ignored for that particular tests. + +For each key value in the g_ok_java_message_dict, the values are stored as a list. + +""" + +# -------------------------------------------------------------------- +# Main program +# -------------------------------------------------------------------- + +g_test_root_dir = os.path.dirname(os.path.realpath(__file__)) # directory where we are running out code from +g_load_java_message_filename = "bad_java_messages_to_exclude.pickle" # default pickle filename that store previous java messages that we wanted to exclude +g_save_java_message_filename = "bad_java_messages_to_exclude.pickle" # pickle filename that we are going to store our added java messages to +g_new_messages_to_exclude = "" # user file that stores the new java messages to ignore +g_old_messages_to_remove = "" # user file that stores java messages that are to be removed from the ignore list. +g_dict_changed = False # True if dictionary has changed and False otherwise +g_java_messages_to_ignore_text_filename = "java_messages_to_ignore.txt" # store all rules for humans to read +g_print_java_messages = False + +# store java bad messages that we can ignore. The keys are "general",testnames that we +# want to add exclude messages for. The values will all be a list of java messages that we want to ignore. +g_ok_java_messages = {} + +def load_dict(): + """ + Load java messages that can be ignored pickle file into a dict structure g_ok_java_messages. + + :return: none + """ + global g_load_java_message_filename + global g_ok_java_messages + + if os.path.isfile(g_load_java_message_filename): + # only load dict from file if it exists. + with open(g_load_java_message_filename,'rb') as ofile: + g_ok_java_messages = pickle.load(ofile) + else: # no previous java messages to be excluded are found + g_ok_java_messages["general"] = [] + +def add_new_message(): + """ + Add new java messages to ignore from user text file. It first reads in the new java ignored messages + from the user text file and generate a dict structure to out of the new java ignored messages. This + is achieved by function extract_message_to_dict. Next, new java messages will be added to the original + ignored java messages dict g_ok_java_messages. Again, this is achieved by function update_message_dict. + + :return: none + """ + global g_new_messages_to_exclude # filename containing text file from user containing new java ignored messages + global g_dict_changed # True if new ignored java messages are added. + + new_message_dict = extract_message_to_dict(g_new_messages_to_exclude) + + if new_message_dict: + g_dict_changed = True + update_message_dict(new_message_dict,1) # update g_ok_java_messages with new message_dict, 1 to add, 2 to remove + + +def remove_old_message(): + """ + Remove java messages from ignored list if users desired it. It first reads in the java ignored messages + from user stored in g_old_messages_to_remove and build a dict structure (old_message_dict) out of it. Next, it removes the + java messages contained in old_message_dict from g_ok_java_messages. + :return: none + """ + global g_old_messages_to_remove + global g_dict_changed + + # extract old java ignored messages to be removed in old_message_dict + old_message_dict = extract_message_to_dict(g_old_messages_to_remove) + + if old_message_dict: + g_dict_changed = True + update_message_dict(old_message_dict,2) # remove the java messages stored in old_message_dict from g_ok_java_messages + + +def update_message_dict(message_dict,action): + """ + Update the g_ok_java_messages dict structure by + 1. add the new java ignored messages stored in message_dict if action == 1 + 2. remove the java ignored messages stired in message_dict if action == 2. + + Parameters + ---------- + + message_dict : Python dict + key: unit test name or "general" + value: list of java messages that are to be ignored if they are found when running the test stored as the key. If + the key is "general", the list of java messages are to be ignored when running all tests. + action : int + if 1: add java ignored messages stored in message_dict to g_ok_java_messages dict; + if 2: remove java ignored messages stored in message_dict from g_ok_java_messages dict. + + :return: none + """ + global g_ok_java_messages + + allKeys = g_ok_java_messages.keys() + + for key in message_dict.keys(): + if key in allKeys: # key already exists, just add to it + for message in message_dict[key]: + + if action == 1: + if message not in g_ok_java_messages[key]: + g_ok_java_messages[key].append(message) + + if action == 2: + if message in g_ok_java_messages[key]: + g_ok_java_messages[key].remove(message) + else: # new key here. Can only add and cannot remove + if action == 1: + g_ok_java_messages[key] = message_dict[key] + + +def extract_message_to_dict(filename): + """ + Read in a text file that java messages to be ignored and generate a dictionary structure out of + it with key and value pairs. The keys are test names and the values are lists of java message + strings associated with that test name where we are either going to add to the existing java messages + to ignore or remove them from g_ok_java_messages. + + Parameters + ---------- + + filename : Str + filename that contains ignored java messages. The text file shall contain something like this: + keyName = general + Message = nfolds: nfolds cannot be larger than the number of rows (406). + KeyName = pyunit_cv_cars_gbm.py + Message = Caught exception: Illegal argument(s) for GBM model: GBM_model_python_1452503348770_2586. \ + Details: ERRR on field: _nfolds: nfolds must be either 0 or >1. + ... + + :return: + message_dict : dict + contains java message to be ignored with key as unit test name or "general" and values as list of ignored java + messages. + """ + message_dict = {} + + if os.path.isfile(filename): + # open file to read in new exclude messages if it exists + with open(filename,'r') as wfile: + + key = "" + val = "" + startMess = False + + while 1: + each_line = wfile.readline() + + if not each_line: # reached EOF + if startMess: + add_to_dict(val.strip(),key,message_dict) + break + + # found a test name or general with values to follow + if "keyname" in each_line.lower(): # name of test file or the word "general" + temp_strings = each_line.strip().split('=') + + if (len(temp_strings) > 1): # make sure the line is formatted sort of correctly + if startMess: # this is the start of a new key/value pair + add_to_dict(val.strip(),key,message_dict) + val = "" + + key = temp_strings[1].strip() + startMess = False + + if (len(each_line) > 1) and startMess: + val += each_line + + if "ignoredmessage" in each_line.lower(): + startMess = True # start of a Java message. + temp_mess = each_line.split('=') + + if (len(temp_mess) > 1): + val = temp_mess[1] + + + + return message_dict + + +def add_to_dict(val,key,message_dict): + """ + Add new key, val (ignored java message) to dict message_dict. + + Parameters + ---------- + + val : Str + contains ignored java messages. + key : Str + key for the ignored java messages. It can be "general" or any R or Python unit + test names + message_dict : dict + stored ignored java message for key ("general" or any R or Python unit test names) + + :return: none + """ + allKeys = message_dict.keys() + if (len(val) > 0): # got a valid message here + if (key in allKeys) and (val not in message_dict[key]): + message_dict[key].append(val) # only include this message if it has not been added before + else: + message_dict[key] = [val] + + +def save_dict(): + """ + Save the ignored java message dict stored in g_ok_java_messages into a pickle file for future use. + + :return: none + """ + global g_ok_java_messages + global g_save_java_message_filename + global g_dict_changed + + if g_dict_changed: + with open(g_save_java_message_filename,'wb') as ofile: + pickle.dump(g_ok_java_messages,ofile) + +def print_dict(): + """ + Write the java ignored messages in g_ok_java_messages into a text file for humans to read. + + :return: none + """ + global g_ok_java_messages + global g_java_messages_to_ignore_text_filename + + allKeys = sorted(g_ok_java_messages.keys()) + + with open(g_java_messages_to_ignore_text_filename,'w') as ofile: + for key in allKeys: + + for mess in g_ok_java_messages[key]: + ofile.write('KeyName: '+key+'\n') + ofile.write('IgnoredMessage: '+mess+'\n') + + print('KeyName: ',key) + print('IgnoredMessage: ',g_ok_java_messages[key]) + print('\n') + + + +def parse_args(argv): + """ + Parse user inputs and set the corresponing global variables to perform the + necessary tasks. + + Parameters + ---------- + + argv : string array + contains flags and input options from users + + :return: + """ + global g_new_messages_to_exclude + global g_old_messages_to_remove + global g_load_java_message_filename + global g_save_java_message_filename + global g_print_java_messages + + + if len(argv) < 2: # print out help menu if user did not enter any arguments. + usage() + + i = 1 + while (i < len(argv)): + s = argv[i] + + if (s == "--inputfileadd"): # input text file where new java messages are stored + i += 1 + if (i > len(argv)): + usage() + g_new_messages_to_exclude = argv[i] + elif (s == "--inputfilerm"): # input text file containing java messages to be removed from the ignored list + i += 1 + if (i > len(argv)): + usage() + g_old_messages_to_remove = argv[i] + elif (s == "--loadjavamessage"): # load previously saved java message pickle file from file other than + i += 1 # the default one before performing update + if i > len(argv): + usage() + g_load_java_message_filename = argv[i] + elif (s == "--savejavamessage"): # save updated java message in this file instead of default file + i += 1 + if (i > len(argv)): + usage() + g_save_java_message_filename = argv[i] + elif (s == '--printjavamessage'): # will print java message out to console and save in a text file + i += 1 + g_print_java_messages = True + g_load_java_message_filename = argv[i] + elif (s == '--help'): # print help menu and exit + usage() + else: + unknown_arg(s) + + i += 1 + + +def usage(): + """ + Illustrate what the various input flags are and the options should be. + + :return: none + """ + global g_script_name # name of the script being run. + + print("") + print("Usage: " + g_script_name + " [...options...]") + print("") + print(" --help print out this help menu and show all the valid flags and inputs.") + print("") + print(" --inputfileadd filename where the new java messages to ignore are stored in.") + print("") + print(" --inputfilerm filename where the java messages are removed from the ignored list.") + print("") + print(" --loadjavamessage filename pickle file that stores the dict structure containing java messages to include.") + print("") + print(" --savejavamessage filename pickle file that saves the final dict structure after update.") + print("") + print(" --printjavamessage filename print java ignored java messages stored in pickle file filenam onto console and save into a text file.") + print("") + sys.exit(1) + + +def unknown_arg(s): + print("") + print("ERROR: Unknown argument: " + s) + print("") + usage() + +def main(argv): + """ + Main program. + + @return: none + """ + global g_script_name + global g_test_root_dir + global g_new_messages_to_exclude + global g_old_messages_to_remove + global g_load_java_message_filename + global g_save_java_message_filename + global g_print_java_messages + global g_java_messages_to_ignore_text_filename + + + g_script_name = os.path.basename(argv[0]) # get name of script being run. + + + # Override any defaults with the user's choices. + parse_args(argv) + + g_load_java_message_filename = os.path.join(g_test_root_dir,g_load_java_message_filename) + load_dict() # load previously stored java messages to g_ok_java_messages + + if len(g_new_messages_to_exclude) > 0: + g_new_messages_to_exclude = os.path.join(g_test_root_dir,g_new_messages_to_exclude) + add_new_message() # add new java messages to exclude to dictionary + + if len(g_old_messages_to_remove) > 0: + g_old_messages_to_remove = os.path.join(g_test_root_dir,g_old_messages_to_remove) + remove_old_message() # remove java messages from ignored list if users desired it + + g_save_java_message_filename = os.path.join(g_test_root_dir,g_save_java_message_filename) + save_dict() # save the updated dict g_ok_java_messages to pickle file + + if g_print_java_messages: # print java ignored messages to console and text file + g_java_messages_to_ignore_text_filename = os.path.join(g_test_root_dir,g_java_messages_to_ignore_text_filename) + print_dict() + + + +if __name__ == "__main__": + main(sys.argv) diff --git a/scripts/javaMess1 b/scripts/javaMess1 new file mode 100644 index 000000000000..b6a3b22bb6a6 --- /dev/null +++ b/scripts/javaMess1 @@ -0,0 +1,10 @@ +KeyName = general +Message = Reproducibility enforced - using only 1 thread - can be slow. +KeyName = general +Message = _train: Dropping constant columns: C21, C29 +keyName = pyunit_benign_glm_grid.py +Message = Grid search: construction of model parameters failed! Exception: java.lang.IllegalArgumentException: Cannot set field 'alpha' +KeyName = pyunit_col_sample_per_tree_RF.py +Message = _train: Dropping constant columns: C21, C29 +keyName = pyunit_pubdev_2223.py +Message = _train: Dropping constant columns: C21, C29 diff --git a/scripts/logscrapedaily.py b/scripts/logscrapedaily.py new file mode 100755 index 000000000000..e959e0925cc1 --- /dev/null +++ b/scripts/logscrapedaily.py @@ -0,0 +1,996 @@ +#!/usr/bin/python + +import sys +import os +import json +import pickle + +import copy +import subprocess + + +""" +In summary, this script given a jenkins job full console url and a summary log filename will +1. scrape the console output log, all unit tests outputs and all java_*_0.out.txt of the + latest build. +2. From all the logs, it will generate potentially two log files: jenkins_job_name_build_number_failed_tests.log + and jenkins_job_name_build_number_passed_tests.log. Inside each log file, it contains the job name, build number, + timestamp, git hash, git branch, node name, build failure and build timeout information. In addition, it will list + unit tests that failed/passed with the corresponding java WARN/ERRR/FATAL/STACKTRACE messages associated with the unit tests. +3. Users can choose to ignore certain java messages that are deemed okay. These ignored java messages are stored in a pickle + file with a default name and location. However, if the user wants to use their own ignored java messages, they can do + so by specifying a third optional argument to this script as the name to where their own personal pickle file name. +4. If there are okay ignored java messages stored in a pickle file, this script will not grab them and store them in + any log files. +5. For details on how to generate ignore java messages and save them to a pickle file, please see addjavamessage2ignore.py. +""" + +# -------------------------------------------------------------------- +# Main program +# -------------------------------------------------------------------- + +g_test_root_dir = os.path.dirname(os.path.realpath(__file__)) # directory where we are running out code from +g_script_name = '' # store script name. + +g_node_name = "Building remotely on" # the very next string is the name of the computer node that ran the test +g_git_hash_branch = "Checking out Revision" # next string is git hash, and the next one is (origin/branch) +g_build_timeout = "Build timed out" # phrase when tests run too long +g_build_success = ["Finished: SUCCESS",'BUILD SUCCESSFUL'] # sentence at the end that guarantee build success + +g_build_success_tests = ['generate_rest_api_docs.py','generate_java_bindings.py'] # two functions that are usually performed after build success +g_build_id_text = 'Build id is' +g_view_name = '' + +g_temp_filename = os.path.join(g_test_root_dir,'tempText') # temp file to store data curled from Jenkins + +# generate file names to store the final logs. +g_output_filename_failed_tests = os.path.join(g_test_root_dir,'failedMessage_failed_tests.log') +g_output_filename_passed_tests = os.path.join(g_test_root_dir,'failedMessage_passed_tests.log') +g_output_pickle_filename = os.path.join(g_test_root_dir,'failedMessage.pickle.log') + +g_failed_test_info_dict = {} +g_failed_test_info_dict["7.build_failure"] = "No" # initialize build_failure with no by default + +# info used to generate timestamp +g_weekdays = 'Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday' +g_months = 'January, Feburary, March, May, April, May, June, July, August, September, October, November, December' + +g_failure_occurred = False # denote when failure actually occurred + +g_failed_jobs = [] # record job names of failed jobs +g_failed_job_java_message_types = [] # java bad message types (can be WARN:, ERRR:, FATAL:, TRACE:) +g_failed_job_java_messages = [] # record failed job java message + +g_success_jobs = [] # record job names of passed jobs +g_success_job_java_message_types = [] +g_success_job_java_messages = [] # record of successful jobs bad java messages + +# text you will find before you can find your java_*_*.out.txt +g_before_java_file = ["H2O Cloud", "Node", "started with output file"] + +g_java_filenames = [] # contains all java filenames for us to mine +g_java_message_type = ["WARN:", ":WARN:", "ERRR:", "FATAL:", "TRACE:"] # bad java message types +g_all_java_message_type = ["WARN:", ":WARN:", "ERRR:", "FATAL:", "TRACE:", "DEBUG:","INFO:"] # all java message types + +g_java_general_bad_message_types = [] +g_java_general_bad_messages = [] # store java messages that are not associated with any tests + +g_jenkins_url = '' +g_toContinue = False + +g_current_testname = '' # denote when we are in a test during java text scanning + +g_java_start_text = 'STARTING TEST:' # test being started in java + +g_ok_java_messages = {} # dict that stores java bad messages that we can ignore +g_java_message_pickle_filename = "bad_java_messages_to_exclude.pickle" # pickle file that store the dictionary structure that include Java error message to exclude +g_build_failed_message = ["Finished: FAILURE".lower(),'BUILD FAILED'.lower()] # something has gone wrong. No tests are performed. +g_summary_text_filename = "" # filename to store the summary file (contains all logs) sent to user via email. + +''' +The sole purpose of this function is to enable us to be able to call +any function that is specified as the first argument using the argument +list specified in second argument. +''' +def perform(function_name, *arguments): + """ + + Parameters + ---------- + + function_name : python function handle + name of functio we want to call and run + *arguments : Python list + list of arguments to be passed to function_name + + + :return: bool + """ + return function_name(*arguments) + + +''' +This function is written to remove extra characters before the actual string we are +looking for. The Jenkins console output is encoded using utf-8. However, the stupid +redirect function can only encode using ASCII. I have googled for half a day with no +results to how. Hence, we are going to the heat and just manually get rid of the junk. +''' +def extract_true_string(string_content): + """ + remove extra characters before the actual string we are + looking for. The Jenkins console output is encoded using utf-8. However, the stupid + redirect function can only encode using ASCII. I have googled for half a day with no + results to how to resolve the issue. Hence, we are going to the heat and just manually + get rid of the junk. + + Parameters + ---------- + + string_content : str + contains a line read in from jenkins console + + :return: str: contains the content of the line after the string '[0m' + + """ + + startL,found,endL = string_content.partition('[0m') + + if found: + return endL + else: + return string_content + +""" +Function find_time is written to extract the timestamp when a job is built. +""" +def find_time(each_line,temp_func_list): + """ + calculate the approximate date/time from the timestamp about when the job + was built. This information was then saved in dict g_failed_test_info_dict. + In addition, it will delete this particular function handle off the temp_func_list + as we do not need to perform this action again. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text + """ + global g_weekdays + global g_months + global g_failed_test_info_dict + + temp_strings = each_line.strip().split() + + if (len(temp_strings) > 2): + if ((temp_strings[0] in g_weekdays) or (temp_strings[1] in g_weekdays)) and ((temp_strings[1] in g_months) or (temp_strings[2] in g_months)): + g_failed_test_info_dict["3.timestamp"] = each_line.strip() + temp_func_list.remove(find_time) # found timestamp, don't need to look again for it + + return True + + +def find_node_name(each_line,temp_func_list): + """ + Find the slave machine where a Jenkins job was executed on. It will save this + information in g_failed_test_info_dict. In addition, it will + delete this particular function handle off the temp_func_list as we do not need + to perform this action again. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text + """ + global g_node_name + global g_failed_test_info_dict + + if g_node_name in each_line: + temp_strings = each_line.split() + [start,found,endstr] = each_line.partition(g_node_name) + + if found: + temp_strings = endstr.split() + g_failed_test_info_dict["6.node_name"] = extract_true_string(temp_strings[1]) + temp_func_list.remove(find_node_name) + + return True + + +def find_git_hash_branch(each_line,temp_func_list): + """ + Find the git hash and branch info that a Jenkins job was taken from. It will save this + information in g_failed_test_info_dict. In addition, it will delete this particular + function handle off the temp_func_list as we do not need to perform this action again. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text + """ + global g_git_hash_branch + global g_failed_test_info_dict + + if g_git_hash_branch in each_line: + [start,found,endstr] = each_line.partition(g_git_hash_branch) + temp_strings = endstr.strip().split() + + if len(temp_strings) > 1: + g_failed_test_info_dict["4.git_hash"] = temp_strings[0] + g_failed_test_info_dict["5.git_branch"] = temp_strings[1] + + temp_func_list.remove(find_git_hash_branch) + + return True + + +def find_build_timeout(each_line,temp_func_list): + """ + Find if a Jenkins job has taken too long to finish and was killed. It will save this + information in g_failed_test_info_dict. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text +""" + global g_build_timeout + global g_failed_test_info_dict + global g_failure_occurred + + if g_build_timeout in each_line: + g_failed_test_info_dict["8.build_timeout"] = 'Yes' + g_failure_occurred = True + return False # build timeout was found, no need to continue mining the console text + else: + return True + +def find_build_failure(each_line,temp_func_list): + """ + Find if a Jenkins job has failed to build. It will save this + information in g_failed_test_info_dict. In addition, it will delete this particular + function handle off the temp_func_list as we do not need to perform this action again. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text + """ + global g_build_success + global g_build_success_tests + global g_failed_test_info_dict + global g_failure_occurred + global g_build_failed_message + + for ind in range(0,len(g_build_failed_message)): + if g_build_failed_message[ind] in each_line.lower(): + if ((ind == 0) and (len(g_failed_jobs) > 0)): + continue + else: + g_failure_occurred = True + g_failed_test_info_dict["7.build_failure"] = 'Yes' + temp_func_list.remove(find_build_failure) + return False + + return True + + +def find_java_filename(each_line,temp_func_list): + """ + Find if all the java_*_0.out.txt files that were mentioned in the console output. + It will save this information in g_java_filenames as a list of strings. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text +""" + global g_before_java_file + global g_java_filenames + + for each_word in g_before_java_file: + if (each_word not in each_line): + return True + + # line contains the name and location of java txt output filename + temp_strings = each_line.split() + g_java_filenames.append(temp_strings[-1]) + + return True + + +def find_build_id(each_line,temp_func_list): + """ + Find the build id of a jenkins job. It will save this + information in g_failed_test_info_dict. In addition, it will delete this particular + function handle off the temp_func_list as we do not need to perform this action again. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + temp_func_list : list of Python function handles + contains a list of functions that we want to invoke to extract information from + the Jenkins console text. + + :return: bool to determine if text mining should continue on the jenkins console text + """ + global g_before_java_file + global g_java_filenames + global g_build_id_text + global g_jenkins_url + global g_output_filename + global g_output_pickle_filename + + + if g_build_id_text in each_line: + [startStr,found,endStr] = each_line.partition(g_build_id_text) + g_failed_test_info_dict["2.build_id"] = endStr.strip() + + temp_func_list.remove(find_build_id) + g_jenkins_url = os.path.join('http://',g_jenkins_url,'view',g_view_name,'job',g_failed_test_info_dict["1.jobName"],g_failed_test_info_dict["2.build_id"],'artifact') + + + return True + +# global list of all functions that are performed extracting new build information. +g_build_func_list = [find_time,find_node_name,find_build_id,find_git_hash_branch,find_build_timeout,find_build_failure,find_java_filename] + + +def update_test_dict(each_line): + """ + Extract unit tests information from the jenkins job console output. It will save this + information in g_failed_jobs list and setup a place holder for saving the bad java + messages/message types in g_failed_job_java_messages, g_failed_job_java_message_types. + + Parameters + ---------- + + each_line : str + contains a line read in from jenkins console + + :return: bool to determine if text mining should continue on the jenkins console text + """ + global g_ignore_test_names + global g_failed_jobs + global g_failed_job_java_messages + global g_failure_occurred + + temp_strings = each_line.split() + + if (len(temp_strings) >= 5) and ("FAIL" in each_line) and ("FAILURE" not in each_line): # found failed test + + test_name = temp_strings[-2] + g_failed_jobs.append(test_name) + g_failed_job_java_messages.append([]) # insert empty java messages for now + g_failed_job_java_message_types.append([]) + g_failure_occurred = True + + return True + + +''' +This function is written to extract the error messages from console output and +possible from the java_*_*.out to warn users of potentially bad runs. + +''' +def extract_test_results(): + """ + Extract error messages from jenkins console output and from java_*_0.out.txt if they exist to + warn users of potentially bad tests. In addition, it will grab the following info about the jenkins + job from the console output and saved it into g_failed_test_info_dict: + 1.jobName + 2.build_id + 3.timestamp + 4.git_hash + 5.git_branch + 6.node_name + 7.build_failure + 8.build_timeout + 9.general_bad_java_messages + failed_tests_info *********: list of failed tests and their associated bad java messages + passed_tests_info *********: list of passed tests and their associated bad java messages + + This is achieved by calling various functions. + + :return: none + """ + global g_test_root_dir + global g_temp_filename + global g_output_filename + global g_build_func_list + + temp_func_list = copy.copy(g_build_func_list) + + if os.path.isfile(g_temp_filename): + console_file = open(g_temp_filename,'r') # open temp file that stored jenkins job console output + + for each_line in console_file: # go through each line of console output to extract build ID, data/time ... + each_line.strip() + + for each_function in temp_func_list: + to_continue = perform(each_function,each_line,temp_func_list) + + if not(to_continue): + break + + if not(to_continue): # something like build failure or built time out has occurred. Stop + break + else: + update_test_dict(each_line) # update the test_dict with new tests if found + + console_file.close() + else: + print "Error: console output file "+g_temp_filename + " does not exist." + sys.exit(1) + + +''' +This function is written to extract the console output that has already been stored +in a text file in a remote place and saved it in a local directory that we have accessed +to. We want to be able to read in the local text file and proces it. +''' +def get_console_out(url_string): + """ + Grab the console output from Jenkins and save the content into a temp file + (g_temp_filename). + + Parameters + ---------- + url_string : str + contains information on the jenkins job whose console output we are interested in. + + :return: none + """ + global g_temp_filename + + full_command = 'curl ' + url_string + ' > ' + g_temp_filename + subprocess.call(full_command,shell=True) + + +def extract_job_build_url(url_string): + """ + From user input, grab the jenkins job name and saved it in g_failed_test_info_dict. + In addition, it will grab the jenkins url and the view name into g_jenkins_url, and + g_view_name. + + Parameters + ---------- + url_string : str + contains information on the jenkins job whose console output we are interested in. + + :return: none + """ + global g_failed_test_info_dict + global g_jenkins_url + global g_view_name + + tempString = url_string.strip('/').split('/') + + if len(tempString) < 6: + print "Illegal URL resource address.\n" + sys.exit(1) + + g_failed_test_info_dict["1.jobName"] = tempString[6] + + g_jenkins_url = tempString[2] + g_view_name = tempString[4] + + +def grab_java_message(): + """scan through the java output text and extract the bad java messages that may or may not happened when + unit tests are run. It will not record any bad java messages that are stored in g_ok_java_messages. + + :return: none + """ + + global g_temp_filename + global g_current_testname + global g_java_start_text + global g_ok_java_messages + global g_java_general_bad_messages # store bad java messages not associated with running a unit test + global g_java_general_bad_message_types + global g_failure_occurred + global g_java_message_type + global g_all_java_message_type + global g_toContinue + + java_messages = [] # store all bad java messages associated with running a unit test + java_message_types = [] # store all bad java message types associated with running a unit test + + if os.path.isfile(g_temp_filename): # open temp file containing content of some java_*_0.out.txt + java_file = open(g_temp_filename,'r') + + g_toContinue = False # denote if a multi-line message starts + + tempMessage = "" + messageType = "" + + for each_line in java_file: + + if (g_java_start_text in each_line): + startStr,found,endStr = each_line.partition(g_java_start_text) + + if len(found) > 0: + if len(g_current_testname) > 0: # a new unit test is being started. Save old info and move on + associate_test_with_java(g_current_testname,java_messages,java_message_types) + + g_current_testname = endStr.strip() # record the test name + + java_messages = [] + java_message_types = [] + + temp_strings = each_line.strip().split() + + if (len(temp_strings) >= 6) and (temp_strings[5] in g_all_java_message_type): + if g_toContinue == True: # at the end of last message fragment + addJavaMessages(tempMessage,messageType,java_messages,java_message_types) + tempMessage = "" + messageType = "" + + # start of new message fragment + g_toContinue = False + else: # non standard output. Continuation of last java message, add it to bad java message list + if g_toContinue: + + tempMessage += each_line # add more java message here + # if len(g_current_testname) == 0: + # addJavaMessages(each_line.strip(),"",java_messages,java_message_types) + # else: + # addJavaMessages(each_line.strip(),"",java_messages,java_message_types) + + if ((len(temp_strings) > 5) and (temp_strings[5] in g_java_message_type)): # find a bad java message + startStr,found,endStr = each_line.partition(temp_strings[5]) # can be WARN,ERRR,FATAL,TRACE + + if found and (len(endStr.strip()) > 0): + tempMessage += endStr + messageType = temp_strings[5] +# if (tempMessage not in g_ok_java_messages["general"]): # found new bad messages that cannot be ignored + g_toContinue = True + + # add tempMessage to bad java message list +# addJavaMessages(tempMessage,temp_strings[5],java_messages,java_message_types) + java_file.close() + + +def addJavaMessages(tempMessage,messageType,java_messages,java_message_types): + """ + Insert Java messages into java_messages and java_message_types if they are associated + with a unit test or into g_java_general_bad_messages/g_java_general_bad_message_types + otherwise. + + Parameters + ---------- + tempMessage : str + contains the bad java messages + messageType : str + contains the bad java message type + java_messages : list of str + contains the bad java message list associated with a unit test + java_message_tuypes : list of str + contains the bad java message type list associated with a unit test. + + :return: none + """ + global g_current_testname + global g_java_general_bad_messages + global g_java_general_bad_message_types + global g_failure_occurred + + tempMess = tempMessage.strip() + + if (tempMess not in g_ok_java_messages["general"]): + if (len(g_current_testname) == 0): # java message not associated with any test name + g_java_general_bad_messages.append(tempMess) + g_java_general_bad_message_types.append(messageType) + g_failure_occurred = True + else: # java message found during a test + write_test = False # do not include java message for test if False + if (g_current_testname in g_ok_java_messages.keys()) and (tempMess in g_ok_java_messages[g_current_testname]): # test name associated with ignored Java messages + write_test = False + else: # not java ignored message for current unit test + write_test = True + + if write_test: + java_messages.append(tempMess) + java_message_types.append(messageType) + g_failure_occurred = True + + +def associate_test_with_java(testname,java_message,java_message_type): + """ + When a new unit test is started as indicated in the java_*_0.out.txt file, + update the data structures that are keeping track of unit tests being run and + bad java messages/messages types associated with each unit test. Since a new + unit test is being started, save all the bad java messages associated with + the previous unit test and start a new set for the new unit test. + + Parameters + ---------- + testname : str + previous unit test testname + java_message : list of str + bad java messages associated with testname + java_message_type : list of str + bad java message types associated with testname + + :return : none + """ + global g_failed_jobs # record job names of failed jobs + global g_failed_job_java_messages # record failed job java message + global g_failed_job_java_message_types + + global g_success_jobs # record job names of passed jobs + global g_success_job_java_messages # record of successful jobs bad java messages + global g_success_job_java_message_types + + if len(java_message) > 0: + if (testname in g_failed_jobs): + message_index = g_failed_jobs.index(testname) + g_failed_job_java_messages[message_index] = java_message + g_failed_job_java_message_types[message_index] = java_message_type + else: # job has been sucessfully executed but something still has gone wrong + g_success_jobs.append(testname) + g_success_job_java_messages.append(java_message) + g_success_job_java_message_types.append(java_message_type) + + +def extract_java_messages(): + """ + loop through java_*_0.out.txt and extract potentially dangerous WARN/ERRR/FATAL + messages associated with a test. The test may even pass but something terrible + has actually happened. + + :return: none + """ + global g_jenkins_url + global g_failed_test_info_dict + global g_java_filenames + + global g_failed_jobs # record job names of failed jobs + global g_failed_job_java_messages # record failed job java message + global g_failed_job_java_message_types + + global g_success_jobs # record job names of passed jobs + global g_success_job_java_messages # record of successful jobs bad java messages + global g_success_job_java_message_types + + global g_java_general_bad_messages # store java error messages when no job is running + global g_java_general_bad_message_types # store java error message types when no job is running. + + if (len(g_failed_jobs) > 0): # artifacts available only during failure of some sort + for fname in g_java_filenames: # grab java message from each java_*_*_.out file + temp_strings = fname.split('/') + + start_url = g_jenkins_url + + for windex in range(6,len(temp_strings)): + start_url = os.path.join(start_url,temp_strings[windex]) + try: # first java file path is different. Can ignore it. + get_console_out(start_url) # get java text and save it in local directory for processing + grab_java_message() # actually process the java text output and see if we found offensive stuff + except: + pass + + # build up the dict structure that we are storing our data in + if len(g_failed_jobs) > 0: + g_failed_test_info_dict["failed_tests_info *********"] = [g_failed_jobs,g_failed_job_java_messages,g_failed_job_java_message_types] + if len(g_success_jobs) > 0: + g_failed_test_info_dict["passed_tests_info *********"] = [g_success_jobs,g_success_job_java_messages,g_success_job_java_message_types] + + if len(g_java_general_bad_messages) > 0: + g_failed_test_info_dict["9.general_bad_java_messages"] = [g_java_general_bad_messages,g_java_general_bad_message_types] + + + +def save_dict(): + """ + Save the log scraping results into logs denoted by g_output_filename_failed_tests and + g_output_filename_passed_tests. + + :return: none + """ + + global g_test_root_dir + global g_output_filename_failed_tests + global g_output_filename_passed_tests + global g_output_pickle_filename + global g_failed_test_info_dict + + + # some build can fail really early that no buid id info is stored in the console text. + if "2.build_id" not in g_failed_test_info_dict.keys(): + g_failed_test_info_dict["2.build_id"] = "unknown" + + build_id = g_failed_test_info_dict["2.build_id"] + + g_output_filename_failed_tests = g_output_filename_failed_tests+'_build_'+build_id+'_failed_tests.log' + g_output_filename_passed_tests = g_output_filename_passed_tests+'_build_'+build_id+'_passed_tests.log' + g_output_pickle_filename = g_output_pickle_filename+'_build_'+build_id+'.pickle' + + allKeys = sorted(g_failed_test_info_dict.keys()) + + # write out the jenkins job info into log files. + with open(g_output_pickle_filename,'wb') as test_file: + pickle.dump(g_failed_test_info_dict,test_file) + + # write out the failure report as text into a text file + text_file_failed_tests = open(g_output_filename_failed_tests,'w') + text_file_passed_tests = None + allKeys = sorted(g_failed_test_info_dict.keys()) + write_passed_tests = False + + if ("passed_tests_info *********" in allKeys): + text_file_passed_tests = open(g_output_filename_passed_tests,'w') + write_passed_tests = True + + for keyName in allKeys: + val = g_failed_test_info_dict[keyName] + if isinstance(val,list): # writing one of the job lists + if (len(val) == 3): # it is a message for a test + if keyName == "failed_tests_info *********": + write_test_java_message(keyName,val,text_file_failed_tests) + + if keyName == "passed_tests_info *********": + write_test_java_message(keyName,val,text_file_passed_tests) + elif (len(val) == 2): # it is a general bad java message + write_java_message(keyName,val,text_file_failed_tests) + if write_passed_tests: + write_java_message(keyName,val,text_file_passed_tests) + else: + write_general_build_message(keyName,val,text_file_failed_tests) + if write_passed_tests: + write_general_build_message(keyName,val,text_file_passed_tests) + + text_file_failed_tests.close() + if write_passed_tests: + text_file_passed_tests.close() + +def write_general_build_message(key,val,text_file): + """ + Write key/value into log file when the value is a string and not a list. + + Parameters + ---------- + key : str + key value in g_failed_test_info_dict + value : str + corresponding value associated with the key in key + text_file : file handle + file handle of log file to write the info to. + + + :return: none + """ + text_file.write(key+": ") + text_file.write(val) + text_file.write('\n\n') + +def write_test_java_message(key,val,text_file): + """ + Write key/value into log file when the value is a list of strings + or even a list of list of string. These lists are associated with + unit tests that are executed in the jenkins job. + + Parameters + ---------- + key : str + key value in g_failed_test_info_dict + value : list of str or list of list of str + corresponding value associated with the key in key + text_file : file handle + file handle of log file to write the info to. + + :return: none + """ + global g_failed_jobs + + text_file.write(key) + text_file.write('\n') + + # val is a tuple of 3 tuples + for index in range(len(val[0])): + + if ((val[0][index] in g_failed_jobs) or ((val[0][index] not in g_failed_jobs) and (len(val[1][index]) > 0))): + text_file.write("\nTest Name: ") + text_file.write(val[0][index]) + text_file.write('\n') + + if (len(val[1][index]) > 0) and (len(val) >= 3): + text_file.write("Java Message Type and Message: \n") + for eleIndex in range(len(val[1][index])): + text_file.write(val[2][index][eleIndex]+" ") + text_file.write(val[1][index][eleIndex]) + text_file.write('\n\n') + + text_file.write('\n') + text_file.write('\n') + +def update_summary_file(): + """ + Concatecate all log file into a summary text file to be sent to users + at the end of a daily log scraping. + + :return: none + """ + global g_summary_text_filename + global g_output_filename_failed_tests + global g_output_filename_passed_tests + + with open(g_summary_text_filename,'a') as tempfile: + write_file_content(tempfile,g_output_filename_failed_tests) + write_file_content(tempfile,g_output_filename_passed_tests) + + +def write_file_content(fhandle,file2read): + """ + Write one log file into the summary text file. + + Parameters + ---------- + fhandle : Python file handle + file handle to the summary text file + file2read : Python file handle + file handle to log file where we want to add its content to the summary text file. + + :return: none + """ + if os.path.isfile(file2read): + + # write summary of failed tests logs + with open(file2read,'r') as tfile: + fhandle.write('============ Content of '+ file2read) + fhandle.write('\n') + fhandle.write(tfile.read()) + fhandle.write('\n\n') + + + +def write_java_message(key,val,text_file): + """ + Loop through all java messages that are not associated with a unit test and + write them into a log file. + + Parameters + ---------- + key : str + 9.general_bad_java_messages + val : list of list of str + contains the bad java messages and the message types. + + + :return: none + """ + + text_file.write(key) + text_file.write('\n') + + if (len(val[0]) > 0) and (len(val) >= 3): + for index in range(len(val[0])): + text_file.write("Java Message Type: ") + text_file.write(val[1][index]) + text_file.write('\n') + + text_file.write("Java Message: ") + + for jmess in val[2][index]: + text_file.write(jmess) + text_file.write('\n') + + text_file.write('\n \n') + + +def load_java_messages_to_ignore(): + """ + Load in pickle file that contains dict structure with bad java messages to ignore per unit test + or for all cases. The ignored bad java info is stored in g_ok_java_messages dict. + + :return: + """ + global g_ok_java_messages + global g_java_message_pickle_filename + + if os.path.isfile(g_java_message_pickle_filename): + with open(g_java_message_pickle_filename,'rb') as tfile: + g_ok_java_messages = pickle.load(tfile) + else: + g_ok_java_messages["general"] = [] + + + +def main(argv): + """ + Main program. + + @return: none + """ + global g_script_name + global g_test_root_dir + global g_temp_filename + global g_output_filename_failed_tests + global g_output_filename_passed_tests + global g_output_pickle_filename + global g_failure_occurred + global g_failed_test_info_dict + global g_java_message_pickle_filename + global g_summary_text_filename + + if len(argv) < 3: + print "Must resource url like http://mr-0xa1:8080/view/wendy_jenkins/job/h2o_regression_pyunit_medium_large/lastBuild/consoleFull, filename of summary text, filename (optional ending in .pickle) to retrieve Java error messages to exclude.\n" + sys.exit(1) + else: # we may be in business + g_script_name = os.path.basename(argv[0]) # get name of script being run. + resource_url = argv[1] + + g_temp_filename = os.path.join(g_test_root_dir,'tempText') + g_summary_text_filename = os.path.join(g_test_root_dir,argv[2]) + + if len(argv) == 4: + g_java_message_pickle_filename = argv[3] + + get_console_out(resource_url) # save remote console output in local directory + extract_job_build_url(resource_url) # extract the job name of build id for identification purposes + + log_filename = g_failed_test_info_dict["1.jobName"] + log_pickle_filename = g_failed_test_info_dict["1.jobName"] + + # pickle file that store bad Java messages that we can ignore. + g_java_message_pickle_filename = os.path.join(g_test_root_dir,g_java_message_pickle_filename) + g_output_filename_failed_tests = os.path.join(g_test_root_dir,log_filename) + g_output_filename_passed_tests = os.path.join(g_test_root_dir,log_filename) + g_output_pickle_filename = os.path.join(g_test_root_dir,log_pickle_filename) + + load_java_messages_to_ignore() # load in bad java messages to ignore and store in g_ok_java_messages + extract_test_results() # grab the console text and stored the failed tests. + extract_java_messages() # grab dangerous java messages that we found for the various unit tests + if ((len(g_failed_jobs) > 0) or (g_failed_test_info_dict["7.build_failure"]=='Yes')): + g_failure_occurred = True + + if g_failure_occurred: + save_dict() # save the dict structure in a pickle file and a text file when failure is detected + update_summary_file() # join together all log files into one giant summary text. + + # output this info to console to form the list of failed jenkins jobs. + print g_failed_test_info_dict["1.jobName"]+' build '+g_failed_test_info_dict["2.build_id"]+',' + else: + print "" + + +if __name__ == "__main__": + main(sys.argv)