diff --git a/OneFlow/LanguageModeling/GPT/extract_gpt_result.py b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py new file mode 100644 index 00000000..348bf499 --- /dev/null +++ b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py @@ -0,0 +1,95 @@ +import os +import argparse +from extract_util import extract_result + + +parser = argparse.ArgumentParser(description="flags for GPT benchmark") +parser.add_argument( + "--benchmark_log_dir", type=str, default="./logs/oneflow", required=False +) +parser.add_argument("--start_iter", type=int, default=300) +parser.add_argument("--end_iter", type=int, default=400) +parser.add_argument("--print_mode", type=str, default="markdown") +args = parser.parse_args() + + +def extract_info_from_file(log_file): + """ + num_nodes ....................................... 1 + num_gpus_per_node ............................... 8 + data_parallel_size .............................. 1 + tensor_model_parallel_size ...................... 8 + pipeline_model_parallel_size .................... 1 + global_batch_size ............................... 32 + micro_batch_size ................................ 32 + num_accumulation_steps .......................... 1 + num_layers ...................................... 16 + hidden_size ..................................... 2304 + num_attention_heads ............................. 16 + seq_length ...................................... 2048 + log_interval .................................... 1 + Training... + | step | micro_batches | samples | throughput | latency | loss | + | -------- | --------------- | --------------- | ---------- | ---------- | ---------- | + | 1 | 1 | 32 | 3.65895 | 8.74569 | 11.27187 | + | 2 | 2 | 64 | 5.92391 | 5.40183 | 22.54614 | + | 3 | 3 | 96 | 33.08657 | 0.96716 | 33.82825 | + | 4 | 4 | 128 | 32.91274 | 0.97227 | 45.10602 | + | 5 | 5 | 160 | 33.05942 | 0.96795 | 56.36795 | + | 6 | 6 | 192 | 32.97452 | 0.97045 | 67.64371 | + | 7 | 7 | 224 | 32.75634 | 0.97691 | 78.92993 | + | 8 | 8 | 256 | 33.13264 | 0.96581 | 90.20315 | + | 9 | 9 | 288 | 33.01570 | 0.96924 | 101.47802 | + utilization.gpu [%], memory.used [MiB] + 100 %, 13858 MiB + 100 %, 13994 MiB + 100 %, 13994 MiB + 100 %, 13994 MiB + 100 %, 13994 MiB + 93 %, 13994 MiB + 100 %, 14102 MiB + 100 %, 13850 MiB + """ + # extract info from file name + # print('extract file:',log_file) + result_dict = {} + with open(log_file, "r") as f: + for line in f.readlines(): + ss = line.split(" ") + if len(ss) == 5 and ss[2] in [ + "num_nodes", + "num_gpus_per_node", + "data_parallel_size", + "tensor_model_parallel_size", + "pipeline_model_parallel_size", + "micro_batch_size", + "global_batch_size", + "num_accumulation_steps", + "num_layers", + "hidden_size", + "num_attention_heads", + "seq_length", + "log_interval", + ]: + result_dict[ss[2]] = ss[-1].strip() + elif len(ss) == 4 and "MiB" in line and "utilization" not in line: + memory_userd = int(ss[-2]) + if ( + "memory" not in result_dict.keys() + or result_dict["memory"] < memory_userd + ): + result_dict["memory"] = memory_userd + + ss = line.split("|") + if len(ss) == 8 and "loss" not in line and "-" not in line: + tmp_line = "".join(line.split(" ")).split("|") + result_dict["throughput_{}".format(tmp_line[1])] = float(tmp_line[4]) + result_dict["latency_{}".format(tmp_line[1])] = ( + float(tmp_line[5]) * 1000 + ) + + return result_dict + + +if __name__ == "__main__": + extract_result(args, extract_info_from_file) diff --git a/OneFlow/LanguageModeling/GPT/extract_util.py b/OneFlow/LanguageModeling/GPT/extract_util.py new file mode 100644 index 00000000..fa55834c --- /dev/null +++ b/OneFlow/LanguageModeling/GPT/extract_util.py @@ -0,0 +1,98 @@ +import os +import glob +from statistics import median + + +def compute_throughput(result_dict, args): + throughput = 0 + latency = 0 + log_interval = int(result_dict["log_interval"]) + for i in range(args.start_iter, args.end_iter + log_interval, log_interval): + throughput += result_dict["throughput_{}".format(i)] + latency += result_dict["latency_{}".format(i)] + + return ( + latency / (args.end_iter - args.start_iter), + throughput / (args.end_iter - args.start_iter), + ) + + +def get_mode_print(mode): + def mode_print(lst): + if mode == "markdown": + print( + "|", + " | ".join( + ("{:.2f}" if type(v) is float else "{}").format(v) for v in lst + ), + "|", + ) + else: + print( + ",".join( + ("{:.2f}" if type(v) is float else "{}").format(v) for v in lst + ) + ) + + return mode_print + + +def extract_result(args, extract_func): + mode_print = get_mode_print(args.print_mode) + logs_list = glob.glob(os.path.join(args.benchmark_log_dir, "*/*.log")) + logs_list = sorted(logs_list) + + throughput_final_result_dict = {} + memory_final_result_dict = {} + lantency_final_result_dict = {} + print("## All Results") + header_list = ["case", "memory", "lantency", "throuthput(sample/sec)"] + mode_print(header_list) + if args.print_mode == "markdown": + mode_print(["--------" for _ in range(4)]) + for l in logs_list: + result_dict = extract_func(l) + lantency, throughput = compute_throughput(result_dict, args) + case = "{num_nodes}n{num_gpus_per_node}g_dp{data_parallel_size}_mp{tensor_model_parallel_size}_pp{pipeline_model_parallel_size}_mbs{micro_batch_size}_gbs{global_batch_size}_na{num_accumulation_steps}_l{num_layers}_hs{hidden_size}_nah{num_attention_heads}_sl{seq_length}".format( + **result_dict + ) + mode_print( + [ + case, + "{} (MiB)".format(result_dict["memory"]), + "{} (ms)".format(round(lantency, 2)), + throughput, + ] + ) + + if case in throughput_final_result_dict: + throughput_final_result_dict[case].append(throughput) + memory_final_result_dict[case].append(result_dict["memory"]) + lantency_final_result_dict[case].append(lantency) + else: + throughput_final_result_dict[case] = [throughput] + memory_final_result_dict[case] = [result_dict["memory"]] + lantency_final_result_dict[case] = [lantency] + + # calculate median throughput and speedup + final_result_list = [] + for k, v in throughput_final_result_dict.items(): + final_result_list.append( + [ + k, + max(memory_final_result_dict[k]), + median(lantency_final_result_dict[k]), + median(v), + ] + ) + + # sort final_result_list + # final_result_list = sorted(final_result_list, key=lambda x: (-x[2], x[0], x[1])) + + # print results + print("## Filtered Result `median value`") + mode_print(["case", "memory (MiB)", "lantency (ms)", "throuthput(sample/sec)"]) + if args.print_mode == "markdown": + mode_print(["--------" for _ in range(5)]) + for res in final_result_list: + mode_print(res)