intel · Devjiu · Jan 22, 2024 · Jan 23, 2024 · Jan 26, 2024 · Jan 26, 2024
diff --git a/dl_bench/cli/launcher.py b/dl_bench/cli/launcher.py
@@ -6,6 +6,7 @@
 from dl_bench.mlp import MlpBenchmark
 from dl_bench.cnn import CnnBenchmark
 from dl_bench.llm import LlmBenchmark
+from dl_bench.ops import OpsBenchmark
 from dl_bench.mlp_basic import MlpBasicBenchmark
 from dl_bench.report.report import BenchmarkDb
 from dl_bench.utils import Backend
@@ -16,6 +17,7 @@
     "mlp": MlpBenchmark,
     "cnn": CnnBenchmark,
     "llm": LlmBenchmark,
+    "ops": OpsBenchmark,
 }
 
 

diff --git a/dl_bench/ops.py b/dl_bench/ops.py
@@ -0,0 +1,204 @@
+from dl_bench.utils import Benchmark, RandomInfDataset
+
+import torch
+
+from collections import OrderedDict
+import numpy as np
+
+import torch.nn as nn
+
+
+
+def summary(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None):
+    result, params_info, summary = summary_string(
+        model, input_size, batch_size, device, dtypes)
+    print(result)
+
+    return params_info, summary
+
+
+def summary_string(model, input_size, batch_size=-1, device=torch.device('cuda:0'), dtypes=None):
+    if dtypes == None:
+        dtypes = [torch.FloatTensor]*len(input_size)
+
+    summary_str = ''
+
+    def register_hook(module):
+        def hook(module, input, output):
+            class_name = str(module.__class__).split(".")[-1].split("'")[0]
+            module_idx = len(summary)
+
+            m_key = "%s-%i" % (class_name, module_idx + 1)
+            summary[m_key] = OrderedDict()
+            summary[m_key]["module"] = module
+            summary[m_key]["input_shape"] = list(input[0].size())
+            summary[m_key]["input_shape"][0] = batch_size
+            if isinstance(output, (list, tuple)):
+                summary[m_key]["output_shape"] = [
+                    [-1] + list(o.size())[1:] for o in output
+                ]
+            else:
+                summary[m_key]["output_shape"] = list(output.size())
+                summary[m_key]["output_shape"][0] = batch_size
+
+            params = 0
+            if hasattr(module, "weight") and hasattr(module.weight, "size"):
+                params += torch.prod(torch.LongTensor(list(module.weight.size())))
+                summary[m_key]["trainable"] = module.weight.requires_grad
+            if hasattr(module, "bias") and hasattr(module.bias, "size"):
+                params += torch.prod(torch.LongTensor(list(module.bias.size())))
+            summary[m_key]["nb_params"] = params
+
+        if (
+            not isinstance(module, nn.Sequential)
+            and not isinstance(module, nn.ModuleList)
+        ):
+            hooks.append(module.register_forward_hook(hook))
+
+    # multiple inputs to the network
+    if isinstance(input_size, tuple):
+        input_size = [input_size]
+
+    # batch_size of 2 for batchnorm
+    x = [torch.rand(2, *in_size).type(dtype).to(device=device)
+         for in_size, dtype in zip(input_size, dtypes)]
+
+    # create properties
+    summary = OrderedDict()
+    hooks = []
+
+    # register hook
+    model.apply(register_hook)
+
+    # make a forward pass
+    # print(x.shape)
+    model(*x)
+
+    # remove these hooks
+    for h in hooks:
+        h.remove()
+
+    summary_str += "----------------------------------------------------------------" + "\n"
+    line_new = "{:>20}  {:>25}  {:>25} {:>15}".format(
+        "Layer (type)", "Input Shape", "Output Shape", "Param #")
+    summary_str += line_new + "\n"
+    summary_str += "================================================================" + "\n"
+    total_params = 0
+    total_output = 0
+    trainable_params = 0
+    for layer in summary:
+        # input_shape, output_shape, trainable, nb_params
+        line_new = "{:>20}  {:>25}  {:>25} {:>15}".format(
+            layer,
+            str(summary[layer]["input_shape"]),
+            str(summary[layer]["output_shape"]),
+            "{0:,}".format(summary[layer]["nb_params"]),
+        )
+        total_params += summary[layer]["nb_params"]
+
+        total_output += np.prod(summary[layer]["output_shape"])
+        if "trainable" in summary[layer]:
+            if summary[layer]["trainable"] == True:
+                trainable_params += summary[layer]["nb_params"]
+        summary_str += line_new + "\n"
+
+    # assume 4 bytes/number (float on cuda).
+    total_input_size = abs(np.prod(sum(input_size, ()))
+                           * batch_size * 4. / (1024 ** 2.))
+    total_output_size = abs(2. * total_output * 4. /
+                            (1024 ** 2.))  # x2 for gradients
+    total_params_size = abs(total_params * 4. / (1024 ** 2.))
+    total_size = total_params_size + total_output_size + total_input_size
+
+    summary_str += "================================================================" + "\n"
+    summary_str += "Total params: {0:,}".format(total_params) + "\n"
+    summary_str += "Trainable params: {0:,}".format(trainable_params) + "\n"
+    summary_str += "Non-trainable params: {0:,}".format(total_params -
+                                                        trainable_params) + "\n"
+    summary_str += "----------------------------------------------------------------" + "\n"
+    summary_str += "Input size (MB): %0.2f" % total_input_size + "\n"
+    summary_str += "Forward/backward pass size (MB): %0.2f" % total_output_size + "\n"
+    summary_str += "Params size (MB): %0.2f" % total_params_size + "\n"
+    summary_str += "Estimated Total Size (MB): %0.2f" % total_size + "\n"
+    summary_str += "----------------------------------------------------------------" + "\n"
+    # return summary
+    return summary_str, (total_params, trainable_params), summary
+
+class Layers:
+    def __init__(self) -> None:
+        pass
+
+    def get_convs_from_resnet(self):
+        from torchvision.models import resnet50, resnet18, ResNet
+        from torchvision.models.resnet import Bottleneck, BasicBlock
+
+        resnet = resnet50()
+        resnet.eval()
+        _, summ = summary(resnet, ( 3, 224, 224), batch_size=-1, device=torch.device('cpu'))
+
+        convs = []
+        in_shs =[]
+        import copy
+        for layer in summ:
+            module = summ[layer]["module"]
+            if isinstance(module, nn.Conv2d):
+                in_sh = summ[layer]["input_shape"]
+                in_sh = in_sh[1:4]
+                convs.append(module)
+                in_shs.append(in_sh)
+        self.convs = convs
+        self.in_shs = in_shs
+
+
+
+class Conv2dNoPaddingModule(torch.nn.Module):
+
+    def __init__(self):
+        super().__init__()
+        torch.manual_seed(0)
+        self.conv = torch.nn.Conv2d(2, 10, 3, bias=False)
+        self.train(False)
+
+    def forward(self, x):
+        return self.conv(x)
+
+layers = Layers()
+layers.get_convs_from_resnet()
+
+def get_op(name):
+    name2model = {
+        "conv210": Conv2dNoPaddingModule,
+    }
+
+    for i in range(len(layers.in_shs)):
+        def factory(ind):
+            return lambda: layers.convs[ind]
+        name2model["conv_" + str(i)] = factory(i)
+
+    if name in name2model:
+        return name2model[name]()
+    else:
+        raise ValueError(f"Unknown name {name}")
+
+
+class OpsBenchmark(Benchmark):
+    def __init__(self, params) -> None:
+        batch_size = int(params.get("batch_size", 1024))
+
+        name = params.get("name", "conv210")
+        if name.split("_")[0] == "conv":
+            in_shs = layers.in_shs
+            in_shape = tuple(in_shs[int(name.split("_")[1])])
+            # print(tuple(in_shape), " type: ", type(in_shape))
+        else:
+            in_shape = (2, 10, 20)
+        min_batches = 10
+        DATASET_SIZE = max(10_240, batch_size * min_batches)
+        dataset = RandomInfDataset(DATASET_SIZE, in_shape)
+        # import sys
+        # sys.exit(0)
+        net = get_op(name=name)
+
+        super().__init__(
+            net=net, in_shape=in_shape, dataset=dataset, batch_size=batch_size
+        )
diff --git a/ops.sh b/ops.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -x
+
+export ONEDNN_VERBOSE=all
+export ONEDNN_VERBOSE_TIMESTAMP=1
+
+if [[ -z "${DL_BENCH_ARGS}" ]]; then
+  echo "Please, provide DL_BENCH_ARGS environment variable"
+  exit 1
+fi
+
+CNNS=(conv_0 conv_1 conv_2 conv_3 conv_4 conv_5 conv_6 conv_7 conv_8 conv_9 conv_10 conv_11 conv_12 conv_13 conv_14 conv_15 conv_16 conv_17 conv_18 conv_19 conv_20 conv_21 conv_22 conv_23 conv_24 conv_25 conv_26 conv_27 conv_28 conv_29 conv_30 conv_31 conv_32 conv_33 conv_34 conv_35 conv_36 conv_37 conv_38 conv_39 conv_40 conv_41 conv_42 conv_43 conv_44 conv_45 conv_46 conv_47 conv_48 conv_49 conv_50 conv_51 conv_52 conv_53)
+for BS in 0001 0032 0128
+do
+    for name in "${CNNS[@]}"
+    do
+        echo "Benchmark $name"
+        benchmark-run -b ops -p "name='${name}',batch_size='$BS'" --benchmark_desc "${name}_bs$BS" ${DL_BENCH_ARGS} || echo Failed
+    done
+done