diff --git a/README.md b/README.md index 10e796bc5b5..12468b4c44b 100644 --- a/README.md +++ b/README.md @@ -54,10 +54,10 @@ | Model | Batch | Hardware | fps | Target fps | Release | |-----------------------------------------------------------------------------|-------|----------------------------------------------------------|---------|------------|-------------| | [ResNet-50 (224x224)](./models/demos/grayskull/resnet50) | 20 | [e150](https://tenstorrent.com/hardware/grayskull) | 5,100 | 10,000 | | -| [ResNet-50 (224x224)](./models/demos/wormhole/resnet50) | 16 | [n150](https://tenstorrent.com/hardware/wormhole) | 4,670 | 7,000 | | -| [ResNet-50 (224x224) (DP=2)](./models/demos/wormhole/resnet50) | 32 | [n300](https://tenstorrent.com/hardware/wormhole) | 8,200 | 14,000 | | -| [ResNet-50 (224x224) (DP=8)](./models/demos/t3000/resnet50) | 128 | [QuietBox](https://tenstorrent.com/hardware/tt-quietbox) | 32,250 | 56,000 | | -| [ResNet-50 (224x224) (DP=32)](./models/demos/tg/resnet50) | 512 | [Galaxy](https://tenstorrent.com/hardware/galaxy) | 95,900 | 224,000 | | +| [ResNet-50 (224x224)](./models/demos/wormhole/resnet50) | 16 | [n150](https://tenstorrent.com/hardware/wormhole) | 4,700 | 7,000 | | +| [ResNet-50 (224x224) (DP=2)](./models/demos/wormhole/resnet50) | 32 | [n300](https://tenstorrent.com/hardware/wormhole) | 9,200 | 14,000 | | +| [ResNet-50 (224x224) (DP=8)](./models/demos/t3000/resnet50) | 128 | [QuietBox](https://tenstorrent.com/hardware/tt-quietbox) | 35,800 | 56,000 | | +| [ResNet-50 (224x224) (DP=32)](./models/demos/tg/resnet50) | 512 | [Galaxy](https://tenstorrent.com/hardware/galaxy) | 96,800 | 224,000 | | | [ResNet-50 (224x224) (DP=64)](./models/demos/tgg/resnet50) | 1024 | [Two Galaxies](https://tenstorrent.com/hardware/galaxy) | 145,000 | 448,000 | | | [ViT (224x224)](./models/demos/grayskull/vit) | 9 | [e150](https://tenstorrent.com/hardware/grayskull) | 1,360 | 2,000 | | | [ViT (224x224)](./models/demos/wormhole/vit) | 8 | [n150](https://tenstorrent.com/hardware/wormhole) | 912 | 1,600 | | diff --git a/models/demos/t3000/resnet50/README.md b/models/demos/t3000/resnet50/README.md index 58d52504532..5a045b1ec6b 100644 --- a/models/demos/t3000/resnet50/README.md +++ b/models/demos/t3000/resnet50/README.md @@ -20,4 +20,4 @@ ResNet50 is a deep convolutional neural network architecture with 50 layers, des WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/t3000/resnet50/tests/test_perf_e2e_resnet50.py::test_perf_trace_2cqs ``` + This will generate a CSV with the timings and throughputs. -+ **Expected end-to-end perf**: For batch = 16 per device, or batch 128 in total, it is about `32,250 fps` currently. This may vary machine to machine. ++ **Expected end-to-end perf**: For batch = 16 per device, or batch 128 in total, it is about `35,800 fps` currently. This may vary machine to machine. diff --git a/models/demos/tg/resnet50/README.md b/models/demos/tg/resnet50/README.md index caa3335fb6c..c48b7c38d55 100644 --- a/models/demos/tg/resnet50/README.md +++ b/models/demos/tg/resnet50/README.md @@ -20,4 +20,4 @@ ResNet50 is a deep convolutional neural network architecture with 50 layers, des pytest models/demos/tg/resnet50/tests/test_perf_e2e_resnet.py::test_perf_trace` ``` + This will generate a CSV with the timings and throughputs. -+ **Expected end-to-end perf**: For batch = 16 per device, or batch 512 in total, it is about `95,900 fps` currently. This may vary machine to machine. ++ **Expected end-to-end perf**: For batch = 16 per device, or batch 512 in total, it is about `96,800 fps` currently. This may vary machine to machine. diff --git a/models/demos/ttnn_resnet/tests/resnet50_test_infra.py b/models/demos/ttnn_resnet/tests/resnet50_test_infra.py index 7c6e113da91..0d7795b9c5e 100644 --- a/models/demos/ttnn_resnet/tests/resnet50_test_infra.py +++ b/models/demos/ttnn_resnet/tests/resnet50_test_infra.py @@ -7,6 +7,7 @@ import pytest import torch import torchvision +import copy import ttnn from ttnn.model_preprocessing import ( @@ -39,7 +40,7 @@ def load_resnet50_model(model_location_generator): ## copied from ttlib version test: # golden pcc is ordered fidelity, weight dtype, activation dtype -golden_pcc = { +golden_pcc_obj = { 8: { ( ttnn.MathFidelity.HiFi4, @@ -142,8 +143,8 @@ def load_resnet50_model(model_location_generator): } golden_pcc = { - ttnn.device.Arch.WORMHOLE_B0: golden_pcc, - ttnn.device.Arch.GRAYSKULL: golden_pcc, + ttnn.device.Arch.WORMHOLE_B0: copy.deepcopy(golden_pcc_obj), + ttnn.device.Arch.GRAYSKULL: copy.deepcopy(golden_pcc_obj), } golden_pcc[ttnn.device.Arch.GRAYSKULL][16][ diff --git a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py index 688f25a9957..790a9eb5c5b 100644 --- a/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py +++ b/models/demos/ttnn_resnet/tt/ttnn_functional_resnet50_new_conv_api.py @@ -610,7 +610,9 @@ def __init__( if type(device) == ttnn.MeshDevice and device.get_num_devices() > 8: self.conv1_config.act_block_h_override = 64 else: - self.conv1_config.act_block_h_override = 49 * 32 + # Todo: restore after issue #16895 is fixed + # self.conv1_config.act_block_h_override = 49 * 32 + self.conv1_config.act_block_h_override = 2 * 32 self.conv1_kernel_size = (4, 4) self.conv1_stride = (1, 1) diff --git a/models/demos/wormhole/resnet50/README.md b/models/demos/wormhole/resnet50/README.md index e79a3e5675e..4d0fed882ce 100644 --- a/models/demos/wormhole/resnet50/README.md +++ b/models/demos/wormhole/resnet50/README.md @@ -49,4 +49,4 @@ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/ WH_ARCH_YAML=wormhole_b0_80_arch_eth_dispatch.yaml pytest models/demos/wormhole/resnet50/tests/test_perf_e2e_resnet50.py::test_perf_trace_2cqs ``` + This will generate a CSV with the timings and throughputs. -+ **Expected end-to-end perf**: For batch = 16, it is about `4,100 fps` currently. This may vary machine to machine. ++ **Expected end-to-end perf**: For batch = 16, it is about `4,700 fps` currently. This may vary machine to machine.