forked from kata-containers/kata-containers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtensorflow_resnet50_fp32.sh
executable file
·161 lines (131 loc) · 4.41 KB
/
tensorflow_resnet50_fp32.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#!/bin/bash
#
# Copyright (c) 2023 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
set -o pipefail
# General env
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"
IMAGE="docker.io/library/resnet50_fp32:latest"
DOCKERFILE="${SCRIPT_PATH}/resnet50_fp32_dockerfile/Dockerfile"
tensorflow_file=$(mktemp tensorflowresults.XXXXXXXXXX)
NUM_CONTAINERS="$1"
TIMEOUT="$2"
TEST_NAME="resnet50_fp32"
PAYLOAD_ARGS="tail -f /dev/null"
TESTDIR="${TESTDIR:-/testdir}"
# Options to control the start of the workload using a trigger-file
dst_dir="/host"
src_dir=$(mktemp --tmpdir -d tensorflowresnet.XXXXXXXXXX)
MOUNT_OPTIONS="type=bind,src=$src_dir,dst=$dst_dir,options=rbind:ro"
start_script="resnet50_fp32_start.sh"
# CMD points to the script that starts the workload
CMD="$dst_dir/$start_script"
guest_trigger_file="$dst_dir/$trigger_file"
host_trigger_file="$src_dir/$trigger_file"
INITIAL_NUM_PIDS=1
CMD_FILE="cat results | grep 'Throughput' | wc -l"
CMD_RESULTS="cat results | grep 'Throughput' | cut -d':' -f2 | cut -d' ' -f2 | tr '\n' ','"
function remove_tmp_file() {
rm -rf "${tensorflow_file}"
}
trap remove_tmp_file EXIT
function help() {
cat << EOF
Usage: $0 <count> <timeout>
Description:
This script launches n number of containers
to run the ResNet50 fp32 model using a Tensorflow
container.
Options:
<count> : Number of containers to run.
<timeout> : Timeout to launch the containers.
EOF
}
function create_start_script() {
local script="${src_dir}/${start_script}"
rm -rf "${script}"
cat <<EOF >>"${script}"
#!/bin/bash
python3.10 models/benchmarks/launch_benchmark.py --benchmark-only --framework tensorflow --model-name resnet50 --precision fp32 --mode inference --in-graph /resnet50_fp32_pretrained_model.pb --batch-size 1 --num-intra-threads 16 >> results
EOF
chmod +x "${script}"
}
function resnet50_fp32_test() {
local CMD_EXPORT_VAR="export KMP_AFFINITY=granularity=fine,verbose,compact && export OMP_NUM_THREADS=16"
info "Export environment variables"
for i in "${containers[@]}"; do
sudo -E "${CTR_EXE}" t exec -d --exec-id "$(random_name)" "${i}" sh -c "${CMD_EXPORT_VAR}"
done
info "Running ResNet50 FP32 Tensorflow test"
local pids=()
local j=0
for i in "${containers[@]}"; do
$(sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD}")&
pids["${j}"]=$!
((j++))
done
# wait for all pids
for pid in ${pids[*]}; do
wait "${pid}"
done
touch "${host_trigger_file}"
info "All containers are running the workload..."
collect_results "${CMD_FILE}"
for i in "${containers[@]}"; do
sudo -E "${CTR_EXE}" t exec --exec-id "$(random_name)" "${i}" sh -c "${CMD_RESULTS}" >> "${tensorflow_file}"
done
local resnet50_fp32_results=$(cat "${tensorflow_file}" | sed 's/.$//')
local average_resnet50_fp32=$(echo "${resnet50_fp32_results}" | sed 's/.$//' | sed "s/,/+/g;s/.*/(&)\/$NUM_CONTAINERS/g" | bc -l)
local json="$(cat << EOF
{
"ResNet50_fp32": {
"Result": "${resnet50_fp32_results}",
"Average": "${average_resnet50_fp32}",
"Units": "images/s"
}
}
EOF
)"
metrics_json_add_array_element "$json"
metrics_json_end_array "Results"
}
function main() {
# Verify enough arguments
if [ $# != 2 ]; then
echo >&2 "error: Not enough arguments [$@]"
help
exit 1
fi
local i=0
local containers=()
local not_started_count="${NUM_CONTAINERS}"
# Check tools/commands dependencies
cmds=("awk" "docker" "bc")
check_cmds "${cmds[@]}"
check_ctr_images "${IMAGE}" "${DOCKERFILE}"
init_env
create_start_script
info "Creating ${NUM_CONTAINERS} containers"
for ((i=1; i<= "${NUM_CONTAINERS}"; i++)); do
containers+=($(random_name))
sudo -E "${CTR_EXE}" run -d --runtime "${CTR_RUNTIME}" --mount="${MOUNT_OPTIONS}" "${IMAGE}" "${containers[-1]}" sh -c "${PAYLOAD_ARGS}"
((not_started_count--))
info "${not_started_count} remaining containers"
done
metrics_json_init
metrics_json_start_array
# Check that the requested number of containers are running
check_containers_are_up "${NUM_CONTAINERS}"
# Check that the requested number of containers are running
check_containers_are_running "${NUM_CONTAINERS}"
# Get the initial number of pids in a single container before the workload starts
INITIAL_NUM_PIDS=$(sudo -E "${CTR_EXE}" t metrics "${containers[-1]}" | grep pids.current | grep pids.current | xargs | cut -d ' ' -f 2)
((INITIAL_NUM_PIDS++))
resnet50_fp32_test
metrics_json_save
sudo rm -rf "${src_dir}"
clean_env_ctr
}
main "$@"