Skip to content

Commit 9674576

Browse files
author
Himani Anil Deshpande
committed
Adding it as condition
1 parent 90ec5ea commit 9674576

File tree

2 files changed

+16
-8
lines changed

2 files changed

+16
-8
lines changed
Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,16 @@
11
#!/bin/bash
22

3-
sinfo_output=$(<%= node['cluster']['slurm']['install_dir'] %>/bin/sinfo -h -o '%N %t' | grep -v -E '(idle|alloc|mix|maint)$')
4-
while IFS= read -r line; do
5-
nodelist=$(echo "$line" | awk '{print $1}')
6-
<%= node['cluster']['slurm']['install_dir'] %>/bin/scontrol show hostnames "$nodelist" | { grep -E '^[a-z0-9\-]+\-st\-[a-z0-9\-]+\-[0-9]+.*' || true; }
7-
done <<< "$sinfo_output"
3+
4+
5+
cluster_static_node_count=$1
6+
if [[ -z "$cluster_static_node_count" ]]; then
7+
cluster_static_node_count=1
8+
fi
9+
10+
if [[ "$cluster_static_node_count" -ge "1" ]]; then
11+
sinfo_output=$(<%= node['cluster']['slurm']['install_dir'] %>/bin/sinfo -h -o '%N %t' | grep -v -E '(idle|alloc|mix|maint)$')
12+
while IFS= read -r line; do
13+
nodelist=$(echo "$line" | awk '{print $1}')
14+
<%= node['cluster']['slurm']['install_dir'] %>/bin/scontrol show hostnames "$nodelist" | { grep -E '^[a-z0-9\-]+\-st\-[a-z0-9\-]+\-[0-9]+.*' || true; }
15+
done <<< "$sinfo_output"
16+
fi

cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,15 +213,14 @@ def check_for_protected_mode(fleet_status_command) # rubocop:disable Lint/Nested
213213
# sinfo -h -o '%N %t'
214214
# queue-0-dy-compute-resource-g4dn-0-[1-10],queue-1-dy-compute-resource-g4dn-1-[1-10] idle~
215215
# queue-2-dy-compute-resource-g4dn-2-[1-10],queue-3-dy-compute-resource-g4dn-3-[1-10] idle
216-
until shell_out!("/bin/bash -c /usr/local/bin/is_fleet_ready.sh").stdout.strip.empty?
217-
check_for_protected_mode(fleet_status_command)
216+
until shell_out!("/bin/bash -c /usr/local/bin/is_fleet_ready.sh #{get_static_node_count.to_i}").stdout.strip.empty?
217+
check_for_protected_mode(fleet_status_command) #TODO Separate check for dynamic Nodes during dfsmv2
218218

219219
Chef::Log.info("Waiting for static fleet capacity provisioning")
220220
sleep(15)
221221
end
222222
Chef::Log.info("Static fleet capacity is ready")
223223
end
224-
only_if { get_static_node_count.to_i > 0 }
225224
end
226225
end
227226

0 commit comments

Comments
 (0)