-
Notifications
You must be signed in to change notification settings - Fork 0
/
provision_headnode.yml
288 lines (246 loc) · 7.96 KB
/
provision_headnode.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
---
- hosts: localhost
gather_facts: false
vars_files:
- ./vars/main.yml
tasks:
- import_tasks: tasks/add_headnode_inventory.yml
- hosts: headnode
become: yes
vars_files:
- ./vars/main.yml
roles:
- role: geerlingguy.nfs
vars:
nfs_exports:
- "/opt/intel {{ cluster_network_cidr }}(ro,no_root_squash)"
when: install_intel_oneapi
- role: geerlingguy.ntp
vars:
ntp_daemon: chronyd
ntp_timezone: "Europe/Rome"
ntp_enabled: true
ntp_config_file: /etc/chrony.conf
ntp_manage_config: true
ntp_servers:
- "it.pool.ntp.org iburst"
- "0.it.pool.ntp.org iburst"
- "1.it.pool.ntp.org iburst"
- "2.it.pool.ntp.org iburst"
- "3.it.pool.ntp.org iburst"
ntp_cron_handler_enabled: true
handlers:
- name: restart slurmdbd
service:
name: slurmdbd
state: restarted
enabled: true
- name: restart slurmctld
service:
name: slurmctld
state: restarted
enabled: true
- name: restart rsyslog
service:
name: rsyslog
state: restarted
enabled: true
- name: restart mariadb
service:
name: mariadb
state: restarted
enabled: true
- name: restart sshd
service:
name: sshd
state: restarted
enabled: true
- name: restart rpcbind
service:
name: rpcbind
state: restarted
enabled: true
- name: restart nfs-server
service:
name: nfs-server
state: restarted
enabled: true
tasks:
- name: Setup CRYPTO_POLICY
lineinfile:
path: /etc/sysconfig/sshd
regexp: 'CRYPTO_POLICY='
line: "CRYPTO_POLICY='[email protected],[email protected],aes256-ctr [email protected],[email protected],[email protected],[email protected],hmac-sha2-256,hmac-sha2-512 -oGSSAPIKeyExchange=no -oKexAlgorithms=curve25519-sha256,[email protected],ecdh-sha2-nistp256,ecdh-sha2-nistp384,ecdh-sha2-nistp521,diffie-hellman-group-exchange-sha256,diffie-hellman-group16-sha512,diffie-hellman-group18-sha512 -oHostKeyAlgorithms=rsa-sha2-256,[email protected],ecdsa-sha2-nistp256,[email protected],ecdsa-sha2-nistp384,[email protected],rsa-sha2-512,[email protected],ecdsa-sha2-nistp521,[email protected],ssh-ed25519,[email protected] -oPubkeyAcceptedKeyTypes=rsa-sha2-256,[email protected],ecdsa-sha2-nistp256,[email protected],ecdsa-sha2-nistp384,[email protected],rsa-sha2-512,[email protected],ecdsa-sha2-nistp521,[email protected],ssh-ed25519,[email protected] -oCASignatureAlgorithms=rsa-sha2-256,ecdsa-sha2-nistp256,ecdsa-sha2-nistp384,rsa-sha2-512,ecdsa-sha2-nistp521,ssh-ed25519'"
state: present
notify: restart sshd
- meta: flush_handlers
- name: Ensures ~/.config/openstack dir exists
file:
path: /root/.config/openstack
mode: 0755
state: directory
- name: Copy clouds.yaml
ansible.builtin.copy:
src: clouds.yaml
dest: /root/.config/openstack/clouds.yaml
- import_tasks: tasks/install_openstack_client.yml
- import_tasks: tasks/install_headnode_packages.yml
- import_tasks: tasks/install_oneapi.yml
when: install_intel_oneapi
- name: Increase memlock
blockinfile:
path: /etc/security/limits.conf
insertbefore: '# End of file'
block: |
* soft memlock unlimited
* hard memlock unlimited
- name: Allow incoming traffic from cluster network
ansible.posix.firewalld:
source: "{{ cluster_network_cidr }}"
zone: trusted
state: enabled
permanent: true
immediate: true
- name: Upload cluster-env script and profiles
copy:
src: "{{ item.src }}"
dest: "{{ item.dest }}"
owner: root
group: root
mode: '0755'
with_items:
- { src: cluster-env.sh, dest: /etc/profile.d/cluster-env.sh }
- { src: cluster-env.csh, dest: /etc/profile.d/cluster-env.csh }
- { src: cluster-env, dest: /usr/bin/cluster-env }
- name: Create /var/log/slurm
file:
path: /var/log/slurm
owner: slurm
group: slurm
mode: 0700
state: directory
- name: Read clouds.yaml configuration
openstack.cloud.config:
register: openstack_config
- name: Copy openrc file
template:
src: openrc.sh.j2
dest: /etc/slurm/openrc.sh
owner: slurm
group: slurm
mode: 0600
- name: Install MariaDB
dnf:
name:
- mariadb-server
state: present
- name: Start MariaDB
service:
name: mariadb
state: started
enabled: true
- name: Install PyMySQL
package:
name: python3-PyMySQL
state: present
- name: Create slurm_acct_db MySQL database
community.mysql.mysql_db:
name: slurm_acct_db
state: present
- name: Create slurm user in MySQL database
community.mysql.mysql_user:
name: "{{ mysql_user }}"
host: localhost
password: "{{ mysql_password }}"
priv:
'slurm_acct_db.*': 'ALL,GRANT'
state: present
- name: Configure slurmdbd
template:
src: slurmdbd.conf.j2
dest: /etc/slurm/slurmdbd.conf
owner: slurm
group: slurm
mode: 0600
notify: restart slurmdbd
- name: Copy slurm.conf
template:
src: slurm.conf.j2
dest: /etc/slurm/slurm.conf
owner: root
group: root
mode: 0644
notify: restart slurmctld
- name: Copy cgroup.conf
template:
src: cgroup.conf.j2
dest: /etc/slurm/cgroup.conf
owner: root
group: root
mode: 0644
notify: restart slurmctld
- name: Copy slurm_resume.sh
template:
src: slurm_resume.sh.j2
dest: /usr/local/sbin/slurm_resume.sh
owner: root
group: root
mode: 0755
- name: Copy slurm_suspend.sh
template:
src: slurm_suspend.sh.j2
dest: /usr/local/sbin/slurm_suspend.sh
owner: root
group: root
mode: 0755
- name: Increase the number of munge daemons to 10
copy:
dest: /etc/sysconfig/munge
owner: root
group: root
mode: 0644
content: |
DAEMON_ARGS="--key-file /etc/munge/munge.key --num-threads 10"
- name: Start slurmctld, slurmdbd, and munge
service:
name: "{{ item }}"
state: started
enabled: true
with_items:
- munge
- slurmctld
- slurmdbd
- name: Gather munge.key
fetch:
src: /etc/munge/munge.key
dest: ./files/munge.key
flat: yes
- name: Configure rsyslog to accept syslog from compute nodes
blockinfile:
path: /etc/rsyslog.d/ohpc.conf
insertbefore: EOF
create: true
state: present
block: |
module(load="imudp")
input(type="imudp" port="514")
notify: restart rsyslog
- hosts: headnode
become: yes
vars_files:
- ./vars/main.yml
roles:
- role: geerlingguy.nfs
vars:
nfs_exports:
- "/home {{ cluster_network_cidr }}(rw,sync,no_root_squash)"
- "/opt/ohpc/pub {{ cluster_network_cidr }}(rw,sync,no_root_squash)"
tasks:
- name: Flush handlers before detaching floating IP
meta: flush_handlers
- name: Detach floating IP from headnode
openstack.cloud.floating_ip:
state: absent
floating_ip_address: "{{ inventory_hostname }}"
network: "{{ cluster_network_floating_ip_pool }}"
server: "{{ head_node_name }}"