From 6058f1b9078ce974ea23c6f4d67540689016f9b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Jel=C3=ADnek?= Date: Tue, 7 Feb 2023 15:53:51 +0100 Subject: [PATCH] Fix stonith watchdog timeout (#105) * fix setting stonith-watchdog-timeout Fix a corner case bug occuring with pcs 0.10.15 and newer and pcs 0.11.4 and newer: When setting stonith-watchdog-timeout in a stoppped cluster, cib.xml.sig is not modified by pcs. This leads to pacemaker ignoring new cib.xml content and read configuration from a previous version of cib, effectively reverting stonith-watchdog-timeout update done by the role. Pacemaker then exits with an error and the role fails, unable to proceed and configure the cluster. With old pcs versions, removing cib.xml.sig has no adverse effects. There is no need to check for pcs version whne removing the file. * fix purging nodes from pacemaker --- tasks/cluster-start-and-reload.yml | 1 + tasks/sbd.yml | 59 ++++++++++++++++++------------ 2 files changed, 36 insertions(+), 24 deletions(-) diff --git a/tasks/cluster-start-and-reload.yml b/tasks/cluster-start-and-reload.yml index df3935c7..a4be5fe5 100644 --- a/tasks/cluster-start-and-reload.yml +++ b/tasks/cluster-start-and-reload.yml @@ -116,4 +116,5 @@ # started. when: - not ansible_check_mode + - (item | length) > 0 - item not in __ha_cluster_all_node_names diff --git a/tasks/sbd.yml b/tasks/sbd.yml index 3cc070de..7710880a 100644 --- a/tasks/sbd.yml +++ b/tasks/sbd.yml @@ -63,28 +63,39 @@ service_facts: - name: Set stonith-watchdog-timeout cluster property - command: - cmd: > - pcs --force - {{ - ( - ansible_facts.services['pacemaker.service']['state'] | d() - == 'running' - ) | ternary('', '-f /var/lib/pacemaker/cib/cib.xml') - }} - -- property set - stonith-watchdog-timeout={{ ha_cluster_sbd_enabled | ternary('', '0') }} - changed_when: true + vars: + pacemaker_running: "{{ + ansible_facts.services['pacemaker.service']['state'] | d('') + == 'running' }}" + block: + - name: Set stonith-watchdog-timeout cluster property in CIB + command: + cmd: > + pcs --force + {{ + pacemaker_running | ternary('', '-f /var/lib/pacemaker/cib/cib.xml') + }} + -- property set + stonith-watchdog-timeout={{ + ha_cluster_sbd_enabled | ternary('', '0') }} + changed_when: true + + # In case cib.xml just got created by the task above, fix its permissions. + - name: Correct cib.xml ownership + file: + path: /var/lib/pacemaker/cib/cib.xml + state: file + owner: hacluster + group: haclient + mode: 0600 + when: + - not pacemaker_running -# In case cib.xml just got created by the task above, fix its permissions. If -# the permissions are not correct, pacemaker refuses to start. -# At this stage, we are sure the cib.xml file exists: either it just got -# created by the task above, or it was created by running pacemaker in the -# past. -- name: Correct cib.xml ownership - file: - path: /var/lib/pacemaker/cib/cib.xml - state: file - owner: hacluster - group: haclient - mode: 0600 + # In case of modifying cib.xml file, delete its (now unmatching) signature + # so that pacemaker accepts the new cib.xml content. + - name: Clean cib.xml.sig + file: + path: /var/lib/pacemaker/cib/cib.xml.sig + state: absent + when: + - not pacemaker_running