From 14a835d53797452905956037ed80e6489e3d2522 Mon Sep 17 00:00:00 2001 From: Jehan-Guillaume de Rorthais Date: Thu, 28 Nov 2019 00:32:18 +0100 Subject: [PATCH] Force synch read of lsn_location from other nodes The loop in _set_priv_attr() is just enough to make sure the private attribute is available locally. But it doesn't mean the attribute has been propagated to other nodes. This should fix gh issue #131 where lsn_location from remote node might not be available yet during the promote action. Note that I haven't been able to reproduce the same behavior myself despite multiple creatives way of making attrd lagging... --- script/pgsqlms | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/script/pgsqlms b/script/pgsqlms index 0f49b1c..1e88bd6 100755 --- a/script/pgsqlms +++ b/script/pgsqlms @@ -1894,16 +1894,20 @@ sub pgsql_promote { # Get the "lsn_location" attribute value for the node, as set during # the "pre-promote" action. - $ans = _get_priv_attr( 'lsn_location', $node ); - - if ( $ans eq '' ) { - # This should not happen as the "lsn_location" attribute should - # have been updated during the "pre-promote" action. - ocf_exit_reason( 'Can not get LSN location for "%s"', $node ); - return $OCF_ERR_GENERIC; + # This is implemented as a loop as private attributes are asynchronously + # available from other nodes. + # see: https://github.com/ClusterLabs/PAF/issues/131 + # NOTE: if a node did not set its lsn_location for some reason, this will end + # with a timeout and the whole promotion will start again. + WAIT_FOR_LSN: { + $ans = _get_priv_attr( 'lsn_location', $node ); + if ( $ans eq '' ) { + ocf_log( 'info', 'pgsql_promote: waiting for LSN from %s', $node ); + select( undef, undef, undef, 0.1 ); + redo WAIT_FOR_LSN; + } } - # convert location to decimal chomp $ans; ( $node_tl, $node_lsn ) = split /#/, $ans;