diff --git a/.jenkins/aws-galera-4-ee-fullbuild.groovy b/.jenkins/aws-galera-4-ee-fullbuild.groovy new file mode 100644 index 000000000..69856e4d8 --- /dev/null +++ b/.jenkins/aws-galera-4-ee-fullbuild.groovy @@ -0,0 +1,93 @@ + +pipeline { + + agent none + + stages { + + stage ('Build sourcetar') { + steps { + script { + + def sourceJob = build job: 'aws-galera-4-ee-sourcetar', wait: true, + parameters: [ + string( name: 'GIT_TARGET', value: env.GIT_TARGET ), + booleanParam( name: 'HOTFIX_BUILD', value: env.HOTFIX_BUILD) + ] + env.SRCTAR_JOB = sourceJob.getNumber().toString() + } + } + } + + stage ('Build binary packages') { + + parallel { + stage ('Build bintar') { + steps { + script { + def bintarJob = build job: 'aws-galera-4-ee-bintar', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.SRCTAR_JOB )] + env.BINTAR_JOB = bintarJob.getNumber().toString() + } + } + } + stage ('Build rpm packages') { + steps { + script { + def rpmJob = build job: 'aws-galera-4-ee-rpm-packages', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.SRCTAR_JOB )] + env.RPM_JOB = rpmJob.getNumber().toString() + } + } + } + stage ('Build deb packages') { + steps { + script { + def debJob = build job: 'aws-galera-4-ee-deb-packages', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.SRCTAR_JOB )] + env.DEB_JOB = debJob.getNumber().toString() + } + } + } + } // parallel + + } // Build binary packages + + stage ('Run tests') { + parallel { + stage('Run bintar test') { + steps { + build job: 'run-galera-4-ee-release-test', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.BINTAR_JOB )] + } + } + stage ('Run RPM test') { + steps { + build job: 'run-galera-4-ee-rpm-test', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.RPM_JOB )] + } + } + stage ('Run DEB test') { + steps { + build job: 'run-galera-4-ee-deb-test', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.DEB_JOB )] + } + } + stage ('Run SST RPM test') { + steps { + build job: 'run-galera-4-ee-systemd-sst-rpm-test', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.RPM_JOB )] + } + } + stage ('Run SST DEB test') { + steps { + build job: 'run-galera-4-ee-systemd-sst-deb-test', wait: true, + parameters: [string(name: 'BUILD_SELECTOR', value: env.DEB_JOB )] + } + } + } // parallel + } + + } // stages + +} diff --git a/GALERA_VERSION b/GALERA_VERSION index 3574312dd..134899b3c 100644 --- a/GALERA_VERSION +++ b/GALERA_VERSION @@ -1,4 +1,4 @@ GALERA_VERSION_WSREP_API=26 GALERA_VERSION_MAJOR=4 -GALERA_VERSION_MINOR=19 +GALERA_VERSION_MINOR=20 GALERA_VERSION_EXTRA= diff --git a/SConstruct b/SConstruct index e62ac5807..8a7379fec 100644 --- a/SConstruct +++ b/SConstruct @@ -170,7 +170,7 @@ static_ssl = ARGUMENTS.get('static_ssl', None) install = ARGUMENTS.get('install', None) version_script = int(ARGUMENTS.get('version_script', 1)) -GALERA_VER = ARGUMENTS.get('version', '4.19') +GALERA_VER = ARGUMENTS.get('version', '4.20') GALERA_REV = ARGUMENTS.get('revno', 'XXXX') # Attempt to read from file if not given diff --git a/debian/changelog b/debian/changelog index 9ad6df1c1..aa6b8ac43 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -galera-4 (26.4.19) UNRELEASED; urgency=medium +galera-4 (26.4.20) UNRELEASED; urgency=medium * Galera 4 release - -- Codership Oy Mon, 24 Jun 2024 20:10:30 +0300 + -- Codership Oy Tue, 30 Jul 2024 05:58:45 +0300 diff --git a/galera/src/galera_gcs.hpp b/galera/src/galera_gcs.hpp index 36f544cca..521b3948f 100644 --- a/galera/src/galera_gcs.hpp +++ b/galera/src/galera_gcs.hpp @@ -96,7 +96,7 @@ namespace galera const char* node_name = 0, const char* node_incoming = 0) : - conn_(gcs_create(reinterpret_cast(&config), + conn_(gcs_create(config, reinterpret_cast(&cache), cb, node_name, node_incoming, diff --git a/galera/src/ist.cpp b/galera/src/ist.cpp index b91c18899..d93e106a4 100644 --- a/galera/src/ist.cpp +++ b/galera/src/ist.cpp @@ -355,7 +355,6 @@ galera::ist::Receiver::prepare(wsrep_seqno_t const first_seqno, return recv_addr_; } - void galera::ist::Receiver::run() { auto socket(acceptor_->accept()); @@ -365,6 +364,7 @@ void galera::ist::Receiver::run() gu::Progress* progress(NULL); int ec(0); + std::ostringstream error_os; try { @@ -410,9 +410,8 @@ void galera::ist::Receiver::run() assert(!progress); if (act.seqno_g > first_seqno_) { - log_error - << "IST started with wrong seqno: " << act.seqno_g - << ", expected <= " << first_seqno_; + error_os << "IST started with wrong seqno: " << act.seqno_g + << ", expected <= " << first_seqno_; ec = EINVAL; goto err; } @@ -438,8 +437,8 @@ void galera::ist::Receiver::run() if (act.seqno_g != current_seqno_) { - log_error << "Unexpected action seqno: " << act.seqno_g - << " expected: " << current_seqno_; + error_os << "Unexpected action seqno: " << act.seqno_g + << " expected: " << current_seqno_; ec = EINVAL; goto err; } @@ -508,7 +507,7 @@ void galera::ist::Receiver::run() ec = e.get_errno(); if (ec != EINTR) { - log_error << "got exception while reading IST stream: " << e.what(); + error_os << "got exception while reading IST stream: " << e.what(); } } @@ -518,17 +517,18 @@ void galera::ist::Receiver::run() socket->close(); running_ = false; - if (last_seqno_ > 0 && ec != EINTR && current_seqno_ < last_seqno_) + if (last_seqno_ > 0 && ec != EINTR && current_seqno_ < last_seqno_ && + error_os.tellp() == 0) { - log_error << "IST didn't contain all write sets, expected last: " - << last_seqno_ << " last received: " << current_seqno_; + error_os << "IST didn't contain all write sets, expected last: " + << last_seqno_ << " last received: " << current_seqno_; ec = EPROTO; } if (ec != EINTR) { error_code_ = ec; } - handler_.ist_end(ec); + handler_.ist_end(Result{ec, error_os.str()}); } @@ -803,7 +803,7 @@ void galera::ist::AsyncSenderMap::run(const gu::Config& conf, if (err != 0) { delete as; - gu_throw_error(err) << "failed to start sender thread"; + gu_throw_system_error(err) << "failed to start sender thread"; } senders_.insert(as); } diff --git a/galera/src/ist.hpp b/galera/src/ist.hpp index c572af70b..a9a4e7598 100644 --- a/galera/src/ist.hpp +++ b/galera/src/ist.hpp @@ -29,6 +29,15 @@ namespace galera void register_params(gu::Config& conf); + struct Result + { + int error; + std::string error_str; + Result(int error_arg, const std::string& error_str_arg) + : error{error_arg} + , error_str{error_str_arg} + { } + }; // IST event handler interface class EventHandler { @@ -40,7 +49,7 @@ namespace galera virtual void ist_cc(const gcs_act_cchange&,const gcs_action&, bool must_apply, bool preload) = 0; // Report IST end - virtual void ist_end(int error) = 0; + virtual void ist_end(const Result&) = 0; protected: virtual ~EventHandler() {} }; diff --git a/galera/src/ist_proto.hpp b/galera/src/ist_proto.hpp index 006240a79..ec0d6e8b8 100644 --- a/galera/src/ist_proto.hpp +++ b/galera/src/ist_proto.hpp @@ -744,7 +744,8 @@ namespace galera } else { - gu_throw_error(-msg.ctrl()) <<"peer reported error"; + gu_throw_error(-msg.ctrl()) + << "peer reported error: " << -msg.ctrl(); } } default: diff --git a/galera/src/mapped_buffer.cpp b/galera/src/mapped_buffer.cpp index eaadee471..0e6788147 100644 --- a/galera/src/mapped_buffer.cpp +++ b/galera/src/mapped_buffer.cpp @@ -85,21 +85,23 @@ void galera::MappedBuffer::reserve(size_t sz) fd_ = mkstemp(&file_[0]); if (fd_ == -1) { - gu_throw_error(errno) << "mkstemp(" << file_ << ") failed"; + gu_throw_system_error(errno) + << "mkstemp(" << file_ << ") failed"; } if (ftruncate(fd_, sz) == -1) { - gu_throw_error(errno) << "ftruncate() failed"; + gu_throw_system_error(errno) << "ftruncate() failed"; } byte_t* tmp(reinterpret_cast( mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_, 0))); if (tmp == MAP_FAILED) { + const int error = errno; free(buf_); buf_ = 0; clear(); - gu_throw_error(ENOMEM) << "mmap() failed"; + gu_throw_system_error(error) << "mmap() failed"; } copy(buf_, buf_ + buf_size_, tmp); free(buf_); @@ -109,19 +111,20 @@ void galera::MappedBuffer::reserve(size_t sz) { if (munmap(buf_, real_buf_size_) != 0) { - gu_throw_error(errno) << "munmap() failed"; + gu_throw_system_error(errno) << "munmap() failed"; } if (ftruncate(fd_, sz) == -1) { - gu_throw_error(errno) << "fruncate() failed"; + gu_throw_system_error(errno) << "fruncate() failed"; } byte_t* tmp(reinterpret_cast( mmap(NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_, 0))); if (tmp == MAP_FAILED) { + const int error = errno; buf_ = 0; clear(); - gu_throw_error(ENOMEM) << "mmap() failed"; + gu_throw_system_error(error) << "mmap() failed"; } buf_ = tmp; } @@ -132,7 +135,7 @@ void galera::MappedBuffer::reserve(size_t sz) byte_t* tmp(reinterpret_cast(realloc(buf_, sz))); if (tmp == 0) { - gu_throw_error(ENOMEM) << "realloc failed"; + gu_throw_system_error(ENOMEM) << "realloc failed"; } buf_ = tmp; } diff --git a/galera/src/replicator_smm.cpp b/galera/src/replicator_smm.cpp index cf43cbe4c..f4d8517af 100644 --- a/galera/src/replicator_smm.cpp +++ b/galera/src/replicator_smm.cpp @@ -5,6 +5,7 @@ #include "galera_common.hpp" #include "replicator_smm.hpp" #include "gcs_action_source.hpp" +#include "gcs_error.hpp" #include "galera_exception.hpp" #include "galera_info.hpp" @@ -315,7 +316,6 @@ galera::ReplicatorSMM::~ReplicatorSMM() delete as_; } - wsrep_status_t galera::ReplicatorSMM::connect(const std::string& cluster_name, const std::string& cluster_url, const std::string& state_donor, @@ -348,14 +348,14 @@ wsrep_status_t galera::ReplicatorSMM::connect(const std::string& cluster_name, if (ret == WSREP_OK && (err = gcs_.set_initial_position(inpos)) != 0) { - log_error << "gcs init failed:" << strerror(-err); + log_error << "gcs init failed:" << gcs_error_str(-err); ret = WSREP_NODE_FAIL; } if (ret == WSREP_OK && (err = gcs_.connect(cluster_name, cluster_url, bootstrap)) != 0) { - log_error << "gcs connect failed: " << strerror(-err); + log_error << "gcs connect failed: " << gcs_error_str(-err); ret = WSREP_NODE_FAIL; } @@ -1590,8 +1590,8 @@ wsrep_status_t galera::ReplicatorSMM::sync_wait(wsrep_gtid_t* upto, } catch (gu::Exception& e) { - log_warn << "gcs_caused() returned " << -e.get_errno() - << " (" << strerror(e.get_errno()) << ")"; + log_debug << "gcs_caused() returned " << -e.get_errno() + << " (" << strerror(e.get_errno()) << ")"; return WSREP_TRX_FAIL; } } @@ -1689,7 +1689,7 @@ wsrep_status_t galera::ReplicatorSMM::wait_nbo_end(TrxHandleMaster* trx, else if (err < 0) { log_error << "Failed to send NBO-end: " << err << ": " - << ::strerror(-err); + << gcs_error_str(-err); return WSREP_NODE_FAIL; } @@ -1960,7 +1960,8 @@ galera::ReplicatorSMM::preordered_commit(wsrep_po_handle_t& handle, if (rcode < 0) gu_throw_error(-rcode) - << "Replication of preordered writeset failed."; + << "Replication of preordered writeset failed: " + << gcs_error_str(-rcode); } delete ws; // cleanup regardless of commit flag @@ -2281,7 +2282,7 @@ void galera::ReplicatorSMM::process_vote(wsrep_seqno_t const seqno_g, default: /* general error */ assert(ret < 0); msg << "Failed to vote on request for " << gtid << ": " - << -ret << " (" << ::strerror(-ret) << "). " + << -ret << " (" << gcs_error_str(-ret) << "). " "Assuming inconsistency"; goto fail; } @@ -3075,8 +3076,9 @@ void galera::ReplicatorSMM::process_join(wsrep_seqno_t seqno_j, if (seqno_j < 0 && S_JOINING == state_()) { // #595, @todo: find a way to re-request state transfer - log_fatal << "Failed to receive state transfer: " << seqno_j - << " (" << strerror (-seqno_j) << "), need to restart."; + log_fatal << "Failed to receive state transfer: " << seqno_j << " (" + << gcs_state_transfer_error_str(-seqno_j) + << "), need to restart."; abort(); } else @@ -3191,7 +3193,7 @@ void galera::ReplicatorSMM::desync() if (ret) { - gu_throw_error (-ret) << "Node desync failed."; + gu_throw_error(-ret) << gcs_error_str(-ret); } } diff --git a/galera/src/replicator_smm.hpp b/galera/src/replicator_smm.hpp index 785fedf92..1a218c322 100644 --- a/galera/src/replicator_smm.hpp +++ b/galera/src/replicator_smm.hpp @@ -201,10 +201,10 @@ namespace galera // IST Action handler interface void ist_trx(const TrxHandleSlavePtr& ts, bool must_apply, - bool preload); + bool preload) override; void ist_cc(const gcs_act_cchange&, const gcs_action&, bool must_apply, - bool preload); - void ist_end(int error); + bool preload) override; + void ist_end(const ist::Result&) override; // Cancel local and enter apply monitors for TrxHandle void cancel_monitors_for_local(const TrxHandleSlave& ts) @@ -282,15 +282,15 @@ namespace galera mutex_(gu::get_mutex_key(gu::GU_MUTEX_KEY_IST_EVENT_QUEUE)), cond_(gu::get_cond_key(gu::GU_COND_KEY_IST_EVENT_QUEUE)), eof_(false), - error_(0), + result_(0, ""), queue_() { } - void reset() { eof_ = false; error_ = 0; } - void eof(int error) + void reset() { eof_ = false; result_ = ist::Result{0, ""}; } + void eof(const ist::Result& result) { gu::Lock lock(mutex_); eof_ = true; - error_ = error; + result_ = result; cond_.broadcast(); } @@ -331,12 +331,14 @@ namespace galera } else { - if (error_) + if (result_.error) { - int err(error_); - error_ = 0; // Make just one thread to detect the failure + int err(result_.error); + // Make just one thread to detect the failure + result_.error = 0; gu_throw_error(err) - << "IST receiver reported failure"; + << "IST receiver reported failure: '" + << result_.error_str << "' (" << err << ")"; } } @@ -347,7 +349,7 @@ namespace galera gu::Mutex mutex_; gu::Cond cond_; bool eof_; - int error_; + ist::Result result_; std::queue queue_; }; @@ -919,7 +921,7 @@ namespace galera ssize_t sst_req_len); /* resume reception of GCS events */ - void resume_recv() { gcs_.resume_recv(); ist_end(0); } + void resume_recv() { gcs_.resume_recv(); ist_end(ist::Result{0, ""}); } /* These methods facilitate closing procedure. * They must be called under closing_mutex_ lock */ diff --git a/galera/src/replicator_smm_params.cpp b/galera/src/replicator_smm_params.cpp index 48dee7622..0e20cd05e 100644 --- a/galera/src/replicator_smm_params.cpp +++ b/galera/src/replicator_smm_params.cpp @@ -116,10 +116,7 @@ galera::ReplicatorSMM::InitConfig::InitConfig(gu::Config& conf, /* register variables and defaults from other modules */ gcache::GCache::register_params(conf); - if (gcs_register_params(reinterpret_cast(&conf))) - { - gu_throw_fatal << "Error initializing GCS parameters"; - } + gcs_register_params(conf); Certification::register_params(conf); ist::register_params(conf); } diff --git a/galera/src/replicator_str.cpp b/galera/src/replicator_str.cpp index 0db34c443..7aaee4143 100644 --- a/galera/src/replicator_str.cpp +++ b/galera/src/replicator_str.cpp @@ -4,6 +4,7 @@ #include "replicator_smm.hpp" #include "galera_info.hpp" +#include "gcs_error.hpp" #include #include @@ -804,12 +805,12 @@ ReplicatorSMM::send_state_request (const StateRequest* const req, if (!retry_str(ret)) { log_error << "Requesting state transfer failed: " - << ret << "(" << strerror(-ret) << ")"; + << gcs_state_transfer_error_str(-ret); } else if (1 == tries) { log_info << "Requesting state transfer failed: " - << ret << "(" << strerror(-ret) << "). " + << gcs_state_transfer_error_str(-ret) << ". " << "Will keep retrying every " << sst_retry_sec_ << " second(s)"; } @@ -861,7 +862,8 @@ ReplicatorSMM::send_state_request (const StateRequest* const req, if (!closing_ && state_() > S_CLOSED) { log_fatal << "State transfer request failed unrecoverably: " - << -ret << " (" << strerror(-ret) << "). Most likely " + << gcs_state_transfer_error_str(-ret) + << ". Most likely " << "it is due to inability to communicate with the " << "cluster primary component. Restart required."; abort(); @@ -1399,9 +1401,9 @@ void ReplicatorSMM::ist_trx(const TrxHandleSlavePtr& ts, bool must_apply, } } -void ReplicatorSMM::ist_end(int error) +void ReplicatorSMM::ist_end(const ist::Result& result) { - ist_event_queue_.eof(error); + ist_event_queue_.eof(result); } void galera::ReplicatorSMM::process_ist_conf_change(const gcs_act_cchange& conf) diff --git a/galera/src/saved_state.cpp b/galera/src/saved_state.cpp index 07987bf74..aa01d00c5 100644 --- a/galera/src/saved_state.cpp +++ b/galera/src/saved_state.cpp @@ -49,7 +49,7 @@ SavedState::SavedState (const std::string& file) : if (!fs_) { - gu_throw_error(errno) + gu_throw_system_error(errno) << "Could not open state file for writing: '" << file << "'. Check permissions and/or disk space."; } diff --git a/galera/src/wsrep_provider.cpp b/galera/src/wsrep_provider.cpp index 2dc485fc1..1d1c325c9 100644 --- a/galera/src/wsrep_provider.cpp +++ b/galera/src/wsrep_provider.cpp @@ -1498,7 +1498,7 @@ wsrep_seqno_t galera_pause (wsrep_t* gh) } catch (gu::Exception& e) { - log_error << e.what(); + log_warn << "Node pause failed: " << e.what(); return -e.get_errno(); } } @@ -1519,7 +1519,7 @@ wsrep_status_t galera_resume (wsrep_t* gh) } catch (gu::Exception& e) { - log_error << e.what(); + log_error << "Node resume failed: " << e.what(); return WSREP_NODE_FAIL; } } @@ -1540,7 +1540,7 @@ wsrep_status_t galera_desync (wsrep_t* gh) } catch (gu::Exception& e) { - log_error << e.what(); + log_warn << "Node desync failed: " << e.what(); return WSREP_TRX_FAIL; } } @@ -1561,7 +1561,7 @@ wsrep_status_t galera_resync (wsrep_t* gh) } catch (gu::Exception& e) { - log_error << e.what(); + log_error << "Node resync failed: " << e.what(); return WSREP_NODE_FAIL; } } diff --git a/galera/tests/ist_check.cpp b/galera/tests/ist_check.cpp index 36367fa43..3aa88a7e1 100644 --- a/galera/tests/ist_check.cpp +++ b/galera/tests/ist_check.cpp @@ -314,7 +314,8 @@ namespace ~ISTHandler() {} - void ist_trx(const TrxHandleSlavePtr& ts, bool must_apply, bool preload) + void ist_trx(const TrxHandleSlavePtr& ts, bool must_apply, + bool preload) override { assert(ts != 0); ts->verify_checksum(); @@ -341,7 +342,7 @@ namespace } void ist_cc(const gcs_act_cchange& cc, const gcs_action& act, - bool must_apply, bool preload) + bool must_apply, bool preload) override { assert(act.seqno_g == cc.seqno); @@ -357,11 +358,11 @@ namespace seqno_ = cc.seqno; } - void ist_end(int error) + void ist_end(const ist::Result& result) override { - log_info << "IST ended with status: " << error; + log_info << "IST ended with status: " << result.error_str; gu::Lock lock(mutex_); - error_ = error; + error_ = result.error; eof_ = true; cond_.signal(); } diff --git a/galerautils/src/gu_asio.cpp b/galerautils/src/gu_asio.cpp index d20a59b2e..d40ecddbe 100644 --- a/galerautils/src/gu_asio.cpp +++ b/galerautils/src/gu_asio.cpp @@ -333,7 +333,7 @@ namespace if (ifs.good() == false) { - gu_throw_error(errno) << + gu_throw_system_error(errno) << "could not open password file '" << file << "'"; } diff --git a/galerautils/src/gu_asio_datagram.cpp b/galerautils/src/gu_asio_datagram.cpp index e0d7c3925..9cfe86da8 100644 --- a/galerautils/src/gu_asio_datagram.cpp +++ b/galerautils/src/gu_asio_datagram.cpp @@ -104,7 +104,7 @@ gu::AsioUdpSocket::resolve_and_open(const gu::URI& uri) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "error opening datagram socket" << uri; } } @@ -117,7 +117,7 @@ void gu::AsioUdpSocket::open(const gu::URI& uri) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "error opening datagram socket" << uri; } } @@ -185,7 +185,7 @@ void gu::AsioUdpSocket::connect(const gu::URI& uri) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to connect UDP socket: " << e.what(); } } @@ -201,7 +201,7 @@ void gu::AsioUdpSocket::write( } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to write UDP socket: " << e.what(); } @@ -221,7 +221,7 @@ void gu::AsioUdpSocket::send_to( } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to send datagram to " << target_endpoint << ": " << e.what(); } diff --git a/galerautils/src/gu_asio_socket_util.hpp b/galerautils/src/gu_asio_socket_util.hpp index 57b78ba59..52d6feb3f 100644 --- a/galerautils/src/gu_asio_socket_util.hpp +++ b/galerautils/src/gu_asio_socket_util.hpp @@ -38,7 +38,7 @@ static void set_fd_options(S& socket) long flags(FD_CLOEXEC); if (fcntl(native_socket_handle(socket), F_SETFD, flags) == -1) { - gu_throw_error(errno) << "failed to set FD_CLOEXEC"; + gu_throw_system_error(errno) << "failed to set FD_CLOEXEC"; } } @@ -58,7 +58,7 @@ static void set_receive_buffer_size(Socket& socket, size_t size) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to set receive buffer size: " << e.what(); } @@ -75,7 +75,7 @@ static size_t get_receive_buffer_size(Socket& socket) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to get receive buffer size: " << e.what(); } @@ -90,7 +90,7 @@ static void set_send_buffer_size(Socket& socket, size_t size) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to set send buffer size: " << e.what(); } @@ -107,7 +107,7 @@ static size_t get_send_buffer_size(Socket& socket) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed to get send buffer size: " << e.what(); } @@ -137,7 +137,7 @@ static void bind(Socket& socket, const gu::AsioIpAddress& addr) } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "Failed bind socket to address: " << e.what(); } @@ -159,8 +159,8 @@ static struct tcp_info get_tcp_info(Socket& socket) if (getsockopt(native_fd, level, TCP_INFO, &tcpi, &tcpi_len)) { int err(errno); - gu_throw_error(err) << "Failed to read TCP info from socket: " - << strerror(err); + gu_throw_system_error(err) << "Failed to read TCP info from socket: " + << strerror(err); } #endif /* __linux__ || __FreeBSD__ */ return tcpi; diff --git a/galerautils/src/gu_asio_stream_react.cpp b/galerautils/src/gu_asio_stream_react.cpp index 2713bde6c..df09c28ab 100644 --- a/galerautils/src/gu_asio_stream_react.cpp +++ b/galerautils/src/gu_asio_stream_react.cpp @@ -68,7 +68,7 @@ void gu::AsioStreamReact::open(const gu::URI& uri) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "error opening stream socket " << uri; } @@ -78,7 +78,7 @@ bool gu::AsioStreamReact::is_open() const try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "error checking if socket is open "; return false; } @@ -105,7 +105,7 @@ void gu::AsioStreamReact::bind(const gu::AsioIpAddress& addr) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error in binding"; + gu_throw_system_error(e.code().value()) << "error in binding"; } void gu::AsioStreamReact::async_connect( @@ -127,7 +127,7 @@ void gu::AsioStreamReact::async_connect( } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error connecting "; + gu_throw_system_error(e.code().value()) << "error connecting "; } @@ -149,7 +149,7 @@ void gu::AsioStreamReact::async_write( } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Async write failed '" + gu_throw_system_error(e.code().value()) << "Async write failed '" << e.what(); } @@ -169,7 +169,7 @@ void gu::AsioStreamReact::async_read( } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Async read failed '" + gu_throw_system_error(e.code().value()) << "Async read failed '" << e.what(); } @@ -178,7 +178,7 @@ static void throw_sync_op_error(const gu::AsioStreamEngine& engine, { auto last_error(engine.last_error()); if (last_error.is_system()) - gu_throw_error(last_error.value()) << prefix + gu_throw_system_error(last_error.value()) << prefix << ": " << last_error.message(); else gu_throw_error(EPROTO) << prefix @@ -215,7 +215,7 @@ void gu::AsioStreamReact::connect(const gu::URI& uri) try } catch (asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to connect '" + gu_throw_system_error(e.code().value()) << "Failed to connect '" << uri << "': " << e.what(); } @@ -242,7 +242,7 @@ size_t gu::AsioStreamReact::write(const AsioConstBuffer& buf) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to write: " << e.what(); + gu_throw_system_error(e.code().value()) << "Failed to write: " << e.what(); } size_t gu::AsioStreamReact::read(const AsioMutableBuffer& buf) try @@ -277,7 +277,7 @@ size_t gu::AsioStreamReact::read(const AsioMutableBuffer& buf) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to read: " << e.what(); + gu_throw_system_error(e.code().value()) << "Failed to read: " << e.what(); } std::string gu::AsioStreamReact::local_addr() const @@ -297,7 +297,7 @@ void gu::AsioStreamReact::set_receive_buffer_size(size_t size) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error setting receive buffer size"; + gu_throw_system_error(e.code().value()) << "error setting receive buffer size"; } size_t gu::AsioStreamReact::get_receive_buffer_size() try @@ -306,7 +306,7 @@ size_t gu::AsioStreamReact::get_receive_buffer_size() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error getting receive buffer size "; + gu_throw_system_error(e.code().value()) << "error getting receive buffer size "; } void gu::AsioStreamReact::set_send_buffer_size(size_t size) try @@ -316,7 +316,7 @@ void gu::AsioStreamReact::set_send_buffer_size(size_t size) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error setting send buffer size"; + gu_throw_system_error(e.code().value()) << "error setting send buffer size"; } size_t gu::AsioStreamReact::get_send_buffer_size() try @@ -325,7 +325,7 @@ size_t gu::AsioStreamReact::get_send_buffer_size() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error getting send buffer size"; + gu_throw_system_error(e.code().value()) << "error getting send buffer size"; } struct tcp_info gu::AsioStreamReact::get_tcp_info() try @@ -334,7 +334,7 @@ struct tcp_info gu::AsioStreamReact::get_tcp_info() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error getting TCP info"; + gu_throw_system_error(e.code().value()) << "error getting TCP info"; } @@ -871,7 +871,7 @@ void gu::AsioAcceptorReact::open(const gu::URI& uri) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to open acceptor: " << e.what(); + gu_throw_system_error(e.code().value()) << "Failed to open acceptor: " << e.what(); } @@ -891,7 +891,7 @@ void gu::AsioAcceptorReact::listen(const gu::URI& uri) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to listen: " << e.what(); + gu_throw_system_error(e.code().value()) << "Failed to listen: " << e.what(); } void gu::AsioAcceptorReact::close() try @@ -904,7 +904,7 @@ void gu::AsioAcceptorReact::close() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to close acceptor: " + gu_throw_system_error(e.code().value()) << "Failed to close acceptor: " << e.what(); } @@ -925,7 +925,7 @@ void gu::AsioAcceptorReact::async_accept( } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to accept: " << e.what(); + gu_throw_system_error(e.code().value()) << "Failed to accept: " << e.what(); } @@ -965,7 +965,7 @@ std::shared_ptr gu::AsioAcceptorReact::accept() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "Failed to accept: " << e.what(); + gu_throw_system_error(e.code().value()) << "Failed to accept: " << e.what(); } std::string gu::AsioAcceptorReact::listen_addr() const try @@ -977,7 +977,7 @@ std::string gu::AsioAcceptorReact::listen_addr() const try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "failed to read listen addr " << "', asio error '" << e.what() << "'"; } @@ -988,7 +988,7 @@ unsigned short gu::AsioAcceptorReact::listen_port() const try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) + gu_throw_system_error(e.code().value()) << "failed to read listen port " << "', asio error '" << e.what() << "'"; } @@ -1000,7 +1000,7 @@ void gu::AsioAcceptorReact::set_receive_buffer_size(size_t size) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error setting receive buffer size"; + gu_throw_system_error(e.code().value()) << "error setting receive buffer size"; } @@ -1010,7 +1010,7 @@ size_t gu::AsioAcceptorReact::get_receive_buffer_size() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error getting receive buffer size"; + gu_throw_system_error(e.code().value()) << "error getting receive buffer size"; return 0; } @@ -1021,7 +1021,7 @@ void gu::AsioAcceptorReact::set_send_buffer_size(size_t size) try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error setting send buffer size"; + gu_throw_system_error(e.code().value()) << "error setting send buffer size"; } size_t gu::AsioAcceptorReact::get_send_buffer_size() try @@ -1030,7 +1030,7 @@ size_t gu::AsioAcceptorReact::get_send_buffer_size() try } catch (const asio::system_error& e) { - gu_throw_error(e.code().value()) << "error getting send buffer size"; + gu_throw_system_error(e.code().value()) << "error getting send buffer size"; return 0; } diff --git a/galerautils/src/gu_config.hpp b/galerautils/src/gu_config.hpp index f6e95b1c5..645e0a105 100644 --- a/galerautils/src/gu_config.hpp +++ b/galerautils/src/gu_config.hpp @@ -133,7 +133,7 @@ class gu::Config } } - /* Parse a string of semicolumn separated key=value pairs into a vector. + /* Parse a string of semicolon separated key=value pairs into a vector. * Throws Exception in case of parsing error. */ static void parse (std::vector >& params_vector, diff --git a/galerautils/src/gu_fdesc.cpp b/galerautils/src/gu_fdesc.cpp index 35bee8067..4fc6a8656 100644 --- a/galerautils/src/gu_fdesc.cpp +++ b/galerautils/src/gu_fdesc.cpp @@ -124,7 +124,7 @@ namespace gu if (ftruncate(fd_, size_)) { - gu_throw_error(errno) << "Failed to truncate '" << name_ + gu_throw_system_error(errno) << "Failed to truncate '" << name_ << "' to " << size_ << " bytes."; } } @@ -138,7 +138,8 @@ namespace gu FileDescriptor::constructor_common() { if (fd_ < 0) { - gu_throw_error(errno) << "Failed to open file '" + name_ + '\''; + gu_throw_system_error(errno) + << "Failed to open file '" + name_ + '\''; } #if !defined(__APPLE__) /* Darwin does not have posix_fadvise */ /* benefits are questionable @@ -180,7 +181,7 @@ namespace gu log_debug << "Flushing file '" << name_ << "'"; if (fsync (fd_) < 0) { - gu_throw_error(errno) << "fsync() failed on '" + name_ + '\''; + gu_throw_system_error(errno) << "fsync() failed on '" + name_ + '\''; } log_debug << "Flushed file '" << name_ << "'"; @@ -192,10 +193,12 @@ namespace gu byte_t const byte (0); if (lseek (fd_, offset, SEEK_SET) != offset) - gu_throw_error(errno) << "lseek() failed on '" << name_ << '\''; + gu_throw_system_error(errno) + << "lseek() failed on '" << name_ << '\''; if (write (fd_, &byte, sizeof(byte)) != sizeof(byte)) - gu_throw_error(errno) << "write() failed on '" << name_ << '\''; + gu_throw_system_error(errno) + << "write() failed on '" << name_ << '\''; return true; } @@ -221,7 +224,7 @@ namespace gu return; } - gu_throw_error (errno) << "File preallocation failed"; + gu_throw_system_error (errno) << "File preallocation failed"; } void @@ -248,7 +251,7 @@ namespace gu } else { - gu_throw_error (errno) << "File preallocation failed"; + gu_throw_system_error (errno) << "File preallocation failed"; } } } diff --git a/galerautils/src/gu_fifo.c b/galerautils/src/gu_fifo.c index 3b109d1cc..e9d068c80 100644 --- a/galerautils/src/gu_fifo.c +++ b/galerautils/src/gu_fifo.c @@ -111,7 +111,7 @@ gu_fifo_t *gu_fifo_create (size_t length, size_t item_size) if (max_size > gu_avphys_bytes()) { gu_error ("Maximum FIFO size %llu exceeds available memory " - "limit %llu", max_size, gu_avphys_bytes()); + "limit %zu", max_size, gu_avphys_bytes()); return NULL; } @@ -122,8 +122,8 @@ gu_fifo_t *gu_fifo_create (size_t length, size_t item_size) } - gu_debug ("Creating FIFO buffer of %llu elements of size %llu, " - "memory min used: %zu, max used: %zu", + gu_debug ("Creating FIFO buffer of %llu elements of size %zu, " + "memory min used: %llu, max used: %llu", array_len * row_len, item_size, alloc_size, alloc_size + array_len*row_size); @@ -143,7 +143,7 @@ gu_fifo_t *gu_fifo_create (size_t length, size_t item_size) gu_cond_init (NULL, &ret->put_cond); } else { - gu_error ("Failed to allocate %zu bytes for FIFO", alloc_size); + gu_error ("Failed to allocate %llu bytes for FIFO", alloc_size); } } @@ -204,7 +204,7 @@ static int fifo_flush (gu_fifo_t* q) /* if there are items in the queue, wait until they are all fetched */ while (q->used > 0 && 0 == ret) { /* will make getters to signal every time item is removed */ - gu_warn ("Waiting for %lu items to be fetched.", q->used); + gu_warn ("Waiting for %u items to be fetched.", q->used); q->put_wait++; ret = gu_cond_wait (&q->put_cond, &q->lock); } diff --git a/galerautils/src/gu_init.c b/galerautils/src/gu_init.c index 75481b4ee..93d2cd40f 100644 --- a/galerautils/src/gu_init.c +++ b/galerautils/src/gu_init.c @@ -18,7 +18,7 @@ gu_init (gu_log_cb_t log_cb) size_t const page_size = GU_PAGE_SIZE; if (page_size & (page_size - 1)) { - gu_fatal("GU_PAGE_SIZE(%z) is not a power of 2", GU_PAGE_SIZE); + gu_fatal("GU_PAGE_SIZE(%zu) is not a power of 2", GU_PAGE_SIZE); gu_abort(); } diff --git a/galerautils/src/gu_lock.hpp b/galerautils/src/gu_lock.hpp index d27376356..e713bf0a3 100644 --- a/galerautils/src/gu_lock.hpp +++ b/galerautils/src/gu_lock.hpp @@ -68,7 +68,7 @@ namespace gu mtx_.owned_ = gu_thread_self(); #endif /* GU_MUTEX_DEBUG */ - if (gu_unlikely(ret)) gu_throw_error(ret); + if (gu_unlikely(ret)) gu_throw_system_error(ret); } #if defined(GU_DEBUG_MUTEX) || defined(GU_MUTEX_DEBUG) bool owns_lock() const diff --git a/galerautils/src/gu_log.c b/galerautils/src/gu_log.c index 7ab718e7d..d18edd075 100644 --- a/galerautils/src/gu_log.c +++ b/galerautils/src/gu_log.c @@ -133,6 +133,7 @@ gu_log (gu_log_severity_t severity, const char* file, const char* function, const int line, + const char* fmt, ...) { va_list ap; @@ -163,12 +164,10 @@ gu_log (gu_log_severity_t severity, str += len; max_string -= len; - va_start (ap, line); + va_start (ap, fmt); { - const char* format = va_arg (ap, const char*); - - if (gu_likely(max_string > 0 && NULL != format)) { - vsnprintf (str, max_string, format, ap); + if (gu_likely(max_string > 0 && NULL != fmt)) { + vsnprintf (str, max_string, fmt, ap); } } va_end (ap); diff --git a/galerautils/src/gu_log.h b/galerautils/src/gu_log.h index c9cc034fb..7d3335bf2 100644 --- a/galerautils/src/gu_log.h +++ b/galerautils/src/gu_log.h @@ -54,7 +54,8 @@ gu_log (gu_log_severity_t severity, const char* file, const char* function, const int line, - ...); + const char* fmt, + ...) __attribute__((format(printf, 5, 6))); /** This variable is made global only for the purpose of using it in * gu_debug() macro and avoid calling gu_log() when debug is off. @@ -68,23 +69,25 @@ extern gu_log_severity_t gu_log_max_level; #endif #if !defined(__cplusplus) || defined(GALERA_LOG_H_ENABLE_CXX) -// NOTE: don't add "\n" here even if you really want to do it -#define GU_LOG_C(level, ...)\ - gu_log(level, __FILE__, __func__, __LINE__,\ - __VA_ARGS__, NULL) - /** * @name Logging macros. * Must be implemented as macros to report the location of the code where * they are called. */ /*@{*/ -#define gu_fatal(...) GU_LOG_C(GU_LOG_FATAL, __VA_ARGS__, NULL) -#define gu_error(...) GU_LOG_C(GU_LOG_ERROR, __VA_ARGS__, NULL) -#define gu_warn(...) GU_LOG_C(GU_LOG_WARN, __VA_ARGS__, NULL) -#define gu_info(...) GU_LOG_C(GU_LOG_INFO, __VA_ARGS__, NULL) -#define gu_debug(...) if (gu_unlikely(gu_log_debug)) \ - { GU_LOG_C(GU_LOG_DEBUG, __VA_ARGS__, NULL); } +#define gu_fatal(...) \ + gu_log(GU_LOG_FATAL, __FILE__, __func__, __LINE__, __VA_ARGS__); +#define gu_error(...) \ + gu_log(GU_LOG_ERROR, __FILE__, __func__, __LINE__, __VA_ARGS__); +#define gu_warn(...) \ + gu_log(GU_LOG_WARN, __FILE__, __func__, __LINE__, __VA_ARGS__); +#define gu_info(...) \ + gu_log(GU_LOG_INFO, __FILE__, __func__, __LINE__, __VA_ARGS__) +#define gu_debug(...) \ + if (gu_unlikely(gu_log_debug)) \ + { \ + gu_log(GU_LOG_DEBUG, __FILE__, __func__, __LINE__, __VA_ARGS__); \ + } /*@}*/ #endif /* __cplusplus */ diff --git a/galerautils/src/gu_mmap.cpp b/galerautils/src/gu_mmap.cpp index b20aaaa14..20ea419ad 100644 --- a/galerautils/src/gu_mmap.cpp +++ b/galerautils/src/gu_mmap.cpp @@ -38,8 +38,8 @@ namespace gu { if (!mapped) { - gu_throw_error(errno) << "mmap() on '" << fd.name() - << "' failed"; + gu_throw_system_error(errno) + << "mmap() on '" << fd.name() << "' failed"; } #if defined(MADV_DONTFORK) @@ -93,8 +93,8 @@ namespace gu if (::msync(sync_addr, sync_length, MS_SYNC) < 0) { - gu_throw_error(errno) << "msync(" << sync_addr << ", " - << sync_length << ") failed"; + gu_throw_system_error(errno) + << "msync(" << sync_addr << ", " << sync_length << ") failed"; } } @@ -110,8 +110,8 @@ namespace gu { if (munmap (ptr, size) < 0) { - gu_throw_error(errno) << "munmap(" << ptr << ", " << size - << ") failed"; + gu_throw_system_error(errno) + << "munmap(" << ptr << ", " << size << ") failed"; } mapped = false; diff --git a/galerautils/src/gu_mutex.hpp b/galerautils/src/gu_mutex.hpp index a17a6fbb3..468554976 100644 --- a/galerautils/src/gu_mutex.hpp +++ b/galerautils/src/gu_mutex.hpp @@ -42,7 +42,7 @@ namespace gu if (gu_unlikely(err != 0)) { assert(0); - gu_throw_error(err) << "gu_mutex_destroy()"; + gu_throw_system_error(err) << "gu_mutex_destroy()"; } } @@ -59,7 +59,7 @@ namespace gu else { assert(0); - gu_throw_error(err) << "Mutex lock failed"; + gu_throw_system_error(err) << "Mutex lock failed"; } } diff --git a/galerautils/src/gu_resolver.cpp b/galerautils/src/gu_resolver.cpp index bc9bca916..62218124f 100644 --- a/galerautils/src/gu_resolver.cpp +++ b/galerautils/src/gu_resolver.cpp @@ -246,7 +246,7 @@ static unsigned int get_ifindex_by_addr(const gu::net::Sockaddr& addr) if (fd == -1) { err = errno; - gu_throw_error(err) << "could not create socket"; + gu_throw_system_error(err) << "could not create socket"; } if ((err = ioctl(fd, GU_SIOCGIFCONF, &ifc)) == -1) { @@ -290,7 +290,7 @@ static unsigned int get_ifindex_by_addr(const gu::net::Sockaddr& addr) #endif /* !__APPLE__ && !__FreeBSD__ */ if (err != 0) { - gu_throw_error(err) << "failed to get interface index"; + gu_throw_system_error(err) << "failed to get interface index"; } else { @@ -465,7 +465,7 @@ std::string gu::net::Addrinfo::to_string() const if (inet_ntop(get_family(), addr.get_addr(), dst, sizeof(dst)) == 0) { - gu_throw_error(errno) << "inet ntop failed"; + gu_throw_system_error(errno) << "inet ntop failed"; } switch (get_family()) diff --git a/galerautils/src/gu_thread.cpp b/galerautils/src/gu_thread.cpp index e89f518f1..afe69bb57 100644 --- a/galerautils/src/gu_thread.cpp +++ b/galerautils/src/gu_thread.cpp @@ -74,7 +74,7 @@ gu::ThreadSchedparam gu::thread_get_schedparam(gu_thread_t thd) int err; if ((err = gu_thread_getschedparam(thd, &policy, &sp)) != 0) { - gu_throw_error(err) << "Failed to read thread schedparams"; + gu_throw_system_error(err) << "Failed to read thread schedparams"; } return ThreadSchedparam(policy, sp.sched_priority); } @@ -102,7 +102,8 @@ void gu::thread_set_schedparam(gu_thread_t thd, const gu::ThreadSchedparam& sp) } else { - gu_throw_error(err) << "Failed to set thread schedparams " << sp; + gu_throw_system_error(err) + << "Failed to set thread schedparams " << sp; } } } diff --git a/galerautils/src/gu_throw.hpp b/galerautils/src/gu_throw.hpp index 4ed378c29..af34b2b8b 100644 --- a/galerautils/src/gu_throw.hpp +++ b/galerautils/src/gu_throw.hpp @@ -46,6 +46,7 @@ namespace gu ThrowBase& operator= (const ThrowBase&); friend class ThrowError; + friend class ThrowSystemError; friend class ThrowFatal; }; @@ -64,7 +65,38 @@ namespace gu ~ThrowError() GU_NOEXCEPT(false) GU_NORETURN { - base.os << ": " << err << " (" << ::strerror(err) << ')'; + Exception e(base.os.str(), err); + + e.trace (base.file, base.func, base.line); + // cppcheck-suppress exceptThrowInDestructor + throw e; + } + + std::ostringstream& msg () { return base.os; } + + private: + + ThrowBase base; + int const err; + }; + + /* final */ class ThrowSystemError + { + public: + + ThrowSystemError (const char* file_, + const char* func_, + int line_, + int err_) + : + base (file_, func_, line_), + err (err_) + {} + + ~ThrowSystemError() GU_NOEXCEPT(false) GU_NORETURN + { + base.os << ": System error: " << err << " (" << ::strerror(err) + << ')'; Exception e(base.os.str(), err); @@ -114,6 +146,9 @@ namespace gu #define gu_throw_error(err_) \ gu::ThrowError(__FILE__, __FUNCTION__, __LINE__, err_).msg() +#define gu_throw_system_error(err_) \ + gu::ThrowSystemError(__FILE__, __FUNCTION__, __LINE__, err_).msg() + #define gu_throw_fatal \ gu::ThrowFatal(__FILE__, __FUNCTION__, __LINE__).msg() diff --git a/galerautils/src/gu_to.c b/galerautils/src/gu_to.c index 99a94d825..1a2da7d13 100644 --- a/galerautils/src/gu_to.c +++ b/galerautils/src/gu_to.c @@ -12,6 +12,8 @@ * section is required, these functions can be used to do this. */ + +#include #include #include #include @@ -130,12 +132,12 @@ long gu_to_destroy (gu_to_t** to) #ifdef TO_USE_SIGNAL if (gu_cond_destroy (&w->cond)) { // @todo: what if someone is waiting? - gu_warn ("Failed to destroy condition %d. Should not happen", i); + gu_warn ("Failed to destroy condition %zd. Should not happen", i); } #else if (pthread_mutex_destroy (&w->mtx)) { // @todo: what if someone is waiting? - gu_warn ("Failed to destroy mutex %d. Should not happen", i); + gu_warn ("Failed to destroy mutex %zd. Should not happen", i); } #endif } @@ -160,7 +162,7 @@ long gu_to_grab (gu_to_t* to, gu_seqno_t seqno) assert (seqno >= 0); if ((err = gu_mutex_lock(&to->lock))) { - gu_fatal("Mutex lock failed (%d): %s", err, strerror(err)); + gu_fatal("Mutex lock failed (%ld): %s", err, strerror(err)); abort(); } @@ -220,7 +222,8 @@ long gu_to_grab (gu_to_t* to, gu_seqno_t seqno) err = -ECANCELED; break; default: - gu_fatal("Invalid cond wait exit state %d, seqno %llu(%llu)", + gu_fatal("Invalid cond wait exit state %d, seqno %" PRId64 + "(%" PRId64 ")", w->state, seqno, to->seqno); abort(); } @@ -247,7 +250,7 @@ to_wake_waiter (to_waiter_t* w) err = pthread_mutex_unlock (&w->mtx); #endif if (err) { - gu_fatal ("gu_cond_signal failed: %d", err); + gu_fatal ("gu_cond_signal failed: %ld", err); } } return err; @@ -277,7 +280,7 @@ long gu_to_release (gu_to_t *to, gu_seqno_t seqno) assert (seqno >= 0); if ((err = gu_mutex_lock(&to->lock))) { - gu_fatal("Mutex lock failed (%d): %s", err, strerror(err)); + gu_fatal("Mutex lock failed (%ld): %s", err, strerror(err)); abort(); } @@ -321,7 +324,7 @@ long gu_to_cancel (gu_to_t *to, gu_seqno_t seqno) assert (seqno >= 0); if ((err = gu_mutex_lock (&to->lock))) { - gu_fatal("Mutex lock failed (%d): %s", err, strerror(err)); + gu_fatal("Mutex lock failed (%ld): %s", err, strerror(err)); abort(); } @@ -337,15 +340,19 @@ long gu_to_cancel (gu_to_t *to, gu_seqno_t seqno) err = to_wake_waiter (w); w->state = CANCELED; } else if (seqno == to->seqno && w->state == HOLDER) { - gu_warn("tried to cancel current TO holder, state %d seqno %llu", - w->state, seqno); + gu_warn("tried to cancel current TO holder, state %d seqno %" PRId64, + w->state, seqno); err = -ECANCELED; - } else { - gu_warn("trying to cancel used seqno: state %d cancel seqno = %llu, " - "TO seqno = %llu", w->state, seqno, to->seqno); - err = -ECANCELED; } - + else + { + gu_warn("trying to cancel used seqno: state %d cancel seqno = %" PRId64 + ", " + "TO seqno = %" PRId64, + w->state, seqno, to->seqno); + err = -ECANCELED; + } + gu_mutex_unlock (&to->lock); return err; } @@ -358,7 +365,7 @@ long gu_to_self_cancel(gu_to_t *to, gu_seqno_t seqno) assert (seqno >= 0); if ((err = gu_mutex_lock (&to->lock))) { - gu_fatal("Mutex lock failed (%d): %s", err, strerror(err)); + gu_fatal("Mutex lock failed (%ld): %s", err, strerror(err)); abort(); } @@ -394,7 +401,7 @@ long gu_to_interrupt (gu_to_t *to, gu_seqno_t seqno) assert (seqno >= 0); if ((err = gu_mutex_lock (&to->lock))) { - gu_fatal("Mutex lock failed (%d): %s", err, strerror(err)); + gu_fatal("Mutex lock failed (%ld): %s", err, strerror(err)); abort(); } if (seqno >= to->seqno) { @@ -406,33 +413,36 @@ long gu_to_interrupt (gu_to_t *to, gu_seqno_t seqno) switch (w->state) { case HOLDER: - gu_debug ("trying to interrupt in use seqno: seqno = %llu, " - "TO seqno = %llu", seqno, to->seqno); + gu_debug("trying to interrupt in use seqno: seqno = %" PRId64 ", " + "TO seqno = %" PRId64, + seqno, to->seqno); /* gu_mutex_unlock (&to->lock); */ rcode = -ERANGE; break; case CANCELED: - gu_debug ("trying to interrupt canceled seqno: seqno = %llu, " - "TO seqno = %llu", seqno, to->seqno); + gu_debug("trying to interrupt canceled seqno: seqno = %" PRId64 ", " + "TO seqno = %" PRId64, + seqno, to->seqno); /* gu_mutex_unlock (&to->lock); */ rcode = -ERANGE; break; case WAIT: - gu_debug ("signaling to interrupt wait seqno: seqno = %llu, " - "TO seqno = %llu", seqno, to->seqno); - rcode = to_wake_waiter (w); + gu_debug("signaling to interrupt wait seqno: seqno = %" PRId64 ", " + "TO seqno = %" PRId64, + seqno, to->seqno); + rcode = to_wake_waiter(w); /* fall through */ - case RELEASED: - w->state = INTERRUPTED; - break; + case RELEASED: w->state = INTERRUPTED; break; case INTERRUPTED: - gu_debug ("TO waiter interrupt already seqno: seqno = %llu, " - "TO seqno = %llu", seqno, to->seqno); + gu_debug("TO waiter interrupt already seqno: seqno = %" PRId64 ", " + "TO seqno = %" PRId64, + seqno, to->seqno); break; } } else { - gu_debug ("trying to interrupt used seqno: cancel seqno = %llu, " - "TO seqno = %llu", seqno, to->seqno); + gu_debug("trying to interrupt used seqno: cancel seqno = %" PRId64 ", " + "TO seqno = %" PRId64, + seqno, to->seqno); /* gu_mutex_unlock (&to->lock); */ rcode = -ERANGE; } diff --git a/garb/garb_gcs.cpp b/garb/garb_gcs.cpp index 6f1bd68e3..0caef6688 100644 --- a/garb/garb_gcs.cpp +++ b/garb/garb_gcs.cpp @@ -16,7 +16,7 @@ Gcs::Gcs (gu::Config& gconf, const std::string& group) : closed_ (true), - gcs_ (gcs_create (reinterpret_cast(&gconf), + gcs_ (gcs_create (gconf, NULL, NULL, name.c_str(), "", diff --git a/garb/garb_main.cpp b/garb/garb_main.cpp index 8d9ecc799..081cf277d 100644 --- a/garb/garb_main.cpp +++ b/garb/garb_main.cpp @@ -20,12 +20,12 @@ become_daemon (const std::string& workdir) { if (chdir("/")) // detach from potentially removable block devices { - gu_throw_error(errno) << "chdir(" << workdir << ") failed"; + gu_throw_system_error(errno) << "chdir(" << workdir << ") failed"; } if (!workdir.empty() && chdir(workdir.c_str())) { - gu_throw_error(errno) << "chdir(" << workdir << ") failed"; + gu_throw_system_error(errno) << "chdir(" << workdir << ") failed"; } if (pid_t pid = fork()) @@ -39,7 +39,7 @@ become_daemon (const std::string& workdir) // I guess we want this to go to stderr as well; std::cerr << "Failed to fork daemon process: " << errno << " (" << strerror(errno) << ")"; - gu_throw_error(errno) << "Failed to fork daemon process"; + gu_throw_system_error(errno) << "Failed to fork daemon process"; } } @@ -47,7 +47,7 @@ become_daemon (const std::string& workdir) if (setsid()<0) // become a new process leader, detach from terminal { - gu_throw_error(errno) << "setsid() failed"; + gu_throw_system_error(errno) << "setsid() failed"; } // umask(0); @@ -62,7 +62,7 @@ become_daemon (const std::string& workdir) } else { - gu_throw_error(errno) << "Second fork failed"; + gu_throw_system_error(errno) << "Second fork failed"; } } @@ -77,7 +77,8 @@ become_daemon (const std::string& workdir) { if (open("/dev/null", O_RDONLY) < 0) { - gu_throw_error(errno) << "Unable to open /dev/null for fd " << fd; + gu_throw_system_error(errno) + << "Unable to open /dev/null for fd " << fd; } } @@ -109,8 +110,9 @@ main (int argc, char* argv[]) if (sigaction (SIGPIPE, &isa, NULL)) { - gu_throw_error(errno) << "Falied to install signal handler for signal " - << "SIGPIPE"; + gu_throw_system_error(errno) + << "Falied to install signal handler for signal " + << "SIGPIPE"; } RecvLoop loop (config); diff --git a/garb/garb_recv_loop.cpp b/garb/garb_recv_loop.cpp index 50d7903bc..6333361bb 100644 --- a/garb/garb_recv_loop.cpp +++ b/garb/garb_recv_loop.cpp @@ -49,14 +49,16 @@ RecvLoop::RecvLoop (const Config& config) if (sigaction (SIGTERM, &sa, NULL)) { - gu_throw_error(errno) << "Falied to install signal handler for signal " - << "SIGTERM"; + gu_throw_system_error(errno) + << "Falied to install signal handler for signal " + << "SIGTERM"; } if (sigaction (SIGINT, &sa, NULL)) { - gu_throw_error(errno) << "Falied to install signal handler for signal " - << "SIGINT"; + gu_throw_system_error(errno) + << "Failed to install signal handler for signal " + << "SIGINT"; } loop(); diff --git a/garb/garb_recv_loop.hpp b/garb/garb_recv_loop.hpp index bb37e5a9a..e5a8fac87 100644 --- a/garb/garb_recv_loop.hpp +++ b/garb/garb_recv_loop.hpp @@ -37,10 +37,7 @@ class RecvLoop RegisterParams(gu::Config& cnf) { gu::ssl_register_params(cnf); - if (gcs_register_params(reinterpret_cast(&cnf))) - { - gu_throw_fatal << "Error initializing GCS parameters"; - } + gcs_register_params(cnf); cnf.add(COMMON_BASE_DIR_KEY); } } diff --git a/gcache/src/gcache_page_store.cpp b/gcache/src/gcache_page_store.cpp index 773e0a913..ead7dc403 100644 --- a/gcache/src/gcache_page_store.cpp +++ b/gcache/src/gcache_page_store.cpp @@ -104,7 +104,8 @@ gcache::PageStore::delete_page () if (0 != err) { delete_thr_ = pthread_t(-1); - gu_throw_error(err) << "Failed to create page file deletion thread"; + gu_throw_system_error(err) + << "Failed to create page file deletion thread"; } return true; @@ -229,8 +230,8 @@ gcache::PageStore::PageStore (const std::string& dir_name, if (0 != err) { - gu_throw_error(err) << "Failed to initialize page file deletion " - << "thread attributes"; + gu_throw_system_error(err) << "Failed to initialize page file deletion " + << "thread attributes"; } #ifdef GCACHE_DETACH_THREAD @@ -239,8 +240,8 @@ gcache::PageStore::PageStore (const std::string& dir_name, if (0 != err) { pthread_attr_destroy (&delete_page_attr_); - gu_throw_error(err) << "Failed to set DETACHED attribute to " - << "page file deletion thread"; + gu_throw_system_error(err) << "Failed to set DETACHED attribute to " + << "page file deletion thread"; } #endif /* GCACHE_DETACH_THREAD */ } diff --git a/gcomm/src/evs_proto.cpp b/gcomm/src/evs_proto.cpp index a2d87524b..0f0b25ced 100644 --- a/gcomm/src/evs_proto.cpp +++ b/gcomm/src/evs_proto.cpp @@ -674,7 +674,7 @@ void gcomm::evs::Proto::isolate(gu::datetime::Period period) void gcomm::evs::Proto::handle_install_timer() { gcomm_assert(state() == S_GATHER || state() == S_INSTALL); - log_warn << self_string() << " install timer expired"; + log_info << self_string() << " install timer expired"; bool is_cons(consensus_.is_consensus()); bool is_repr(is_representative(uuid())); @@ -2621,7 +2621,6 @@ int gcomm::evs::Proto::handle_down(Datagram& wb, const ProtoDownMeta& dm) else if (state() != S_OPERATIONAL) { - log_warn << "user message in state " << to_string(state()); return ENOTCONN; } diff --git a/gcomm/src/gcomm/protolay.hpp b/gcomm/src/gcomm/protolay.hpp index 611979ea9..9306bc389 100644 --- a/gcomm/src/gcomm/protolay.hpp +++ b/gcomm/src/gcomm/protolay.hpp @@ -287,7 +287,6 @@ class gcomm::Protolay { if (down_context_.empty() == true) { - log_warn << this << " down context(s) not set"; return ENOTCONN; } diff --git a/gcomm/src/gmcast.cpp b/gcomm/src/gmcast.cpp index 0ca541a83..dda401503 100644 --- a/gcomm/src/gmcast.cpp +++ b/gcomm/src/gmcast.cpp @@ -718,9 +718,9 @@ void gcomm::GMCast::handle_established(Proto* est) if (AddrList::value(i).retry_cnt() > AddrList::value(i).max_retries()) { - log_warn << "discarding established (time wait) " - << est->remote_uuid() - << " (" << est->remote_addr() << ") "; + log_info << "discarding connection " << est->remote_uuid() << " (" + << est->remote_addr() << ") " + << "after " << AddrList::value(i).retry_cnt() << " retries"; erase_proto(proto_map_->find(est->socket()->id())); update_addresses(); return; diff --git a/gcomm/src/gmcast_proto.cpp b/gcomm/src/gmcast_proto.cpp index e0310b8ec..75163bfc1 100644 --- a/gcomm/src/gmcast_proto.cpp +++ b/gcomm/src/gmcast_proto.cpp @@ -282,9 +282,9 @@ void gcomm::gmcast::Proto::handle_ok(const Message& hs) void gcomm::gmcast::Proto::handle_failed(const Message& hs) { - log_warn << "handshake with " << remote_uuid_ << " " - << remote_addr_ << " failed: '" - << hs.error() << "'"; + log_debug << "handshake with " << remote_uuid_ << " " + << remote_addr_ << " failed: '" + << hs.error() << "'"; set_state(S_FAILED); if (hs.error() == gmcast_proto_err_evicted) { @@ -300,7 +300,7 @@ void gcomm::gmcast::Proto::handle_failed(const Message& hs) { if (gmcast_.prim_view_reached()) { - log_warn << "Received duplicate UUID error from other node " + log_info << "Received duplicate UUID error from other node " << "while in primary component. This may mean that " << "this node's IP address has changed. Will close " << "connection and keep on retrying"; diff --git a/gcomm/src/pc_proto.cpp b/gcomm/src/pc_proto.cpp index 6a1af13c8..0603adcaa 100644 --- a/gcomm/src/pc_proto.cpp +++ b/gcomm/src/pc_proto.cpp @@ -183,6 +183,21 @@ void gcomm::pc::Proto::send_state() } } +static std::string send_error_str(int const err) +{ + std::ostringstream os; + switch (err) + { + case 0: os << "Success"; break; + case EAGAIN: + os << "Cluster configuration change in progress or flow control active"; + break; + case ENOTCONN: os << "Not connected to the cluster"; break; + default: os << "Unknown error: " << err; break; + } + return os.str(); +} + int gcomm::pc::Proto::send_install(bool bootstrap, int weight) { gcomm_assert(bootstrap == false || weight == -1); @@ -227,10 +242,10 @@ int gcomm::pc::Proto::send_install(bool bootstrap, int weight) serialize(pci, buf); Datagram dg(buf); int ret = send_down(dg, ProtoDownMeta()); - if (ret != 0) + if (ret) { - log_warn << self_id() << " sending install message failed: " - << strerror(ret); + log_info << "sending install message for new primary component failed: " + << send_error_str(ret) << ", will retry in next configuration"; } return ret; } @@ -579,14 +594,14 @@ void gcomm::pc::Proto::handle_trans(const View& view) if (closing_ == false && ignore_sb_ == true && have_split_brain(view)) { // configured to ignore split brain - log_warn << "Ignoring possible split-brain " + log_info << "Ignoring possible split-brain " << "(allowed by configuration) from view:\n" << current_view_ << "\nto view:\n" << view; } else if (closing_ == false && ignore_quorum_ == true) { // configured to ignore lack of quorum - log_warn << "Ignoring lack of quorum " + log_info << "Ignoring lack of quorum " << "(allowed by configuration) from view:\n" << current_view_ << "\nto view:\n" << view; } @@ -964,7 +979,8 @@ bool gcomm::pc::Proto::is_prim() const if (last_prim_uuids.empty() == true) { - log_warn << "no nodes coming from prim view, prim not possible"; + log_info << "No nodes coming from primary view, " + << "primary view is not possible"; return false; } @@ -1641,7 +1657,9 @@ int gcomm::pc::Proto::handle_down(Datagram& dg, const ProtoDownMeta& dm) } else if (ret != EAGAIN) { - log_warn << "Proto::handle_down: " << strerror(ret); + log_warn << "Got unexpected error code from send in " + "pc::Proto::handle_down(): " + << ret; } pop_header(um, dg); diff --git a/gcs/src/CMakeLists.txt b/gcs/src/CMakeLists.txt index 5bbe028cb..404eeaf32 100644 --- a/gcs/src/CMakeLists.txt +++ b/gcs/src/CMakeLists.txt @@ -22,6 +22,7 @@ set(GCS_SOURCES gcs_fc.cpp gcs.cpp gcs_gcomm.cpp + gcs_error.cpp ) # diff --git a/gcs/src/SConscript b/gcs/src/SConscript index 8c8d37272..ca2cabd26 100644 --- a/gcs/src/SConscript +++ b/gcs/src/SConscript @@ -51,6 +51,7 @@ libgcs_sources = Split(''' gcs_fc.cpp gcs.cpp gcs_gcomm.cpp + gcs_error.cpp ''') #libgcs_env.VariantDir('.gcs', '.', duplicate=0) libgcs_env.StaticLibrary('gcs', libgcs_sources) diff --git a/gcs/src/gcs.cpp b/gcs/src/gcs.cpp index a8511b51d..9e6d97777 100644 --- a/gcs/src/gcs.cpp +++ b/gcs/src/gcs.cpp @@ -16,6 +16,7 @@ #include "gcs_fifo_lite.hpp" #include "gcs_sm.hpp" #include "gcs_gcache.hpp" +#include "gcs_error.hpp" #include #include @@ -30,6 +31,8 @@ #include #include +#include + const char* gcs_node_state_to_str (gcs_node_state_t state) { static const char* str[GCS_NODE_STATE_MAX + 1] = @@ -116,6 +119,15 @@ __attribute__((__packed__)); struct gcs_conn { + gcs_conn(gu::Config& conf, + gcache_t* gcache, + gu::Progress::Callback* progress_cb, + const char* node_name, + const char* inc_addr, + int repl_proto_ver, + int appl_proto_ver); + ~gcs_conn(); + gu::UUID group_uuid; char* my_name; char* channel; @@ -126,7 +138,6 @@ struct gcs_conn gcs_conn_state_t state; gu_config_t* config; - bool config_is_local; struct gcs_params params; gcache_t* gcache; @@ -245,65 +256,63 @@ struct gcs_repl_act { } }; -/*! Releases resources associated with parameters */ -static void -_cleanup_params (gcs_conn_t* conn) -{ - if (conn->config_is_local) gu_config_destroy(conn->config); -} - -/*! Creates local configuration object if no external is submitted */ -static long -_init_params (gcs_conn_t* conn, gu_config_t* conf) -{ - long rc; - - conn->config = conf; - conn->config_is_local = false; - - if (!conn->config) { - conn->config = gu_config_create(); - - if (conn->config) { - conn->config_is_local = true; - } - else { - rc = -ENOMEM; - goto enomem; - } - } - - rc = gcs_params_init (&conn->params, conn->config); - - if (!rc) return 0; - - _cleanup_params (conn); - -enomem: - - gu_error ("Parameter initialization failed: %s", strerror (-rc)); - - return rc; -} - -/* Creates a group connection handle */ -gcs_conn_t* -gcs_create (gu_config_t* const conf, gcache_t* const gcache, - gu::Progress::Callback* const progress_cb, - const char* const node_name, const char* const inc_addr, - int const repl_proto_ver, int const appl_proto_ver) -{ - gcs_conn_t* conn = GU_CALLOC (1, gcs_conn_t); - - if (!conn) { - gu_error ("Could not allocate GCS connection handle: %s", - strerror (ENOMEM)); - return NULL; - } - - if (_init_params (conn, conf)) { - goto init_params_failed; - } +gcs_conn::gcs_conn(gu::Config& conf, + gcache_t* cache, + gu::Progress::Callback* const progress_cb, + const char* const node_name, + const char* const inc_addr, + int const repl_proto_ver, + int const appl_proto_ver) + : + group_uuid(), + my_name(), + channel(), + socket(), + my_idx(), + memb_num(), + state(GCS_CONN_DESTROYED), + config(reinterpret_cast(&conf)), + params(conf), + gcache(cache), + sm(), + local_act_id(), + global_seqno(), + repl_q(), + send_thread(), + recv_q(), + recv_q_size(), + recv_thread(), + timeout(), + fc_lock(), + stfc(), + stop_sent_(), + stop_count(), + queue_len(), + upper_limit(), + lower_limit(), + fc_offset(), + max_fc_state(), + stats_fc_stop_sent(), + stats_fc_cont_sent(), + stats_fc_received(), + conf_id(), + need_to_join(), + join_gtid(), + join_code(), + sync_sent_(), + core(), + vote_lock_(), + vote_cond_(), + vote_gtid_(), + vote_res_(), + vote_wait_(), + vote_err_(), + inner_close_count(), + outer_close_count(), + progress_cb_(progress_cb), + progress_() +{ + auto conn(this); // to minimize diff if (gcs_fc_init (&conn->stfc, conn->params.recv_q_hard_limit, @@ -314,7 +323,8 @@ gcs_create (gu_config_t* const conf, gcache_t* const gcache, } conn->state = GCS_CONN_DESTROYED; - conn->core = gcs_core_create (conf, gcache, node_name, inc_addr, + conn->core = gcs_core_create (conf, conn->gcache, + node_name, inc_addr, repl_proto_ver, appl_proto_ver); if (!conn->core) { gu_error ("Failed to create core."); @@ -354,7 +364,6 @@ gcs_create (gu_config_t* const conf, gcache_t* const gcache, conn->global_seqno = 0; conn->fc_offset = 0; conn->timeout = GU_TIME_ETERNITY; - conn->gcache = gcache; conn->max_fc_state = conn->params.sync_donor ? GCS_CONN_DONOR : GCS_CONN_JOINED; @@ -366,7 +375,7 @@ gcs_create (gu_config_t* const conf, gcache_t* const gcache, conn->progress_cb_ = progress_cb; conn->progress_ = NULL; - return conn; // success + return; // success sm_create_failed: @@ -383,14 +392,26 @@ gcs_create (gu_config_t* const conf, gcache_t* const gcache, core_create_failed: fc_init_failed: - _cleanup_params (conn); - -init_params_failed: - - gu_free (conn); + gu_throw_fatal << "Failed to create GCS connection handle."; +} - gu_error ("Failed to create GCS connection handle."); - return NULL; // failure +/* Creates a group connection handle */ +gcs_conn_t* +gcs_create (gu::Config& conf, gcache_t* gcache, + gu::Progress::Callback* const progress_cb, + const char* const node_name, const char* const inc_addr, + int const repl_proto_ver, int const appl_proto_ver) +{ + try + { + return new gcs_conn(conf, gcache, + progress_cb, node_name, inc_addr, + repl_proto_ver, appl_proto_ver); + } + catch (...) + { + return nullptr; + } } long @@ -426,7 +447,7 @@ gcs_check_error (int err, const char* warning) case -ENOTCONN: case -ECONNABORTED: if (NULL != warning) { - gu_warn ("%s: %d (%s)", warning, err, strerror(-err)); + gu_info ("%s: %d (%s)", warning, err, gcs_error_str(-err)); } err = 0; break; @@ -456,7 +477,7 @@ gcs_fc_stop_begin (gcs_conn_t* conn) !(err = gu_mutex_lock (&conn->fc_lock))); if (gu_unlikely(err)) { - gu_fatal ("Mutex lock failed: %d (%s)", err, strerror(err)); + gu_fatal ("Mutex lock failed: %ld (%s)", err, strerror(err)); abort(); } @@ -491,7 +512,8 @@ gcs_fc_stop_end (gcs_conn_t* conn) conn->stop_sent_dec(1); } - gu_debug ("SENDING FC_STOP (local seqno: %lld, fc_offset: %ld): %d", + gu_debug("SENDING FC_STOP (local seqno: %" PRId64 + ", fc_offset: %ld): %d", conn->local_act_id, conn->fc_offset, ret); } else @@ -521,7 +543,7 @@ gcs_fc_cont_begin (gcs_conn_t* conn) !(err = gu_mutex_lock (&conn->fc_lock))); if (gu_unlikely(err)) { - gu_fatal ("Mutex lock failed: %d (%s)", err, strerror(err)); + gu_fatal ("Mutex lock failed: %ld (%s)", err, strerror(err)); abort(); } @@ -553,7 +575,8 @@ gcs_fc_cont_end (gcs_conn_t* conn) conn->stop_sent_inc(1); } - gu_debug ("SENDING FC_CONT (local seqno: %lld, fc_offset: %ld): %d", + gu_debug("SENDING FC_CONT (local seqno: %" PRId64 + ", fc_offset: %ld): %d", conn->local_act_id, conn->fc_offset, ret); } else @@ -661,16 +684,17 @@ gcs_shift_state (gcs_conn_t* const conn, if (!allowed[new_state][old_state]) { if (old_state != new_state) { - gu_warn ("GCS: Shifting %s -> %s is not allowed (TO: %lld)", - gcs_conn_state_str[old_state], - gcs_conn_state_str[new_state], conn->global_seqno); + gu_warn("GCS: Shifting %s -> %s is not allowed (TO: %" PRId64 ")", + gcs_conn_state_str[old_state], + gcs_conn_state_str[new_state], conn->global_seqno); } return false; } if (old_state != new_state) { - gu_info ("Shifting %s -> %s (TO: %lld)", gcs_conn_state_str[old_state], - gcs_conn_state_str[new_state], conn->global_seqno); + gu_info("Shifting %s -> %s (TO: %" PRId64 ")", + gcs_conn_state_str[old_state], gcs_conn_state_str[new_state], + conn->global_seqno); conn->state = new_state; } @@ -740,8 +764,8 @@ gcs_become_primary (gcs_conn_t* conn) int ret; if ((ret = _release_flow_control (conn))) { - gu_fatal ("Failed to release flow control: %ld (%s)", - ret, strerror(ret)); + gu_fatal ("Failed to release flow control: %d (%s)", + ret, gcs_error_str(ret)); gcs_close (conn); abort(); } @@ -794,7 +818,7 @@ gcs_become_donor (gcs_conn_t* conn) -EPROTO); if (err < 0 && !(err == -ENOTCONN || err == -EBADFD)) { gu_fatal ("Failed to send State Transfer Request rejection: " - "%zd (%s)", err, (strerror (-err))); + "%zd (%s)", err, (gcs_error_str (-err))); assert (0); return -ENOTRECOVERABLE; // failed to clear donor status, } @@ -849,8 +873,8 @@ gcs_become_joined (gcs_conn_t* conn) if (GCS_CONN_JOINER == conn->state) { ret = _release_sst_flow_control (conn); if (ret < 0) { - gu_fatal ("Releasing SST flow control failed: %ld (%s)", - ret, strerror (-ret)); + gu_fatal ("Releasing SST flow control failed: %d (%s)", + ret, gcs_error_str (-ret)); abort(); } conn->timeout = GU_TIME_ETERNITY; @@ -865,7 +889,7 @@ gcs_become_joined (gcs_conn_t* conn) gu_debug("Become joined, FC offset %ld", conn->fc_offset); /* One of the cases when the node can become SYNCED */ if ((ret = gcs_send_sync (conn))) { - gu_warn ("Sending SYNC failed: %ld (%s)", ret, strerror (-ret)); + gu_warn ("Sending SYNC failed: %d (%s)", ret, gcs_error_str(-ret)); } } else { @@ -958,11 +982,12 @@ s_join (gcs_conn_t* conn) switch (err) { case -ENOTCONN: - gu_warn ("Sending JOIN failed: %d (%s). " - "Will retry in new primary component.", err,strerror(-err)); + gu_info("Sending JOIN failed: %s. " + "Will retry in new primary component.", + gcs_error_str(-err)); return 0; default: - gu_error ("Sending JOIN failed: %d (%s).", err, strerror(-err)); + gu_error("Sending JOIN failed: %d (%s).", err, gcs_error_str(-err)); return err; } } @@ -1107,7 +1132,7 @@ gcs_handle_act_conf (gcs_conn_t* conn, gcs_act_rcvd& rcvd) } if (old_state != conn->state) { - gu_info ("Restored state %s -> %s (%lld)", + gu_info ("Restored state %s -> %s (%" PRId64 ")", gcs_conn_state_str[old_state], gcs_conn_state_str[conn->state], conn->global_seqno); } @@ -1116,7 +1141,7 @@ gcs_handle_act_conf (gcs_conn_t* conn, gcs_act_rcvd& rcvd) case GCS_CONN_JOINED: /* One of the cases when the node can become SYNCED */ if ((ret = gcs_send_sync(conn)) < 0) { - gu_warn ("CC: sending SYNC failed: %ld (%s)", ret, strerror (-ret)); + gu_warn ("CC: sending SYNC failed: %ld (%s)", ret, gcs_error_str (-ret)); } break; case GCS_CONN_JOINER: @@ -1138,7 +1163,7 @@ gcs_handle_act_state_req (gcs_conn_t* conn, { if ((gcs_seqno_t)conn->my_idx == rcvd.id) { int const donor_idx = (int)rcvd.id; // to pacify valgrind - gu_debug("Got GCS_ACT_STATE_REQ to %i, my idx: %ld", + gu_debug("Got GCS_ACT_STATE_REQ to %i, my idx: %d", donor_idx, conn->my_idx); // rewrite to pass global seqno for application rcvd.id = conn->global_seqno; @@ -1157,7 +1182,7 @@ static long gcs_handle_state_change (gcs_conn_t* conn, const struct gcs_act* act) { - gu_debug ("Got '%s' dated %lld", gcs_act_type_to_str (act->type), + gu_debug ("Got '%s' dated %" PRId64, gcs_act_type_to_str (act->type), gcs_seqno_gtoh(*(gcs_seqno_t*)act->buf)); void* buf = malloc (act->buf_len); @@ -1389,7 +1414,7 @@ _close(gcs_conn_t* conn, bool join_recv_thread) /* if called from gcs_close(), we need to synchronize with gcs_recv_thread at this point */ if ((ret = gu_thread_join (conn->recv_thread, NULL))) { - gu_error ("Failed to join recv_thread(): %d (%s)", + gu_error ("Failed to join recv_thread(): %ld (%s)", -ret, strerror(-ret)); } else { @@ -1455,7 +1480,8 @@ static void *gcs_recv_thread (void *arg) if (gu_unlikely(ret <= 0)) { - gu_debug ("gcs_core_recv returned %d: %s", ret, strerror(-ret)); + gu_debug("gcs_core_recv returned %zd: %s", ret, + gcs_error_str(-ret)); if (-ETIMEDOUT == ret && _handle_timeout(conn)) continue; @@ -1502,7 +1528,7 @@ static void *gcs_recv_thread (void *arg) } if (gu_unlikely(ret < 0)) { // error - gu_debug ("gcs_handle_actions returned %d: %s", + gu_debug ("gcs_handle_actions returned %zd: %s", ret, strerror(-ret)); break; } @@ -1576,8 +1602,8 @@ static void *gcs_recv_thread (void *arg) } if (gu_unlikely(send_stop) && (ret = gcs_fc_stop_end(conn))) { - gu_error ("gcs_fc_stop() returned %d: %s", - ret, strerror(-ret)); + gu_error ("gcs_fc_stop() returned %zd: %s", + ret, gcs_error_str(-ret)); break; } } @@ -1605,7 +1631,7 @@ static void *gcs_recv_thread (void *arg) else if (conn->my_idx == rcvd.sender_idx) { gu_debug("Discarding: unordered local action not in repl_q: " - "{ {%p, %zd, %s}, %d, %lld }.", + "{ {%p, %zd, %s}, %d, %" PRId64 " }.", rcvd.act.buf, rcvd.act.buf_len, gcs_act_type_to_str(rcvd.act.type), rcvd.sender_idx, rcvd.id); @@ -1613,7 +1639,7 @@ static void *gcs_recv_thread (void *arg) else { gu_fatal ("Protocol violation: unordered remote action: " - "{ {%p, %zd, %s}, %d, %lld }", + "{ {%p, %zd, %s}, %d, % " PRId64 " }", rcvd.act.buf, rcvd.act.buf_len, gcs_act_type_to_str(rcvd.act.type), rcvd.sender_idx, rcvd.id); @@ -1632,7 +1658,7 @@ static void *gcs_recv_thread (void *arg) (void)_close(conn, false); gcs_shift_state (conn, GCS_CONN_CLOSED); } - gu_info ("RECV thread exiting %d: %s", ret, strerror(-ret)); + gu_info ("RECV thread exiting %zd: %s", ret, strerror(-ret)); return NULL; } @@ -1649,7 +1675,7 @@ long gcs_open (gcs_conn_t* conn, const char* channel, const char* url, if ((ret = gcs_sm_enter (conn->sm, &tmp_cond, false, true))) { - gu_error("Failed to enter send monitor: %d (%s)", ret, strerror(-ret)); + gu_error("Failed to enter send monitor: %ld (%s)", ret, strerror(-ret)); return ret; } @@ -1678,7 +1704,7 @@ long gcs_open (gcs_conn_t* conn, const char* channel, const char* url, gcs_core_close (conn->core); } else { - gu_error ("Failed to open channel '%s' at '%s': %d (%s)", + gu_error ("Failed to open channel '%s' at '%s': %ld (%s)", channel, url, ret, strerror(-ret)); } } @@ -1712,7 +1738,7 @@ long gcs_close (gcs_conn_t *conn) /* _close() has already been called by gcs_recv_thread() and it is taking care of cleanup, just join the thread */ if ((ret = gu_thread_join (conn->recv_thread, NULL))) { - gu_error ("Failed to join recv_thread(): %d (%s)", + gu_error ("Failed to join recv_thread(): %ld (%s)", -ret, strerror(-ret)); } else { @@ -1729,11 +1755,10 @@ long gcs_close (gcs_conn_t *conn) return ret; } -/* Frees resources associated with GCS connection handle */ -long gcs_destroy (gcs_conn_t *conn) +gcs_conn::~gcs_conn() { - long err; - + auto conn(this); // to minimize diff + int err; gu_cond_t tmp_cond; gu_cond_init (gu::get_cond_key(gu::GU_COND_KEY_GCS_DESTROY), &tmp_cond); @@ -1747,7 +1772,7 @@ long gcs_destroy (gcs_conn_t *conn) gu_cond_destroy (&tmp_cond); - return -EBADFD; + gu_throw_error(EBADFD); } gcs_sm_leave (conn->sm); @@ -1768,24 +1793,30 @@ long gcs_destroy (gcs_conn_t *conn) if ((err = gcs_fifo_lite_destroy (conn->repl_q))) { gu_debug ("Error destroying repl FIFO: %d (%s)", err, strerror(-err)); - return err; + gu_throw_error(-err); } if ((err = gcs_core_destroy (conn->core))) { gu_debug ("Error destroying core: %d (%s)", err, strerror(-err)); - return err; + gu_throw_error(-err); } gu_cond_destroy(&conn->vote_cond_); gu_mutex_destroy(&conn->vote_lock_); /* This must not last for long */ while (gu_mutex_destroy (&conn->fc_lock)); +} - _cleanup_params (conn); - - gu_free (conn); - - return 0; +/* Frees resources associated with GCS connection handle */ +long gcs_destroy (gcs_conn_t *conn) +{ + try { + delete conn; + return 0; + } + catch (...) { + return -1; + } } /* Puts action in the send queue and returns */ @@ -1906,9 +1937,10 @@ long gcs_replv (gcs_conn_t* const conn, //!buf, act->size,gcs_act_type_to_str(act->type), - ret, strerror(-ret)); + gu_debug( + "Send action {%p, %" PRId32 ", %s} returned %ld (%s)", + act->buf, act->size, gcs_act_type_to_str(act->type), + ret, gcs_error_str(-ret)); if (!gcs_fifo_lite_remove (conn->repl_q)) { gu_fatal ("Failed to remove unsent item from repl_q"); @@ -1958,7 +1990,7 @@ long gcs_replv (gcs_conn_t* const conn, //!buf) // action was allocated in gcache { - gu_debug("Freeing gcache buffer %p after receiving %d", + gu_debug("Freeing gcache buffer %p after receiving %ld", act->buf, ret); gcs_gcache_free (conn->gcache, act->buf); act->buf = orig_buf; @@ -2144,19 +2176,19 @@ long gcs_recv (gcs_conn_t* conn, if (conn->queue_len > 0) { gu_warn ("Failed to send CONT message: %d (%s). " "Attempts left: %ld", - err, strerror(-err), conn->queue_len); + err, gcs_error_str(-err), conn->queue_len); } else { gu_fatal ("Last opportunity to send CONT message failed: " "%d (%s). Aborting to avoid cluster lock-up...", - err, strerror(-err)); + err, gcs_error_str(-err)); gcs_close(conn); gu_abort(); } } else if (gu_unlikely(send_sync) && (err = gcs_send_sync_end (conn))) { gu_warn ("Failed to send SYNC message: %d (%s). Will try later.", - err, strerror(-err)); + err, gcs_error_str(-err)); } return action->size; @@ -2701,9 +2733,10 @@ _set_max_throttle (gcs_conn_t* conn, const char* value) } } -bool gcs_register_params (gu_config_t* const conf) +void gcs_register_params (gu::Config& conf) { - return (gcs_params_register (conf) || gcs_core_register (conf)); + gcs_params::register_params(conf); + gcs_core_register(conf); } long gcs_param_set (gcs_conn_t* conn, const char* key, const char *value) diff --git a/gcs/src/gcs.hpp b/gcs/src/gcs.hpp index b7696c589..ee3e2e5a8 100644 --- a/gcs/src/gcs.hpp +++ b/gcs/src/gcs.hpp @@ -16,7 +16,7 @@ #include "gcs_gcache.hpp" #include // pfs declarations -#include +#include #include #include #include @@ -61,7 +61,7 @@ typedef struct gcs_conn gcs_conn_t; * @return pointer to GCS connection handle, NULL in case of failure. */ extern gcs_conn_t* -gcs_create (gu_config_t* conf, gcache_t* cache, +gcs_create (gu::Config& conf, gcache_t* cache, gu::Progress::Callback* progress_cb, const char* node_name, const char* inc_addr, int repl_proto_ver, int appl_proto_ver); @@ -395,9 +395,9 @@ gcs_vote (gcs_conn_t* conn, const gu::GTID& gtid, uint64_t code, /* GCS Configuration */ /*! Registers configurable parameters with conf object - * @return false if success, true if error happened */ -extern bool -gcs_register_params (gu_config_t* conf); + * throws exception if error happened */ +extern void +gcs_register_params (gu::Config& conf); /*! sets the key to a given value * diff --git a/gcs/src/gcs_act_proto.cpp b/gcs/src/gcs_act_proto.cpp index addca77d2..ff13945ce 100644 --- a/gcs/src/gcs_act_proto.cpp +++ b/gcs/src/gcs_act_proto.cpp @@ -85,7 +85,7 @@ gcs_act_proto_read (gcs_act_frag_t* frag, const void* buf, size_t buf_len) frag->proto_ver = ((uint8_t*)buf)[PROTO_PV_OFFSET]; if (gu_unlikely(buf_len < PROTO_DATA_OFFSET)) { - gu_error ("Action message too short: %zu, expected at least %d", + gu_error ("Action message too short: %zu, expected at least %zu", buf_len, PROTO_DATA_OFFSET); return -EBADMSG; } diff --git a/gcs/src/gcs_core.cpp b/gcs/src/gcs_core.cpp index f2a532f28..9ccaa5f15 100644 --- a/gcs/src/gcs_core.cpp +++ b/gcs/src/gcs_core.cpp @@ -15,6 +15,7 @@ #include "gcs_backend.hpp" #include "gcs_comp_msg.hpp" #include "gcs_code_msg.hpp" +#include "gcs_error.hpp" #include "gcs_fifo_lite.hpp" #include "gcs_group.hpp" #include "gcs_gcache.hpp" @@ -28,13 +29,18 @@ #include // for mempcpy #include +#include + using namespace gcs::core; -bool -gcs_core_register (gu_config_t* conf) +void +gcs_core_register(gu::Config& conf) { - gcs_group_register(reinterpret_cast(conf)); - return (gcs_backend_register(conf)); + gcs_group::register_params(conf); + if (gcs_backend_register(reinterpret_cast(&conf))) + { + gu_throw_fatal << "Could not register backend parmeters"; + } } const size_t CORE_FIFO_LEN = (1 << 10); // 1024 elements (no need to have more) @@ -52,9 +58,21 @@ core_state_t; struct gcs_core { + gcs_core(gu::Config& conf, + gcache_t* cache, + const char* node_name, + const char* inc_addr, + int repl_proto_ver, + int appl_proto_ver, + int gcs_proto_ver = GCS_PROTO_MAX); + ~gcs_core(); + gu_config_t* config; gcache_t* cache; + /* group context */ + gcs_group_t group; + /* connection per se */ long prim_comp_no; core_state_t state; @@ -79,9 +97,6 @@ struct gcs_core /* local action FIFO */ gcs_fifo_lite_t* fifo; - /* group context */ - gcs_group_t group; - /* backend part */ size_t msg_size; gcs_backend_t backend; // message IO context @@ -110,24 +125,37 @@ typedef struct causal_act gu_cond_t* cond; } causal_act_t; -gcs_core_t* -gcs_core_create (gu_config_t* const conf, - gcache_t* const cache, - const char* const node_name, - const char* const inc_addr, - int const repl_proto_ver, - int const appl_proto_ver, - int const gcs_proto_ver) +gcs_core::gcs_core(gu::Config& conf, + gcache_t* cache, + const char* node_name, + const char* inc_addr, + int repl_proto_ver, + int appl_proto_ver, + int gcs_proto_ver) + : + config(reinterpret_cast(&conf)), + cache(cache), + group(conf, cache, node_name, inc_addr, + gcs_proto_ver, repl_proto_ver,appl_proto_ver), + prim_comp_no(), + state(), + proto_ver(), + send_lock(), + send_buf(), + send_buf_len(), + send_act_no(), + recv_msg(), + code_msg_buf(), + fifo(), + msg_size(), + backend() +#ifdef GCS_CORE_TESTING + ,ls() // to lock-step in unit tests + ,state_uuid() +#endif { - assert (conf); - - gcs_core_t* core = GU_CALLOC (1, gcs_core_t); - - if (NULL != core) { - - core->config = conf; - core->cache = cache; - + auto core(this); // to minimize diff + { // Need to allocate something, otherwise Spread 3.17.3 freaks out. core->recv_msg.buf = gu_malloc(CORE_INIT_BUF_SIZE); if (core->recv_msg.buf) { @@ -147,19 +175,13 @@ gcs_core_create (gu_config_t* const conf, &core->send_lock); core->proto_ver = -1; // ^^^ shall be bumped in gcs_group_act_conf() - - gcs_group_init(&core->group, - reinterpret_cast(conf), cache, - node_name, inc_addr, gcs_proto_ver, - repl_proto_ver, appl_proto_ver); - core->state = CORE_CLOSED; core->send_act_no = 1; // 0 == no actions sent #ifdef GCS_CORE_TESTING gu_lock_step_init (&core->ls); core->state_uuid = GU_UUID_NIL; #endif - return core; // success + return; // success } gu_free (core->send_buf); @@ -167,11 +189,27 @@ gcs_core_create (gu_config_t* const conf, gu_free (core->recv_msg.buf); } - - gu_free (core); } - return NULL; // failure + gu_throw_fatal << "Failed to initialize GCS core"; +} + +gcs_core_t* +gcs_core_create (gu::Config& conf, + gcache_t* const cache, + const char* const node_name, + const char* const inc_addr, + int const repl_proto_ver, + int const appl_proto_ver, + int const gcs_proto_ver) +{ + try { + return new gcs_core(conf, cache, node_name, inc_addr, + repl_proto_ver, appl_proto_ver, gcs_proto_ver); + } + catch (...) { + return nullptr; + } } long @@ -218,14 +256,14 @@ gcs_core_open (gcs_core_t* core, core->state = CORE_NON_PRIMARY; } else { - gu_error ("Failed to open backend connection: %d (%s)", + gu_error ("Failed to open backend connection: %ld (%s)", ret, strerror(-ret)); core->backend.destroy (&core->backend); } } else { - gu_error ("Failed to initialize backend using '%s': %d (%s)", + gu_error ("Failed to initialize backend using '%s': %ld (%s)", url, ret, strerror(-ret)); } @@ -355,7 +393,7 @@ gcs_core_send (gcs_core_t* const conn, } else { ret = core_error (conn->state); - gu_error ("Failed to access core FIFO: %d (%s)", ret, strerror (-ret)); + gu_error ("Failed to access core FIFO: %zd (%s)", ret, strerror (-ret)); return ret; } @@ -490,7 +528,7 @@ core_msg_recv (gcs_backend_t* backend, gcs_recv_msg_t* recv_msg, /* sometimes - like in case of component message, we may need to * do reallocation 2 times. This should be fixed in backend */ void* msg = gu_realloc (recv_msg->buf, ret); - gu_debug ("Reallocating buffer from %d to %d bytes", + gu_debug ("Reallocating buffer from %d to %ld bytes", recv_msg->buf_len, ret); if (msg) { /* try again */ @@ -504,7 +542,7 @@ core_msg_recv (gcs_backend_t* backend, gcs_recv_msg_t* recv_msg, } else { /* realloc unsuccessfull, old recv_buf remains */ - gu_error ("Failed to reallocate buffer to %d bytes", ret); + gu_error ("Failed to reallocate buffer to %ld bytes", ret); ret = -ENOMEM; break; } @@ -513,7 +551,7 @@ core_msg_recv (gcs_backend_t* backend, gcs_recv_msg_t* recv_msg, assert(recv_msg->buf); if (gu_unlikely(ret < 0)) { - gu_debug ("returning %d: %s\n", ret, strerror(-ret)); + gu_debug ("returning %ld: %s\n", ret, strerror(-ret)); } return ret; @@ -597,8 +635,10 @@ core_handle_act_msg (gcs_core_t* core, /* NOTE! local_act cannot be used after this point */ /* sanity check */ if (gu_unlikely(sent_act_id != frg.act_id)) { - gu_fatal ("FIFO violation: expected sent_act_id %lld " - "found %lld", sent_act_id, frg.act_id); + gu_fatal("FIFO violation: expected sent_act_id %" PRId64 + " " + "found %" PRId64, + sent_act_id, frg.act_id); ret = -ENOTRECOVERABLE; } if (gu_unlikely(act->act.buf_len != ret)) { @@ -641,7 +681,7 @@ core_handle_act_msg (gcs_core_t* core, ret = gcs_group_handle_state_request (group, act); assert (ret <= 0 || ret == act->act.buf_len); #ifdef GCS_FOR_GARB - if (ret < 0) gu_fatal ("Handling state request failed: %d",ret); + if (ret < 0) gu_fatal ("Handling state request failed: %ld",ret); act->act.buf = NULL; } else { @@ -665,7 +705,7 @@ core_handle_act_msg (gcs_core_t* core, } else { /* Non-primary conf, foreign message - ignore */ - gu_warn ("Action message in non-primary configuration from " + gu_info ("Action message in non-primary configuration from " "member %d", msg->sender_idx); ret = 0; } @@ -796,7 +836,7 @@ core_handle_comp_msg (gcs_core_t* const core, assert (GCS_MSG_COMPONENT == msg->type); if (msg->size < (ssize_t)sizeof(gcs_comp_msg_t)) { - gu_error ("Malformed component message (size %zd < %zd). Ignoring", + gu_error ("Malformed component message (size %d < %zu). Ignoring", msg->size, sizeof(gcs_comp_msg_t)); return 0; } @@ -815,7 +855,7 @@ core_handle_comp_msg (gcs_core_t* const core, ret = gcs_group_act_conf (group, rcvd, &core->proto_ver); if (ret < 0) { - gu_fatal ("Failed create PRIM CONF action: %d (%s)", + gu_fatal ("Failed create PRIM CONF action: %zd (%s)", ret, strerror (-ret)); assert (0); ret = -ENOTRECOVERABLE; @@ -838,11 +878,25 @@ core_handle_comp_msg (gcs_core_t* const core, &uuid, sizeof(uuid), GCS_MSG_STATE_UUID); - if (ret < 0) { + if (ret < 0) + { // if send() failed, it means new configuration change // is on the way. Probably should ignore. - gu_warn ("Failed to send state UUID: %d (%s)", - ret, strerror (-ret)); + switch (-ret) + { + case EAGAIN: + gu_info("Temporary failure in sending state UUID, " + "will try again in next primary component"); + break; + case ENOTCONN: + gu_info("Failed to send state UUID: Connection to " + "cluster was closed"); + break; + default: + gu_warn("Failed to send state UUID: %zd (%s)", ret, + gcs_error_str(-ret)); + break; + } } else { gu_info ("STATE_EXCHANGE: sent state UUID: " @@ -867,7 +921,7 @@ core_handle_comp_msg (gcs_core_t* const core, assert(act->buf == NULL); assert(act->buf_len == 0); act->type = GCS_ACT_ERROR; - gu_debug("comp msg error in core %d", -ret); + gu_debug("comp msg error in core %ld", -ret); } } else { // regular non-prim @@ -877,7 +931,7 @@ core_handle_comp_msg (gcs_core_t* const core, if (GCS_GROUP_NON_PRIMARY == ret) { // no error in comp msg ret = gcs_group_act_conf (group, rcvd, &core->proto_ver); if (ret < 0) { - gu_fatal ("Failed create NON-PRIM CONF action: %d (%s)", + gu_fatal ("Failed create NON-PRIM CONF action: %ld (%s)", ret, strerror (-ret)); assert (0); ret = -ENOTRECOVERABLE; @@ -899,7 +953,7 @@ core_handle_comp_msg (gcs_core_t* const core, assert(0); // fall through default: - gu_fatal ("Failed to handle component message: %d (%s)!", + gu_fatal ("Failed to handle component message: %ld (%s)!", ret, strerror (-ret)); assert(0); } @@ -950,7 +1004,7 @@ core_handle_uuid_msg (gcs_core_t* core, // This may happen if new configuraiton chage goes on. // What shall we do in this case? Is it unrecoverable? gu_error ("STATE EXCHANGE: failed for: " GU_UUID_FORMAT - ": %d (%s)", + ": %zd (%s)", GU_UUID_ARGS(state_uuid), ret, strerror(-ret)); } gcs_state_msg_destroy (state); @@ -966,7 +1020,7 @@ core_handle_uuid_msg (gcs_core_t* core, break; default: assert(ret < 0); - gu_error ("Failed to handle state UUID: %d (%s)", + gu_error ("Failed to handle state UUID: %zd (%s)", ret, strerror (-ret)); } } @@ -1015,7 +1069,7 @@ core_handle_state_msg (gcs_core_t* core, ret = gcs_group_act_conf (group, rcvd, &core->proto_ver); if (ret < 0) { - gu_fatal ("Failed create CONF action: %d (%s)", + gu_fatal ("Failed create CONF action: %zd (%s)", ret, strerror (-ret)); assert (0); ret = -ENOTRECOVERABLE; @@ -1031,7 +1085,7 @@ core_handle_state_msg (gcs_core_t* core, break; default: assert (ret < 0); - gu_error ("Failed to handle state message: %d (%s)", + gu_error ("Failed to handle state message: %zd (%s)", ret, strerror (-ret)); } gu_mutex_unlock (&core->send_lock); @@ -1119,7 +1173,7 @@ core_msg_to_action (gcs_core_t* core, } break; default: - gu_error ("Iternal error. Unexpected message type %s from %ld", + gu_error ("Iternal error. Unexpected message type %s from %d", gcs_msg_type_string[msg->type], msg->sender_idx); assert (0); ret = -EPROTO; @@ -1133,7 +1187,10 @@ core_msg_to_action (gcs_core_t* core, } } else { - gu_warn ("%s message from member %ld in non-primary configuration. " + /* Messages which were sent just before cluster partitioning may + * be delivered in the following non-primary configuration. This + * is expected behavior, so info log level is enough. */ + gu_info ("%s message from member %d in non-primary configuration. " "Ignored.", gcs_msg_type_string[msg->type], msg->sender_idx); } @@ -1145,7 +1202,7 @@ static long core_msg_causal(gcs_core_t* conn, { if (gu_unlikely(msg->size != sizeof(causal_act_t))) { - gu_error("invalid causal act len %ld, expected %ld", + gu_error("invalid causal act len %d, expected %zu", msg->size, sizeof(causal_act_t)); return -EPROTO; } @@ -1315,12 +1372,11 @@ long gcs_core_close (gcs_core_t* core) return ret; } -long gcs_core_destroy (gcs_core_t* core) +static int +core_destroy(gcs_core_t* core) { core_act_t* tmp; - if (!core) return -EBADFD; - if (gu_mutex_lock (&core->send_lock)) return -EBADFD; { if (CORE_CLOSED != core->state) { @@ -1348,7 +1404,6 @@ long gcs_core_destroy (gcs_core_t* core) gcs_fifo_lite_pop_head (core->fifo); } gcs_fifo_lite_destroy (core->fifo); - gcs_group_free (&core->group); /* free buffers */ gu_free (core->recv_msg.buf); @@ -1358,11 +1413,28 @@ long gcs_core_destroy (gcs_core_t* core) gu_lock_step_destroy (&core->ls); #endif - gu_free (core); - return 0; } +gcs_core::~gcs_core() +{ + int const ret(core_destroy(this)); + if (ret) { + gu_throw_error(ret) << "GCS core destructor failed"; + } +} + +long gcs_core_destroy (gcs_core_t* core) +{ + try { + delete core; + return 0; + } + catch (...) { + return -1; + } +} + int gcs_core_proto_ver (const gcs_core_t* conn) { diff --git a/gcs/src/gcs_core.hpp b/gcs/src/gcs_core.hpp index d3a77e3c7..d2624143c 100644 --- a/gcs/src/gcs_core.hpp +++ b/gcs/src/gcs_core.hpp @@ -27,14 +27,15 @@ #include "gcs_act.hpp" #include "gcs_act_proto.hpp" +#include #include #include #include /* 'static' method to register configuration variables */ -extern bool -gcs_core_register (gu_config_t* conf); +extern void +gcs_core_register (gu::Config& conf); struct gcs_core; typedef struct gcs_core gcs_core_t; @@ -45,7 +46,7 @@ typedef struct gcs_core gcs_core_t; * @param gcs_proto_ver only for unit tests */ extern gcs_core_t* -gcs_core_create (gu_config_t* conf, +gcs_core_create (gu::Config& conf, gcache_t* cache, const char* node_name, const char* inc_addr, diff --git a/gcs/src/gcs_defrag.cpp b/gcs/src/gcs_defrag.cpp index 186cba050..7735624ad 100644 --- a/gcs/src/gcs_defrag.cpp +++ b/gcs/src/gcs_defrag.cpp @@ -7,6 +7,7 @@ #include "gcs_defrag.hpp" #include +#include #include #include @@ -58,9 +59,9 @@ gcs_defrag_handle_frag (gcs_defrag_t* df, /* df->sent_id was aborted halfway and is being taken care of * by the sender thread. Forget about it. * Reinit counters and continue with the new action. */ - gu_debug ("Local action %lld, size %ld reset.", - frg->act_id, frg->act_size); - df->frag_no = 0; + gu_debug("Local action %" PRId64 ", size %ld reset.", + frg->act_id, frg->act_size); + df->frag_no = 0; df->received = 0; df->tail = static_cast(df->plain); df->reset = false; @@ -83,9 +84,10 @@ gcs_defrag_handle_frag (gcs_defrag_t* df, } else if (frg->act_id == df->sent_id && frg->frag_no < df->frag_no) { /* gh172: tolerate duplicate fragments in production. */ - gu_warn ("Duplicate fragment %lld:%ld, expected %lld:%ld. " - "Skipping.", - frg->act_id, frg->frag_no, df->sent_id, df->frag_no); + gu_warn("Duplicate fragment %" PRId64 ":%ld, expected %" PRId64 + ":%ld. " + "Skipping.", + frg->act_id, frg->frag_no, df->sent_id, df->frag_no); df->frag_no--; // revert counter in hope that we get good frag #ifndef GCS_CORE_TESTING // allow unit tests to pass in debug mode assert(0); @@ -94,9 +96,10 @@ gcs_defrag_handle_frag (gcs_defrag_t* df, } else { gu_error ("Unordered fragment received. Protocol error."); - gu_error ("Expected: %llu:%ld, received: %llu:%ld", - df->sent_id, df->frag_no, frg->act_id, frg->frag_no); - gu_error ("Contents: '%.*s'", frg->frag_len, (char*)frg->frag); + gu_error("Expected: %" PRId64 ":%ld, received: %" PRId64 ":%ld", + df->sent_id, df->frag_no, frg->act_id, frg->frag_no); + gu_error("Contents: '%.*s'", static_cast(frg->frag_len), + (char*)frg->frag); df->frag_no--; // revert counter in hope that we get good frag #ifndef GCS_CORE_TESTING // allow unit tests to pass in debug mode assert(0); @@ -125,14 +128,15 @@ gcs_defrag_handle_frag (gcs_defrag_t* df, if (!local && df->reset) { /* can happen after configuration change, just ignore this message calmly */ - gu_debug ("Ignoring fragment %lld:%ld (size %d) after reset", - frg->act_id, frg->frag_no, frg->act_size); + gu_debug("Ignoring fragment %" PRId64 + ":%ld (size %zu) after reset", + frg->act_id, frg->frag_no, frg->act_size); return 0; } else { ((char*)frg->frag)[frg->frag_len - 1] = '\0'; gu_error ("Unordered fragment received. Protocol error."); - gu_error ("Expected: any:0(first), received: %lld:%ld", + gu_error ("Expected: any:0(first), received: %" PRId64 ":%ld", frg->act_id, frg->frag_no); gu_error ("Contents: '%s', local: %s, reset: %s", (char*)frg->frag, local ? "yes" : "no", diff --git a/gcs/src/gcs_dummy.cpp b/gcs/src/gcs_dummy.cpp index 4a8f2b2e3..6a20f2bd0 100644 --- a/gcs/src/gcs_dummy.cpp +++ b/gcs/src/gcs_dummy.cpp @@ -167,7 +167,7 @@ GCS_BACKEND_RECV_FN(dummy_recv) } else { ret = -EBADFD; // closing - gu_debug ("Returning %d: %s", ret, strerror(-ret)); + gu_debug ("Returning %ld: %s", ret, strerror(-ret)); } } else { @@ -189,7 +189,7 @@ GCS_BACKEND_MSG_SIZE_FN(dummy_msg_size) const long max_pkt_size = backend->conn->max_pkt_size; if (pkt_size > max_pkt_size) { - gu_warn ("Requested packet size: %d, maximum possible packet size: %d", + gu_warn ("Requested packet size: %ld, maximum possible packet size: %ld", pkt_size, max_pkt_size); return (max_pkt_size - backend->conn->hdr_size); } @@ -230,7 +230,7 @@ GCS_BACKEND_OPEN_FN(dummy_open) } gcs_comp_msg_delete (comp); } - gu_debug ("Opened backend connection: %d (%s)", ret, strerror(-ret)); + gu_debug ("Opened backend connection: %ld (%s)", ret, strerror(-ret)); return ret; } diff --git a/gcs/src/gcs_error.cpp b/gcs/src/gcs_error.cpp new file mode 100644 index 000000000..fdf694aaf --- /dev/null +++ b/gcs/src/gcs_error.cpp @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2024 Codership Oy + */ + +#include "gcs_error.hpp" + +#include +#include + +const char* gcs_error_str(int err) +{ + switch (err) + { + case EINTR: return "Operation interrupted"; + case EAGAIN: return "Operation failed temporarily"; + case EPERM: + case ENOTCONN: return "Not in primary component"; + case ECONNABORTED: return "Connection was closed"; + case EBADF: return "Connection not initialized"; + case ETIMEDOUT: return "Operation timed out"; + default: return strerror(err); + } +} + +const char* gcs_state_transfer_error_str(int err) +{ + switch (err) + { + case EAGAIN: + return "No donor candidates temporarily available in suitable state"; + case EHOSTUNREACH: return "Requested donor is not available"; + case EHOSTDOWN: return "Joiner and donor can't be the same node"; + default: return gcs_error_str(err); + } +} diff --git a/gcs/src/gcs_error.hpp b/gcs/src/gcs_error.hpp new file mode 100644 index 000000000..423fe20ea --- /dev/null +++ b/gcs/src/gcs_error.hpp @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2024 Codership Oy + */ + +/*! @file gcs_error.hpp + * + * Error code to error string translation according to GCS conventions. + */ + +#ifndef GCS_ERROR_HPP +#define GCS_ERROR_HPP + +/*! + * Return an error string associated with a system error code for gcs calls + * where the error code does not come from system call. As a fallback, + * error string for unhandled error codes are obtained by strerror() + * system call. + * + * This function follows the following conventions for system error + * codes for group communication errors: + * + * EAGAIN - Operation failed temporarily due to group configuration + * change or flow control. + * ENOTCONN, EPERM - Not in primary component. + * ECONNABORTED - Connection was closed while the operation was in progress. + * ETIMEDOUT - Operation timed out. + * EBADF - Connection was not initialized. + * + * @param err System error code. + * @return Error string describing the error condition. + */ +const char* gcs_error_str(int err); + +/*! + * Return and errorstring associated with a system error code for + * state transfer requests. As a fallback, error string for unhandled + * error codes are obtained by strerror() system call. + * + * The function follows the following conventions for system error codes + * for state transfer request errors (for details, see donor selection in + * gcs_group.cpp): + * + * EAGAIN - No donors available in suitable state. + * EHOSTUNREACH - Requested donor is not avaialble. + * EHOSTDOWN - Joiner and donor can't be the same node. + * + * @param err System error code. + * @return Error string describing state transfer error condition. + */ +const char* gcs_state_transfer_error_str(int err); + + +#endif /* GCS_ERROR_HPP */ diff --git a/gcs/src/gcs_gcomm.cpp b/gcs/src/gcs_gcomm.cpp index a554d9041..59297d3df 100644 --- a/gcs/src/gcs_gcomm.cpp +++ b/gcs/src/gcs_gcomm.cpp @@ -373,7 +373,7 @@ void GCommConn::connect(string channel, bool const bootstrap) &thd_, run_fn, this)) != 0) { - gu_throw_error(err) << "Failed to create thread"; + gu_throw_system_error(err) << "Failed to create thread"; } thread_set_schedparam(thd_, schedparam_); @@ -588,7 +588,7 @@ static void fill_cmp_msg(const View& view, const gcomm::UUID& my_uuid, i->second.segment()); if (ret < 0) { gu_throw_error(-ret) << "Failed to add member '" << uuid - << "' to component message."; + << "' to component message: " << -ret; } if (uuid == my_uuid) @@ -871,7 +871,7 @@ GCS_BACKEND_STATUS_GET_FN(gcomm_status_get) GCommConn::Ref ref(backend); if (ref.get() == 0) { - gu_throw_error(-EBADFD); + gu_throw_error(-EBADFD) << "Could not get status from gcomm backend"; } GCommConn& conn(*ref.get()); diff --git a/gcs/src/gcs_group.cpp b/gcs/src/gcs_group.cpp index a46b8ca57..3863a0f85 100644 --- a/gcs/src/gcs_group.cpp +++ b/gcs/src/gcs_group.cpp @@ -8,6 +8,7 @@ #include "gcs_gcache.hpp" #include "gcs_priv.hpp" #include "gcs_code_msg.hpp" +#include "gcs_error.hpp" #include #include @@ -19,16 +20,17 @@ #include +#include #include std::string const GCS_VOTE_POLICY_KEY("gcs.vote_policy"); uint8_t const GCS_VOTE_POLICY_DEFAULT(0); -void gcs_group_register(gu::Config* cnf) +void gcs_group::register_params(gu::Config& cnf) { - cnf->add(GCS_VOTE_POLICY_KEY, - gu::Config::Flag::read_only | - gu::Config::Flag::type_integer); + cnf.add(GCS_VOTE_POLICY_KEY, + gu::Config::Flag::read_only | + gu::Config::Flag::type_integer); } const char* gcs_group_state_str[GCS_GROUP_STATE_MAX] = @@ -54,49 +56,50 @@ uint8_t gcs_group_conf_to_vote_policy(gu::Config& cnf) return i; } -int -gcs_group_init (gcs_group_t* group, gu::Config* const cnf, gcache_t* const cache, - const char* node_name, const char* inc_addr, - gcs_proto_t const gcs_proto_ver, int const repl_proto_ver, - int const appl_proto_ver) -{ - // here we also create default node instance. - group->cache = cache; - group->act_id_ = GCS_SEQNO_ILL; - group->conf_id = GCS_SEQNO_ILL; - group->state_uuid = GU_UUID_NIL; - group->group_uuid = GU_UUID_NIL; - group->num = 0; - group->my_idx = -1; - group->my_name = strdup(node_name ? node_name : NODE_NO_NAME); - group->my_address = strdup(inc_addr ? inc_addr : NODE_NO_ADDR); - group->state = GCS_GROUP_NON_PRIMARY; - group->last_applied = group->act_id_; - group->last_node = -1; - group->vote_request_seqno = GCS_NO_VOTE_SEQNO; - group->vote_result = (VoteResult){ GCS_NO_VOTE_SEQNO, 0 }; - group->vote_history = new VoteHistory; - group->vote_policy = gcs_group_conf_to_vote_policy(*cnf); - group->frag_reset = true; // just in case - group->nodes = NULL; - group->prim_uuid = GU_UUID_NIL; - group->prim_seqno = GCS_SEQNO_ILL; - group->prim_num = 0; - group->prim_state = GCS_NODE_STATE_NON_PRIM; - group->prim_gcs_ver = 0; - group->prim_repl_ver = 0; - group->prim_appl_ver = 0; - - *(gcs_proto_t*)&group->gcs_proto_ver = gcs_proto_ver; - *(int*)&group->repl_proto_ver = repl_proto_ver; - *(int*)&group->appl_proto_ver = appl_proto_ver; - - group->quorum = GCS_QUORUM_NON_PRIMARY; - - group->last_applied_proto_ver = -1; - - return 0; -} +gcs_group::gcs_group(gu::Config& cnf, + gcache_t* cache, + const char* node_name, ///< can be null + const char* inc_addr, ///< can be null + gcs_proto_t gcs_proto_ver, + int repl_proto_ver, + int appl_proto_ver) + : + memb_mtx_ (gu::get_mutex_key(gu::GU_MUTEX_KEY_GCS_MEMBERSHIP)), + memb_epoch_ (GCS_SEQNO_ILL), + cache (cache), + cnf (cnf), + act_id_ (GCS_SEQNO_ILL), + conf_id (GCS_SEQNO_ILL), + state_uuid (GU_UUID_NIL), + group_uuid (GU_UUID_NIL), + num (0), + my_idx (-1), + my_name (strdup(node_name ? node_name : NODE_NO_NAME)), + my_address (strdup(inc_addr ? inc_addr : NODE_NO_ADDR)), + state (GCS_GROUP_NON_PRIMARY), + last_applied (act_id_), + last_node (-1), + vote_request_seqno (GCS_NO_VOTE_SEQNO), + vote_result ((VoteResult){ GCS_NO_VOTE_SEQNO, 0 }), + vote_history (), + vote_policy (gcs_group_conf_to_vote_policy(cnf)), + frag_reset (true), // just in case + nodes (NULL), + prim_uuid (GU_UUID_NIL), + prim_seqno (GCS_SEQNO_ILL), + prim_num (0), + prim_state (GCS_NODE_STATE_NON_PRIM), + prim_gcs_ver (0), + prim_repl_ver (0), + prim_appl_ver (0), + + gcs_proto_ver (gcs_proto_ver), + repl_proto_ver(repl_proto_ver), + appl_proto_ver(appl_proto_ver), + + quorum (GCS_QUORUM_NON_PRIMARY), + last_applied_proto_ver(-1) +{} int gcs_group_init_history (gcs_group_t* group, @@ -150,7 +153,7 @@ group_nodes_init (const gcs_group_t* group, const gcs_comp_msg_t* comp) } } else { - gu_error ("Could not allocate %ld x %z bytes", nodes_num, + gu_error ("Could not allocate %ld x %zu bytes", nodes_num, sizeof(gcs_node_t)); } return ret; @@ -183,12 +186,16 @@ gcs_group_free (gcs_group_t* group) { if (group->my_name) free ((char*)group->my_name); if (group->my_address) free ((char*)group->my_address); - delete group->vote_history; gu::Lock lock(group->memb_mtx_); group_nodes_free (group); } +gcs_group::~gcs_group() +{ + gcs_group_free(this); +} + /* Reset nodes array without breaking the statistics */ static inline void group_nodes_reset (gcs_group_t* group) @@ -492,10 +499,10 @@ group_post_state_exchange (gcs_group_t* group) gu_info ("Quorum results:" "\n\tversion = %u," "\n\tcomponent = %s," - "\n\tconf_id = %lld," - "\n\tmembers = %d/%d (joined/total)," - "\n\tact_id = %lld," - "\n\tlast_appl. = %lld," + "\n\tconf_id = %" PRId64 "," + "\n\tmembers = %ld/%ld (joined/total)," + "\n\tact_id = %" PRId64 "," + "\n\tlast_appl. = %" PRId64 "," "\n\tprotocols = %d/%d/%d (gcs/repl/appl)," "\n\tvote policy= %d," "\n\tgroup UUID = " GU_UUID_FORMAT, @@ -555,7 +562,7 @@ gcs_group_handle_comp_msg (gcs_group_t* group, const gcs_comp_msg_t* comp) new_nodes = group_nodes_init (group, comp); if (!new_nodes) { - gu_fatal ("Could not allocate memory for %ld-node component.", + gu_fatal ("Could not allocate memory for %d-node component.", gcs_comp_msg_num (comp)); assert(0); return (gcs_group_state_t)-ENOMEM; @@ -711,7 +718,7 @@ gcs_group_handle_uuid_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) } else { gu_warn ("Stray state UUID msg: " GU_UUID_FORMAT - " from node %ld (%s), current group state %s", + " from node %d (%s), current group state %s", GU_UUID_ARGS((gu_uuid_t*)msg->buf), msg->sender_idx, group->nodes[msg->sender_idx].name, gcs_group_state_str[group->state]); @@ -749,7 +756,7 @@ gcs_group_handle_state_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) } else { gu_debug ("STATE EXCHANGE: stray state msg: " GU_UUID_FORMAT - " from node %ld (%s), current state UUID: " + " from node %d (%s), current state UUID: " GU_UUID_FORMAT, GU_UUID_ARGS(state_uuid), msg->sender_idx, gcs_state_msg_name(state), @@ -760,7 +767,7 @@ gcs_group_handle_state_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) } } else { - gu_warn ("Could not parse state message from node %d", + gu_warn ("Could not parse state message from node %d, %s", msg->sender_idx, group->nodes[msg->sender_idx].name); } } @@ -854,7 +861,7 @@ gcs_group_handle_last_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) group_redo_last_applied (group); if (old_val < group->last_applied) { - gu_debug ("New COMMIT CUT %lld on %d after %lld from %d", + gu_debug ("New COMMIT CUT %lld on %ld after %lld from %d", (long long)group->last_applied, group->my_idx, (long long)gtid.seqno(), msg->sender_idx); return group->last_applied; @@ -1003,7 +1010,7 @@ group_recount_votes (gcs_group_t& group) // record voting result in the history for later std::pair const val(vote_gtid, win_vote); std::pair const res - (group.vote_history->insert(val)); + (group.vote_history.insert(val)); if (false == res.second) { assert(0); @@ -1053,8 +1060,12 @@ gcs_group_handle_vote_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) << gtid << ',' << gu::PrintBase<>(code) << ": " << (code ? (data ? data : "(null)") : "Success"); - gcs_node_set_vote (&sender, gtid.seqno(), code, - group->quorum.gcs_proto_ver); + { + gu::Lock lock(group->memb_mtx_); + group->memb_epoch_ = group->act_id_; + gcs_node_set_vote (&sender, gtid.seqno(), code, + group->quorum.gcs_proto_ver); + } if (group_recount_votes(*group)) { @@ -1086,11 +1097,11 @@ gcs_group_handle_vote_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) msg << "Recovering vote result from history: " << gtid; int64_t result(0); - VoteHistory::iterator it(group->vote_history->find(gtid)); - if (group->vote_history->end() != it) + VoteHistory::iterator it(group->vote_history.find(gtid)); + if (group->vote_history.end() != it) { result = it->second; - group->vote_history->erase(it); + group->vote_history.erase(it); msg << ',' << gu::PrintBase<>(result); } else @@ -1200,15 +1211,16 @@ gcs_group_handle_join_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) } } - if (j == group->num) { - gu_warn ("Could not find peer: %s", peer_id); + if (j == group->num && strlen(peer_id)) { + /* This can happen if the 'peer' is no longer in group. */ + gu_info ("Could not find peer: %s", peer_id); } if (code < 0) { - gu_warn ("%d.%d (%s): State transfer %s %d.%d (%s) failed: %d (%s)", + gu_warn ("%d.%d (%s): State transfer %s %d.%d (%s) failed: %s", sender_idx, sender->segment, sender->name, st_dir, peer_idx, peer ? peer->segment : -1, peer_name, - (int)code, strerror((int)-code)); + gcs_state_transfer_error_str((int)-code)); if (from_donor && peer_idx == group->my_idx && GCS_NODE_STATE_JOINER == group->nodes[peer_idx].status) { @@ -1250,8 +1262,14 @@ gcs_group_handle_join_msg (gcs_group_t* group, const gcs_recv_msg_t* msg) gu_warn("Rejecting JOIN message from %d.%d (%s): new State Transfer" " required.", sender_idx, sender->segment, sender->name); } - else { - // should we freak out and throw an error? + else if (GCS_NODE_STATE_SYNCED != sender->status && + GCS_NODE_STATE_JOINED != sender->status) { + /* According to comments in gcs_join(), sending of JOIN messages + * is always allowed when not in JOINER state. This may lead to + * duplicate joins of which some can be received in JOINED or + * SYNCED state. This is expected, so the warning is not printed if + * the state is JOINED or SYNCED, but we'll keep it for other + * states to catch possible errors in sender logic. */ gu_warn("Protocol violation. JOIN message sender %d.%d (%s) is not " "in state transfer (%s). Message ignored.", sender_idx, sender->segment, sender->name, @@ -1358,7 +1376,7 @@ group_find_node_by_state (const gcs_group_t* const group, /* Have not found suitable donor in the same segment. */ if (!hnss && donor >= 0) { if (joiner_idx == group->my_idx) { - gu_warn ("There are no nodes in the same segment that will ever " + gu_info ("There are no nodes in the same segment that will ever " "be able to become donors, yet there is a suitable donor " "outside. Will use that one."); } @@ -1700,7 +1718,6 @@ gcs_group_find_donor(const gcs_group_t* group, return donor_idx; } - /*! * Selects and returns the index of state transfer donor, if available. * Updates donor and joiner status if state transfer is possible @@ -1780,12 +1797,24 @@ group_select_donor (gcs_group_t* group, assert(true == desync); } } - else { - gu_warn ("Member %d.%d (%s) requested state transfer from '%s', " - "but it is impossible to select State Transfer donor: %s", - joiner_idx, group->nodes[joiner_idx].segment, - group->nodes[joiner_idx].name, - required_donor ? donor_string : "*any*", strerror (-donor_idx)); + else if (-donor_idx == EAGAIN) { + /* In case of EAGAIN the failure of selecting the donor is + * transient, and donor selection may succeed when the request is + * retried by the Joiner. Therefore print info level message + * instead of warning. */ + gu_info("Member %d.%d (%s) requested state transfer from '%s', " + "but it is impossible to select State Transfer donor: %s", + joiner_idx, group->nodes[joiner_idx].segment, + group->nodes[joiner_idx].name, + required_donor ? donor_string : "*any*", + gcs_state_transfer_error_str(-donor_idx)); + } else { + gu_warn("Member %d.%d (%s) requested state transfer from '%s', " + "but it is impossible to select State Transfer donor: %s", + joiner_idx, group->nodes[joiner_idx].segment, + group->nodes[joiner_idx].name, + required_donor ? donor_string : "*any*", + gcs_state_transfer_error_str(-donor_idx)); } return donor_idx; @@ -1796,7 +1825,7 @@ void gcs_group_ignore_action (gcs_group_t* group, struct gcs_act_rcvd* act) { gu_debug("Ignoring action: buf: %p, len: %zd, type: %d, sender: %d, " - "seqno: %lld", act->act.buf, act->act.buf_len, act->act.type, + "seqno: %" PRId64, act->act.buf, act->act.buf_len, act->act.type, act->sender_idx, act->id); if (gcs_act_in_cache(act->act.type)) { diff --git a/gcs/src/gcs_group.hpp b/gcs/src/gcs_group.hpp index bddedcb2f..8e1ffd728 100644 --- a/gcs/src/gcs_group.hpp +++ b/gcs/src/gcs_group.hpp @@ -24,7 +24,6 @@ #include "gu_config.hpp" extern std::string const GCS_VOTE_POLICY_KEY; -extern void gcs_group_register(gu::Config* cnf); // register parameters extern uint8_t gcs_group_conf_to_vote_policy(gu::Config& cnf); #include "gu_status.hpp" @@ -58,7 +57,7 @@ typedef struct gcs_group gcs_seqno_t memb_epoch_; gcache_t* cache; - gu::Config* cnf; + gu::Config& cnf; gcs_seqno_t act_id_; // current(last) action seqno gcs_seqno_t conf_id; // current configuration seqno gu_uuid_t state_uuid; // state exchange id @@ -72,7 +71,7 @@ typedef struct gcs_group long last_node; // node that last reported commit_cut gcs_seqno_t vote_request_seqno; // last vote request was passed for it VoteResult vote_result; // last vote result - VoteHistory* vote_history; // history of group votes + VoteHistory vote_history; // history of group votes uint8_t vote_policy; bool frag_reset; // indicate that fragmentation was reset gcs_node_t* nodes; // array of node contexts @@ -94,28 +93,20 @@ typedef struct gcs_group gcs_state_quorum_t quorum; int last_applied_proto_ver; - gcs_group() : - memb_mtx_(gu::get_mutex_key(gu::GU_MUTEX_KEY_GCS_MEMBERSHIP)), - gcs_proto_ver(0), - repl_proto_ver(0), - appl_proto_ver(0) - { } + gcs_group(gu::Config& cnf, + gcache_t* cache, + const char* node_name, ///< can be null + const char* inc_addr, ///< can be null + gcs_proto_t gcs_proto_ver, + int repl_proto_ver, + int appl_proto_ver); + ~gcs_group(); + + static void + register_params(gu::Config& cnf); } gcs_group_t; -/*! - * Initialize group at startup - */ -extern int -gcs_group_init (gcs_group_t* group, - gu::Config* cnf, - gcache_t* cache, - const char* node_name, ///< can be null - const char* inc_addr, ///< can be null - gcs_proto_t gcs_proto_ver, - int repl_proto_ver, - int appl_proto_ver); - /*! * Initialize group action history parameters. See gcs.h */ @@ -132,12 +123,6 @@ extern void group_nodes_free (gcs_group_t* group); #endif // GCS_CORE_TESTING -/*! - * Free group resources - */ -extern void -gcs_group_free (gcs_group_t* group); - /*! Forget the action if it is not to be delivered */ extern void gcs_group_ignore_action (gcs_group_t* group, struct gcs_act_rcvd* rcvd); diff --git a/gcs/src/gcs_node.cpp b/gcs/src/gcs_node.cpp index b62a5387d..6a5cff029 100644 --- a/gcs/src/gcs_node.cpp +++ b/gcs/src/gcs_node.cpp @@ -7,6 +7,7 @@ #include "gcs_node.hpp" #include "gcs_state_msg.hpp" #include +#include #include // gu::PrintBase @@ -192,10 +193,10 @@ gcs_node_update_status (gcs_node_t* node, const gcs_state_quorum_t* quorum) else { // gap in sequence numbers, needs a snapshot, demote status if (node->status > GCS_NODE_STATE_PRIM) { - gu_info ("'%s' demoted %s->PRIMARY due to gap in history: " - "%lld - %lld", - node->name, gcs_node_state_to_str(node->status), - node_act_id, quorum->act_id); + gu_info("'%s' demoted %s->PRIMARY due to gap in history: " + "%" PRId64 " - %" PRId64, + node->name, gcs_node_state_to_str(node->status), + node_act_id, quorum->act_id); } node->status = GCS_NODE_STATE_PRIM; } diff --git a/gcs/src/gcs_params.cpp b/gcs/src/gcs_params.cpp index 3f1756a2a..27608746a 100644 --- a/gcs/src/gcs_params.cpp +++ b/gcs/src/gcs_params.cpp @@ -37,8 +37,8 @@ static ssize_t const GCS_PARAMS_RECV_Q_HARD_LIMIT_DEFAULT = SSIZE_MAX; static const char* const GCS_PARAMS_RECV_Q_SOFT_LIMIT_DEFAULT = "0.25"; static const char* const GCS_PARAMS_MAX_THROTTLE_DEFAULT = "0.25"; -bool -gcs_params_register(gu_config_t* conf) +static bool +gcs_params_register(gu_config_t* const conf) { bool ret = 0; @@ -84,6 +84,15 @@ gcs_params_register(gu_config_t* conf) return ret; } +void +gcs_params::register_params(gu::Config& conf) +{ + if (gcs_params_register(reinterpret_cast(&conf))) + { + gu_throw_fatal << "Failed to register GCS parameters"; + } +} + static long params_init_bool (gu_config_t* conf, const char* const name, bool* const var) { @@ -207,10 +216,10 @@ static void deprecation_warning(gu_config_t* config, } } -long -gcs_params_init (struct gcs_params* params, gu_config_t* config) +static int +gcs_params_init (struct gcs_params* const params, gu_config_t* const config) { - long ret; + int ret; if ((ret = params_init_long (config, GCS_PARAMS_FC_LIMIT, 0, LONG_MAX, ¶ms->fc_base_limit))) return ret; @@ -218,7 +227,7 @@ gcs_params_init (struct gcs_params* params, gu_config_t* config) if ((ret = params_init_long (config, GCS_PARAMS_FC_DEBUG, 0, LONG_MAX, ¶ms->fc_debug))) return ret; - if ((ret = params_init_long (config, GCS_PARAMS_MAX_PKT_SIZE, 0,LONG_MAX, + if ((ret = params_init_long (config, GCS_PARAMS_MAX_PKT_SIZE, 0, LONG_MAX, ¶ms->max_packet_size))) return ret; if ((ret = params_init_double (config, GCS_PARAMS_FC_FACTOR, 0.0, 1.0, @@ -257,3 +266,22 @@ gcs_params_init (struct gcs_params* params, gu_config_t* config) ¶ms->sync_donor))) return ret; return 0; } + +gcs_params::gcs_params(gu::Config& conf) + : + fc_resume_factor(), + recv_q_soft_limit(), + max_throttle(), + recv_q_hard_limit(), + fc_base_limit(), + max_packet_size(), + fc_debug(), + fc_single_primary(), + sync_donor() +{ + int const ret(gcs_params_init(this, reinterpret_cast(&conf))); + if (0 != ret) + { + gu_throw_error(-ret); + } +} diff --git a/gcs/src/gcs_params.hpp b/gcs/src/gcs_params.hpp index e279fa473..a675b8109 100644 --- a/gcs/src/gcs_params.hpp +++ b/gcs/src/gcs_params.hpp @@ -6,10 +6,16 @@ #ifndef _gcs_params_h_ #define _gcs_params_h_ +#include "gu_config.hpp" #include "galerautils.h" struct gcs_params { + gcs_params(gu::Config& config); + + static void + register_params(gu::Config& config); + double fc_resume_factor; double recv_q_soft_limit; double max_throttle; @@ -34,16 +40,5 @@ extern const char* const GCS_PARAMS_MAX_THROTTLE; extern const char* const GCS_PARAMS_SM_DUMP; #endif /* GCS_SM_DEBUG */ -/*! Register configuration parameters */ -extern bool -gcs_params_register(gu_config_t* config); - -/*! Initializes parameters from config - * - * @return 0 in case of success, - * -EINVAL if some values were set incorrectly in config */ -extern long -gcs_params_init (struct gcs_params* params, gu_config_t* config); - #endif /* _gcs_params_h_ */ diff --git a/gcs/src/gcs_sm.cpp b/gcs/src/gcs_sm.cpp index 3f4823e07..19076d312 100644 --- a/gcs/src/gcs_sm.cpp +++ b/gcs/src/gcs_sm.cpp @@ -127,7 +127,7 @@ gcs_sm_open (gcs_sm_t* sm) gu_mutex_unlock (&sm->lock); - if (ret) { gu_error ("Can't open send monitor: wrong state %d", ret); } + if (ret) { gu_error ("Can't open send monitor: wrong state %ld", ret); } return ret; } diff --git a/gcs/src/gcs_state_msg.cpp b/gcs/src/gcs_state_msg.cpp index faf5c0f8e..b538b2656 100644 --- a/gcs/src/gcs_state_msg.cpp +++ b/gcs/src/gcs_state_msg.cpp @@ -568,11 +568,11 @@ state_quorum_inherit (const gcs_state_msg_t* states[], state_report_uuids (buf, buf_len, states, states_num, GCS_NODE_STATE_NON_PRIM); #ifdef GCS_CORE_TESTING - gu_warn ("Quorum: No node with complete state:\n%s", buf); + gu_info ("Quorum: No node with complete state:\n%s", buf); #else /* Print buf into stderr in order to message truncation * of application logger. */ - gu_warn ("Quorum: No node with complete state:"); + gu_info ("Quorum: No node with complete state:"); fprintf(stderr, "%s\n", buf); #endif /* GCS_CORE_TESTING */ gu_free (buf); @@ -592,7 +592,8 @@ state_quorum_inherit (const gcs_state_msg_t* states[], if (buf) { state_report_uuids (buf, buf_len, states, states_num, GCS_NODE_STATE_DONOR); - gu_fatal("Quorum impossible: conflicting group UUIDs:\n%s"); + gu_fatal("Quorum impossible: conflicting group UUIDs:\n%s", + buf); gu_free (buf); } else { diff --git a/gcs/src/gcs_test.cpp b/gcs/src/gcs_test.cpp index 2ed4cec89..d7788f535 100644 --- a/gcs/src/gcs_test.cpp +++ b/gcs/src/gcs_test.cpp @@ -729,10 +729,11 @@ int main (int argc, char *argv[]) gu_config_set_string(gconf, "gcache.size", "0"); gu_config_set_string(gconf, "gcache.page_size", "1M"); - gcs_register_params(gconf); + gcs_register_params(*reinterpret_cast(gconf)); if (!(cache = gcache_create (gconf, ""))) goto out; - if (!(gcs = gcs_create (gconf, cache, NULL, NULL, NULL, 0, 0))) goto out; + if (!(gcs = gcs_create (*reinterpret_cast(gconf), + cache, NULL, NULL, NULL, 0, 0))) goto out; puts ("debug"); fflush(stdout); /* the following hack won't work if there is 0.0.0.0 in URL options */ bstrap = (NULL != strstr(conf.backend, "0.0.0.0")); diff --git a/gcs/src/unit_tests/CMakeLists.txt b/gcs/src/unit_tests/CMakeLists.txt index 75905f848..5153451a4 100644 --- a/gcs/src/unit_tests/CMakeLists.txt +++ b/gcs/src/unit_tests/CMakeLists.txt @@ -35,6 +35,7 @@ add_executable(gcs_tests ../gcs_params.cpp gcs_fc_test.cpp ../gcs_fc.cpp + ../gcs_error.cpp ) target_compile_definitions(gcs_tests diff --git a/gcs/src/unit_tests/SConscript b/gcs/src/unit_tests/SConscript index be61ca714..2aab85607 100644 --- a/gcs/src/unit_tests/SConscript +++ b/gcs/src/unit_tests/SConscript @@ -58,6 +58,7 @@ gcs_tests_sources = Split(''' ../gcs_params.cpp gcs_fc_test.cpp ../gcs_fc.cpp + ../gcs_error.cpp ''') diff --git a/gcs/src/unit_tests/gcs_core_test.cpp b/gcs/src/unit_tests/gcs_core_test.cpp index 2fd7954c5..37270bdba 100644 --- a/gcs/src/unit_tests/gcs_core_test.cpp +++ b/gcs/src/unit_tests/gcs_core_test.cpp @@ -399,7 +399,7 @@ core_test_init (gu::Config* config, Cache = new gcache::GCache(NULL, *config, "."); } - Core = gcs_core_create (reinterpret_cast(config), + Core = gcs_core_create (*config, reinterpret_cast(Cache), "core_test", "aaa.bbb.ccc.ddd:xxxx", 0, 0, gcs_proto_ver); @@ -543,7 +543,7 @@ test_api(bool const enc) while (i--) { long frags = (act_size - 1)/FRAG_SIZE + 1; - gu_info ("Iteration %ld: act: %s, size: %zu, frags: %ld", + gu_info ("Iteration %ld: act: %p, size: %zu, frags: %ld", i, act, act_size, frags); ck_assert(!CORE_SEND_START (&act_s)); diff --git a/gcs/src/unit_tests/gcs_group_test.cpp b/gcs/src/unit_tests/gcs_group_test.cpp index 056231140..63c52b33d 100644 --- a/gcs/src/unit_tests/gcs_group_test.cpp +++ b/gcs/src/unit_tests/gcs_group_test.cpp @@ -62,7 +62,6 @@ new_component (gcs_group_t* group, const gcs_comp_msg_t* comp) START_TEST (gcs_group_configuration) { ssize_t ret; - gcs_group_t group; gcs_seqno_t seqno = 11; // The Action @@ -131,8 +130,8 @@ START_TEST (gcs_group_configuration) // ready gu::Config cnf; - gcs_group_register(&cnf); - gcs_group_init (&group, &cnf, NULL, "my node", "my addr", 0, 0, 0); + gcs_group::register_params(cnf); + gcs_group_t group(cnf, NULL, "my node", "my addr", 0, 0, 0); ck_assert(!gcs_group_is_primary(&group)); ck_assert(group.num == 0); @@ -397,7 +396,6 @@ START_TEST (gcs_group_configuration) ret = new_component (&group, comp); ck_assert(ret >= 0); gcs_comp_msg_delete (comp); - gcs_group_free(&group); } END_TEST @@ -491,9 +489,8 @@ END_TEST START_TEST(test_gcs_group_find_donor) { gu::Config cnf; - gcs_group_register(&cnf); - gcs_group_t group; - gcs_group_init(&group, &cnf, NULL, "", "", 0, 0, 0); + gcs_group::register_params(cnf); + gcs_group_t group(cnf, NULL, "", "", 0, 0, 0); const char* s_group_uuid = "0d0d0d0d-0d0d-0d0d-0d0d-0d0d0d0d0d0d"; gu_uuid_scan(s_group_uuid, strlen(s_group_uuid), &group.group_uuid); @@ -592,8 +589,6 @@ START_TEST(test_gcs_group_find_donor) nodes[1].status = GCS_NODE_STATE_SYNCED; nodes[2].status = GCS_NODE_STATE_SYNCED; #undef SARGS - - gcs_group_free(&group); } END_TEST diff --git a/gcs/src/unit_tests/gcs_memb_test.cpp b/gcs/src/unit_tests/gcs_memb_test.cpp index 218a11270..ce7f554b8 100644 --- a/gcs/src/unit_tests/gcs_memb_test.cpp +++ b/gcs/src/unit_tests/gcs_memb_test.cpp @@ -11,7 +11,6 @@ #include "gu_uuid.h" -#include "gcs_test_utils.hpp" #include "gcs_memb_test.hpp" // must be included last using namespace gcs_test; diff --git a/gcs/src/unit_tests/gcs_test_utils.cpp b/gcs/src/unit_tests/gcs_test_utils.cpp index 364290f8a..ce9cbdc0c 100644 --- a/gcs/src/unit_tests/gcs_test_utils.cpp +++ b/gcs/src/unit_tests/gcs_test_utils.cpp @@ -16,7 +16,7 @@ void InitConfig::common_ctor(gu::Config& cfg) { gcache::GCache::register_params(cfg); - gcs_register_params(reinterpret_cast(&cfg)); + gcs_register_params(cfg); } InitConfig::InitConfig(gu::Config& cfg) @@ -40,7 +40,7 @@ GcsGroup::GcsGroup() : conf_ (), init_ (conf_, "group"), gcache_ (NULL), - group_ (), + group_ (NULL), initialized_(false) {} @@ -89,39 +89,19 @@ GcsGroup::common_ctor(const std::string& node_name, gcache_ = new gcache::GCache(NULL, conf_, path_); } - int const err(gcs_group_init(&group_, &conf_, - reinterpret_cast(gcache_), - node_name.c_str(), inc_addr.c_str(), - gver, rver, aver)); - if (err) - { - gu_throw_error(-err) << "GcsGroup init failed"; - } - + group_ = new gcs_group(conf_, reinterpret_cast(gcache_), + node_name.c_str(), inc_addr.c_str(), gver, rver, aver); initialized_ = true; } -GcsGroup::GcsGroup(const std::string& node_id, - const std::string& inc_addr, - bool enc, - gcs_proto_t gver, int rver, int aver) : - path_ ("./"), - conf_ (), - init_ (conf_, node_id), - gcache_ (NULL), - group_ (), - initialized_(false) -{ - common_ctor(node_id, inc_addr, enc, gver, rver, aver); -} - void GcsGroup::common_dtor() { if (initialized_) { assert(NULL != gcache_); - gcs_group_free(&group_); + assert(NULL != group_); + delete group_; delete gcache_; boost::filesystem::path path(path_); @@ -133,6 +113,7 @@ GcsGroup::common_dtor() else { assert(NULL == gcache_); + assert(NULL == group_); } } @@ -509,7 +490,7 @@ gt_group::sst_start (int const joiner_idx,const char* donor_name) int ret = gcs_group_handle_state_request(nodes[i]->group(), &req); if (ret < 0) { // don't fail here, we may want to test negatives - gu_error (ret < 0, "Handling state request to '%s' failed: %d (%s)", + gu_error ("Handling state request to '%s' failed: %d (%s)", donor_name, ret, strerror (-ret)); return ret; } diff --git a/gcs/src/unit_tests/gcs_test_utils.hpp b/gcs/src/unit_tests/gcs_test_utils.hpp index a00844f9d..016102fef 100644 --- a/gcs/src/unit_tests/gcs_test_utils.hpp +++ b/gcs/src/unit_tests/gcs_test_utils.hpp @@ -24,11 +24,6 @@ namespace gcs_test public: GcsGroup(); - GcsGroup(const std::string& node_id, - const std::string& inc_addr, - bool enc, - gcs_proto_t gver = 1, int pver = 2, int aver = 3); - ~GcsGroup(); void init(const std::string& node_name, @@ -38,17 +33,17 @@ namespace gcs_test int repl_proto_ver, int appl_proto_ver); - struct gcs_group* group() { return &group_; } + struct gcs_group* group() { return group_; } struct gcs_group* operator()(){ return group(); } - struct gcs_group* operator->(){ return &group_; } + struct gcs_group* operator->(){ return group_; } gu::Config& config() { return conf_; } gcache::GCache* gcache() { return gcache_; } - gcs_group_state_t state() const { return group_.state; } + gcs_group_state_t state() const { return group_->state; } gcs_node_state_t node_state() const - { return group_.nodes[group_.my_idx].status; } + { return group_->nodes[group_->my_idx].status; } private: @@ -62,7 +57,7 @@ namespace gcs_test gu::Config conf_; InitConfig init_; gcache::GCache* gcache_; - struct gcs_group group_; + gcs_group* group_; bool initialized_; }; } /* namespace gcs_test */ diff --git a/scripts/build.sh b/scripts/build.sh index 44e18f7f6..4b9c5ea75 100755 --- a/scripts/build.sh +++ b/scripts/build.sh @@ -5,7 +5,7 @@ set -eux # $Id$ # Galera library version -VERSION="26.4.19" +VERSION="26.4.20" get_cores() {