summaryrefslogtreecommitdiffstats
path: root/wsrep-lib/src/server_state.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--wsrep-lib/src/server_state.cpp1654
1 files changed, 1654 insertions, 0 deletions
diff --git a/wsrep-lib/src/server_state.cpp b/wsrep-lib/src/server_state.cpp
new file mode 100644
index 00000000..2fc9b199
--- /dev/null
+++ b/wsrep-lib/src/server_state.cpp
@@ -0,0 +1,1654 @@
+/*
+ * Copyright (C) 2018-2023 Codership Oy <info@codership.com>
+ *
+ * This file is part of wsrep-lib.
+ *
+ * Wsrep-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Wsrep-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wsrep-lib. If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "wsrep/server_state.hpp"
+#include "wsrep/client_state.hpp"
+#include "wsrep/server_service.hpp"
+#include "wsrep/client_service.hpp"
+#include "wsrep/high_priority_service.hpp"
+#include "wsrep/transaction.hpp"
+#include "wsrep/view.hpp"
+#include "wsrep/logger.hpp"
+#include "wsrep/compiler.hpp"
+#include "wsrep/id.hpp"
+
+#include <cassert>
+#include <sstream>
+#include <algorithm>
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Helpers //
+//////////////////////////////////////////////////////////////////////////////
+
+
+//
+// This method is used to deal with historical burden of several
+// ways to bootstrap the cluster. Bootstrap happens if
+//
+// * bootstrap option is given
+// * cluster_address is "gcomm://" (Galera provider)
+//
+static bool is_bootstrap(const std::string& cluster_address, bool bootstrap)
+{
+ return (bootstrap || cluster_address == "gcomm://");
+}
+
+// Helper method to provide detailed error message if transaction
+// adopt for fragment removal fails.
+static void log_adopt_error(const wsrep::transaction& transaction)
+{
+ wsrep::log_warning() << "Adopting a transaction ("
+ << transaction.server_id() << "," << transaction.id()
+ << ") for rollback failed, "
+ << "this may leave stale entries to streaming log "
+ << "which may need to be removed manually.";
+}
+
+// resolve which of the two errors return to caller
+static inline int resolve_return_error(bool const vote,
+ int const vote_err,
+ int const apply_err)
+{
+ if (vote) return vote_err;
+ return vote_err != 0 ? vote_err : apply_err;
+}
+
+static void
+discard_streaming_applier(wsrep::server_state& server_state,
+ wsrep::high_priority_service& high_priority_service,
+ wsrep::high_priority_service* streaming_applier,
+ const wsrep::ws_meta& ws_meta)
+{
+ server_state.stop_streaming_applier(
+ ws_meta.server_id(), ws_meta.transaction_id());
+ server_state.server_service().release_high_priority_service(
+ streaming_applier);
+ high_priority_service.store_globals();
+}
+
+static int apply_fragment(wsrep::server_state& server_state,
+ wsrep::high_priority_service& high_priority_service,
+ wsrep::high_priority_service* streaming_applier,
+ const wsrep::ws_handle& ws_handle,
+ const wsrep::ws_meta& ws_meta,
+ const wsrep::const_buffer& data)
+{
+ int ret(0);
+ int apply_err;
+ wsrep::mutable_buffer err;
+ {
+ wsrep::high_priority_switch sw(high_priority_service,
+ *streaming_applier);
+ apply_err = streaming_applier->apply_write_set(ws_meta, data, err);
+ if (!apply_err)
+ {
+ assert(err.size() == 0);
+ streaming_applier->after_apply();
+ }
+ else
+ {
+ bool const remove_fragments(streaming_applier->transaction(
+ ).streaming_context().fragments().size() > 0);
+ ret = streaming_applier->rollback(ws_handle, ws_meta);
+ ret = ret || (streaming_applier->after_apply(), 0);
+
+ if (remove_fragments)
+ {
+ ret = ret || streaming_applier->start_transaction(ws_handle,
+ ws_meta);
+ ret = ret || (streaming_applier->adopt_apply_error(err), 0);
+ ret = ret || streaming_applier->remove_fragments(ws_meta);
+ ret = ret || streaming_applier->commit(ws_handle, ws_meta);
+ ret = ret || (streaming_applier->after_apply(), 0);
+ }
+ else
+ {
+ ret = streaming_applier->log_dummy_write_set(ws_handle,
+ ws_meta, err);
+ }
+ }
+ }
+
+ if (!ret)
+ {
+ if (!apply_err)
+ {
+ high_priority_service.debug_crash("crash_apply_cb_before_append_frag");
+ const wsrep::xid xid(streaming_applier->transaction().xid());
+ ret = high_priority_service.append_fragment_and_commit(
+ ws_handle, ws_meta, data, xid);
+ high_priority_service.debug_crash("crash_apply_cb_after_append_frag");
+ ret = ret || (high_priority_service.after_apply(), 0);
+ }
+ else
+ {
+ discard_streaming_applier(server_state,
+ high_priority_service,
+ streaming_applier,
+ ws_meta);
+ ret = resolve_return_error(err.size() > 0, ret, apply_err);
+ }
+ }
+
+ return ret;
+}
+
+static int commit_fragment(wsrep::server_state& server_state,
+ wsrep::high_priority_service& high_priority_service,
+ wsrep::high_priority_service* streaming_applier,
+ const wsrep::ws_handle& ws_handle,
+ const wsrep::ws_meta& ws_meta,
+ const wsrep::const_buffer& data)
+{
+ int ret(0);
+ {
+ wsrep::high_priority_switch sw(
+ high_priority_service, *streaming_applier);
+ wsrep::mutable_buffer err;
+ int const apply_err(
+ streaming_applier->apply_write_set(ws_meta, data, err));
+ if (apply_err)
+ {
+ assert(streaming_applier->transaction(
+ ).streaming_context().fragments().size() > 0);
+ ret = streaming_applier->rollback(ws_handle, ws_meta);
+ ret = ret || (streaming_applier->after_apply(), 0);
+ ret = ret || streaming_applier->start_transaction(
+ ws_handle, ws_meta);
+ ret = ret || (streaming_applier->adopt_apply_error(err),0);
+ }
+ else
+ {
+ assert(err.size() == 0);
+ }
+
+ const wsrep::transaction& trx(streaming_applier->transaction());
+ // Fragment removal for XA is going to happen in after_commit
+ if (trx.state() != wsrep::transaction::s_prepared)
+ {
+ streaming_applier->debug_crash(
+ "crash_apply_cb_before_fragment_removal");
+
+ ret = ret || streaming_applier->remove_fragments(ws_meta);
+
+ streaming_applier->debug_crash(
+ "crash_apply_cb_after_fragment_removal");
+ }
+
+ streaming_applier->debug_crash(
+ "crash_commit_cb_before_last_fragment_commit");
+ ret = ret || streaming_applier->commit(ws_handle, ws_meta);
+ streaming_applier->debug_crash(
+ "crash_commit_cb_last_fragment_commit_success");
+ ret = ret || (streaming_applier->after_apply(), 0);
+ ret = resolve_return_error(err.size() > 0, ret, apply_err);
+ }
+
+ if (!ret)
+ {
+ discard_streaming_applier(server_state, high_priority_service,
+ streaming_applier, ws_meta);
+ }
+
+ return ret;
+}
+
+static int rollback_fragment(wsrep::server_state& server_state,
+ wsrep::high_priority_service& high_priority_service,
+ wsrep::high_priority_service* streaming_applier,
+ const wsrep::ws_handle& ws_handle,
+ const wsrep::ws_meta& ws_meta)
+{
+ int ret(0);
+ int adopt_error(0);
+ bool const remove_fragments(streaming_applier->transaction().
+ streaming_context().fragments().size() > 0);
+ // If fragment removal is needed, adopt transaction state
+ // and start a transaction for it.
+ if (remove_fragments &&
+ (adopt_error = high_priority_service.adopt_transaction(
+ streaming_applier->transaction())))
+ {
+ log_adopt_error(streaming_applier->transaction());
+ }
+ // Even if the adopt above fails we roll back the streaming transaction.
+ // Adopt failure will leave stale entries in streaming log which can
+ // be removed manually.
+ wsrep::const_buffer no_error;
+ {
+ wsrep::high_priority_switch ws(
+ high_priority_service, *streaming_applier);
+ // Streaming applier rolls back out of order. Fragment
+ // removal grabs commit order below.
+ ret = streaming_applier->rollback(wsrep::ws_handle(), wsrep::ws_meta());
+ ret = ret || (streaming_applier->after_apply(), 0);
+ }
+
+ if (!ret)
+ {
+ discard_streaming_applier(server_state, high_priority_service,
+ streaming_applier, ws_meta);
+
+ if (adopt_error == 0)
+ {
+ if (remove_fragments)
+ {
+ ret = high_priority_service.remove_fragments(ws_meta);
+ ret = ret || high_priority_service.commit(ws_handle, ws_meta);
+ if (ret)
+ {
+ high_priority_service.rollback(ws_handle, ws_meta);
+ }
+ high_priority_service.after_apply();
+ }
+ else
+ {
+ if (ws_meta.ordered())
+ {
+ wsrep::mutable_buffer no_error;
+ ret = high_priority_service.log_dummy_write_set(
+ ws_handle, ws_meta, no_error);
+ }
+ }
+ }
+ }
+ return ret;
+}
+
+static int apply_write_set(wsrep::server_state& server_state,
+ wsrep::high_priority_service& high_priority_service,
+ const wsrep::ws_handle& ws_handle,
+ const wsrep::ws_meta& ws_meta,
+ const wsrep::const_buffer& data)
+{
+ int ret(0);
+ if (wsrep::rolls_back_transaction(ws_meta.flags()))
+ {
+ wsrep::mutable_buffer no_error;
+ if (wsrep::starts_transaction(ws_meta.flags()))
+ {
+ // No transaction existed before, log a dummy write set
+ ret = high_priority_service.log_dummy_write_set(
+ ws_handle, ws_meta, no_error);
+ }
+ else
+ {
+ wsrep::high_priority_service* sa(
+ server_state.find_streaming_applier(
+ ws_meta.server_id(), ws_meta.transaction_id()));
+ if (sa == 0)
+ {
+ // It is a known limitation that galera provider
+ // cannot always determine if certification test
+ // for interrupted transaction will pass or fail
+ // (see comments in transaction::certify_fragment()).
+ // As a consequence, unnecessary rollback fragments
+ // may be delivered here. The message below has
+ // been intentionally turned into a debug message,
+ // rather than warning.
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "Could not find applier context for "
+ << ws_meta.server_id()
+ << ": " << ws_meta.transaction_id());
+ ret = high_priority_service.log_dummy_write_set(
+ ws_handle, ws_meta, no_error);
+ }
+ else
+ {
+ // rollback_fragment() consumes sa
+ ret = rollback_fragment(server_state,
+ high_priority_service,
+ sa,
+ ws_handle,
+ ws_meta);
+ }
+ }
+ }
+ else if (wsrep::starts_transaction(ws_meta.flags()) &&
+ wsrep::commits_transaction(ws_meta.flags()))
+ {
+ ret = high_priority_service.start_transaction(ws_handle, ws_meta);
+ if (!ret)
+ {
+ wsrep::mutable_buffer err;
+ int const apply_err(high_priority_service.apply_write_set(
+ ws_meta, data, err));
+ if (!apply_err)
+ {
+ assert(err.size() == 0);
+ ret = high_priority_service.commit(ws_handle, ws_meta);
+ ret = ret || (high_priority_service.after_apply(), 0);
+ }
+ else
+ {
+ ret = high_priority_service.rollback(ws_handle, ws_meta);
+ ret = ret || (high_priority_service.after_apply(), 0);
+ ret = ret || high_priority_service.log_dummy_write_set(
+ ws_handle, ws_meta, err);
+ ret = resolve_return_error(err.size() > 0, ret, apply_err);
+ }
+ }
+ }
+ else if (wsrep::starts_transaction(ws_meta.flags()))
+ {
+ assert(server_state.find_streaming_applier(
+ ws_meta.server_id(), ws_meta.transaction_id()) == 0);
+ wsrep::high_priority_service* sa(
+ server_state.server_service().streaming_applier_service(
+ high_priority_service));
+ server_state.start_streaming_applier(
+ ws_meta.server_id(), ws_meta.transaction_id(), sa);
+ sa->start_transaction(ws_handle, ws_meta);
+ ret = apply_fragment(server_state,
+ high_priority_service,
+ sa,
+ ws_handle,
+ ws_meta,
+ data);
+ }
+ else if (ws_meta.flags() == 0 || ws_meta.flags() == wsrep::provider::flag::pa_unsafe ||
+ wsrep::prepares_transaction(ws_meta.flags()))
+ {
+ wsrep::high_priority_service* sa(
+ server_state.find_streaming_applier(
+ ws_meta.server_id(), ws_meta.transaction_id()));
+ if (sa == 0)
+ {
+ // It is possible that rapid group membership changes
+ // may cause streaming transaction be rolled back before
+ // commit fragment comes in. Although this is a valid
+ // situation, log a warning if a sac cannot be found as
+ // it may be an indication of a bug too.
+ wsrep::log_warning() << "Could not find applier context for "
+ << ws_meta.server_id()
+ << ": " << ws_meta.transaction_id();
+ wsrep::mutable_buffer no_error;
+ ret = high_priority_service.log_dummy_write_set(
+ ws_handle, ws_meta, no_error);
+ }
+ else
+ {
+ sa->next_fragment(ws_meta);
+ ret = apply_fragment(server_state,
+ high_priority_service,
+ sa,
+ ws_handle,
+ ws_meta,
+ data);
+ }
+ }
+ else if (wsrep::commits_transaction(ws_meta.flags()))
+ {
+ if (high_priority_service.is_replaying())
+ {
+ wsrep::mutable_buffer unused;
+ ret = high_priority_service.start_transaction(
+ ws_handle, ws_meta) ||
+ high_priority_service.apply_write_set(ws_meta, data, unused) ||
+ high_priority_service.commit(ws_handle, ws_meta);
+ }
+ else
+ {
+ wsrep::high_priority_service* sa(
+ server_state.find_streaming_applier(
+ ws_meta.server_id(), ws_meta.transaction_id()));
+ if (sa == 0)
+ {
+ // It is possible that rapid group membership changes
+ // may cause streaming transaction be rolled back before
+ // commit fragment comes in. Although this is a valid
+ // situation, log a warning if a sac cannot be found as
+ // it may be an indication of a bug too.
+ wsrep::log_warning()
+ << "Could not find applier context for "
+ << ws_meta.server_id()
+ << ": " << ws_meta.transaction_id();
+ wsrep::mutable_buffer no_error;
+ ret = high_priority_service.log_dummy_write_set(
+ ws_handle, ws_meta, no_error);
+ }
+ else
+ {
+ // Commit fragment consumes sa
+ sa->next_fragment(ws_meta);
+ ret = commit_fragment(server_state,
+ high_priority_service,
+ sa,
+ ws_handle,
+ ws_meta,
+ data);
+ }
+ }
+ }
+ else
+ {
+ assert(0);
+ }
+ if (ret)
+ {
+ wsrep::log_error() << "Failed to apply write set: " << ws_meta;
+ }
+ return ret;
+}
+
+static int apply_toi(wsrep::provider& provider,
+ wsrep::high_priority_service& high_priority_service,
+ const wsrep::ws_handle& ws_handle,
+ const wsrep::ws_meta& ws_meta,
+ const wsrep::const_buffer& data)
+{
+ if (wsrep::starts_transaction(ws_meta.flags()) &&
+ wsrep::commits_transaction(ws_meta.flags()))
+ {
+ //
+ // Regular TOI.
+ //
+ provider.commit_order_enter(ws_handle, ws_meta);
+ wsrep::mutable_buffer err;
+ int const apply_err(high_priority_service.apply_toi(ws_meta,data,err));
+ int const vote_err(provider.commit_order_leave(ws_handle, ws_meta,err));
+ return resolve_return_error(err.size() > 0, vote_err, apply_err);
+ }
+ else if (wsrep::starts_transaction(ws_meta.flags()))
+ {
+ provider.commit_order_enter(ws_handle, ws_meta);
+ wsrep::mutable_buffer err;
+ int const apply_err(high_priority_service.apply_nbo_begin(ws_meta, data, err));
+ int const vote_err(provider.commit_order_leave(ws_handle, ws_meta, err));
+ return resolve_return_error(err.size() > 0, vote_err, apply_err);
+ }
+ else if (wsrep::commits_transaction(ws_meta.flags()))
+ {
+ // NBO end event is ignored here, both local and applied
+ // have NBO end handled via local TOI calls.
+ provider.commit_order_enter(ws_handle, ws_meta);
+ wsrep::mutable_buffer err;
+ provider.commit_order_leave(ws_handle, ws_meta, err);
+ return 0;
+ }
+ else
+ {
+ assert(0);
+ return 0;
+ }
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Server State //
+//////////////////////////////////////////////////////////////////////////////
+
+int wsrep::server_state::load_provider(
+ const std::string& provider_spec, const std::string& provider_options,
+ const wsrep::provider::services& services)
+{
+ wsrep::log_info() << "Loading provider " << provider_spec
+ << " initial position: " << initial_position_;
+
+ provider_ = wsrep::provider::make_provider(*this,
+ provider_spec,
+ provider_options,
+ services);
+ return (provider_ ? 0 : 1);
+}
+
+void wsrep::server_state::unload_provider()
+{
+ delete provider_;
+ provider_ = 0;
+}
+
+int wsrep::server_state::connect(const std::string& cluster_name,
+ const std::string& cluster_address,
+ const std::string& state_donor,
+ bool bootstrap)
+{
+ bootstrap_ = is_bootstrap(cluster_address, bootstrap);
+ wsrep::log_info() << "Connecting with bootstrap option: " << bootstrap_;
+ return provider().connect(cluster_name, cluster_address, state_donor,
+ bootstrap_);
+}
+
+int wsrep::server_state::disconnect()
+{
+ {
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ // In case of failure situations which are caused by provider
+ // being shut down some failing operation may also try to shut
+ // down the replication. Check the state here and
+ // return success if the provider disconnect is already in progress
+ // or has completed.
+ if (state(lock) == s_disconnecting || state(lock) == s_disconnected)
+ {
+ return 0;
+ }
+ state(lock, s_disconnecting);
+ interrupt_state_waiters(lock);
+ }
+ return provider().disconnect();
+}
+
+wsrep::server_state::~server_state()
+{
+ delete provider_;
+}
+
+std::vector<wsrep::provider::status_variable>
+wsrep::server_state::status() const
+{
+ return provider().status();
+}
+
+
+wsrep::seqno wsrep::server_state::pause()
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ // Disallow concurrent calls to pause to in order to have non-concurrent
+ // access to desynced_on_pause_ which is checked in resume() call.
+ wsrep::log_info() << "pause";
+ while (pause_count_ > 0)
+ {
+ cond_.wait(lock);
+ }
+ ++pause_count_;
+ assert(pause_seqno_.is_undefined());
+ lock.unlock();
+ pause_seqno_ = provider().pause();
+ lock.lock();
+ if (pause_seqno_.is_undefined())
+ {
+ --pause_count_;
+ }
+ return pause_seqno_;
+}
+
+void wsrep::server_state::resume()
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ wsrep::log_info() << "resume";
+ assert(pause_seqno_.is_undefined() == false);
+ assert(pause_count_ == 1);
+ if (provider().resume())
+ {
+ throw wsrep::runtime_error("Failed to resume provider");
+ }
+ pause_seqno_ = wsrep::seqno::undefined();
+ --pause_count_;
+ cond_.notify_all();
+}
+
+wsrep::seqno wsrep::server_state::desync_and_pause()
+{
+ wsrep::log_info() << "Desyncing and pausing the provider";
+ // Temporary variable to store desync() return status. This will be
+ // assigned to desynced_on_pause_ after pause() call to prevent
+ // concurrent access to member variable desynced_on_pause_.
+ bool desync_successful;
+ if (desync())
+ {
+ // Desync may give transient error if the provider cannot
+ // communicate with the rest of the cluster. However, this
+ // error can be tolerated because if the provider can be
+ // paused successfully below.
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "Failed to desync server before pause");
+ desync_successful = false;
+ }
+ else
+ {
+ desync_successful = true;
+ }
+ wsrep::seqno ret(pause());
+ if (ret.is_undefined())
+ {
+ wsrep::log_warning() << "Failed to pause provider";
+ resync();
+ return wsrep::seqno::undefined();
+ }
+ else
+ {
+ desynced_on_pause_ = desync_successful;
+ }
+ wsrep::log_info() << "Provider paused at: " << ret;
+ return ret;
+}
+
+void wsrep::server_state::resume_and_resync()
+{
+ wsrep::log_info() << "Resuming and resyncing the provider";
+ try
+ {
+ // Assign desynced_on_pause_ to local variable before resuming
+ // in order to avoid concurrent access to desynced_on_pause_ member
+ // variable.
+ bool do_resync = desynced_on_pause_;
+ desynced_on_pause_ = false;
+ resume();
+ if (do_resync)
+ {
+ resync();
+ }
+ }
+ catch (const wsrep::runtime_error& e)
+ {
+ wsrep::log_warning()
+ << "Resume and resync failed, server may have to be restarted";
+ }
+}
+
+std::string wsrep::server_state::prepare_for_sst()
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ state(lock, s_joiner);
+ lock.unlock();
+ return server_service_.sst_request();
+}
+
+int wsrep::server_state::start_sst(const std::string& sst_request,
+ const wsrep::gtid& gtid,
+ bool bypass)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ state(lock, s_donor);
+ int ret(0);
+ lock.unlock();
+ if (server_service_.start_sst(sst_request, gtid, bypass))
+ {
+ lock.lock();
+ wsrep::log_warning() << "SST preparation failed";
+ return_from_donor_state(lock);
+ ret = 1;
+ }
+ return ret;
+}
+
+void wsrep::server_state::sst_sent(const wsrep::gtid& gtid, int error)
+{
+ if (0 == error)
+ wsrep::log_info() << "SST sent: " << gtid;
+ else
+ wsrep::log_info() << "SST sending failed: " << error;
+
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+
+ return_from_donor_state(lock);
+
+ lock.unlock();
+ enum provider::status const retval(provider().sst_sent(gtid, error));
+ if (retval != provider::success)
+ {
+ std::string msg("wsrep::sst_sent() returned an error: ");
+ msg += wsrep::provider::to_string(retval);
+ server_service_.log_message(wsrep::log::warning, msg.c_str());
+ }
+}
+
+int wsrep::server_state::sst_received(wsrep::client_service& cs,
+ int const error)
+try
+{
+ wsrep::log_info() << "SST received";
+ wsrep::gtid gtid(wsrep::gtid::undefined());
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ assert(state_ == s_joiner || state_ == s_initialized);
+
+ // Run initialization only if the SST was successful.
+ // In case of SST failure the system is in undefined state
+ // may not be recoverable.
+ if (error == 0)
+ {
+ if (server_service_.sst_before_init())
+ {
+ if (init_initialized_ == false)
+ {
+ state(lock, s_initializing);
+ lock.unlock();
+ server_service_.debug_sync("on_view_wait_initialized");
+ lock.lock();
+ wait_until_state(lock, s_initialized);
+ assert(init_initialized_);
+ }
+ }
+ lock.unlock();
+
+ if (id_.is_undefined())
+ {
+ assert(0);
+ throw wsrep::runtime_error(
+ "wsrep::sst_received() called before connection to cluster");
+ }
+
+ gtid = server_service_.get_position(cs);
+ wsrep::log_info() << "Recovered position from storage: " << gtid;
+
+ lock.lock();
+ if (gtid.seqno() >= connected_gtid().seqno())
+ {
+ /* Now the node has all the data the cluster has: part in
+ * storage, part in replication event queue. */
+ state(lock, s_joined);
+ }
+ lock.unlock();
+
+ wsrep::view const v(server_service_.get_view(cs, id_));
+ wsrep::log_info() << "Recovered view from SST:\n" << v;
+
+ /*
+ * If the state id from recovered view has undefined ID, we may
+ * be upgrading from earlier version which does not provide
+ * view stored in stable storage. In this case we skip
+ * sanity checks and assigning the current view and wait
+ * until the first view delivery.
+ */
+ if (v.state_id().id().is_undefined() == false)
+ {
+ if (v.state_id().id() != gtid.id() ||
+ v.state_id().seqno() > gtid.seqno())
+ {
+ /* Since IN GENERAL we may not be able to recover SST GTID from
+ * the state data, we have to rely on SST script passing the
+ * GTID value explicitly.
+ * Here we check if the passed GTID makes any sense: it should
+ * have the same UUID and greater or equal seqno than the last
+ * logged view. */
+ std::ostringstream msg;
+ msg << "SST script passed bogus GTID: " << gtid
+ << ". Preceding view GTID: " << v.state_id();
+ throw wsrep::runtime_error(msg.str());
+ }
+
+ if (current_view_.status() == wsrep::view::primary)
+ {
+ previous_primary_view_ = current_view_;
+ }
+ current_view_ = v;
+ server_service_.log_view(NULL /* this view is stored already */, v);
+ }
+ else
+ {
+ wsrep::log_warning()
+ << "View recovered from stable storage was empty. If the "
+ << "server is doing rolling upgrade from previous version "
+ << "which does not support storing view info into stable "
+ << "storage, this is ok. Otherwise this may be a sign of "
+ << "malfunction.";
+ }
+ lock.lock();
+ recover_streaming_appliers_if_not_recovered(lock, cs);
+ lock.unlock();
+ }
+
+ enum provider::status const retval(provider().sst_received(gtid, error));
+ if (retval != provider::success)
+ {
+ wsrep::log_error() << "provider.sst_received() failed: "
+ << wsrep::provider::to_string(retval);
+ return 1;
+ }
+ return 0;
+}
+catch (const wsrep::runtime_error& e)
+{
+ wsrep::log_error() << "sst_received failed: " << e.what();
+ if (provider_)
+ {
+ provider_->sst_received(wsrep::gtid::undefined(), -EINTR);
+ }
+ return 1;
+}
+
+void wsrep::server_state::initialized()
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ wsrep::log_info() << "Server initialized";
+ init_initialized_ = true;
+ if (server_service_.sst_before_init())
+ {
+ state(lock, s_initialized);
+ }
+ else
+ {
+ state(lock, s_initializing);
+ state(lock, s_initialized);
+ }
+}
+
+enum wsrep::provider::status
+wsrep::server_state::wait_for_gtid(const wsrep::gtid& gtid, int timeout)
+ const
+{
+ return provider().wait_for_gtid(gtid, timeout);
+}
+
+int
+wsrep::server_state::set_encryption_key(std::vector<unsigned char>& key)
+{
+ encryption_key_ = key;
+ if (provider_)
+ {
+ wsrep::const_buffer const key(encryption_key_.data(),
+ encryption_key_.size());
+ enum provider::status const retval(provider_->enc_set_key(key));
+ if (retval != provider::success)
+ {
+ wsrep::log_error() << "Failed to set encryption key: "
+ << provider::to_string(retval);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+std::pair<wsrep::gtid, enum wsrep::provider::status>
+wsrep::server_state::causal_read(int timeout) const
+{
+ return provider().causal_read(timeout);
+}
+
+void wsrep::server_state::on_connect(const wsrep::view& view)
+{
+ // Sanity checks
+ if (view.own_index() < 0 ||
+ size_t(view.own_index()) >= view.members().size())
+ {
+ std::ostringstream os;
+ os << "Invalid view on connect: own index out of range: " << view;
+#ifndef NDEBUG
+ wsrep::log_error() << os.str();
+ assert(0);
+#endif
+ throw wsrep::runtime_error(os.str());
+ }
+
+ const size_t own_index(static_cast<size_t>(view.own_index()));
+ if (id_.is_undefined() == false && id_ != view.members()[own_index].id())
+ {
+ std::ostringstream os;
+ os << "Connection in connected state.\n"
+ << "Connected view:\n" << view
+ << "Previous view:\n" << current_view_
+ << "Current own ID: " << id_;
+#ifndef NDEBUG
+ wsrep::log_error() << os.str();
+ assert(0);
+#endif
+ throw wsrep::runtime_error(os.str());
+ }
+ else
+ {
+ id_ = view.members()[own_index].id();
+ }
+
+ wsrep::log_info() << "Server "
+ << name_
+ << " connected to cluster at position "
+ << view.state_id()
+ << " with ID "
+ << id_;
+
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ connected_gtid_ = view.state_id();
+ state(lock, s_connected);
+}
+
+void wsrep::server_state::on_primary_view(
+ const wsrep::view& view,
+ wsrep::high_priority_service* high_priority_service)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ assert(view.final() == false);
+ //
+ // Reached primary from connected state. This may mean the following
+ //
+ // 1) Server was joined to the cluster and got SST transfer
+ // 2) Server was partitioned from the cluster and got back
+ // 3) A new cluster was bootstrapped from non-prim cluster
+ //
+ // There is no enough information here what was the cause
+ // of the primary component, so we need to walk through
+ // all states leading to joined to notify possible state
+ // waiters in other threads.
+ //
+ if (server_service_.sst_before_init())
+ {
+ if (state_ == s_connected)
+ {
+ state(lock, s_joiner);
+ // We need to assign init_initialized_ here to local
+ // variable. If the value here was false, we need to skip
+ // the initializing -> initialized -> joined state cycle
+ // below. However, if we don't assign the value to
+ // local, it is possible that the main thread gets control
+ // between changing the state to initializing and checking
+ // initialized flag, which may cause the initialzing -> initialized
+ // state change to be executed even if it should not be.
+ const bool was_initialized(init_initialized_);
+ state(lock, s_initializing);
+ if (was_initialized)
+ {
+ // If server side has already been initialized,
+ // skip directly to s_joined.
+ state(lock, s_initialized);
+ }
+ }
+ }
+ else
+ {
+ if (state_ == s_connected)
+ {
+ state(lock, s_joiner);
+ }
+ }
+ if (init_initialized_ == false)
+ {
+ lock.unlock();
+ server_service_.debug_sync("on_view_wait_initialized");
+ lock.lock();
+ wait_until_state(lock, s_initialized);
+ }
+ assert(init_initialized_);
+
+ if (bootstrap_)
+ {
+ server_service_.bootstrap();
+ bootstrap_ = false;
+ }
+
+ assert(high_priority_service);
+
+ if (high_priority_service)
+ {
+ recover_streaming_appliers_if_not_recovered(lock,
+ *high_priority_service);
+ close_orphaned_sr_transactions(lock, *high_priority_service);
+ }
+
+ if (state(lock) < s_joined &&
+ view.state_id().seqno() >= connected_gtid().seqno())
+ {
+ // If we progressed beyond connected seqno, it means we have full state
+ state(lock, s_joined);
+ }
+}
+
+void wsrep::server_state::on_non_primary_view(
+ const wsrep::view& view,
+ wsrep::high_priority_service* high_priority_service)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ wsrep::log_info() << "Non-primary view";
+ if (view.final())
+ {
+ go_final(lock, view, high_priority_service);
+ }
+ else if (state_ != s_disconnecting)
+ {
+ state(lock, s_connected);
+ }
+}
+
+void wsrep::server_state::go_final(wsrep::unique_lock<wsrep::mutex>& lock,
+ const wsrep::view& view,
+ wsrep::high_priority_service* hps)
+{
+ (void)view; // avoid compiler warning "unused parameter 'view'"
+ assert(view.final());
+ assert(hps);
+ if (hps)
+ {
+ close_transactions_at_disconnect(*hps);
+ }
+ state(lock, s_disconnected);
+ id_ = wsrep::id::undefined();
+}
+
+void wsrep::server_state::on_view(const wsrep::view& view,
+ wsrep::high_priority_service* high_priority_service)
+{
+ wsrep::log_info()
+ << "================================================\nView:\n"
+ << view
+ << "=================================================";
+ if (current_view_.status() == wsrep::view::primary)
+ {
+ previous_primary_view_ = current_view_;
+ }
+ current_view_ = view;
+ switch (view.status())
+ {
+ case wsrep::view::primary:
+ on_primary_view(view, high_priority_service);
+ break;
+ case wsrep::view::non_primary:
+ on_non_primary_view(view, high_priority_service);
+ break;
+ case wsrep::view::disconnected:
+ {
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ go_final(lock, view, high_priority_service);
+ break;
+ }
+ default:
+ wsrep::log_warning() << "Unrecognized view status: " << view.status();
+ assert(0);
+ }
+
+ server_service_.log_view(high_priority_service, view);
+}
+
+void wsrep::server_state::on_sync()
+{
+ wsrep::log_info() << "Server " << name_ << " synced with group";
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+
+ // Initial sync
+ if (server_service_.sst_before_init() && init_synced_ == false)
+ {
+ switch (state_)
+ {
+ case s_synced:
+ break;
+ case s_connected: // Seed node path: provider becomes
+ state(lock, s_joiner); // synced with itself before anything
+ WSREP_FALLTHROUGH; // else. Then goes DB initialization.
+ case s_joiner: // |
+ state(lock, s_initializing); // V
+ break;
+ case s_donor:
+ assert(false); // this should never happen
+ state(lock, s_joined);
+ state(lock, s_synced);
+ break;
+ case s_initialized:
+ state(lock, s_joined);
+ WSREP_FALLTHROUGH;
+ default:
+ /* State */
+ state(lock, s_synced);
+ };
+ }
+ else
+ {
+ // Calls to on_sync() in synced state are possible if
+ // server desyncs itself from the group. Provider does not
+ // inform about this through callbacks.
+ if (state_ != s_synced)
+ {
+ state(lock, s_synced);
+ }
+ }
+ init_synced_ = true;
+
+ enum wsrep::provider::status status(send_pending_rollback_events(lock));
+ if (status)
+ {
+ // TODO should be retried?
+ wsrep::log_warning()
+ << "Failed to flush rollback event cache: " << status;
+ }
+}
+
+int wsrep::server_state::on_apply(
+ wsrep::high_priority_service& high_priority_service,
+ const wsrep::ws_handle& ws_handle,
+ const wsrep::ws_meta& ws_meta,
+ const wsrep::const_buffer& data)
+{
+ if (is_toi(ws_meta.flags()))
+ {
+ return apply_toi(provider(), high_priority_service,
+ ws_handle, ws_meta, data);
+ }
+ else if (is_commutative(ws_meta.flags()) || is_native(ws_meta.flags()))
+ {
+ // Not implemented yet.
+ assert(0);
+ return 0;
+ }
+ else
+ {
+ return apply_write_set(*this, high_priority_service,
+ ws_handle, ws_meta, data);
+ }
+}
+
+enum wsrep::server_state::state wsrep::server_state::state(
+ wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED) const
+{
+ assert(lock.owns_lock());
+ return state_;
+}
+
+void wsrep::server_state::start_streaming_client(
+ wsrep::client_state* client_state)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "Start streaming client: " << client_state->id());
+ if (streaming_clients_.insert(
+ std::make_pair(client_state->id(), client_state)).second == false)
+ {
+ wsrep::log_warning() << "Failed to insert streaming client "
+ << client_state->id();
+ assert(0);
+ }
+}
+
+void wsrep::server_state::convert_streaming_client_to_applier(
+ wsrep::client_state* client_state)
+{
+ // create streaming_applier beforehand as server_state lock should
+ // not be held when calling server_service methods
+ wsrep::high_priority_service* streaming_applier(
+ server_service_.streaming_applier_service(
+ client_state->client_service()));
+
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "Convert streaming client to applier "
+ << client_state->id());
+ streaming_clients_map::iterator i(
+ streaming_clients_.find(client_state->id()));
+ assert(i != streaming_clients_.end());
+ if (i == streaming_clients_.end())
+ {
+ wsrep::log_warning() << "Unable to find streaming client "
+ << client_state->id();
+ assert(0);
+ }
+ else
+ {
+ streaming_clients_.erase(i);
+ }
+
+ // Convert to applier only if the state is not disconnected. In
+ // disconnected state the applier map is supposed to be empty
+ // and it will be reconstructed from fragment storage when
+ // joining back to cluster.
+ if (state(lock) != s_disconnected)
+ {
+ if (streaming_applier->adopt_transaction(client_state->transaction()))
+ {
+ log_adopt_error(client_state->transaction());
+ streaming_applier->after_apply();
+ server_service_.release_high_priority_service(streaming_applier);
+ return;
+ }
+ if (streaming_appliers_.insert(
+ std::make_pair(
+ std::make_pair(client_state->transaction().server_id(),
+ client_state->transaction().id()),
+ streaming_applier)).second == false)
+ {
+ wsrep::log_warning() << "Could not insert streaming applier "
+ << id_
+ << ", "
+ << client_state->transaction().id();
+ assert(0);
+ }
+ }
+ else
+ {
+ server_service_.release_high_priority_service(streaming_applier);
+ client_state->client_service().store_globals();
+ }
+}
+
+
+void wsrep::server_state::stop_streaming_client(
+ wsrep::client_state* client_state)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "Stop streaming client: " << client_state->id());
+ streaming_clients_map::iterator i(
+ streaming_clients_.find(client_state->id()));
+ assert(i != streaming_clients_.end());
+ if (i == streaming_clients_.end())
+ {
+ wsrep::log_warning() << "Unable to find streaming client "
+ << client_state->id();
+ assert(0);
+ return;
+ }
+ else
+ {
+ streaming_clients_.erase(i);
+ cond_.notify_all();
+ }
+}
+
+void wsrep::server_state::start_streaming_applier(
+ const wsrep::id& server_id,
+ const wsrep::transaction_id& transaction_id,
+ wsrep::high_priority_service* sa)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ if (streaming_appliers_.insert(
+ std::make_pair(std::make_pair(server_id, transaction_id),
+ sa)).second == false)
+ {
+ wsrep::log_error() << "Could not insert streaming applier";
+ throw wsrep::fatal_error();
+ }
+}
+
+void wsrep::server_state::stop_streaming_applier(
+ const wsrep::id& server_id,
+ const wsrep::transaction_id& transaction_id)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ streaming_appliers_map::iterator i(
+ streaming_appliers_.find(std::make_pair(server_id, transaction_id)));
+ assert(i != streaming_appliers_.end());
+ if (i == streaming_appliers_.end())
+ {
+ wsrep::log_warning() << "Could not find streaming applier for "
+ << server_id << ":" << transaction_id;
+ }
+ else
+ {
+ streaming_appliers_.erase(i);
+ cond_.notify_all();
+ }
+}
+
+wsrep::high_priority_service* wsrep::server_state::find_streaming_applier(
+ const wsrep::id& server_id,
+ const wsrep::transaction_id& transaction_id) const
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ streaming_appliers_map::const_iterator i(
+ streaming_appliers_.find(std::make_pair(server_id, transaction_id)));
+ return (i == streaming_appliers_.end() ? 0 : i->second);
+}
+
+wsrep::high_priority_service* wsrep::server_state::find_streaming_applier(
+ const wsrep::xid& xid) const
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ streaming_appliers_map::const_iterator i(streaming_appliers_.begin());
+ while (i != streaming_appliers_.end())
+ {
+ wsrep::high_priority_service* sa(i->second);
+ if (sa->transaction().xid() == xid)
+ {
+ return sa;
+ }
+ i++;
+ }
+ return NULL;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+// Private //
+//////////////////////////////////////////////////////////////////////////////
+
+int wsrep::server_state::desync(wsrep::unique_lock<wsrep::mutex>& lock)
+{
+ assert(lock.owns_lock());
+ ++desync_count_;
+ lock.unlock();
+ int ret(provider().desync());
+ lock.lock();
+ if (ret)
+ {
+ --desync_count_;
+ }
+ return ret;
+}
+
+void wsrep::server_state::resync(wsrep::unique_lock<wsrep::mutex>&
+ lock WSREP_UNUSED)
+{
+ assert(lock.owns_lock());
+ assert(desync_count_ > 0);
+ if (desync_count_ > 0)
+ {
+ --desync_count_;
+ if (provider().resync())
+ {
+ throw wsrep::runtime_error("Failed to resync");
+ }
+ }
+ else
+ {
+ wsrep::log_warning() << "desync_count " << desync_count_
+ << " on resync";
+ }
+}
+
+
+void wsrep::server_state::state(
+ wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED,
+ enum wsrep::server_state::state state)
+{
+ assert(lock.owns_lock());
+ static const char allowed[n_states_][n_states_] =
+ {
+ /* dis, ing, ized, cted, jer, jed, dor, sed, ding to/from */
+ { 0, 1, 0, 1, 0, 0, 0, 0, 0}, /* dis */
+ { 1, 0, 1, 0, 0, 0, 0, 0, 1}, /* ing */
+ { 1, 0, 0, 1, 0, 1, 0, 0, 1}, /* ized */
+ { 1, 0, 0, 1, 1, 0, 0, 1, 1}, /* cted */
+ { 1, 1, 0, 0, 0, 1, 0, 0, 1}, /* jer */
+ { 1, 0, 0, 1, 0, 0, 1, 1, 1}, /* jed */
+ { 1, 0, 0, 1, 0, 1, 0, 0, 1}, /* dor */
+ { 1, 0, 0, 1, 0, 1, 1, 0, 1}, /* sed */
+ { 1, 0, 0, 0, 0, 0, 0, 0, 0} /* ding */
+ };
+
+ if (allowed[state_][state] == false)
+ {
+ std::ostringstream os;
+ os << "server: " << name_ << " unallowed state transition: "
+ << wsrep::to_string(state_) << " -> " << wsrep::to_string(state);
+ wsrep::log_warning() << os.str() << "\n";
+ assert(0);
+ }
+
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "server " << name_ << " state change: "
+ << to_c_string(state_) << " -> "
+ << to_c_string(state));
+ state_hist_.push_back(state_);
+ server_service_.log_state_change(state_, state);
+ state_ = state;
+ cond_.notify_all();
+ while (state_waiters_[state_])
+ {
+ cond_.wait(lock);
+ }
+}
+
+void wsrep::server_state::wait_until_state(
+ wsrep::unique_lock<wsrep::mutex>& lock,
+ enum wsrep::server_state::state state) const
+{
+ ++state_waiters_[state];
+ while (state_ != state)
+ {
+ cond_.wait(lock);
+ // If the waiter waits for any other state than disconnecting
+ // or disconnected and the state has been changed to disconnecting,
+ // this usually means that some error was encountered
+ if (state != s_disconnecting && state != s_disconnected
+ && state_ == s_disconnecting)
+ {
+ throw wsrep::runtime_error("State wait was interrupted");
+ }
+ }
+ --state_waiters_[state];
+ cond_.notify_all();
+}
+
+int wsrep::server_state::wait_until_state(enum state state) const
+try
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ wait_until_state(lock, state);
+ return 0;
+}
+catch (...)
+{
+ return 1;
+}
+
+void wsrep::server_state::interrupt_state_waiters(
+ wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED)
+{
+ assert(lock.owns_lock());
+ cond_.notify_all();
+}
+
+template <class C>
+void wsrep::server_state::recover_streaming_appliers_if_not_recovered(
+ wsrep::unique_lock<wsrep::mutex>& lock, C& c)
+{
+ assert(lock.owns_lock());
+ if (streaming_appliers_recovered_ == false)
+ {
+ lock.unlock();
+ server_service_.recover_streaming_appliers(c);
+ lock.lock();
+ }
+ streaming_appliers_recovered_ = true;
+}
+
+class transaction_state_cmp
+{
+public:
+ transaction_state_cmp(const enum wsrep::transaction::state s)
+ : state_(s) { }
+ bool operator()(const std::pair<wsrep::client_id,
+ wsrep::client_state*>& vt) const
+ {
+ return vt.second->transaction().state() == state_;
+ }
+private:
+ enum wsrep::transaction::state state_;
+};
+
+void wsrep::server_state::close_orphaned_sr_transactions(
+ wsrep::unique_lock<wsrep::mutex>& lock,
+ wsrep::high_priority_service& high_priority_service)
+{
+ assert(lock.owns_lock());
+
+ // When the originator of an SR transaction leaves the primary
+ // component of the cluster, that SR must be rolled back. When two
+ // consecutive primary views have the same membership, the system
+ // may have been in a state with no primary components.
+ // Example with 2 node cluster:
+ // - (1,2 primary)
+ // - (1 non-primary) and (2 non-primary)
+ // - (1,2 primary)
+ // We need to rollback SRs owned by both 1 and 2.
+ // Notice that since the introduction of rollback_event_queue_,
+ // checking for equal consecutive views is no longer needed.
+ // However, we must keep it here for the time being, for backwards
+ // compatibility.
+ const bool equal_consecutive_views =
+ current_view_.equal_membership(previous_primary_view_);
+
+ if (current_view_.own_index() == -1 || equal_consecutive_views)
+ {
+ streaming_clients_map::iterator i;
+ transaction_state_cmp prepared_state_cmp(wsrep::transaction::s_prepared);
+ while ((i = std::find_if_not(streaming_clients_.begin(),
+ streaming_clients_.end(),
+ prepared_state_cmp))
+ != streaming_clients_.end())
+ {
+ wsrep::client_id client_id(i->first);
+ wsrep::transaction_id transaction_id(i->second->transaction().id());
+ // It is safe to unlock the server state temporarily here.
+ // The processing happens inside view handler which is
+ // protected by the provider commit ordering critical
+ // section. The lock must be unlocked temporarily to
+ // allow converting the current client to streaming
+ // applier in transaction::streaming_rollback().
+ // The iterator i may be invalidated when the server state
+ // remains unlocked, so it should not be accessed after
+ // the bf abort call.
+ lock.unlock();
+ i->second->total_order_bf_abort(current_view_.view_seqno());
+ lock.lock();
+ streaming_clients_map::const_iterator found_i;
+ while ((found_i = streaming_clients_.find(client_id)) !=
+ streaming_clients_.end() &&
+ found_i->second->transaction().id() == transaction_id)
+ {
+ cond_.wait(lock);
+ }
+ }
+ }
+
+ streaming_appliers_map::iterator i(streaming_appliers_.begin());
+ while (i != streaming_appliers_.end())
+ {
+ wsrep::high_priority_service* streaming_applier(i->second);
+
+ // Rollback SR on equal consecutive primary views or if its
+ // originator is not in the current view.
+ // Transactions in prepared state must be committed or
+ // rolled back explicitly, those are never rolled back here.
+ if ((streaming_applier->transaction().state() !=
+ wsrep::transaction::s_prepared) &&
+ (equal_consecutive_views ||
+ not current_view_.is_member(
+ streaming_applier->transaction().server_id())))
+ {
+ WSREP_LOG_DEBUG(wsrep::log::debug_log_level(),
+ wsrep::log::debug_level_server_state,
+ "Removing SR fragments for "
+ << i->first.first
+ << ", " << i->first.second);
+ wsrep::id server_id(i->first.first);
+ wsrep::transaction_id transaction_id(i->first.second);
+ int adopt_error;
+ if ((adopt_error = high_priority_service.adopt_transaction(
+ streaming_applier->transaction())))
+ {
+ log_adopt_error(streaming_applier->transaction());
+ }
+ // Even if the transaction adopt above fails, we roll back
+ // the transaction. Adopt error will leave stale entries
+ // in the streaming log which can be removed manually.
+ {
+ wsrep::high_priority_switch sw(high_priority_service,
+ *streaming_applier);
+ streaming_applier->rollback(
+ wsrep::ws_handle(), wsrep::ws_meta());
+ streaming_applier->after_apply();
+ }
+
+ streaming_appliers_.erase(i++);
+ server_service_.release_high_priority_service(streaming_applier);
+ high_priority_service.store_globals();
+ wsrep::ws_meta ws_meta(
+ wsrep::gtid(),
+ wsrep::stid(server_id, transaction_id, wsrep::client_id()),
+ wsrep::seqno::undefined(), 0);
+ lock.unlock();
+ if (adopt_error == 0)
+ {
+ high_priority_service.remove_fragments(ws_meta);
+ high_priority_service.commit(wsrep::ws_handle(transaction_id, 0),
+ ws_meta);
+ }
+ high_priority_service.after_apply();
+ lock.lock();
+ }
+ else
+ {
+ ++i;
+ }
+ }
+}
+
+void wsrep::server_state::close_transactions_at_disconnect(
+ wsrep::high_priority_service& high_priority_service)
+{
+ // Close streaming applier without removing fragments
+ // from fragment storage. When the server is started again,
+ // it must be able to recover ongoing streaming transactions.
+ streaming_appliers_map::iterator i(streaming_appliers_.begin());
+ while (i != streaming_appliers_.end())
+ {
+ wsrep::high_priority_service* streaming_applier(i->second);
+ {
+ wsrep::high_priority_switch sw(high_priority_service,
+ *streaming_applier);
+ streaming_applier->rollback(
+ wsrep::ws_handle(), wsrep::ws_meta());
+ streaming_applier->after_apply();
+ }
+ streaming_appliers_.erase(i++);
+ server_service_.release_high_priority_service(streaming_applier);
+ high_priority_service.store_globals();
+ }
+ streaming_appliers_recovered_ = false;
+}
+
+//
+// Rollback event queue
+//
+
+void wsrep::server_state::queue_rollback_event(
+ const wsrep::transaction_id& id)
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+#ifndef NDEBUG
+ // Make sure we don't have duplicate
+ // transaction ids in rollback event queue.
+ // There is no need to do this in release
+ // build given that caller (streaming_rollback())
+ // should avoid duplicates.
+ for (auto i : rollback_event_queue_)
+ {
+ assert(id != i);
+ }
+#endif
+ rollback_event_queue_.push_back(id);
+}
+
+enum wsrep::provider::status
+wsrep::server_state::send_pending_rollback_events(
+ wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED)
+{
+ assert(lock.owns_lock());
+ while (not rollback_event_queue_.empty())
+ {
+ const wsrep::transaction_id& id(rollback_event_queue_.front());
+ const enum wsrep::provider::status status(provider().rollback(id));
+ if (status)
+ {
+ return status;
+ }
+ rollback_event_queue_.pop_front();
+ }
+ return wsrep::provider::success;
+}
+
+enum wsrep::provider::status
+wsrep::server_state::send_pending_rollback_events()
+{
+ wsrep::unique_lock<wsrep::mutex> lock(mutex_);
+ return send_pending_rollback_events(lock);
+}
+
+void wsrep::server_state::return_from_donor_state(
+ wsrep::unique_lock<wsrep::mutex>& lock)
+{
+ assert(lock.owns_lock());
+ // v26 API does not have JOINED event, so in anticipation of SYNCED
+ // we must do it here. Do not modify the state if donor lost the
+ // donor state e.g. due to cluster partitioning.
+ if (state(lock) == s_donor)
+ {
+ state(lock, s_joined);
+ }
+}