From 06eaf7232e9a920468c0f8d74dcf2fe8b555501c Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Sat, 13 Apr 2024 14:24:36 +0200
Subject: Adding upstream version 1:10.11.6.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 wsrep-lib/src/transaction.cpp | 2171 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 2171 insertions(+)
 create mode 100644 wsrep-lib/src/transaction.cpp

(limited to 'wsrep-lib/src/transaction.cpp')
diff --git a/wsrep-lib/src/transaction.cpp b/wsrep-lib/src/transaction.cpp
new file mode 100644
index 00000000..451e94dd
--- /dev/null
+++ b/wsrep-lib/src/transaction.cpp
@@ -0,0 +1,2171 @@
+/*
+ * Copyright (C) 2018 Codership Oy <info@codership.com>
+ *
+ * This file is part of wsrep-lib.
+ *
+ * Wsrep-lib is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Wsrep-lib is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with wsrep-lib.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include "wsrep/transaction.hpp"
+#include "wsrep/client_state.hpp"
+#include "wsrep/server_state.hpp"
+#include "wsrep/storage_service.hpp"
+#include "wsrep/high_priority_service.hpp"
+#include "wsrep/key.hpp"
+#include "wsrep/logger.hpp"
+#include "wsrep/compiler.hpp"
+#include "wsrep/server_service.hpp"
+#include "wsrep/client_service.hpp"
+
+#include <cassert>
+#include <sstream>
+#include <memory>
+
+namespace
+{
+    class storage_service_deleter
+    {
+    public:
+        storage_service_deleter(wsrep::server_service& server_service)
+            : server_service_(server_service)
+        { }
+        void operator()(wsrep::storage_service* storage_service)
+        {
+            server_service_.release_storage_service(storage_service);
+        }
+    private:
+        wsrep::server_service& server_service_;
+    };
+
+    template <class D>
+    class scoped_storage_service
+    {
+    public:
+        scoped_storage_service(wsrep::client_service& client_service,
+                               wsrep::storage_service* storage_service,
+                               D deleter)
+            : client_service_(client_service)
+            , storage_service_(storage_service)
+            , deleter_(deleter)
+        {
+            if (storage_service_ == 0)
+            {
+                throw wsrep::runtime_error("Null client_state provided");
+            }
+            client_service_.reset_globals();
+            storage_service_->store_globals();
+        }
+
+        wsrep::storage_service& storage_service()
+        {
+            return *storage_service_;
+        }
+
+        ~scoped_storage_service()
+        {
+            deleter_(storage_service_);
+            client_service_.store_globals();
+        }
+    private:
+        scoped_storage_service(const scoped_storage_service&);
+        scoped_storage_service& operator=(const scoped_storage_service&);
+        wsrep::client_service& client_service_;
+        wsrep::storage_service* storage_service_;
+        D deleter_;
+    };
+}
+
+// Public
+
+wsrep::transaction::transaction(
+    wsrep::client_state& client_state)
+    : server_service_(client_state.server_state().server_service())
+    , client_service_(client_state.client_service())
+    , client_state_(client_state)
+    , server_id_()
+    , id_(transaction_id::undefined())
+    , state_(s_executing)
+    , state_hist_()
+    , bf_abort_state_(s_executing)
+    , bf_abort_provider_status_()
+    , bf_abort_client_state_()
+    , bf_aborted_in_total_order_()
+    , ws_handle_()
+    , ws_meta_()
+    , flags_()
+    , implicit_deps_(false)
+    , certified_(false)
+    , fragments_certified_for_statement_()
+    , streaming_context_()
+    , sr_keys_()
+    , apply_error_buf_()
+    , xid_()
+    , streaming_rollback_in_progress_(false)
+    , is_bf_immutable_(false)
+{ }
+
+
+wsrep::transaction::~transaction()
+{
+}
+
+int wsrep::transaction::start_transaction(
+    const wsrep::transaction_id& id)
+{
+    debug_log_state("start_transaction enter");
+    assert(active() == false);
+    assert(is_xa() == false);
+    assert(flags() == 0);
+    server_id_ = client_state_.server_state().id();
+    id_ = id;
+    state_ = s_executing;
+    state_hist_.clear();
+    ws_handle_ = wsrep::ws_handle(id);
+    flags(wsrep::provider::flag::start_transaction);
+    switch (client_state_.mode())
+    {
+    case wsrep::client_state::m_high_priority:
+        debug_log_state("start_transaction success");
+        return 0;
+    case wsrep::client_state::m_local:
+        debug_log_state("start_transaction success");
+        return provider().start_transaction(ws_handle_);
+    default:
+        debug_log_state("start_transaction error");
+        assert(0);
+        return 1;
+    }
+}
+
+int wsrep::transaction::start_transaction(
+    const wsrep::ws_handle& ws_handle,
+    const wsrep::ws_meta& ws_meta)
+{
+    debug_log_state("start_transaction enter");
+    if (state() != s_replaying)
+    {
+        // assert(ws_meta.flags());
+        assert(active() == false);
+        assert(flags() == 0);
+        server_id_ = ws_meta.server_id();
+        id_ = ws_meta.transaction_id();
+        assert(client_state_.mode() == wsrep::client_state::m_high_priority);
+        state_ = s_executing;
+        state_hist_.clear();
+        ws_handle_ = ws_handle;
+        ws_meta_ = ws_meta;
+        flags(wsrep::provider::flag::start_transaction);
+        certified_ = true;
+    }
+    else
+    {
+        ws_meta_ = ws_meta;
+        assert(ws_meta_.flags() & wsrep::provider::flag::commit);
+        assert(active());
+        assert(client_state_.mode() == wsrep::client_state::m_high_priority);
+        assert(state() == s_replaying);
+        assert(ws_meta_.seqno().is_undefined() == false);
+        certified_ = true;
+    }
+    debug_log_state("start_transaction leave");
+    return 0;
+}
+
+int wsrep::transaction::next_fragment(
+    const wsrep::ws_meta& ws_meta)
+{
+    debug_log_state("next_fragment enter");
+    ws_meta_ = ws_meta;
+    debug_log_state("next_fragment leave");
+    return 0;
+}
+
+void wsrep::transaction::adopt(const wsrep::transaction& transaction)
+{
+    debug_log_state("adopt enter");
+    assert(transaction.is_streaming());
+    start_transaction(transaction.id());
+    server_id_ = transaction.server_id_;
+    flags_  = transaction.flags();
+    streaming_context_ = transaction.streaming_context();
+    debug_log_state("adopt leave");
+}
+
+void wsrep::transaction::fragment_applied(wsrep::seqno seqno)
+{
+    assert(active());
+    streaming_context_.applied(seqno);
+}
+
+int wsrep::transaction::prepare_for_ordering(
+    const wsrep::ws_handle& ws_handle,
+    const wsrep::ws_meta& ws_meta,
+    bool is_commit)
+{
+    assert(active());
+
+    if (state_ != s_replaying)
+    {
+        ws_handle_ = ws_handle;
+        ws_meta_ = ws_meta;
+        certified_ = is_commit;
+    }
+    return 0;
+}
+
+int wsrep::transaction::assign_read_view(const wsrep::gtid* const gtid)
+{
+    try
+    {
+        return provider().assign_read_view(ws_handle_, gtid);
+    }
+    catch (...)
+    {
+        wsrep::log_error() << "Failed to assign read view";
+        return 1;
+    }
+}
+
+int wsrep::transaction::append_key(const wsrep::key& key)
+{
+    assert(active());
+    try
+    {
+        debug_log_key_append(key);
+        sr_keys_.insert(key);
+        return provider().append_key(ws_handle_, key);
+    }
+    catch (...)
+    {
+        wsrep::log_error() << "Failed to append key";
+        return 1;
+    }
+}
+
+int wsrep::transaction::append_data(const wsrep::const_buffer& data)
+{
+    assert(active());
+    return provider().append_data(ws_handle_, data);
+}
+
+int wsrep::transaction::after_row()
+{
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+    debug_log_state("after_row_enter");
+    int ret(0);
+    if (streaming_context_.fragment_size() &&
+        streaming_context_.fragment_unit() != streaming_context::statement)
+    {
+        ret = streaming_step(lock);
+    }
+    debug_log_state("after_row_leave");
+    return ret;
+}
+
+int wsrep::transaction::before_prepare(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+    int ret(0);
+    debug_log_state("before_prepare_enter");
+    assert(state() == s_executing || state() == s_must_abort ||
+           state() == s_replaying);
+
+    if (state() == s_must_abort)
+    {
+        assert(client_state_.mode() == wsrep::client_state::m_local);
+        client_state_.override_error(wsrep::e_deadlock_error);
+        return 1;
+    }
+
+    switch (client_state_.mode())
+    {
+    case wsrep::client_state::m_local:
+        if (is_streaming())
+        {
+            client_service_.debug_crash(
+                "crash_last_fragment_commit_before_fragment_removal");
+            lock.unlock();
+            if (client_service_.statement_allowed_for_streaming() == false)
+            {
+                client_state_.override_error(
+                    wsrep::e_error_during_commit,
+                    wsrep::provider::error_not_allowed);
+                ret = 1;
+            }
+            else if (!is_xa())
+            {
+                // Note: we can't remove fragments here for XA,
+                // the transaction has already issued XA END and
+                // is in IDLE state, no more changes allowed!
+                ret = client_service_.remove_fragments();
+                if (ret)
+                {
+                    client_state_.override_error(wsrep::e_deadlock_error);
+                }
+            }
+            lock.lock();
+            client_service_.debug_crash(
+                "crash_last_fragment_commit_after_fragment_removal");
+            if (state() == s_must_abort)
+            {
+                client_state_.override_error(wsrep::e_deadlock_error);
+                ret = 1;
+            }
+        }
+
+        if (ret == 0)
+        {
+            if (is_xa())
+            {
+                // Force fragment replication on XA prepare
+                flags(flags() | wsrep::provider::flag::prepare);
+                pa_unsafe(true);
+                append_sr_keys_for_commit();
+                const bool force_streaming_step = true;
+                ret = streaming_step(lock, force_streaming_step);
+                if (ret == 0)
+                {
+                    assert(state() == s_executing);
+                    state(lock, s_preparing);
+                }
+            }
+            else
+            {
+                ret = certify_commit(lock);
+            }
+
+            assert((ret == 0 && state() == s_preparing) ||
+                   (state() == s_must_abort ||
+                    state() == s_must_replay ||
+                    state() == s_cert_failed));
+
+            if (ret)
+            {
+                assert(state() == s_must_replay ||
+                       client_state_.current_error());
+                ret = 1;
+            }
+        }
+
+        break;
+    case wsrep::client_state::m_high_priority:
+        // Note: fragment removal is done from applying
+        // context for high priority mode.
+        if (is_xa())
+        {
+            assert(state() == s_executing ||
+                   state() == s_replaying);
+            if (state() == s_replaying)
+            {
+                break;
+            }
+        }
+        state(lock, s_preparing);
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    assert(state() == s_preparing ||
+           (is_xa() && state() == s_replaying) ||
+           (ret && (state() == s_must_abort ||
+                    state() == s_must_replay ||
+                    state() == s_cert_failed ||
+                    state() == s_aborted)));
+    debug_log_state("before_prepare_leave");
+    return ret;
+}
+
+int wsrep::transaction::after_prepare(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+
+    int ret = 0;
+    debug_log_state("after_prepare_enter");
+    if (is_xa())
+    {
+        switch (state())
+        {
+        case s_preparing:
+            assert(client_state_.mode() == wsrep::client_state::m_local ||
+                   (certified() && ordered()));
+            state(lock, s_prepared);
+            break;
+        case s_must_abort:
+            // prepared locally, but client has not received
+            // a result yet. We can still abort.
+            assert(client_state_.mode() == wsrep::client_state::m_local);
+            client_state_.override_error(wsrep::e_deadlock_error);
+            ret = 1;
+            break;
+        case s_replaying:
+            assert(client_state_.mode() == wsrep::client_state::m_high_priority);
+            break;
+        default:
+            assert(0);
+        }
+    }
+    else
+    {
+        assert(certified() && ordered());
+        assert(state() == s_preparing || state() == s_must_abort);
+
+        if (state() == s_must_abort)
+        {
+            assert(client_state_.mode() == wsrep::client_state::m_local);
+            state(lock, s_must_replay);
+            ret = 1;
+        }
+        else
+        {
+            state(lock, s_committing);
+        }
+    }
+    debug_log_state("after_prepare_leave");
+    return ret;
+}
+
+int wsrep::transaction::before_commit()
+{
+    int ret(1);
+
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+    debug_log_state("before_commit_enter");
+    assert(client_state_.mode() != wsrep::client_state::m_toi);
+    assert(state() == s_executing ||
+           state() == s_prepared ||
+           state() == s_committing ||
+           state() == s_must_abort ||
+           state() == s_replaying);
+    assert((state() != s_committing && state() != s_replaying) ||
+           certified());
+
+    switch (client_state_.mode())
+    {
+    case wsrep::client_state::m_local:
+        if (state() == s_executing)
+        {
+            ret = before_prepare(lock) || after_prepare(lock);
+            assert((ret == 0 &&
+                    (state() == s_committing || state() == s_prepared))
+                   ||
+                   (state() == s_must_abort ||
+                    state() == s_must_replay ||
+                    state() == s_cert_failed ||
+                    state() == s_aborted));
+        }
+        else if (state() != s_committing && state() != s_prepared)
+        {
+            assert(state() == s_must_abort);
+            if (certified() ||
+                (is_xa() && is_streaming()))
+            {
+                state(lock, s_must_replay);
+            }
+            else
+            {
+                client_state_.override_error(wsrep::e_deadlock_error);
+            }
+        }
+        else
+        {
+            // 2PC commit, prepare was done before
+            ret = 0;
+        }
+
+        if (ret == 0 && state() == s_prepared)
+        {
+            ret = certify_commit(lock);
+            assert((ret == 0 && state() == s_committing) ||
+                   (state() == s_must_abort ||
+                    state() == s_must_replay ||
+                    state() == s_cert_failed ||
+                    state() == s_prepared));
+        }
+
+        if (ret == 0)
+        {
+            assert(certified());
+            assert(ordered());
+            lock.unlock();
+            client_service_.debug_sync("wsrep_before_commit_order_enter");
+            enum wsrep::provider::status
+                status(provider().commit_order_enter(ws_handle_, ws_meta_));
+            lock.lock();
+            switch (status)
+            {
+            case wsrep::provider::success:
+                break;
+            case wsrep::provider::error_bf_abort:
+                if (state() != s_must_abort)
+                {
+                    state(lock, s_must_abort);
+                }
+                state(lock, s_must_replay);
+                ret = 1;
+                break;
+            default:
+                ret = 1;
+                assert(0);
+                break;
+            }
+        }
+        break;
+    case wsrep::client_state::m_high_priority:
+        assert(certified());
+        assert(ordered());
+        if (is_xa())
+        {
+            assert(state() == s_prepared ||
+                   state() == s_replaying);
+            state(lock, s_committing);
+            ret = 0;
+        }
+        else if (state() == s_executing || state() == s_replaying)
+        {
+            ret = before_prepare(lock) || after_prepare(lock);
+        }
+        else
+        {
+            ret = 0;
+        }
+        lock.unlock();
+        ret = ret || provider().commit_order_enter(ws_handle_, ws_meta_);
+        lock.lock();
+        if (ret)
+        {
+            state(lock, s_must_abort);
+            state(lock, s_aborting);
+        }
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    if (ret == 0 && state() == s_committing)
+    {
+        is_bf_immutable_ = true;
+    }
+
+    debug_log_state("before_commit_leave");
+    return ret;
+}
+
+int wsrep::transaction::ordered_commit()
+{
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+    debug_log_state("ordered_commit_enter");
+    assert(state() == s_committing);
+    assert(is_bf_immutable_);
+    assert(ordered());
+    client_service_.debug_sync("wsrep_before_commit_order_leave");
+    int ret(provider().commit_order_leave(ws_handle_, ws_meta_,
+                                          apply_error_buf_));
+    client_service_.debug_sync("wsrep_after_commit_order_leave");
+    // Should always succeed:
+    // 1) If before commit before succeeds, the transaction handle
+    //    in the provider is guaranteed to exist and the commit
+    //    has been ordered
+    // 2) The transaction which has been ordered for commit cannot be BF
+    //    aborted anymore
+    // 3) The provider should always guarantee that the transactions which
+    //    have been ordered for commit can finish committing.
+    //
+    // The exception here is a storage service transaction which is running
+    // in high priority mode. The fragment storage commit may get BF
+    // aborted in the provider after commit ordering has been
+    // established since the transaction is operating in streaming
+    // mode.
+    if (ret)
+    {
+        assert(client_state_.mode() == wsrep::client_state::m_high_priority);
+        state(lock, s_must_abort);
+        state(lock, s_aborting);
+    }
+    else
+    {
+        state(lock, s_ordered_commit);
+    }
+    debug_log_state("ordered_commit_leave");
+    return ret;
+}
+
+int wsrep::transaction::after_commit()
+{
+    int ret(0);
+
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+    assert(is_bf_immutable_);
+    debug_log_state("after_commit_enter");
+    assert(state() == s_ordered_commit);
+
+    if (is_streaming())
+    {
+        assert(client_state_.mode() == wsrep::client_state::m_local ||
+               client_state_.mode() == wsrep::client_state::m_high_priority);
+
+        if (is_xa())
+        {
+            // XA fragment removal happens here,
+            // see comment in before_prepare
+            remove_fragments_in_storage_service_scope(lock);
+        }
+
+        if (client_state_.mode() == wsrep::client_state::m_local)
+        {
+            lock.unlock();
+            client_state_.server_state_.stop_streaming_client(&client_state_);
+            lock.lock();
+        }
+        streaming_context_.cleanup();
+    }
+
+    switch (client_state_.mode())
+    {
+    case wsrep::client_state::m_local:
+        ret = provider().release(ws_handle_);
+        break;
+    case wsrep::client_state::m_high_priority:
+        break;
+    default:
+        assert(0);
+        break;
+    }
+    assert(ret == 0);
+    state(lock, s_committed);
+    debug_log_state("after_commit_leave");
+    return ret;
+}
+
+int wsrep::transaction::before_rollback()
+{
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+    debug_log_state("before_rollback_enter");
+    assert(state() == s_executing ||
+           state() == s_preparing ||
+           state() == s_prepared ||
+           state() == s_must_abort ||
+           // Background rollbacker or rollback initiated from SE
+           state() == s_aborting ||
+           state() == s_cert_failed ||
+           state() == s_must_replay);
+
+    switch (client_state_.mode())
+    {
+    case wsrep::client_state::m_local:
+        if (is_streaming())
+        {
+            client_service_.debug_sync("wsrep_before_SR_rollback");
+        }
+        switch (state())
+        {
+        case s_preparing:
+            // Error detected during prepare phase
+            state(lock, s_must_abort);
+            WSREP_FALLTHROUGH;
+        case s_prepared:
+            WSREP_FALLTHROUGH;
+        case s_executing:
+            // Voluntary rollback
+            if (is_streaming())
+            {
+                streaming_rollback(lock);
+            }
+            state(lock, s_aborting);
+            break;
+        case s_must_abort:
+            if (certified())
+            {
+                state(lock, s_must_replay);
+            }
+            else
+            {
+                if (is_streaming())
+                {
+                    streaming_rollback(lock);
+                }
+                state(lock, s_aborting);
+            }
+            break;
+        case s_cert_failed:
+            if (is_streaming())
+            {
+                streaming_rollback(lock);
+            }
+            state(lock, s_aborting);
+            break;
+        case s_aborting:
+            if (is_streaming())
+            {
+                streaming_rollback(lock);
+            }
+            break;
+        case s_must_replay:
+            break;
+        default:
+            assert(0);
+            break;
+        }
+        break;
+    case wsrep::client_state::m_high_priority:
+        // Rollback by rollback write set or BF abort
+        assert(state_ == s_executing || state_ == s_prepared || state_ == s_aborting);
+        if (state_ != s_aborting)
+        {
+            state(lock, s_aborting);
+        }
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    debug_log_state("before_rollback_leave");
+    return 0;
+}
+
+int wsrep::transaction::after_rollback()
+{
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+    debug_log_state("after_rollback_enter");
+    assert(state() == s_aborting || state() == s_must_replay);
+
+    // Note that it would be technically more correct to
+    // remove fragments after TOI BF abort in before_rollback(),
+    // it seems to cause deadlocks and is done here instead.
+    // We assume that the application does not let the TOI
+    // to proceed until this method returns, e.g. by holding
+    // MDL locks. It is not clear how to enforce that though.
+    if (is_streaming() && bf_aborted_in_total_order_)
+    {
+        remove_fragments_in_storage_service_scope(lock);
+    }
+
+    if (state() == s_aborting)
+    {
+        if (is_streaming())
+        {
+            // We skip streaming context cleanup for replay because
+            // we want to remember if the transaction was streaming.
+            // See transaction::replay()
+            streaming_context_.cleanup();
+        }
+
+        state(lock, s_aborted);
+    }
+
+    // Releasing the transaction from provider is postponed into
+    // after_statement() hook. Depending on DBMS system all the
+    // resources acquired by transaction may or may not be released
+    // during actual rollback. If the transaction has been ordered,
+    // releasing the commit ordering critical section should be
+    // also postponed until all resources have been released.
+    debug_log_state("after_rollback_leave");
+    return 0;
+}
+
+int wsrep::transaction::release_commit_order(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    lock.unlock();
+    int ret(provider().commit_order_enter(ws_handle_, ws_meta_));
+    if (!ret)
+    {
+        server_service_.set_position(client_service_, ws_meta_.gtid());
+        ret = provider().commit_order_leave(ws_handle_, ws_meta_,
+                                            apply_error_buf_);
+    }
+    // grabbing lock here, as set_position may call for sync wait in galera side
+    lock.lock();
+    return ret;
+}
+
+void wsrep::transaction::remove_fragments_in_storage_service_scope(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+    lock.unlock();
+    {
+        scoped_storage_service<storage_service_deleter>
+            sr_scope(
+                client_service_,
+                server_service_.storage_service(client_service_),
+                storage_service_deleter(server_service_));
+        wsrep::storage_service& storage_service(
+            sr_scope.storage_service());
+        storage_service.adopt_transaction(*this);
+        storage_service.remove_fragments();
+        storage_service.commit(wsrep::ws_handle(), wsrep::ws_meta());
+    }
+    lock.lock();
+}
+
+int wsrep::transaction::after_statement()
+{
+  wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex());
+  return after_statement(lock);
+}
+
+int wsrep::transaction::after_statement(wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    int ret(0);
+    debug_log_state("after_statement_enter");
+    assert(lock.owns_lock());
+    assert(client_state_.mode() == wsrep::client_state::m_local);
+    assert(state() == s_executing ||
+           state() == s_prepared ||
+           state() == s_committed ||
+           state() == s_aborted ||
+           state() == s_must_abort ||
+           state() == s_cert_failed ||
+           state() == s_must_replay);
+
+    if (state() == s_executing &&
+        streaming_context_.fragment_size() &&
+        streaming_context_.fragment_unit() == streaming_context::statement)
+    {
+        ret = streaming_step(lock);
+    }
+
+    switch (state())
+    {
+    case s_executing:
+        // ?
+        break;
+    case s_prepared:
+        assert(is_xa());
+        break;
+    case s_committed:
+        assert(is_streaming() == false);
+        break;
+    case s_must_abort:
+    case s_cert_failed:
+        // Error may be set already. For example, if fragment size
+        // exceeded the maximum size in certify_fragment(), then
+        // we already have wsrep::e_error_during_commit
+        if (client_state_.current_error() == wsrep::e_success)
+        {
+            client_state_.override_error(wsrep::e_deadlock_error);
+        }
+        lock.unlock();
+        ret = client_service_.bf_rollback();
+        lock.lock();
+        if (state() != s_must_replay)
+        {
+            break;
+        }
+        // Continue to replay if rollback() changed the state to s_must_replay
+        WSREP_FALLTHROUGH;
+    case s_must_replay:
+    {
+        if (is_xa() && !ordered())
+        {
+            ret = xa_replay_commit(lock);
+        }
+        else
+        {
+            ret = replay(lock);
+        }
+        break;
+    }
+    case s_aborted:
+        // Raise a deadlock error if the transaction was BF aborted and
+        // rolled back by client outside of transaction hooks.
+        if (bf_aborted() && client_state_.current_error() == wsrep::e_success &&
+            !client_service_.is_xa_rollback())
+        {
+            client_state_.override_error(wsrep::e_deadlock_error);
+        }
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    assert(state() == s_executing ||
+           state() == s_prepared ||
+           state() == s_committed ||
+           state() == s_aborted   ||
+           state() == s_must_replay);
+
+    if (state() == s_aborted)
+    {
+        if (ordered())
+        {
+            ret = release_commit_order(lock);
+        }
+        lock.unlock();
+        provider().release(ws_handle_);
+        lock.lock();
+    }
+
+    if (state() != s_executing &&
+        (!client_service_.is_explicit_xa() ||
+         client_state_.state() == wsrep::client_state::s_quitting))
+    {
+        cleanup();
+    }
+    fragments_certified_for_statement_ = 0;
+    debug_log_state("after_statement_leave");
+    assert(ret == 0 || state() == s_aborted);
+    return ret;
+}
+
+void wsrep::transaction::after_command_must_abort(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    debug_log_state("after_command_must_abort enter");
+    assert(active());
+    assert(state_ == s_must_abort);
+
+    if (is_xa() && is_streaming())
+    {
+        state(lock, s_must_replay);
+    }
+
+    lock.unlock();
+    client_service_.bf_rollback();
+    lock.lock();
+
+    if (is_xa() && is_streaming())
+    {
+        xa_replay(lock);
+    }
+    else
+    {
+        client_state_.override_error(wsrep::e_deadlock_error);
+    }
+
+    debug_log_state("after_command_must_abort leave");
+}
+
+void wsrep::transaction::after_applying()
+{
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex_);
+    debug_log_state("after_applying enter");
+    assert(state_ == s_executing ||
+           state_ == s_prepared ||
+           state_ == s_committed ||
+           state_ == s_aborted ||
+           state_ == s_replaying);
+
+    if (state_ != s_executing && state_ != s_prepared)
+    {
+        if (state_ == s_replaying) state(lock, s_aborted);
+        cleanup();
+    }
+    else
+    {
+        // State remains executing or prepared, so this is a streaming applier.
+        // Reset the meta data to avoid releasing commit order
+        // critical section above if the next fragment is rollback
+        // fragment. Rollback fragment ordering will be handled by
+        // another instance while removing the fragments from
+        // storage.
+        ws_meta_ = wsrep::ws_meta();
+    }
+    debug_log_state("after_applying leave");
+}
+
+bool wsrep::transaction::bf_abort(
+    wsrep::unique_lock<wsrep::mutex>& lock,
+    wsrep::seqno bf_seqno)
+{
+    bool ret(false);
+    const enum wsrep::transaction::state state_at_enter(state());
+    assert(lock.owns_lock());
+
+    if (active() == false)
+    {
+         WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                         wsrep::log::debug_level_transaction,
+                         "Transaction not active, skipping bf abort");
+    }
+    else if (is_bf_immutable_)
+    {
+        WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                        wsrep::log::debug_level_transaction,
+                        "Transaction has become immutable for BF abort");
+    }
+    else
+    {
+        switch (state_at_enter)
+        {
+        case s_executing:
+        case s_preparing:
+        case s_prepared:
+        case s_certifying:
+        case s_committing:
+        {
+            wsrep::seqno victim_seqno;
+            enum wsrep::provider::status
+                status(client_state_.provider().bf_abort(
+                           bf_seqno, id_, victim_seqno));
+            switch (status)
+            {
+            case wsrep::provider::success:
+                WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                                wsrep::log::debug_level_transaction,
+                                "Seqno " << bf_seqno
+                                << " successfully BF aborted " << id_
+                                << " victim_seqno " << victim_seqno);
+                bf_abort_state_ = state_at_enter;
+                state(lock, s_must_abort);
+                ret = true;
+                break;
+            default:
+                WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                                wsrep::log::debug_level_transaction,
+                                "Seqno " << bf_seqno
+                                << " failed to BF abort " << id_
+                                << " with status " << status
+                                << " victim_seqno " << victim_seqno);
+                break;
+            }
+            break;
+        }
+        default:
+                WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                                wsrep::log::debug_level_transaction,
+                                "BF abort not allowed in state "
+                                << wsrep::to_string(state_at_enter));
+            break;
+        }
+    }
+
+    if (ret)
+    {
+        bf_abort_client_state_ = client_state_.state();
+        // If the transaction is in executing state, we must initiate
+        // streaming rollback to ensure that the rollback fragment gets
+        // replicated before the victim starts to roll back and release locks.
+        // In other states the BF abort will be detected outside of
+        // storage engine operations and streaming rollback will be
+        // handled from before_rollback() call.
+        if (client_state_.mode() == wsrep::client_state::m_local &&
+            is_streaming() && state_at_enter == s_executing)
+        {
+            streaming_rollback(lock);
+        }
+
+        if ((client_state_.state() == wsrep::client_state::s_idle &&
+             client_state_.server_state().rollback_mode() ==
+             wsrep::server_state::rm_sync) // locally processing idle
+            ||
+            // high priority streaming
+            (client_state_.mode() == wsrep::client_state::m_high_priority &&
+             is_streaming()))
+        {
+            // We need to change the state to aborting under the
+            // lock protection to avoid a race between client thread,
+            // otherwise it could happen that the client gains control
+            // between releasing the lock and before background
+            // rollbacker gets control.
+            if (is_xa() && state_at_enter == s_prepared)
+            {
+                state(lock, s_must_replay);
+                client_state_.set_rollbacker_active(true);
+            }
+            else
+            {
+                state(lock, s_aborting);
+                client_state_.set_rollbacker_active(true);
+                if (client_state_.mode() == wsrep::client_state::m_high_priority)
+                {
+                    lock.unlock();
+                    client_state_.server_state().stop_streaming_applier(
+                        server_id_, id_);
+                    lock.lock();
+                }
+            }
+
+            server_service_.background_rollback(lock, client_state_);
+        }
+    }
+    return ret;
+}
+
+bool wsrep::transaction::total_order_bf_abort(
+    wsrep::unique_lock<wsrep::mutex>& lock WSREP_UNUSED,
+    wsrep::seqno bf_seqno)
+{
+    /* We must set this flag before entering bf_abort() in order
+     * to streaming_rollback() work correctly. The flag will be
+     * unset if BF abort was not allowed. Note that we rely in
+     * bf_abort() not to release lock if the BF abort is not allowed. */
+    bf_aborted_in_total_order_ = true;
+    bool ret(bf_abort(lock, bf_seqno));
+    if (not ret)
+    {
+        bf_aborted_in_total_order_ = false;
+    }
+    return ret;
+}
+
+void wsrep::transaction::clone_for_replay(const wsrep::transaction& other)
+{
+    assert(other.state() == s_replaying);
+    id_ = other.id_;
+    xid_ = other.xid_;
+    server_id_ = other.server_id_;
+    ws_handle_ = other.ws_handle_;
+    ws_meta_ = other.ws_meta_;
+    streaming_context_ = other.streaming_context_;
+    state_ = s_replaying;
+}
+
+void wsrep::transaction::assign_xid(const wsrep::xid& xid)
+{
+    assert(active());
+    assert(!is_xa());
+    xid_ = xid;
+}
+
+int wsrep::transaction::restore_to_prepared_state(const wsrep::xid& xid)
+{
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex_);
+    assert(active());
+    assert(is_empty());
+    assert(state() == s_executing || state() == s_replaying);
+    flags(flags() & ~wsrep::provider::flag::start_transaction);
+    if (state() == s_executing)
+    {
+        state(lock, s_certifying);
+    }
+    else
+    {
+        state(lock, s_preparing);
+    }
+    state(lock, s_prepared);
+    xid_ = xid;
+    return 0;
+}
+
+int wsrep::transaction::commit_or_rollback_by_xid(const wsrep::xid& xid,
+                                                  bool commit)
+{
+    debug_log_state("commit_or_rollback_by_xid enter");
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex_);
+    wsrep::server_state& server_state(client_state_.server_state());
+    wsrep::high_priority_service* sa(server_state.find_streaming_applier(xid));
+
+    if (!sa)
+    {
+        assert(sa);
+        client_state_.override_error(wsrep::e_error_during_commit);
+        return 1;
+    }
+
+    if (commit)
+    {
+        flags(wsrep::provider::flag::commit);
+    }
+    else
+    {
+        flags(wsrep::provider::flag::rollback);
+    }
+    pa_unsafe(true);
+    wsrep::stid stid(sa->transaction().server_id(),
+                     sa->transaction().id(),
+                     client_state_.id());
+    wsrep::ws_meta meta(stid);
+
+    const enum wsrep::provider::status cert_ret(
+        provider().certify(client_state_.id(),
+                           ws_handle_,
+                           flags(),
+                           meta));
+
+    int ret;
+    if (cert_ret == wsrep::provider::success)
+    {
+        if (commit)
+        {
+            state(lock, s_certifying);
+            state(lock, s_committing);
+            state(lock, s_committed);
+        }
+        else
+        {
+            state(lock, s_aborting);
+            state(lock, s_aborted);
+        }
+        ret = 0;
+    }
+    else
+    {
+        client_state_.override_error(wsrep::e_error_during_commit,
+                                     cert_ret);
+        wsrep::log_error() << "Failed to commit_or_rollback_by_xid,"
+                           << " xid: " << xid
+                           << " error: " << cert_ret;
+        ret = 1;
+    }
+    debug_log_state("commit_or_rollback_by_xid leave");
+    return ret;
+}
+
+void wsrep::transaction::xa_detach()
+{
+    debug_log_state("xa_detach enter");
+    assert(state() == s_prepared ||
+           client_state_.state() == wsrep::client_state::s_quitting);
+    if (state() == s_prepared)
+    {
+        wsrep::server_state& server_state(client_state_.server_state());
+        server_state.convert_streaming_client_to_applier(&client_state_);
+        client_service_.store_globals();
+        client_service_.cleanup_transaction();
+    }
+    wsrep::unique_lock<wsrep::mutex> lock(client_state_.mutex_);
+    streaming_context_.cleanup();
+    state(lock, s_aborting);
+    state(lock, s_aborted);
+    provider().release(ws_handle_);
+    cleanup();
+    debug_log_state("xa_detach leave");
+}
+
+void wsrep::transaction::xa_replay_common(wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+    assert(is_xa());
+    assert(is_streaming());
+    assert(state() == s_must_replay);
+    assert(bf_aborted());
+
+    state(lock, s_replaying);
+
+    enum wsrep::provider::status status;
+    wsrep::server_state& server_state(client_state_.server_state());
+
+    lock.unlock();
+    server_state.convert_streaming_client_to_applier(&client_state_);
+    status = client_service_.replay_unordered();
+    client_service_.store_globals();
+    lock.lock();
+
+    if (status != wsrep::provider::success)
+    {
+        client_service_.emergency_shutdown();
+    }
+}
+
+int wsrep::transaction::xa_replay(wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    debug_log_state("xa_replay enter");
+    xa_replay_common(lock);
+    state(lock, s_aborted);
+    streaming_context_.cleanup();
+    provider().release(ws_handle_);
+    cleanup();
+    client_service_.signal_replayed();
+    debug_log_state("xa_replay leave");
+    return 0;
+}
+
+int wsrep::transaction::xa_replay_commit(wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    debug_log_state("xa_replay_commit enter");
+    xa_replay_common(lock);
+    lock.unlock();
+    enum wsrep::provider::status status(client_service_.commit_by_xid());
+    lock.lock();
+    int ret(1);
+    switch (status)
+    {
+    case wsrep::provider::success:
+        state(lock, s_committed);
+        streaming_context_.cleanup();
+        provider().release(ws_handle_);
+        cleanup();
+        ret = 0;
+        break;
+    default:
+        log_warning() << "Failed to commit by xid during replay";
+        // Commit by xid failed, return a commit
+        // error and let the client retry
+        state(lock, s_preparing);
+        state(lock, s_prepared);
+        client_state_.override_error(wsrep::e_error_during_commit, status);
+    }
+
+    client_service_.signal_replayed();
+    debug_log_state("xa_replay_commit leave");
+    return ret;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+//                                 Private                                    //
+////////////////////////////////////////////////////////////////////////////////
+
+inline wsrep::provider& wsrep::transaction::provider()
+{
+    return client_state_.server_state().provider();
+}
+
+void wsrep::transaction::state(
+    wsrep::unique_lock<wsrep::mutex>& lock __attribute__((unused)),
+    enum wsrep::transaction::state next_state)
+{
+    WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                    wsrep::log::debug_level_transaction,
+                    "client: " << client_state_.id().get()
+                    << " txc: " << id().get()
+                    << " state: " << to_string(state_)
+                    << " -> " << to_string(next_state));
+
+    assert(lock.owns_lock());
+    // BF aborter is allowed to change the state without gaining control
+    // to the state if the next state is s_must_abort, s_aborting or
+    // s_must_replay (for xa idle replay).
+    assert(client_state_.owning_thread_id_ == wsrep::this_thread::get_id() ||
+           next_state == s_must_abort ||
+           next_state == s_must_replay ||
+           next_state == s_aborting);
+
+    static const char allowed[n_states][n_states] =
+        { /*  ex pg pd ce co oc ct cf ma ab ad mr re */
+            { 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0}, /* ex */
+            { 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0}, /* pg */
+            { 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0}, /* pd */
+            { 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0}, /* ce */
+            { 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0}, /* co */
+            { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, /* oc */
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* ct */
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}, /* cf */
+            { 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0}, /* ma */
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0}, /* ab */
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, /* ad */
+            { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}, /* mr */
+            { 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0}  /* re */
+        };
+
+    if (!allowed[state_][next_state])
+    {
+        wsrep::log_debug() << "unallowed state transition for transaction "
+                           << id_ << ": " << wsrep::to_string(state_)
+                           << " -> " << wsrep::to_string(next_state);
+        assert(0);
+    }
+
+    state_hist_.push_back(state_);
+    if (state_hist_.size() == 12)
+    {
+        state_hist_.erase(state_hist_.begin());
+    }
+    state_ = next_state;
+
+    if (state_ == s_must_replay)
+    {
+        client_service_.will_replay();
+    }
+}
+
+bool wsrep::transaction::abort_or_interrupt(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+    if (state() == s_must_abort)
+    {
+        client_state_.override_error(wsrep::e_deadlock_error);
+        return true;
+    }
+    else if (state() == s_aborting || state() == s_aborted)
+    {
+        // Somehow we got there after BF abort without setting error
+        // status. This may happen if the DBMS side aborts the transaction
+        // but still tries to continue processing rows. Raise the error here
+        // and assert so that the error will be caught in debug builds.
+        if (bf_abort_client_state_ &&
+            client_state_.current_error() == wsrep::e_success)
+        {
+            client_state_.override_error(wsrep::e_deadlock_error);
+            assert(0);
+        }
+        return true;
+    }
+    else if (client_service_.interrupted(lock))
+    {
+        client_state_.override_error(wsrep::e_interrupted_error);
+        if (state() != s_must_abort)
+        {
+            state(lock, s_must_abort);
+        }
+        return true;
+    }
+    return false;
+}
+
+int wsrep::transaction::streaming_step(wsrep::unique_lock<wsrep::mutex>& lock,
+                                       bool force)
+{
+    assert(lock.owns_lock());
+    assert(streaming_context_.fragment_size() || is_xa());
+
+    if (client_service_.bytes_generated() <
+        streaming_context_.log_position())
+    {
+        /* Something went wrong on DBMS side in keeping track of
+           generated bytes. Return an error to abort the transaction. */
+        wsrep::log_warning() << "Bytes generated "
+                             << client_service_.bytes_generated()
+                             << " less than bytes certified "
+                             << streaming_context_.log_position()
+                             << ", aborting streaming transaction";
+        return 1;
+    }
+    int ret(0);
+
+    const size_t bytes_to_replicate(client_service_.bytes_generated() -
+                                    streaming_context_.log_position());
+
+    switch (streaming_context_.fragment_unit())
+    {
+    case streaming_context::row:
+        WSREP_FALLTHROUGH;
+    case streaming_context::statement:
+        streaming_context_.increment_unit_counter(1);
+        break;
+    case streaming_context::bytes:
+        streaming_context_.set_unit_counter(bytes_to_replicate);
+        break;
+    }
+
+    // Some statements have no effect. Do not atttempt to
+    // replicate a fragment if no data has been generated since
+    // last fragment replication.
+    // We use `force = true` on XA prepare: a fragment will be
+    // generated even if no data is pending replication.
+    if (bytes_to_replicate <= 0 && !force)
+    {
+        assert(bytes_to_replicate == 0);
+        return ret;
+    }
+
+    if (streaming_context_.fragment_size_exceeded() || force)
+    {
+        streaming_context_.reset_unit_counter();
+        ret = certify_fragment(lock);
+    }
+
+    return ret;
+}
+
+int wsrep::transaction::certify_fragment(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+
+    assert(client_state_.mode() == wsrep::client_state::m_local);
+    assert(streaming_context_.rolled_back() == false ||
+           state() == s_must_abort);
+
+    client_service_.wait_for_replayers(lock);
+    if (abort_or_interrupt(lock))
+    {
+        return 1;
+    }
+
+    state(lock, s_certifying);
+    lock.unlock();
+    client_service_.debug_sync("wsrep_before_fragment_certification");
+
+    enum wsrep::provider::status status(
+        client_state_.server_state_.send_pending_rollback_events());
+    if (status)
+    {
+        wsrep::log_warning()
+            << "Failed to replicate pending rollback events: "
+            << status << " ("
+            << wsrep::provider::to_string(status) << ")";
+        lock.lock();
+        state(lock, s_must_abort);
+        return 1;
+    }
+
+    wsrep::mutable_buffer data;
+    size_t log_position(0);
+    if (client_service_.prepare_fragment_for_replication(data, log_position))
+    {
+        lock.lock();
+        state(lock, s_must_abort);
+        client_state_.override_error(wsrep::e_error_during_commit);
+        return 1;
+    }
+    streaming_context_.set_log_position(log_position);
+
+    if (data.size() == 0)
+    {
+        wsrep::log_warning() << "Attempt to replicate empty data buffer";
+        lock.lock();
+        state(lock, s_executing);
+        return 0;
+    }
+
+    if (provider().append_data(ws_handle_,
+                               wsrep::const_buffer(data.data(), data.size())))
+    {
+        lock.lock();
+        state(lock, s_must_abort);
+        client_state_.override_error(wsrep::e_error_during_commit);
+        return 1;
+    }
+
+    if (is_xa())
+    {
+        // One more check to see if the transaction
+        // has been aborted. This is necessary because
+        // until the append_data above will make sure
+        // that the transaction exists in provider.
+        lock.lock();
+        if (abort_or_interrupt(lock))
+        {
+            return 1;
+        }
+        lock.unlock();
+    }
+
+    if (is_streaming() == false)
+    {
+        client_state_.server_state_.start_streaming_client(&client_state_);
+    }
+
+    if (implicit_deps())
+    {
+        flags(flags() | wsrep::provider::flag::implicit_deps);
+    }
+
+    int ret(0);
+    enum wsrep::client_error error(wsrep::e_success);
+    enum wsrep::provider::status cert_ret(wsrep::provider::success);
+    // Storage service scope
+    {
+        scoped_storage_service<storage_service_deleter>
+            sr_scope(
+                client_service_,
+                server_service_.storage_service(client_service_),
+                storage_service_deleter(server_service_));
+        wsrep::storage_service& storage_service(
+            sr_scope.storage_service());
+
+        // First the fragment is appended to the stable storage.
+        // This is done to ensure that there is enough capacity
+        // available to store the fragment. The fragment meta data
+        // is updated after certification.
+        wsrep::id server_id(client_state_.server_state().id());
+
+        if (server_id.is_undefined()) {
+            // Server disconnected from cluster, do not
+            // append a fragment with undefined server_id.
+            ret = 1;
+            error = wsrep::e_append_fragment_error;
+        }
+
+        if (ret == 0)
+        {
+            ret = storage_service.start_transaction(ws_handle_);
+            if (ret)
+            {
+                error = wsrep::e_append_fragment_error;
+            }
+        }
+
+        if (ret == 0)
+        {
+            ret = storage_service.append_fragment(
+                server_id, id(), flags(),
+                wsrep::const_buffer(data.data(), data.size()), xid());
+            if (ret)
+            {
+                error = wsrep::e_append_fragment_error;
+                storage_service.rollback(wsrep::ws_handle(), wsrep::ws_meta());
+            }
+        }
+
+        if (ret == 0)
+        {
+            client_service_.debug_crash(
+                "crash_replicate_fragment_before_certify");
+
+            wsrep::ws_meta sr_ws_meta;
+            cert_ret = provider().certify(client_state_.id(),
+                                          ws_handle_,
+                                          flags(),
+                                          sr_ws_meta);
+            client_service_.debug_crash(
+                "crash_replicate_fragment_after_certify");
+
+            switch (cert_ret)
+            {
+            case wsrep::provider::success:
+                ++fragments_certified_for_statement_;
+                assert(sr_ws_meta.seqno().is_undefined() == false);
+                streaming_context_.certified();
+                if (storage_service.update_fragment_meta(sr_ws_meta))
+                {
+                    storage_service.rollback(wsrep::ws_handle(),
+                                             wsrep::ws_meta());
+                    ret = 1;
+                    error = wsrep::e_deadlock_error;
+                    break;
+                }
+                if (storage_service.commit(ws_handle_, sr_ws_meta))
+                {
+                    ret = 1;
+                    error = wsrep::e_deadlock_error;
+                }
+                else
+                {
+                    streaming_context_.stored(sr_ws_meta.seqno());
+                }
+                client_service_.debug_crash(
+                    "crash_replicate_fragment_success");
+                break;
+            case wsrep::provider::error_bf_abort:
+            case wsrep::provider::error_certification_failed:
+                // Streaming transcation got BF aborted, so it must roll
+                // back. Roll back the fragment storage operation out of
+                // order as the commit order will be grabbed later on
+                // during rollback process. Mark the fragment as certified
+                // though in streaming context in order to enter streaming
+                // rollback codepath.
+                //
+                // Note that despite we handle error_certification_failed
+                // here, we mark the transaction as streaming. Apparently
+                // the provider may return status corresponding to certification
+                // failure even if the fragment has passed certification.
+                // This may be a bug in provider implementation or a limitation
+                // of error codes defined in wsrep-API. In order to make
+                // sure that the transaction will be cleaned on other servers,
+                // we take a risk of sending one rollback fragment for nothing.
+                storage_service.rollback(wsrep::ws_handle(),
+                                         wsrep::ws_meta());
+                streaming_context_.certified();
+                ret = 1;
+                error = wsrep::e_deadlock_error;
+                break;
+            default:
+                // Storage service rollback must be done out of order,
+                // otherwise there may be a deadlock between BF aborter
+                // and the rollback process.
+                storage_service.rollback(wsrep::ws_handle(), wsrep::ws_meta());
+                ret = 1;
+                error = (cert_ret == wsrep::provider::error_size_exceeded ?
+                         wsrep::e_size_exceeded_error :
+                         wsrep::e_deadlock_error);
+                break;
+            }
+        }
+    }
+
+    // Note: This does not release the handle in the provider
+    // since streaming is still on. However it is needed to
+    // make provider internal state to transition for the
+    // next fragment. If any of the operations above failed,
+    // the handle needs to be left unreleased for the following
+    // rollback process.
+    if (ret == 0)
+    {
+        assert(error == wsrep::e_success);
+        ret = provider().release(ws_handle_);
+        if (ret)
+        {
+            error = wsrep::e_deadlock_error;
+        }
+    }
+    lock.lock();
+    if (ret)
+    {
+        assert(error != wsrep::e_success);
+        if (is_streaming() == false)
+        {
+            lock.unlock();
+            client_state_.server_state_.stop_streaming_client(&client_state_);
+            lock.lock();
+        }
+        else
+        {
+            streaming_rollback(lock);
+        }
+        if (state_ != s_must_abort)
+        {
+            state(lock, s_must_abort);
+        }
+        client_state_.override_error(error, cert_ret);
+    }
+    else if (state_ == s_must_abort)
+    {
+        if (is_streaming())
+        {
+            streaming_rollback(lock);
+        }
+        client_state_.override_error(wsrep::e_deadlock_error, cert_ret);
+        ret = 1;
+    }
+    else
+    {
+        assert(state_ == s_certifying);
+        state(lock, s_executing);
+        flags(flags() & ~wsrep::provider::flag::start_transaction);
+        flags(flags() & ~wsrep::provider::flag::pa_unsafe);
+    }
+    return ret;
+}
+
+int wsrep::transaction::certify_commit(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    assert(lock.owns_lock());
+    assert(active());
+    client_service_.wait_for_replayers(lock);
+
+    assert(lock.owns_lock());
+
+    if (abort_or_interrupt(lock))
+    {
+        if (is_xa() && state() == s_must_abort)
+        {
+            state(lock, s_must_replay);
+        }
+        return 1;
+    }
+
+    state(lock, s_certifying);
+    lock.unlock();
+
+    enum wsrep::provider::status status(
+        client_state_.server_state_.send_pending_rollback_events());
+    if (status)
+    {
+        wsrep::log_warning()
+            << "Failed to replicate pending rollback events: "
+            << status << " ("
+            << wsrep::provider::to_string(status) << ")";
+
+        // We failed to replicate some pending rollback fragment.
+        // Meaning that some transaction that was rolled back
+        // locally might still be active out there in the cluster.
+        // To avoid a potential BF-BF conflict, we need to abort
+        // and give up on this one.
+        // Notice that we can't abort a prepared XA that wants to
+        // commit. Fortunately, there is no need to in this case:
+        // the commit fragment for XA does not cause any changes and
+        // can't possibly conflict with other transactions out there.
+        if (!is_xa())
+        {
+            lock.lock();
+            state(lock, s_must_abort);
+            return 1;
+        }
+    }
+
+    if (is_streaming())
+    {
+        if (!is_xa())
+        {
+            append_sr_keys_for_commit();
+        }
+        pa_unsafe(true);
+    }
+
+    if (implicit_deps())
+    {
+        flags(flags() | wsrep::provider::flag::implicit_deps);
+    }
+
+    flags(flags() | wsrep::provider::flag::commit);
+    flags(flags() & ~wsrep::provider::flag::prepare);
+
+    if (client_service_.prepare_data_for_replication())
+    {
+        lock.lock();
+        // Here we fake that the size exceeded error came from provider,
+        // even though it came from the client service. This requires
+        // some consideration how to get meaningful error codes from
+        // the client service.
+        client_state_.override_error(wsrep::e_size_exceeded_error,
+                                     wsrep::provider::error_size_exceeded);
+        if (state_ != s_must_abort)
+        {
+            state(lock, s_must_abort);
+        }
+        return 1;
+    }
+
+    client_service_.debug_sync("wsrep_before_certification");
+    enum wsrep::provider::status
+        cert_ret(provider().certify(client_state_.id(),
+                                   ws_handle_,
+                                   flags(),
+                                   ws_meta_));
+    client_service_.debug_sync("wsrep_after_certification");
+
+    lock.lock();
+
+    assert(state() == s_certifying || state() == s_must_abort);
+
+    int ret(1);
+    switch (cert_ret)
+    {
+    case wsrep::provider::success:
+        assert(ordered());
+        certified_ = true;
+        ++fragments_certified_for_statement_;
+        switch (state())
+        {
+        case s_certifying:
+            if (is_xa())
+            {
+                state(lock, s_committing);
+            }
+            else
+            {
+                state(lock, s_preparing);
+            }
+            ret = 0;
+            break;
+        case s_must_abort:
+            // We got BF aborted after succesful certification
+            // and before acquiring client state lock. The trasaction
+            // must be replayed.
+            state(lock, s_must_replay);
+            break;
+        default:
+            assert(0);
+            break;
+        }
+        break;
+    case wsrep::provider::error_warning:
+        assert(ordered() == false);
+        state(lock, s_must_abort);
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        break;
+    case wsrep::provider::error_transaction_missing:
+        state(lock, s_must_abort);
+        // The execution should never reach this point if the
+        // transaction has not generated any keys or data.
+        wsrep::log_warning() << "Transaction was missing in provider";
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        break;
+    case wsrep::provider::error_bf_abort:
+        // Transaction was replicated successfully and it was either
+        // certified successfully or the result of certifying is not
+        // yet known. Therefore the transaction must roll back
+        // and go through replay either to replay and commit the whole
+        // transaction or to determine failed certification status.
+        if (state() != s_must_abort)
+        {
+            state(lock, s_must_abort);
+        }
+        state(lock, s_must_replay);
+        break;
+    case wsrep::provider::error_certification_failed:
+        state(lock, s_cert_failed);
+        client_state_.override_error(wsrep::e_deadlock_error);
+        break;
+    case wsrep::provider::error_size_exceeded:
+        state(lock, s_must_abort);
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        break;
+    case wsrep::provider::error_connection_failed:
+        // Galera provider may return CONN_FAIL if the trx is
+        // BF aborted O_o. If we see here that the trx was BF aborted,
+        // return deadlock error instead of error during commit
+        // to reduce number of error state combinations elsewhere.
+        if (state() == s_must_abort)
+        {
+            if (is_xa())
+            {
+                state(lock, s_must_replay);
+            }
+            client_state_.override_error(wsrep::e_deadlock_error);
+        }
+        else
+        {
+            client_state_.override_error(wsrep::e_error_during_commit,
+                                         cert_ret);
+            if (is_xa())
+            {
+                state(lock, s_prepared);
+            }
+            else
+            {
+                state(lock, s_must_abort);
+            }
+        }
+        break;
+    case wsrep::provider::error_provider_failed:
+        if (state() != s_must_abort)
+        {
+            state(lock, s_must_abort);
+        }
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        break;
+    case wsrep::provider::error_fatal:
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        state(lock, s_must_abort);
+        client_service_.emergency_shutdown();
+        break;
+    case wsrep::provider::error_not_implemented:
+    case wsrep::provider::error_not_allowed:
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        state(lock, s_must_abort);
+        wsrep::log_warning() << "Certification operation was not allowed: "
+                             << "id: " << id().get()
+                             << " flags: " << std::hex << flags() << std::dec;
+        break;
+    default:
+        state(lock, s_must_abort);
+        client_state_.override_error(wsrep::e_error_during_commit, cert_ret);
+        break;
+    }
+
+    return ret;
+}
+
+int wsrep::transaction::append_sr_keys_for_commit()
+{
+    int ret(0);
+    assert(client_state_.mode() == wsrep::client_state::m_local);
+    for (wsrep::sr_key_set::branch_type::const_iterator
+             i(sr_keys_.root().begin());
+         ret == 0 && i != sr_keys_.root().end(); ++i)
+    {
+        for (wsrep::sr_key_set::leaf_type::const_iterator
+                 j(i->second.begin());
+             ret == 0 && j != i->second.end(); ++j)
+        {
+            wsrep::key key(wsrep::key::shared);
+            key.append_key_part(i->first.data(), i->first.size());
+            key.append_key_part(j->data(), j->size());
+            ret = provider().append_key(ws_handle_, key);
+        }
+    }
+    return ret;
+}
+
+void wsrep::transaction::streaming_rollback(
+    wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    debug_log_state("streaming_rollback enter");
+    assert(state_ != s_must_replay);
+    assert(is_streaming());
+    assert(lock.owns_lock());
+
+    // Prevent streaming_rollback() to be executed simultaneously.
+    // Notice that lock is unlocked when calling into server_state
+    // methods, to avoid violating lock order.
+    // The condition variable below prevents a thread to go
+    // through streaming_rollback() while another thread is busy
+    // stopping or converting the streaming_client().
+    // This would be problematic if a thread is performing BF abort,
+    // while the original client manages to complete its rollback
+    // and therefore change the state of the transaction, causing
+    // assertions to fire.
+    while (streaming_rollback_in_progress_)
+        client_state_.cond_.wait(lock);
+    streaming_rollback_in_progress_ = true;
+
+    if (streaming_context_.rolled_back() == false)
+    {
+        // Note that streaming_context_ must not be cleaned up in this
+        // method. This is because the owning thread may still be executing
+        // fragment removal on commit, which will access fragment
+        // vector in streaming context. Clearing streaming context
+        // here may cause owning thread to access memory which was
+        // already freed. Cleanup for streaming_context_ will happen
+        // in after_rollback().
+
+        if (bf_aborted_in_total_order_)
+        {
+            lock.unlock();
+            server_service_.debug_sync("wsrep_streaming_rollback");
+            client_state_.server_state_.stop_streaming_client(&client_state_);
+            lock.lock();
+        }
+        else
+        {
+            // Create a high priority applier which will handle the
+            // rollback fragment or clean up on configuration change.
+            // Adopt transaction will copy fragment set and appropriate
+            // meta data.
+            lock.unlock();
+            server_service_.debug_sync("wsrep_streaming_rollback");
+            client_state_.server_state_.convert_streaming_client_to_applier(
+                &client_state_);
+            lock.lock();
+
+            enum wsrep::provider::status status(provider().rollback(id_));
+            if (status)
+            {
+                lock.unlock();
+                client_state_.server_state_.queue_rollback_event(id_);
+                lock.lock();
+                wsrep::log_debug()
+                    << "Failed to replicate rollback fragment for " << id_
+                    << ": " << status << " ( "
+                    << wsrep::provider::to_string(status) << ")";
+            }
+        }
+
+        // Mark the streaming context as rolled back,
+        // so that this block is executed once.
+        streaming_context_.rolled_back(id_);
+    }
+
+    debug_log_state("streaming_rollback leave");
+    streaming_rollback_in_progress_ = false;
+    client_state_.cond_.notify_all();
+}
+
+int wsrep::transaction::replay(wsrep::unique_lock<wsrep::mutex>& lock)
+{
+    int ret(0);
+    state(lock, s_replaying);
+    // Need to remember streaming state before replay, entering
+    // after_commit() after succesful replay will clear
+    // fragments.
+    const bool was_streaming(is_streaming());
+    lock.unlock();
+    client_service_.debug_sync("wsrep_before_replay");
+    enum wsrep::provider::status replay_ret(client_service_.replay());
+    client_service_.signal_replayed();
+    if (was_streaming)
+    {
+        client_state_.server_state_.stop_streaming_client(&client_state_);
+    }
+    lock.lock();
+    switch (replay_ret)
+    {
+    case wsrep::provider::success:
+        if (state() == s_replaying)
+        {
+            // Replay was done by using different client state, adjust state
+            // to committed.
+            state(lock, s_committed);
+        }
+        if (is_streaming())
+        {
+            streaming_context_.cleanup();
+        }
+        provider().release(ws_handle_);
+        break;
+    case wsrep::provider::error_certification_failed:
+        client_state_.override_error(
+            wsrep::e_deadlock_error);
+        if (is_streaming())
+        {
+            lock.unlock();
+            client_service_.remove_fragments();
+            lock.lock();
+            streaming_context_.cleanup();
+        }
+        state(lock, s_aborted);
+        ret = 1;
+        break;
+    default:
+        client_service_.emergency_shutdown();
+        break;
+    }
+
+    WSREP_LOG_DEBUG(
+        client_state_.debug_log_level(), wsrep::log::debug_level_transaction,
+        "replay returned: " << replay_ret << " ("
+                            << wsrep::provider::to_string(replay_ret) << ")");
+    return ret;
+}
+
+void wsrep::transaction::cleanup()
+{
+    debug_log_state("cleanup_enter");
+    assert(state() == s_committed || state() == s_aborted);
+    id_ = wsrep::transaction_id::undefined();
+    ws_handle_ = wsrep::ws_handle();
+    // Keep the state history for troubleshooting. Reset
+    // at start_transaction().
+    // state_hist_.clear();
+    if (ordered())
+    {
+        client_state_.update_last_written_gtid(ws_meta_.gtid());
+    }
+    bf_abort_state_ = s_executing;
+    bf_abort_provider_status_ = wsrep::provider::success;
+    bf_abort_client_state_ = 0;
+    bf_aborted_in_total_order_ = false;
+    ws_meta_ = wsrep::ws_meta();
+    flags_ = 0;
+    certified_ = false;
+    implicit_deps_ = false;
+    sr_keys_.clear();
+    streaming_context_.cleanup();
+    client_service_.cleanup_transaction();
+    apply_error_buf_.clear();
+    xid_.clear();
+    is_bf_immutable_ = false;
+    debug_log_state("cleanup_leave");
+}
+
+void wsrep::transaction::debug_log_state(
+    const char* context) const
+{
+    WSREP_LOG_DEBUG(
+        client_state_.debug_log_level(),
+        wsrep::log::debug_level_transaction,
+        context
+        << "\n    server: " << server_id_
+        << ", client: " << int64_t(client_state_.id().get())
+        << ", state: " << wsrep::to_c_string(client_state_.state())
+        << ", mode: " << wsrep::to_c_string(client_state_.mode())
+        << "\n    trx_id: " << int64_t(id_.get())
+        << ", seqno: " << ws_meta_.seqno().get()
+        << ", flags: " << flags()
+        << "\n"
+        << "    state: " << wsrep::to_c_string(state_)
+        << ", bfa_state: " << wsrep::to_c_string(bf_abort_state_)
+        << ", error: " << wsrep::to_c_string(client_state_.current_error())
+        << ", status: " << client_state_.current_error_status()
+        << "\n"
+        << "    is_sr: " << is_streaming()
+        << ", frags: " << streaming_context_.fragments_certified()
+        << ", frags size: " << streaming_context_.fragments().size()
+        << ", unit: " << streaming_context_.fragment_unit()
+        << ", size: " << streaming_context_.fragment_size()
+        << ", counter: " << streaming_context_.unit_counter()
+        << ", log_pos: " << streaming_context_.log_position()
+        << ", sr_rb: " << streaming_context_.rolled_back()
+        << "\n    own: " << (client_state_.owning_thread_id_ == wsrep::this_thread::get_id())
+        << " thread_id: " << client_state_.owning_thread_id_
+        << "");
+}
+
+void wsrep::transaction::debug_log_key_append(const wsrep::key& key) const
+{
+    WSREP_LOG_DEBUG(client_state_.debug_log_level(),
+                    wsrep::log::debug_level_transaction,
+                    "key_append: "
+                    << "trx_id: "
+                    << int64_t(id().get())
+                    << " append key:\n" << key);
+}
+
+std::ostream& wsrep::operator<<(std::ostream& os,
+                                enum wsrep::transaction::state state)
+{
+    return (os << to_c_string(state));
+}
-- 
cgit v1.2.3