1 files changed, 2870 insertions, 0 deletions
diff --git a/src/msg/async/ProtocolV2.cc b/src/msg/async/ProtocolV2.cc
new file mode 100644
index 00000000..381d42c3
--- /dev/null
+++ b/src/msg/async/ProtocolV2.cc
@@ -0,0 +1,2870 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <type_traits>
+
+#include "ProtocolV2.h"
+#include "AsyncMessenger.h"
+
+#include "common/EventTrace.h"
+#include "common/ceph_crypto.h"
+#include "common/errno.h"
+#include "include/random.h"
+#include "auth/AuthClient.h"
+#include "auth/AuthServer.h"
+
+#define dout_subsys ceph_subsys_ms
+#undef dout_prefix
+#define dout_prefix _conn_prefix(_dout)
+ostream &ProtocolV2::_conn_prefix(std::ostream *_dout) {
+  return *_dout << "--2- " << messenger->get_myaddrs() << " >> "
+                << *connection->peer_addrs << " conn(" << connection << " "
+                << this
+		<< " " << ceph_con_mode_name(auth_meta->con_mode)
+		<< " :" << connection->port
+                << " s=" << get_state_name(state) << " pgs=" << peer_global_seq
+                << " cs=" << connect_seq << " l=" << connection->policy.lossy
+                << " rev1=" << HAVE_MSGR2_FEATURE(peer_supported_features,
+                                                  REVISION_1)
+                << " rx=" << session_stream_handlers.rx.get()
+                << " tx=" << session_stream_handlers.tx.get()
+                << ").";
+}
+
+using namespace ceph::msgr::v2;
+
+using CtPtr = Ct<ProtocolV2> *;
+using CtRef = Ct<ProtocolV2> &;
+
+void ProtocolV2::run_continuation(CtPtr pcontinuation) {
+  if (pcontinuation) {
+    run_continuation(*pcontinuation);
+  }
+}
+
+void ProtocolV2::run_continuation(CtRef continuation) {
+  try {
+    CONTINUATION_RUN(continuation)
+  } catch (const buffer::error &e) {
+    lderr(cct) << __func__ << " failed decoding of frame header: " << e
+               << dendl;
+    _fault();
+  } catch (const ceph::crypto::onwire::MsgAuthError &e) {
+    lderr(cct) << __func__ << " " << e.what() << dendl;
+    _fault();
+  } catch (const DecryptionError &) {
+    lderr(cct) << __func__ << " failed to decrypt frame payload" << dendl;
+  }
+}
+
+#define WRITE(B, D, C) write(D, CONTINUATION(C), B)
+
+#define READ(L, C) read(CONTINUATION(C), buffer::ptr_node::create(buffer::create(L)))
+
+#define READ_RXBUF(B, C) read(CONTINUATION(C), B)
+
+#ifdef UNIT_TESTS_BUILT
+
+#define INTERCEPT(S) { \
+if(connection->interceptor) { \
+  auto a = connection->interceptor->intercept(connection, (S)); \
+  if (a == Interceptor::ACTION::FAIL) { \
+    return _fault(); \
+  } else if (a == Interceptor::ACTION::STOP) { \
+    stop(); \
+    connection->dispatch_queue->queue_reset(connection); \
+    return nullptr; \
+  }}}
+  
+#else
+#define INTERCEPT(S)
+#endif
+
+ProtocolV2::ProtocolV2(AsyncConnection *connection)
+    : Protocol(2, connection),
+      state(NONE),
+      peer_supported_features(0),
+      client_cookie(0),
+      server_cookie(0),
+      global_seq(0),
+      connect_seq(0),
+      peer_global_seq(0),
+      message_seq(0),
+      reconnecting(false),
+      replacing(false),
+      can_write(false),
+      bannerExchangeCallback(nullptr),
+      tx_frame_asm(&session_stream_handlers, false),
+      rx_frame_asm(&session_stream_handlers, false),
+      next_tag(static_cast<Tag>(0)),
+      keepalive(false) {
+}
+
+ProtocolV2::~ProtocolV2() {
+}
+
+void ProtocolV2::connect() {
+  ldout(cct, 1) << __func__ << dendl;
+  state = START_CONNECT;
+  pre_auth.enabled = true;
+}
+
+void ProtocolV2::accept() {
+  ldout(cct, 1) << __func__ << dendl;
+  state = START_ACCEPT;
+}
+
+bool ProtocolV2::is_connected() { return can_write; }
+
+/*
+ * Tears down the message queues, and removes them from the
+ * DispatchQueue Must hold write_lock prior to calling.
+ */
+void ProtocolV2::discard_out_queue() {
+  ldout(cct, 10) << __func__ << " started" << dendl;
+
+  for (list<Message *>::iterator p = sent.begin(); p != sent.end(); ++p) {
+    ldout(cct, 20) << __func__ << " discard " << *p << dendl;
+    (*p)->put();
+  }
+  sent.clear();
+  for (auto& [ prio, entries ] : out_queue) {
+    static_cast<void>(prio);
+    for (auto& entry : entries) {
+      ldout(cct, 20) << __func__ << " discard " << *entry.m << dendl;
+      entry.m->put();
+    }
+  }
+  out_queue.clear();
+  write_in_progress = false;
+}
+
+void ProtocolV2::reset_session() {
+  ldout(cct, 1) << __func__ << dendl;
+
+  std::lock_guard<std::mutex> l(connection->write_lock);
+  if (connection->delay_state) {
+    connection->delay_state->discard();
+  }
+
+  connection->dispatch_queue->discard_queue(connection->conn_id);
+  discard_out_queue();
+  connection->outgoing_bl.clear();
+
+  connection->dispatch_queue->queue_remote_reset(connection);
+
+  out_seq = 0;
+  in_seq = 0;
+  client_cookie = 0;
+  server_cookie = 0;
+  connect_seq = 0;
+  peer_global_seq = 0;
+  message_seq = 0;
+  ack_left = 0;
+  can_write = false;
+}
+
+void ProtocolV2::stop() {
+  ldout(cct, 1) << __func__ << dendl;
+  if (state == CLOSED) {
+    return;
+  }
+
+  if (connection->delay_state) connection->delay_state->flush();
+
+  std::lock_guard<std::mutex> l(connection->write_lock);
+
+  reset_recv_state();
+  discard_out_queue();
+
+  connection->_stop();
+
+  can_write = false;
+  state = CLOSED;
+}
+
+void ProtocolV2::fault() { _fault(); }
+
+void ProtocolV2::requeue_sent() {
+  write_in_progress = false;
+  if (sent.empty()) {
+    return;
+  }
+
+  auto& rq = out_queue[CEPH_MSG_PRIO_HIGHEST];
+  out_seq -= sent.size();
+  while (!sent.empty()) {
+    Message *m = sent.back();
+    sent.pop_back();
+    ldout(cct, 5) << __func__ << " requeueing message m=" << m
+                  << " seq=" << m->get_seq() << " type=" << m->get_type() << " "
+                  << *m << dendl;
+    rq.emplace_front(out_queue_entry_t{false, m});
+  }
+}
+
+uint64_t ProtocolV2::discard_requeued_up_to(uint64_t out_seq, uint64_t seq) {
+  ldout(cct, 10) << __func__ << " " << seq << dendl;
+  std::lock_guard<std::mutex> l(connection->write_lock);
+  if (out_queue.count(CEPH_MSG_PRIO_HIGHEST) == 0) {
+    return seq;
+  }
+  auto& rq = out_queue[CEPH_MSG_PRIO_HIGHEST];
+  uint64_t count = out_seq;
+  while (!rq.empty()) {
+    Message* const m = rq.front().m;
+    if (m->get_seq() == 0 || m->get_seq() > seq) break;
+    ldout(cct, 5) << __func__ << " discarding message m=" << m
+                  << " seq=" << m->get_seq() << " ack_seq=" << seq << " "
+                  << *m << dendl;
+    m->put();
+    rq.pop_front();
+    count++;
+  }
+  if (rq.empty()) out_queue.erase(CEPH_MSG_PRIO_HIGHEST);
+  return count;
+}
+
+void ProtocolV2::reset_security() {
+  ldout(cct, 5) << __func__ << dendl;
+
+  auth_meta.reset(new AuthConnectionMeta);
+  session_stream_handlers.rx.reset(nullptr);
+  session_stream_handlers.tx.reset(nullptr);
+  pre_auth.rxbuf.clear();
+  pre_auth.txbuf.clear();
+}
+
+// it's expected the `write_lock` is held while calling this method.
+void ProtocolV2::reset_recv_state() {
+  ldout(cct, 5) << __func__ << dendl;
+
+  if (!connection->center->in_thread()) {
+    // execute in the same thread that uses the rx/tx handlers. We need
+    // to do the warp because holding `write_lock` is not enough as
+    // `write_event()` unlocks it just before calling `write_message()`.
+    // `submit_to()` here is NOT blocking.
+    connection->center->submit_to(connection->center->get_id(), [this] {
+      ldout(cct, 5) << "reset_recv_state (warped) reseting crypto handlers"
+                    << dendl;
+      // Possibly unnecessary. See the comment in `deactivate_existing`.
+      std::lock_guard<std::mutex> l(connection->lock);
+      std::lock_guard<std::mutex> wl(connection->write_lock);
+      reset_security();
+    }, /* nowait = */true);
+  } else {
+    reset_security();
+  }
+
+  // clean read and write callbacks
+  connection->pendingReadLen.reset();
+  connection->writeCallback.reset();
+
+  next_tag = static_cast<Tag>(0);
+
+  reset_throttle();
+}
+
+size_t ProtocolV2::get_current_msg_size() const {
+  ceph_assert(rx_frame_asm.get_num_segments() > 0);
+  size_t sum = 0;
+  // we don't include SegmentIndex::Msg::HEADER.
+  for (size_t i = 1; i < rx_frame_asm.get_num_segments(); i++) {
+    sum += rx_frame_asm.get_segment_logical_len(i);
+  }
+  return sum;
+}
+
+void ProtocolV2::reset_throttle() {
+  if (state > THROTTLE_MESSAGE && state <= THROTTLE_DONE &&
+      connection->policy.throttler_messages) {
+    ldout(cct, 10) << __func__ << " releasing " << 1
+                   << " message to policy throttler "
+                   << connection->policy.throttler_messages->get_current()
+                   << "/" << connection->policy.throttler_messages->get_max()
+                   << dendl;
+    connection->policy.throttler_messages->put();
+  }
+  if (state > THROTTLE_BYTES && state <= THROTTLE_DONE) {
+    if (connection->policy.throttler_bytes) {
+      const size_t cur_msg_size = get_current_msg_size();
+      ldout(cct, 10) << __func__ << " releasing " << cur_msg_size
+                     << " bytes to policy throttler "
+                     << connection->policy.throttler_bytes->get_current() << "/"
+                     << connection->policy.throttler_bytes->get_max() << dendl;
+      connection->policy.throttler_bytes->put(cur_msg_size);
+    }
+  }
+  if (state > THROTTLE_DISPATCH_QUEUE && state <= THROTTLE_DONE) {
+    const size_t cur_msg_size = get_current_msg_size();
+    ldout(cct, 10)
+        << __func__ << " releasing " << cur_msg_size
+        << " bytes to dispatch_queue throttler "
+        << connection->dispatch_queue->dispatch_throttler.get_current() << "/"
+        << connection->dispatch_queue->dispatch_throttler.get_max() << dendl;
+    connection->dispatch_queue->dispatch_throttle_release(cur_msg_size);
+  }
+}
+
+CtPtr ProtocolV2::_fault() {
+  ldout(cct, 10) << __func__ << dendl;
+
+  if (state == CLOSED || state == NONE) {
+    ldout(cct, 10) << __func__ << " connection is already closed" << dendl;
+    return nullptr;
+  }
+
+  if (connection->policy.lossy &&
+      !(state >= START_CONNECT && state <= SESSION_RECONNECTING)) {
+    ldout(cct, 2) << __func__ << " on lossy channel, failing" << dendl;
+    stop();
+    connection->dispatch_queue->queue_reset(connection);
+    return nullptr;
+  }
+
+  connection->write_lock.lock();
+
+  can_write = false;
+  // requeue sent items
+  requeue_sent();
+
+  if (out_queue.empty() && state >= START_ACCEPT &&
+      state <= SESSION_ACCEPTING && !replacing) {
+    ldout(cct, 2) << __func__ << " with nothing to send and in the half "
+                   << " accept state just closed" << dendl;
+    connection->write_lock.unlock();
+    stop();
+    connection->dispatch_queue->queue_reset(connection);
+    return nullptr;
+  }
+
+  replacing = false;
+  connection->fault();
+  reset_recv_state();
+
+  reconnecting = false;
+
+  if (connection->policy.standby && out_queue.empty() && !keepalive &&
+      state != WAIT) {
+    ldout(cct, 1) << __func__ << " with nothing to send, going to standby"
+                  << dendl;
+    state = STANDBY;
+    connection->write_lock.unlock();
+    return nullptr;
+  }
+  if (connection->policy.server) {
+    ldout(cct, 1) << __func__ << " server, going to standby, even though i have stuff queued" << dendl;
+    state = STANDBY;
+    connection->write_lock.unlock();
+    return nullptr;
+  }
+
+  connection->write_lock.unlock();
+
+  if (!(state >= START_CONNECT && state <= SESSION_RECONNECTING) &&
+      state != WAIT &&
+      state != SESSION_ACCEPTING /* due to connection race */) {
+    // policy maybe empty when state is in accept
+    if (connection->policy.server) {
+      ldout(cct, 1) << __func__ << " server, going to standby" << dendl;
+      state = STANDBY;
+    } else {
+      ldout(cct, 1) << __func__ << " initiating reconnect" << dendl;
+      connect_seq++;
+      global_seq = messenger->get_global_seq();
+      state = START_CONNECT;
+      pre_auth.enabled = true;
+      connection->state = AsyncConnection::STATE_CONNECTING;
+    }
+    backoff = utime_t();
+    connection->center->dispatch_event_external(connection->read_handler);
+  } else {
+    if (state == WAIT) {
+      backoff.set_from_double(cct->_conf->ms_max_backoff);
+    } else if (backoff == utime_t()) {
+      backoff.set_from_double(cct->_conf->ms_initial_backoff);
+    } else {
+      backoff += backoff;
+      if (backoff > cct->_conf->ms_max_backoff)
+        backoff.set_from_double(cct->_conf->ms_max_backoff);
+    }
+
+    if (server_cookie) {
+      connect_seq++;
+    }
+
+    global_seq = messenger->get_global_seq();
+    state = START_CONNECT;
+    pre_auth.enabled = true;
+    connection->state = AsyncConnection::STATE_CONNECTING;
+    ldout(cct, 1) << __func__ << " waiting " << backoff << dendl;
+    // woke up again;
+    connection->register_time_events.insert(
+        connection->center->create_time_event(backoff.to_nsec() / 1000,
+                                              connection->wakeup_handler));
+  }
+  return nullptr;
+}
+
+void ProtocolV2::prepare_send_message(uint64_t features,
+				      Message *m) {
+  ldout(cct, 20) << __func__ << " m=" << *m << dendl;
+
+  // associate message with Connection (for benefit of encode_payload)
+  if (m->empty_payload()) {
+    ldout(cct, 20) << __func__ << " encoding features " << features << " " << m
+                   << " " << *m << dendl;
+  } else {
+    ldout(cct, 20) << __func__ << " half-reencoding features " << features
+                   << " " << m << " " << *m << dendl;
+  }
+
+  // encode and copy out of *m
+  m->encode(features, 0);
+}
+
+void ProtocolV2::send_message(Message *m) {
+  uint64_t f = connection->get_features();
+
+  // TODO: Currently not all messages supports reencode like MOSDMap, so here
+  // only let fast dispatch support messages prepare message
+  const bool can_fast_prepare = messenger->ms_can_fast_dispatch(m);
+  if (can_fast_prepare) {
+    prepare_send_message(f, m);
+  }
+
+  std::lock_guard<std::mutex> l(connection->write_lock);
+  bool is_prepared = can_fast_prepare;
+  // "features" changes will change the payload encoding
+  if (can_fast_prepare && (!can_write || connection->get_features() != f)) {
+    // ensure the correctness of message encoding
+    m->clear_payload();
+    is_prepared = false;
+    ldout(cct, 10) << __func__ << " clear encoded buffer previous " << f
+                   << " != " << connection->get_features() << dendl;
+  }
+  if (state == CLOSED) {
+    ldout(cct, 10) << __func__ << " connection closed."
+                   << " Drop message " << m << dendl;
+    m->put();
+  } else {
+    ldout(cct, 5) << __func__ << " enqueueing message m=" << m
+                  << " type=" << m->get_type() << " " << *m << dendl;
+    m->trace.event("async enqueueing message");
+    out_queue[m->get_priority()].emplace_back(
+      out_queue_entry_t{is_prepared, m});
+    ldout(cct, 15) << __func__ << " inline write is denied, reschedule m=" << m
+                   << dendl;
+    if (((!replacing && can_write) || state == STANDBY) && !write_in_progress) {
+      write_in_progress = true;
+      connection->center->dispatch_event_external(connection->write_handler);
+    }
+  }
+}
+
+void ProtocolV2::send_keepalive() {
+  ldout(cct, 10) << __func__ << dendl;
+  std::lock_guard<std::mutex> l(connection->write_lock);
+  if (state != CLOSED) {
+    keepalive = true;
+    connection->center->dispatch_event_external(connection->write_handler);
+  }
+}
+
+void ProtocolV2::read_event() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  switch (state) {
+    case START_CONNECT:
+      run_continuation(CONTINUATION(start_client_banner_exchange));
+      break;
+    case START_ACCEPT:
+      run_continuation(CONTINUATION(start_server_banner_exchange));
+      break;
+    case READY:
+      run_continuation(CONTINUATION(read_frame));
+      break;
+    case THROTTLE_MESSAGE:
+      run_continuation(CONTINUATION(throttle_message));
+      break;
+    case THROTTLE_BYTES:
+      run_continuation(CONTINUATION(throttle_bytes));
+      break;
+    case THROTTLE_DISPATCH_QUEUE:
+      run_continuation(CONTINUATION(throttle_dispatch_queue));
+      break;
+    default:
+      break;
+  }
+}
+
+ProtocolV2::out_queue_entry_t ProtocolV2::_get_next_outgoing() {
+  out_queue_entry_t out_entry;
+
+  if (!out_queue.empty()) {
+    auto it = out_queue.rbegin();
+    auto& entries = it->second;
+    ceph_assert(!entries.empty());
+    out_entry = entries.front();
+    entries.pop_front();
+    if (entries.empty()) {
+      out_queue.erase(it->first);
+    }
+  }
+  return out_entry;
+}
+
+ssize_t ProtocolV2::write_message(Message *m, bool more) {
+  FUNCTRACE(cct);
+  ceph_assert(connection->center->in_thread());
+  m->set_seq(++out_seq);
+
+  connection->lock.lock();
+  uint64_t ack_seq = in_seq;
+  ack_left = 0;
+  connection->lock.unlock();
+
+  ceph_msg_header &header = m->get_header();
+  ceph_msg_footer &footer = m->get_footer();
+
+  ceph_msg_header2 header2{header.seq,        header.tid,
+                           header.type,       header.priority,
+                           header.version,
+                           init_le32(0),      header.data_off,
+                           init_le64(ack_seq),
+                           footer.flags,      header.compat_version,
+                           header.reserved};
+
+  auto message = MessageFrame::Encode(
+			     header2,
+			     m->get_payload(),
+			     m->get_middle(),
+			     m->get_data());
+  if (!append_frame(message)) {
+    m->put();
+    return -EILSEQ;
+  }
+
+  ldout(cct, 5) << __func__ << " sending message m=" << m
+                << " seq=" << m->get_seq() << " " << *m << dendl;
+
+  m->trace.event("async writing message");
+  ldout(cct, 20) << __func__ << " sending m=" << m << " seq=" << m->get_seq()
+                 << " src=" << entity_name_t(messenger->get_myname())
+                 << " off=" << header2.data_off
+                 << dendl;
+  ssize_t total_send_size = connection->outgoing_bl.length();
+  ssize_t rc = connection->_try_send(more);
+  if (rc < 0) {
+    ldout(cct, 1) << __func__ << " error sending " << m << ", "
+                  << cpp_strerror(rc) << dendl;
+  } else {
+    connection->logger->inc(
+        l_msgr_send_bytes, total_send_size - connection->outgoing_bl.length());
+    ldout(cct, 10) << __func__ << " sending " << m
+                   << (rc ? " continuely." : " done.") << dendl;
+  }
+  if (m->get_type() == CEPH_MSG_OSD_OP)
+    OID_EVENT_TRACE_WITH_MSG(m, "SEND_MSG_OSD_OP_END", false);
+  else if (m->get_type() == CEPH_MSG_OSD_OPREPLY)
+    OID_EVENT_TRACE_WITH_MSG(m, "SEND_MSG_OSD_OPREPLY_END", false);
+  m->put();
+
+  return rc;
+}
+
+template <class F>
+bool ProtocolV2::append_frame(F& frame) {
+  ceph::bufferlist bl;
+  try {
+    bl = frame.get_buffer(tx_frame_asm);
+  } catch (ceph::crypto::onwire::TxHandlerError &e) {
+    ldout(cct, 1) << __func__ << " " << e.what() << dendl;
+    return false;
+  }
+
+  ldout(cct, 25) << __func__ << " assembled frame " << bl.length()
+                 << " bytes " << tx_frame_asm << dendl;
+  connection->outgoing_bl.append(bl);
+  return true;
+}
+
+void ProtocolV2::handle_message_ack(uint64_t seq) {
+  if (connection->policy.lossy) {  // lossy connections don't keep sent messages
+    return;
+  }
+
+  ldout(cct, 15) << __func__ << " seq=" << seq << dendl;
+
+  // trim sent list
+  static const int max_pending = 128;
+  int i = 0;
+  Message *pending[max_pending];
+  connection->write_lock.lock();
+  while (!sent.empty() && sent.front()->get_seq() <= seq && i < max_pending) {
+    Message *m = sent.front();
+    sent.pop_front();
+    pending[i++] = m;
+    ldout(cct, 10) << __func__ << " got ack seq " << seq
+                   << " >= " << m->get_seq() << " on " << m << " " << *m
+                   << dendl;
+  }
+  connection->write_lock.unlock();
+  for (int k = 0; k < i; k++) {
+    pending[k]->put();
+  }
+}
+
+void ProtocolV2::write_event() {
+  ldout(cct, 10) << __func__ << dendl;
+  ssize_t r = 0;
+
+  connection->write_lock.lock();
+  if (can_write) {
+    if (keepalive) {
+      ldout(cct, 10) << __func__ << " appending keepalive" << dendl;
+      auto keepalive_frame = KeepAliveFrame::Encode();
+      if (!append_frame(keepalive_frame)) {
+        connection->write_lock.unlock();
+        connection->lock.lock();
+        fault();
+        connection->lock.unlock();
+        return;
+      }
+      keepalive = false;
+    }
+
+    auto start = ceph::mono_clock::now();
+    bool more;
+    do {
+      const auto out_entry = _get_next_outgoing();
+      if (!out_entry.m) {
+        break;
+      }
+
+      if (!connection->policy.lossy) {
+        // put on sent list
+        sent.push_back(out_entry.m);
+        out_entry.m->get();
+      }
+      more = !out_queue.empty();
+      connection->write_lock.unlock();
+
+      // send_message or requeue messages may not encode message
+      if (!out_entry.is_prepared) {
+        prepare_send_message(connection->get_features(), out_entry.m);
+      }
+
+      r = write_message(out_entry.m, more);
+
+      connection->write_lock.lock();
+      if (r == 0) {
+        ;
+      } else if (r < 0) {
+        ldout(cct, 1) << __func__ << " send msg failed" << dendl;
+        break;
+      } else if (r > 0)
+        break;
+    } while (can_write);
+    write_in_progress = false;
+
+    // if r > 0 mean data still lefted, so no need _try_send.
+    if (r == 0) {
+      uint64_t left = ack_left;
+      if (left) {
+        ceph_le64 s;
+        s = in_seq;
+        ldout(cct, 10) << __func__ << " try send msg ack, acked " << left
+                       << " messages" << dendl;
+        auto ack_frame = AckFrame::Encode(in_seq);
+        if (append_frame(ack_frame)) {
+          ack_left -= left;
+          left = ack_left;
+          r = connection->_try_send(left);
+        } else {
+          r = -EILSEQ;
+        }
+      } else if (is_queued()) {
+        r = connection->_try_send();
+      }
+    }
+    connection->write_lock.unlock();
+
+    connection->logger->tinc(l_msgr_running_send_time,
+                             ceph::mono_clock::now() - start);
+    if (r < 0) {
+      ldout(cct, 1) << __func__ << " send msg failed" << dendl;
+      connection->lock.lock();
+      fault();
+      connection->lock.unlock();
+      return;
+    }
+  } else {
+    write_in_progress = false;
+    connection->write_lock.unlock();
+    connection->lock.lock();
+    connection->write_lock.lock();
+    if (state == STANDBY && !connection->policy.server && is_queued()) {
+      ldout(cct, 10) << __func__ << " policy.server is false" << dendl;
+      if (server_cookie) {  // only increment connect_seq if there is a session
+        connect_seq++;
+      }
+      connection->_connect();
+    } else if (connection->cs && state != NONE && state != CLOSED &&
+               state != START_CONNECT) {
+      r = connection->_try_send();
+      if (r < 0) {
+        ldout(cct, 1) << __func__ << " send outcoming bl failed" << dendl;
+        connection->write_lock.unlock();
+        fault();
+        connection->lock.unlock();
+        return;
+      }
+    }
+    connection->write_lock.unlock();
+    connection->lock.unlock();
+  }
+}
+
+bool ProtocolV2::is_queued() {
+  return !out_queue.empty() || connection->is_queued();
+}
+
+CtPtr ProtocolV2::read(CONTINUATION_RXBPTR_TYPE<ProtocolV2> &next,
+                       rx_buffer_t &&buffer) {
+  const auto len = buffer->length();
+  const auto buf = buffer->c_str();
+  next.node = std::move(buffer);
+  ssize_t r = connection->read(len, buf,
+    [&next, this](char *buffer, int r) {
+      if (unlikely(pre_auth.enabled) && r >= 0) {
+        pre_auth.rxbuf.append(*next.node);
+	ceph_assert(!cct->_conf->ms_die_on_bug ||
+		    pre_auth.rxbuf.length() < 20000000);
+      }
+      next.r = r;
+      run_continuation(next);
+    });
+  if (r <= 0) {
+    // error or done synchronously
+    if (unlikely(pre_auth.enabled) && r >= 0) {
+      pre_auth.rxbuf.append(*next.node);
+      ceph_assert(!cct->_conf->ms_die_on_bug ||
+		  pre_auth.rxbuf.length() < 20000000);
+    }
+    next.r = r;
+    return &next;
+  }
+
+  return nullptr;
+}
+
+template <class F>
+CtPtr ProtocolV2::write(const std::string &desc,
+                        CONTINUATION_TYPE<ProtocolV2> &next,
+                        F &frame) {
+  ceph::bufferlist bl;
+  try {
+    bl = frame.get_buffer(tx_frame_asm);
+  } catch (ceph::crypto::onwire::TxHandlerError &e) {
+    ldout(cct, 1) << __func__ << " " << e.what() << dendl;
+    return _fault();
+  }
+
+  ldout(cct, 25) << __func__ << " assembled frame " << bl.length()
+                 << " bytes " << tx_frame_asm << dendl;
+  return write(desc, next, bl);
+}
+
+CtPtr ProtocolV2::write(const std::string &desc,
+                        CONTINUATION_TYPE<ProtocolV2> &next,
+                        bufferlist &buffer) {
+  if (unlikely(pre_auth.enabled)) {
+    pre_auth.txbuf.append(buffer);
+    ceph_assert(!cct->_conf->ms_die_on_bug ||
+		pre_auth.txbuf.length() < 20000000);
+  }
+
+  ssize_t r =
+      connection->write(buffer, [&next, desc, this](int r) {
+        if (r < 0) {
+          ldout(cct, 1) << __func__ << " " << desc << " write failed r=" << r
+                        << " (" << cpp_strerror(r) << ")" << dendl;
+          connection->inject_delay();
+          _fault();
+        }
+        run_continuation(next);
+      });
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " " << desc << " write failed r=" << r
+                  << " (" << cpp_strerror(r) << ")" << dendl;
+    return _fault();
+  } else if (r == 0) {
+    next.setParams();
+    return &next;
+  }
+
+  return nullptr;
+}
+
+CtPtr ProtocolV2::_banner_exchange(CtRef callback) {
+  ldout(cct, 20) << __func__ << dendl;
+  bannerExchangeCallback = &callback;
+
+  bufferlist banner_payload;
+  encode((uint64_t)CEPH_MSGR2_SUPPORTED_FEATURES, banner_payload, 0);
+  encode((uint64_t)CEPH_MSGR2_REQUIRED_FEATURES, banner_payload, 0);
+
+  bufferlist bl;
+  bl.append(CEPH_BANNER_V2_PREFIX, strlen(CEPH_BANNER_V2_PREFIX));
+  encode((uint16_t)banner_payload.length(), bl, 0);
+  bl.claim_append(banner_payload);
+
+  INTERCEPT(state == BANNER_CONNECTING ? 3 : 4);
+
+  return WRITE(bl, "banner", _wait_for_peer_banner);
+}
+
+CtPtr ProtocolV2::_wait_for_peer_banner() {
+  unsigned banner_len = strlen(CEPH_BANNER_V2_PREFIX) + sizeof(__le16);
+  return READ(banner_len, _handle_peer_banner);
+}
+
+CtPtr ProtocolV2::_handle_peer_banner(rx_buffer_t &&buffer, int r) {
+  ldout(cct, 20) << __func__ << " r=" << r << dendl;
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " read peer banner failed r=" << r << " ("
+                  << cpp_strerror(r) << ")" << dendl;
+    return _fault();
+  }
+
+  unsigned banner_prefix_len = strlen(CEPH_BANNER_V2_PREFIX);
+
+  if (memcmp(buffer->c_str(), CEPH_BANNER_V2_PREFIX, banner_prefix_len)) {
+    if (memcmp(buffer->c_str(), CEPH_BANNER, strlen(CEPH_BANNER)) == 0) {
+      lderr(cct) << __func__ << " peer " << *connection->peer_addrs
+                 << " is using msgr V1 protocol" << dendl;
+      return _fault();
+    }
+    ldout(cct, 1) << __func__ << " accept peer sent bad banner" << dendl;
+    return _fault();
+  }
+
+  uint16_t payload_len;
+  bufferlist bl;
+  buffer->set_offset(banner_prefix_len);
+  buffer->set_length(sizeof(__le16));
+  bl.push_back(std::move(buffer));
+  auto ti = bl.cbegin();
+  try {
+    decode(payload_len, ti);
+  } catch (const buffer::error &e) {
+    lderr(cct) << __func__ << " decode banner payload len failed " << dendl;
+    return _fault();
+  }
+
+  INTERCEPT(state == BANNER_CONNECTING ? 5 : 6);
+
+  return READ(payload_len, _handle_peer_banner_payload);
+}
+
+CtPtr ProtocolV2::_handle_peer_banner_payload(rx_buffer_t &&buffer, int r) {
+  ldout(cct, 20) << __func__ << " r=" << r << dendl;
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " read peer banner payload failed r=" << r
+                  << " (" << cpp_strerror(r) << ")" << dendl;
+    return _fault();
+  }
+
+  uint64_t peer_supported_features;
+  uint64_t peer_required_features;
+
+  bufferlist bl;
+  bl.push_back(std::move(buffer));
+  auto ti = bl.cbegin();
+  try {
+    decode(peer_supported_features, ti);
+    decode(peer_required_features, ti);
+  } catch (const buffer::error &e) {
+    lderr(cct) << __func__ << " decode banner payload failed " << dendl;
+    return _fault();
+  }
+
+  ldout(cct, 1) << __func__ << " supported=" << std::hex
+                << peer_supported_features << " required=" << std::hex
+                << peer_required_features << std::dec << dendl;
+
+  // Check feature bit compatibility
+
+  uint64_t supported_features = CEPH_MSGR2_SUPPORTED_FEATURES;
+  uint64_t required_features = CEPH_MSGR2_REQUIRED_FEATURES;
+
+  if ((required_features & peer_supported_features) != required_features) {
+    ldout(cct, 1) << __func__ << " peer does not support all required features"
+                  << " required=" << std::hex << required_features
+                  << " supported=" << std::hex << peer_supported_features
+                  << std::dec << dendl;
+    stop();
+    connection->dispatch_queue->queue_reset(connection);
+    return nullptr;
+  }
+  if ((supported_features & peer_required_features) != peer_required_features) {
+    ldout(cct, 1) << __func__ << " we do not support all peer required features"
+                  << " required=" << std::hex << peer_required_features
+                  << " supported=" << supported_features << std::dec << dendl;
+    stop();
+    connection->dispatch_queue->queue_reset(connection);
+    return nullptr;
+  }
+
+  this->peer_supported_features = peer_supported_features;
+  if (peer_required_features == 0) {
+    this->connection_features = msgr2_required;
+  }
+
+  // if the peer supports msgr2.1, switch to it
+  bool is_rev1 = HAVE_MSGR2_FEATURE(peer_supported_features, REVISION_1);
+  tx_frame_asm.set_is_rev1(is_rev1);
+  rx_frame_asm.set_is_rev1(is_rev1);
+
+  if (state == BANNER_CONNECTING) {
+    state = HELLO_CONNECTING;
+  }
+  else {
+    ceph_assert(state == BANNER_ACCEPTING);
+    state = HELLO_ACCEPTING;
+  }
+
+  auto hello = HelloFrame::Encode(messenger->get_mytype(),
+                                  connection->target_addr);
+
+  INTERCEPT(state == HELLO_CONNECTING ? 7 : 8);
+
+  return WRITE(hello, "hello frame", read_frame);
+}
+
+CtPtr ProtocolV2::handle_hello(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != HELLO_CONNECTING && state != HELLO_ACCEPTING) {
+    lderr(cct) << __func__ << " not in hello exchange state!" << dendl;
+    return _fault();
+  }
+
+  auto hello = HelloFrame::Decode(payload);
+
+  ldout(cct, 5) << __func__ << " received hello:"
+                << " peer_type=" << (int)hello.entity_type()
+                << " peer_addr_for_me=" << hello.peer_addr() << dendl;
+
+  sockaddr_storage ss;
+  socklen_t len = sizeof(ss);
+  getsockname(connection->cs.fd(), (sockaddr *)&ss, &len);
+  ldout(cct, 5) << __func__ << " getsockname says I am " << (sockaddr *)&ss
+		<< " when talking to " << connection->target_addr << dendl;
+
+  if (connection->get_peer_type() == -1) {
+    connection->set_peer_type(hello.entity_type());
+
+    ceph_assert(state == HELLO_ACCEPTING);
+    connection->policy = messenger->get_policy(hello.entity_type());
+    ldout(cct, 10) << __func__ << " accept of host_type "
+                   << (int)hello.entity_type()
+                   << ", policy.lossy=" << connection->policy.lossy
+                   << " policy.server=" << connection->policy.server
+                   << " policy.standby=" << connection->policy.standby
+                   << " policy.resetcheck=" << connection->policy.resetcheck
+                   << dendl;
+  } else {
+    ceph_assert(state == HELLO_CONNECTING);
+    if (connection->get_peer_type() != hello.entity_type()) {
+      ldout(cct, 1) << __func__ << " connection peer type does not match what"
+                    << " peer advertises " << connection->get_peer_type()
+                    << " != " << (int)hello.entity_type() << dendl;
+      stop();
+      connection->dispatch_queue->queue_reset(connection);
+      return nullptr;
+    }
+  }
+
+  if (messenger->get_myaddrs().empty() ||
+      messenger->get_myaddrs().front().is_blank_ip()) {
+    entity_addr_t a;
+    if (cct->_conf->ms_learn_addr_from_peer) {
+      ldout(cct, 1) << __func__ << " peer " << connection->target_addr
+		    << " says I am " << hello.peer_addr() << " (socket says "
+		    << (sockaddr*)&ss << ")" << dendl;
+      a = hello.peer_addr();
+    } else {
+      ldout(cct, 1) << __func__ << " socket to  " << connection->target_addr
+		    << " says I am " << (sockaddr*)&ss
+		    << " (peer says " << hello.peer_addr() << ")" << dendl;
+      a.set_sockaddr((sockaddr *)&ss);
+    }
+    a.set_type(entity_addr_t::TYPE_MSGR2); // anything but NONE; learned_addr ignores this
+    a.set_port(0);
+    connection->lock.unlock();
+    messenger->learned_addr(a);
+    if (cct->_conf->ms_inject_internal_delays &&
+        cct->_conf->ms_inject_socket_failures) {
+      if (rand() % cct->_conf->ms_inject_socket_failures == 0) {
+        ldout(cct, 10) << __func__ << " sleep for "
+                       << cct->_conf->ms_inject_internal_delays << dendl;
+        utime_t t;
+        t.set_from_double(cct->_conf->ms_inject_internal_delays);
+        t.sleep();
+      }
+    }
+    connection->lock.lock();
+    if (state != HELLO_CONNECTING) {
+      ldout(cct, 1) << __func__
+                    << " state changed while learned_addr, mark_down or "
+                    << " replacing must be happened just now" << dendl;
+      return nullptr;
+    }
+  }
+
+
+
+  CtPtr callback;
+  callback = bannerExchangeCallback;
+  bannerExchangeCallback = nullptr;
+  ceph_assert(callback);
+  return callback;
+}
+
+CtPtr ProtocolV2::read_frame() {
+  if (state == CLOSED) {
+    return nullptr;
+  }
+
+  ldout(cct, 20) << __func__ << dendl;
+  rx_preamble.clear();
+  rx_epilogue.clear();
+  rx_segments_data.clear();
+
+  return READ(rx_frame_asm.get_preamble_onwire_len(),
+              handle_read_frame_preamble_main);
+}
+
+CtPtr ProtocolV2::handle_read_frame_preamble_main(rx_buffer_t &&buffer, int r) {
+  ldout(cct, 20) << __func__ << " r=" << r << dendl;
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " read frame preamble failed r=" << r
+                  << " (" << cpp_strerror(r) << ")" << dendl;
+    return _fault();
+  }
+
+  rx_preamble.push_back(std::move(buffer));
+
+  ldout(cct, 30) << __func__ << " preamble\n";
+  rx_preamble.hexdump(*_dout);
+  *_dout << dendl;
+
+  try {
+    next_tag = rx_frame_asm.disassemble_preamble(rx_preamble);
+  } catch (FrameError& e) {
+    ldout(cct, 1) << __func__ << " " << e.what() << dendl;
+    return _fault();
+  } catch (ceph::crypto::onwire::MsgAuthError&) {
+    ldout(cct, 1) << __func__ << "bad auth tag" << dendl;
+    return _fault();
+  }
+
+  ldout(cct, 25) << __func__ << " disassembled preamble " << rx_frame_asm
+                 << dendl;
+
+  if (session_stream_handlers.rx) {
+    ldout(cct, 30) << __func__ << " preamble after decrypt\n";
+    rx_preamble.hexdump(*_dout);
+    *_dout << dendl;
+  }
+
+  // does it need throttle?
+  if (next_tag == Tag::MESSAGE) {
+    if (state != READY) {
+      lderr(cct) << __func__ << " not in ready state!" << dendl;
+      return _fault();
+    }
+    state = THROTTLE_MESSAGE;
+    return CONTINUE(throttle_message);
+  } else {
+    return read_frame_segment();
+  }
+}
+
+CtPtr ProtocolV2::handle_read_frame_dispatch() {
+  ldout(cct, 10) << __func__
+                 << " tag=" << static_cast<uint32_t>(next_tag) << dendl;
+
+  switch (next_tag) {
+    case Tag::HELLO:
+    case Tag::AUTH_REQUEST:
+    case Tag::AUTH_BAD_METHOD:
+    case Tag::AUTH_REPLY_MORE:
+    case Tag::AUTH_REQUEST_MORE:
+    case Tag::AUTH_DONE:
+    case Tag::AUTH_SIGNATURE:
+    case Tag::CLIENT_IDENT:
+    case Tag::SERVER_IDENT:
+    case Tag::IDENT_MISSING_FEATURES:
+    case Tag::SESSION_RECONNECT:
+    case Tag::SESSION_RESET:
+    case Tag::SESSION_RETRY:
+    case Tag::SESSION_RETRY_GLOBAL:
+    case Tag::SESSION_RECONNECT_OK:
+    case Tag::KEEPALIVE2:
+    case Tag::KEEPALIVE2_ACK:
+    case Tag::ACK:
+    case Tag::WAIT:
+      return handle_frame_payload();
+    case Tag::MESSAGE:
+      return handle_message();
+    default: {
+      lderr(cct) << __func__
+                 << " received unknown tag=" << static_cast<uint32_t>(next_tag)
+                 << dendl;
+      return _fault();
+    }
+  }
+
+  return nullptr;
+}
+
+CtPtr ProtocolV2::read_frame_segment() {
+  size_t seg_idx = rx_segments_data.size();
+  ldout(cct, 20) << __func__ << " seg_idx=" << seg_idx << dendl;
+  rx_segments_data.emplace_back();
+
+  uint32_t onwire_len = rx_frame_asm.get_segment_onwire_len(seg_idx);
+  if (onwire_len == 0) {
+    return _handle_read_frame_segment();
+  }
+
+  rx_buffer_t rx_buffer;
+  uint16_t align = rx_frame_asm.get_segment_align(seg_idx);
+  try {
+    rx_buffer = buffer::ptr_node::create(buffer::create_aligned(
+        onwire_len, align));
+  } catch (std::bad_alloc&) {
+    // Catching because of potential issues with satisfying alignment.
+    ldout(cct, 1) << __func__ << " can't allocate aligned rx_buffer"
+                  << " len=" << onwire_len
+                  << " align=" << align
+                  << dendl;
+    return _fault();
+  }
+
+  return READ_RXBUF(std::move(rx_buffer), handle_read_frame_segment);
+}
+
+CtPtr ProtocolV2::handle_read_frame_segment(rx_buffer_t &&rx_buffer, int r) {
+  ldout(cct, 20) << __func__ << " r=" << r << dendl;
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " read frame segment failed r=" << r << " ("
+                  << cpp_strerror(r) << ")" << dendl;
+    return _fault();
+  }
+
+  rx_segments_data.back().push_back(std::move(rx_buffer));
+  return _handle_read_frame_segment();
+}
+
+CtPtr ProtocolV2::_handle_read_frame_segment() {
+  if (rx_segments_data.size() == rx_frame_asm.get_num_segments()) {
+    // OK, all segments planned to read are read. Can go with epilogue.
+    uint32_t epilogue_onwire_len = rx_frame_asm.get_epilogue_onwire_len();
+    if (epilogue_onwire_len == 0) {
+      return _handle_read_frame_epilogue_main();
+    }
+    return READ(epilogue_onwire_len, handle_read_frame_epilogue_main);
+  }
+  // TODO: for makeshift only. This will be more generic and throttled
+  return read_frame_segment();
+}
+
+CtPtr ProtocolV2::handle_frame_payload() {
+  ceph_assert(!rx_segments_data.empty());
+  auto& payload = rx_segments_data.back();
+
+  ldout(cct, 30) << __func__ << "\n";
+  payload.hexdump(*_dout);
+  *_dout << dendl;
+
+  switch (next_tag) {
+    case Tag::HELLO:
+      return handle_hello(payload);
+    case Tag::AUTH_REQUEST:
+      return handle_auth_request(payload);
+    case Tag::AUTH_BAD_METHOD:
+      return handle_auth_bad_method(payload);
+    case Tag::AUTH_REPLY_MORE:
+      return handle_auth_reply_more(payload);
+    case Tag::AUTH_REQUEST_MORE:
+      return handle_auth_request_more(payload);
+    case Tag::AUTH_DONE:
+      return handle_auth_done(payload);
+    case Tag::AUTH_SIGNATURE:
+      return handle_auth_signature(payload);
+    case Tag::CLIENT_IDENT:
+      return handle_client_ident(payload);
+    case Tag::SERVER_IDENT:
+      return handle_server_ident(payload);
+    case Tag::IDENT_MISSING_FEATURES:
+      return handle_ident_missing_features(payload);
+    case Tag::SESSION_RECONNECT:
+      return handle_reconnect(payload);
+    case Tag::SESSION_RESET:
+      return handle_session_reset(payload);
+    case Tag::SESSION_RETRY:
+      return handle_session_retry(payload);
+    case Tag::SESSION_RETRY_GLOBAL:
+      return handle_session_retry_global(payload);
+    case Tag::SESSION_RECONNECT_OK:
+      return handle_reconnect_ok(payload);
+    case Tag::KEEPALIVE2:
+      return handle_keepalive2(payload);
+    case Tag::KEEPALIVE2_ACK:
+      return handle_keepalive2_ack(payload);
+    case Tag::ACK:
+      return handle_message_ack(payload);
+    case Tag::WAIT:
+      return handle_wait(payload);
+    default:
+      ceph_abort();
+  }
+  return nullptr;
+}
+
+CtPtr ProtocolV2::ready() {
+  ldout(cct, 25) << __func__ << dendl;
+
+  reconnecting = false;
+  replacing = false;
+
+  // make sure no pending tick timer
+  if (connection->last_tick_id) {
+    connection->center->delete_time_event(connection->last_tick_id);
+  }
+  connection->last_tick_id = connection->center->create_time_event(
+      connection->inactive_timeout_us, connection->tick_handler);
+
+  {
+    std::lock_guard<std::mutex> l(connection->write_lock);
+    can_write = true;
+    if (!out_queue.empty()) {
+      connection->center->dispatch_event_external(connection->write_handler);
+    }
+  }
+
+  connection->maybe_start_delay_thread();
+
+  state = READY;
+  ldout(cct, 1) << __func__ << " entity=" << peer_name << " client_cookie="
+                << std::hex << client_cookie << " server_cookie="
+                << server_cookie << std::dec << " in_seq=" << in_seq
+                << " out_seq=" << out_seq << dendl;
+
+  INTERCEPT(15);
+
+  return CONTINUE(read_frame);
+}
+
+CtPtr ProtocolV2::handle_read_frame_epilogue_main(rx_buffer_t &&buffer, int r)
+{
+  ldout(cct, 20) << __func__ << " r=" << r << dendl;
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " read frame epilogue failed r=" << r
+                  << " (" << cpp_strerror(r) << ")" << dendl;
+    return _fault();
+  }
+
+  rx_epilogue.push_back(std::move(buffer));
+  return _handle_read_frame_epilogue_main();
+}
+
+CtPtr ProtocolV2::_handle_read_frame_epilogue_main() {
+  bool aborted;
+  try {
+    rx_frame_asm.disassemble_first_segment(rx_preamble, rx_segments_data[0]);
+    aborted = !rx_frame_asm.disassemble_remaining_segments(
+        rx_segments_data.data(), rx_epilogue);
+  } catch (FrameError& e) {
+    ldout(cct, 1) << __func__ << " " << e.what() << dendl;
+    return _fault();
+  } catch (ceph::crypto::onwire::MsgAuthError&) {
+    ldout(cct, 1) << __func__ << "bad auth tag" << dendl;
+    return _fault();
+  }
+
+  // we do have a mechanism that allows transmitter to start sending message
+  // and abort after putting entire data field on wire. This will be used by
+  // the kernel client to avoid unnecessary buffering.
+  if (aborted) {
+    reset_throttle();
+    state = READY;
+    return CONTINUE(read_frame);
+  }
+  return handle_read_frame_dispatch();
+}
+
+CtPtr ProtocolV2::handle_message() {
+  ldout(cct, 20) << __func__ << dendl;
+  ceph_assert(state == THROTTLE_DONE);
+
+#if defined(WITH_LTTNG) && defined(WITH_EVENTTRACE)
+  ltt_recv_stamp = ceph_clock_now();
+#endif
+  recv_stamp = ceph_clock_now();
+
+  const size_t cur_msg_size = get_current_msg_size();
+  auto msg_frame = MessageFrame::Decode(rx_segments_data);
+
+  // XXX: paranoid copy just to avoid oops
+  ceph_msg_header2 current_header = msg_frame.header();
+
+  ldout(cct, 5) << __func__
+		<< " got " << msg_frame.front_len()
+		<< " + " << msg_frame.middle_len()
+		<< " + " << msg_frame.data_len()
+		<< " byte message."
+		<< " envelope type=" << current_header.type
+		<< " src " << peer_name
+		<< " off " << current_header.data_off
+                << dendl;
+
+  INTERCEPT(16);
+  ceph_msg_header header{current_header.seq,
+                         current_header.tid,
+                         current_header.type,
+                         current_header.priority,
+                         current_header.version,
+                         init_le32(msg_frame.front_len()),
+                         init_le32(msg_frame.middle_len()),
+                         init_le32(msg_frame.data_len()),
+                         current_header.data_off,
+                         peer_name,
+                         current_header.compat_version,
+                         current_header.reserved,
+                         init_le32(0)};
+  ceph_msg_footer footer{init_le32(0), init_le32(0),
+	                 init_le32(0), init_le64(0), current_header.flags};
+
+  Message *message = decode_message(cct, 0, header, footer,
+      msg_frame.front(),
+      msg_frame.middle(),
+      msg_frame.data(),
+      connection);
+  if (!message) {
+    ldout(cct, 1) << __func__ << " decode message failed " << dendl;
+    return _fault();
+  } else {
+    state = READ_MESSAGE_COMPLETE;
+  }
+
+  INTERCEPT(17);
+
+  message->set_byte_throttler(connection->policy.throttler_bytes);
+  message->set_message_throttler(connection->policy.throttler_messages);
+
+  // store reservation size in message, so we don't get confused
+  // by messages entering the dispatch queue through other paths.
+  message->set_dispatch_throttle_size(cur_msg_size);
+
+  message->set_recv_stamp(recv_stamp);
+  message->set_throttle_stamp(throttle_stamp);
+  message->set_recv_complete_stamp(ceph_clock_now());
+
+  // check received seq#.  if it is old, drop the message.
+  // note that incoming messages may skip ahead.  this is convenient for the
+  // client side queueing because messages can't be renumbered, but the (kernel)
+  // client will occasionally pull a message out of the sent queue to send
+  // elsewhere.  in that case it doesn't matter if we "got" it or not.
+  uint64_t cur_seq = in_seq;
+  if (message->get_seq() <= cur_seq) {
+    ldout(cct, 0) << __func__ << " got old message " << message->get_seq()
+                  << " <= " << cur_seq << " " << message << " " << *message
+                  << ", discarding" << dendl;
+    message->put();
+    if (connection->has_feature(CEPH_FEATURE_RECONNECT_SEQ) &&
+        cct->_conf->ms_die_on_old_message) {
+      ceph_assert(0 == "old msgs despite reconnect_seq feature");
+    }
+    return nullptr;
+  }
+  if (message->get_seq() > cur_seq + 1) {
+    ldout(cct, 0) << __func__ << " missed message?  skipped from seq "
+                  << cur_seq << " to " << message->get_seq() << dendl;
+    if (cct->_conf->ms_die_on_skipped_message) {
+      ceph_assert(0 == "skipped incoming seq");
+    }
+  }
+
+#if defined(WITH_LTTNG) && defined(WITH_EVENTTRACE)
+  if (message->get_type() == CEPH_MSG_OSD_OP ||
+      message->get_type() == CEPH_MSG_OSD_OPREPLY) {
+    utime_t ltt_processed_stamp = ceph_clock_now();
+    double usecs_elapsed =
+        (ltt_processed_stamp.to_nsec() - ltt_recv_stamp.to_nsec()) / 1000;
+    ostringstream buf;
+    if (message->get_type() == CEPH_MSG_OSD_OP)
+      OID_ELAPSED_WITH_MSG(message, usecs_elapsed, "TIME_TO_DECODE_OSD_OP",
+                           false);
+    else
+      OID_ELAPSED_WITH_MSG(message, usecs_elapsed, "TIME_TO_DECODE_OSD_OPREPLY",
+                           false);
+  }
+#endif
+
+  // note last received message.
+  in_seq = message->get_seq();
+  ldout(cct, 5) << __func__ << " received message m=" << message
+                << " seq=" << message->get_seq()
+                << " from=" << message->get_source() << " type=" << header.type
+                << " " << *message << dendl;
+
+  bool need_dispatch_writer = false;
+  if (!connection->policy.lossy) {
+    ack_left++;
+    need_dispatch_writer = true;
+  }
+
+  state = READY;
+
+  connection->logger->inc(l_msgr_recv_messages);
+  connection->logger->inc(l_msgr_recv_bytes,
+                          rx_frame_asm.get_frame_onwire_len());
+
+  messenger->ms_fast_preprocess(message);
+  auto fast_dispatch_time = ceph::mono_clock::now();
+  connection->logger->tinc(l_msgr_running_recv_time,
+                           fast_dispatch_time - connection->recv_start_time);
+  if (connection->delay_state) {
+    double delay_period = 0;
+    if (rand() % 10000 < cct->_conf->ms_inject_delay_probability * 10000.0) {
+      delay_period =
+          cct->_conf->ms_inject_delay_max * (double)(rand() % 10000) / 10000.0;
+      ldout(cct, 1) << "queue_received will delay after "
+                    << (ceph_clock_now() + delay_period) << " on " << message
+                    << " " << *message << dendl;
+    }
+    connection->delay_state->queue(delay_period, message);
+  } else if (messenger->ms_can_fast_dispatch(message)) {
+    connection->lock.unlock();
+    connection->dispatch_queue->fast_dispatch(message);
+    connection->recv_start_time = ceph::mono_clock::now();
+    connection->logger->tinc(l_msgr_running_fast_dispatch_time,
+                             connection->recv_start_time - fast_dispatch_time);
+    connection->lock.lock();
+  } else {
+    connection->dispatch_queue->enqueue(message, message->get_priority(),
+                                        connection->conn_id);
+  }
+
+  handle_message_ack(current_header.ack_seq);
+
+  // we might have been reused by another connection
+  // let's check if that is the case
+  if (state != READY) {
+    // yes, that was the case, let's do nothing
+    return nullptr;
+  }
+
+  if (need_dispatch_writer && connection->is_connected()) {
+    connection->center->dispatch_event_external(connection->write_handler);
+  }
+
+  return CONTINUE(read_frame);
+}
+
+
+CtPtr ProtocolV2::throttle_message() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  if (connection->policy.throttler_messages) {
+    ldout(cct, 10) << __func__ << " wants " << 1
+                   << " message from policy throttler "
+                   << connection->policy.throttler_messages->get_current()
+                   << "/" << connection->policy.throttler_messages->get_max()
+                   << dendl;
+    if (!connection->policy.throttler_messages->get_or_fail()) {
+      ldout(cct, 10) << __func__ << " wants 1 message from policy throttle "
+                     << connection->policy.throttler_messages->get_current()
+                     << "/" << connection->policy.throttler_messages->get_max()
+                     << " failed, just wait." << dendl;
+      // following thread pool deal with th full message queue isn't a
+      // short time, so we can wait a ms.
+      if (connection->register_time_events.empty()) {
+        connection->register_time_events.insert(
+            connection->center->create_time_event(1000,
+                                                  connection->wakeup_handler));
+      }
+      return nullptr;
+    }
+  }
+
+  state = THROTTLE_BYTES;
+  return CONTINUE(throttle_bytes);
+}
+
+CtPtr ProtocolV2::throttle_bytes() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  const size_t cur_msg_size = get_current_msg_size();
+  if (cur_msg_size) {
+    if (connection->policy.throttler_bytes) {
+      ldout(cct, 10) << __func__ << " wants " << cur_msg_size
+                     << " bytes from policy throttler "
+                     << connection->policy.throttler_bytes->get_current() << "/"
+                     << connection->policy.throttler_bytes->get_max() << dendl;
+      if (!connection->policy.throttler_bytes->get_or_fail(cur_msg_size)) {
+        ldout(cct, 10) << __func__ << " wants " << cur_msg_size
+                       << " bytes from policy throttler "
+                       << connection->policy.throttler_bytes->get_current()
+                       << "/" << connection->policy.throttler_bytes->get_max()
+                       << " failed, just wait." << dendl;
+        // following thread pool deal with th full message queue isn't a
+        // short time, so we can wait a ms.
+        if (connection->register_time_events.empty()) {
+          connection->register_time_events.insert(
+              connection->center->create_time_event(
+                  1000, connection->wakeup_handler));
+        }
+        return nullptr;
+      }
+    }
+  }
+
+  state = THROTTLE_DISPATCH_QUEUE;
+  return CONTINUE(throttle_dispatch_queue);
+}
+
+CtPtr ProtocolV2::throttle_dispatch_queue() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  const size_t cur_msg_size = get_current_msg_size();
+  if (cur_msg_size) {
+    if (!connection->dispatch_queue->dispatch_throttler.get_or_fail(
+            cur_msg_size)) {
+      ldout(cct, 10)
+          << __func__ << " wants " << cur_msg_size
+          << " bytes from dispatch throttle "
+          << connection->dispatch_queue->dispatch_throttler.get_current() << "/"
+          << connection->dispatch_queue->dispatch_throttler.get_max()
+          << " failed, just wait." << dendl;
+      // following thread pool deal with th full message queue isn't a
+      // short time, so we can wait a ms.
+      if (connection->register_time_events.empty()) {
+        connection->register_time_events.insert(
+            connection->center->create_time_event(1000,
+                                                  connection->wakeup_handler));
+      }
+      return nullptr;
+    }
+  }
+
+  throttle_stamp = ceph_clock_now();
+  state = THROTTLE_DONE;
+
+  return read_frame_segment();
+}
+
+CtPtr ProtocolV2::handle_keepalive2(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != READY) {
+    lderr(cct) << __func__ << " not in ready state!" << dendl;
+    return _fault();
+  }
+
+  auto keepalive_frame = KeepAliveFrame::Decode(payload);
+
+  ldout(cct, 30) << __func__ << " got KEEPALIVE2 tag ..." << dendl;
+
+  connection->write_lock.lock();
+  auto keepalive_ack_frame = KeepAliveFrameAck::Encode(keepalive_frame.timestamp());
+  if (!append_frame(keepalive_ack_frame)) {
+    connection->write_lock.unlock();
+    return _fault();
+  }
+  connection->write_lock.unlock();
+
+  ldout(cct, 20) << __func__ << " got KEEPALIVE2 "
+                 << keepalive_frame.timestamp() << dendl;
+  connection->set_last_keepalive(ceph_clock_now());
+
+  if (is_connected()) {
+    connection->center->dispatch_event_external(connection->write_handler);
+  }
+
+  return CONTINUE(read_frame);
+}
+
+CtPtr ProtocolV2::handle_keepalive2_ack(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != READY) {
+    lderr(cct) << __func__ << " not in ready state!" << dendl;
+    return _fault();
+  }
+
+  auto keepalive_ack_frame = KeepAliveFrameAck::Decode(payload);
+  connection->set_last_keepalive_ack(keepalive_ack_frame.timestamp());
+  ldout(cct, 20) << __func__ << " got KEEPALIVE_ACK" << dendl;
+
+  return CONTINUE(read_frame);
+}
+
+CtPtr ProtocolV2::handle_message_ack(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != READY) {
+    lderr(cct) << __func__ << " not in ready state!" << dendl;
+    return _fault();
+  }
+
+  auto ack = AckFrame::Decode(payload);
+  handle_message_ack(ack.seq());
+  return CONTINUE(read_frame);
+}
+
+/* Client Protocol Methods */
+
+CtPtr ProtocolV2::start_client_banner_exchange() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  INTERCEPT(1);
+
+  state = BANNER_CONNECTING;
+
+  global_seq = messenger->get_global_seq();
+
+  return _banner_exchange(CONTINUATION(post_client_banner_exchange));
+}
+
+CtPtr ProtocolV2::post_client_banner_exchange() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  state = AUTH_CONNECTING;
+
+  return send_auth_request();
+}
+
+CtPtr ProtocolV2::send_auth_request(std::vector<uint32_t> &allowed_methods) {
+  ldout(cct, 20) << __func__ << " peer_type " << (int)connection->peer_type
+		 << " auth_client " << messenger->auth_client << dendl;
+  ceph_assert(messenger->auth_client);
+
+  bufferlist bl;
+  vector<uint32_t> preferred_modes;
+  auto am = auth_meta;
+  connection->lock.unlock();
+  int r = messenger->auth_client->get_auth_request(
+    connection, am.get(),
+    &am->auth_method, &preferred_modes, &bl);
+  connection->lock.lock();
+  if (state != AUTH_CONNECTING) {
+    ldout(cct, 1) << __func__ << " state changed!" << dendl;
+    return _fault();
+  }
+  if (r < 0) {
+    ldout(cct, 0) << __func__ << " get_initial_auth_request returned " << r
+		  << dendl;
+    stop();
+    connection->dispatch_queue->queue_reset(connection);
+    return nullptr;
+  }
+
+  INTERCEPT(9);
+
+  auto frame = AuthRequestFrame::Encode(auth_meta->auth_method, preferred_modes,
+                                        bl);
+  return WRITE(frame, "auth request", read_frame);
+}
+
+CtPtr ProtocolV2::handle_auth_bad_method(ceph::bufferlist &payload) {
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != AUTH_CONNECTING) {
+    lderr(cct) << __func__ << " not in auth connect state!" << dendl;
+    return _fault();
+  }
+
+  auto bad_method = AuthBadMethodFrame::Decode(payload);
+  ldout(cct, 1) << __func__ << " method=" << bad_method.method()
+		<< " result " << cpp_strerror(bad_method.result())
+                << ", allowed methods=" << bad_method.allowed_methods()
+		<< ", allowed modes=" << bad_method.allowed_modes()
+                << dendl;
+  ceph_assert(messenger->auth_client);
+  auto am = auth_meta;
+  connection->lock.unlock();
+  int r = messenger->auth_client->handle_auth_bad_method(
+    connection,
+    am.get(),
+    bad_method.method(), bad_method.result(),
+    bad_method.allowed_methods(),
+    bad_method.allowed_modes());
+  connection->lock.lock();
+  if (state != AUTH_CONNECTING || r < 0) {
+    return _fault();
+  }
+  return send_auth_request(bad_method.allowed_methods());
+}
+
+CtPtr ProtocolV2::handle_auth_reply_more(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != AUTH_CONNECTING) {
+    lderr(cct) << __func__ << " not in auth connect state!" << dendl;
+    return _fault();
+  }
+
+  auto auth_more = AuthReplyMoreFrame::Decode(payload);
+  ldout(cct, 5) << __func__
+                << " auth reply more len=" << auth_more.auth_payload().length()
+                << dendl;
+  ceph_assert(messenger->auth_client);
+  ceph::bufferlist reply;
+  auto am = auth_meta;
+  connection->lock.unlock();
+  int r = messenger->auth_client->handle_auth_reply_more(
+    connection, am.get(), auth_more.auth_payload(), &reply);
+  connection->lock.lock();
+  if (state != AUTH_CONNECTING) {
+    ldout(cct, 1) << __func__ << " state changed!" << dendl;
+    return _fault();
+  }
+  if (r < 0) {
+    lderr(cct) << __func__ << " auth_client handle_auth_reply_more returned "
+	       << r << dendl;
+    return _fault();
+  }
+  auto more_reply = AuthRequestMoreFrame::Encode(reply);
+  return WRITE(more_reply, "auth request more", read_frame);
+}
+
+CtPtr ProtocolV2::handle_auth_done(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != AUTH_CONNECTING) {
+    lderr(cct) << __func__ << " not in auth connect state!" << dendl;
+    return _fault();
+  }
+
+  auto auth_done = AuthDoneFrame::Decode(payload);
+
+  ceph_assert(messenger->auth_client);
+  auto am = auth_meta;
+  connection->lock.unlock();
+  int r = messenger->auth_client->handle_auth_done(
+    connection,
+    am.get(),
+    auth_done.global_id(),
+    auth_done.con_mode(),
+    auth_done.auth_payload(),
+    &am->session_key,
+    &am->connection_secret);
+  connection->lock.lock();
+  if (state != AUTH_CONNECTING) {
+    ldout(cct, 1) << __func__ << " state changed!" << dendl;
+    return _fault();
+  }
+  if (r < 0) {
+    return _fault();
+  }
+  auth_meta->con_mode = auth_done.con_mode();
+  bool is_rev1 = HAVE_MSGR2_FEATURE(peer_supported_features, REVISION_1);
+  session_stream_handlers = ceph::crypto::onwire::rxtx_t::create_handler_pair(
+      cct, *auth_meta, /*new_nonce_format=*/is_rev1, /*crossed=*/false);
+
+  state = AUTH_CONNECTING_SIGN;
+
+  const auto sig = auth_meta->session_key.empty() ? sha256_digest_t() :
+    auth_meta->session_key.hmac_sha256(cct, pre_auth.rxbuf);
+  auto sig_frame = AuthSignatureFrame::Encode(sig);
+  pre_auth.enabled = false;
+  pre_auth.rxbuf.clear();
+  return WRITE(sig_frame, "auth signature", read_frame);
+}
+
+CtPtr ProtocolV2::finish_client_auth() {
+  if (!server_cookie) {
+    ceph_assert(connect_seq == 0);
+    state = SESSION_CONNECTING;
+    return send_client_ident();
+  } else {  // reconnecting to previous session
+    state = SESSION_RECONNECTING;
+    ceph_assert(connect_seq > 0);
+    return send_reconnect();
+  }
+}
+
+CtPtr ProtocolV2::send_client_ident() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  if (!connection->policy.lossy && !client_cookie) {
+    client_cookie = ceph::util::generate_random_number<uint64_t>(1, -1ll);
+  }
+
+  uint64_t flags = 0;
+  if (connection->policy.lossy) {
+    flags |= CEPH_MSG_CONNECT_LOSSY;
+  }
+
+  auto client_ident = ClientIdentFrame::Encode(
+      messenger->get_myaddrs(),
+      connection->target_addr,
+      messenger->get_myname().num(),
+      global_seq,
+      connection->policy.features_supported,
+      connection->policy.features_required | msgr2_required,
+      flags,
+      client_cookie);
+
+  ldout(cct, 5) << __func__ << " sending identification: "
+                << "addrs=" << messenger->get_myaddrs()
+                << " target=" << connection->target_addr
+                << " gid=" << messenger->get_myname().num()
+                << " global_seq=" << global_seq
+                << " features_supported=" << std::hex
+                << connection->policy.features_supported
+                << " features_required="
+		            << (connection->policy.features_required | msgr2_required)
+                << " flags=" << flags
+                << " cookie=" << client_cookie << std::dec << dendl;
+
+  INTERCEPT(11);
+
+  return WRITE(client_ident, "client ident", read_frame);
+}
+
+CtPtr ProtocolV2::send_reconnect() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  auto reconnect = ReconnectFrame::Encode(messenger->get_myaddrs(),
+                                          client_cookie,
+                                          server_cookie,
+                                          global_seq,
+                                          connect_seq,
+                                          in_seq);
+
+  ldout(cct, 5) << __func__ << " reconnect to session: client_cookie="
+                << std::hex << client_cookie << " server_cookie="
+                << server_cookie << std::dec
+                << " gs=" << global_seq << " cs=" << connect_seq
+                << " ms=" << in_seq << dendl;
+
+  INTERCEPT(13);
+
+  return WRITE(reconnect, "reconnect", read_frame);
+}
+
+CtPtr ProtocolV2::handle_ident_missing_features(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_CONNECTING) {
+    lderr(cct) << __func__ << " not in session connect state!" << dendl;
+    return _fault();
+  }
+
+  auto ident_missing =
+      IdentMissingFeaturesFrame::Decode(payload);
+  lderr(cct) << __func__
+             << " client does not support all server features: " << std::hex
+             << ident_missing.features() << std::dec << dendl;
+
+  return _fault();
+}
+
+CtPtr ProtocolV2::handle_session_reset(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_RECONNECTING) {
+    lderr(cct) << __func__ << " not in session reconnect state!" << dendl;
+    return _fault();
+  }
+
+  auto reset = ResetFrame::Decode(payload);
+
+  ldout(cct, 1) << __func__ << " received session reset full=" << reset.full()
+                << dendl;
+  if (reset.full()) {
+    reset_session();
+  } else {
+    server_cookie = 0;
+    connect_seq = 0;
+    in_seq = 0;
+  }
+
+  state = SESSION_CONNECTING;
+  return send_client_ident();
+}
+
+CtPtr ProtocolV2::handle_session_retry(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_RECONNECTING) {
+    lderr(cct) << __func__ << " not in session reconnect state!" << dendl;
+    return _fault();
+  }
+
+  auto retry = RetryFrame::Decode(payload);
+  connect_seq = retry.connect_seq() + 1;
+
+  ldout(cct, 1) << __func__
+                << " received session retry connect_seq=" << retry.connect_seq()
+                << ", inc to cs=" << connect_seq << dendl;
+
+  return send_reconnect();
+}
+
+CtPtr ProtocolV2::handle_session_retry_global(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_RECONNECTING) {
+    lderr(cct) << __func__ << " not in session reconnect state!" << dendl;
+    return _fault();
+  }
+
+  auto retry = RetryGlobalFrame::Decode(payload);
+  global_seq = messenger->get_global_seq(retry.global_seq());
+
+  ldout(cct, 1) << __func__ << " received session retry global global_seq="
+                << retry.global_seq() << ", choose new gs=" << global_seq
+                << dendl;
+
+  return send_reconnect();
+}
+
+CtPtr ProtocolV2::handle_wait(ceph::bufferlist &payload) {
+  ldout(cct, 20) << __func__
+		 << " received WAIT (connection race)"
+		 << " payload.length()=" << payload.length()
+		 << dendl;
+
+  if (state != SESSION_CONNECTING && state != SESSION_RECONNECTING) {
+    lderr(cct) << __func__ << " not in session (re)connect state!" << dendl;
+    return _fault();
+  }
+
+  state = WAIT;
+  WaitFrame::Decode(payload);
+  return _fault();
+}
+
+CtPtr ProtocolV2::handle_reconnect_ok(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_RECONNECTING) {
+    lderr(cct) << __func__ << " not in session reconnect state!" << dendl;
+    return _fault();
+  }
+
+  auto reconnect_ok = ReconnectOkFrame::Decode(payload);
+  ldout(cct, 5) << __func__
+                << " reconnect accepted: sms=" << reconnect_ok.msg_seq()
+                << dendl;
+
+  out_seq = discard_requeued_up_to(out_seq, reconnect_ok.msg_seq());
+
+  backoff = utime_t();
+  ldout(cct, 10) << __func__ << " reconnect success " << connect_seq
+                 << ", lossy = " << connection->policy.lossy << ", features "
+                 << connection->get_features() << dendl;
+
+  if (connection->delay_state) {
+    ceph_assert(connection->delay_state->ready());
+  }
+
+  connection->dispatch_queue->queue_connect(connection);
+  messenger->ms_deliver_handle_fast_connect(connection);
+
+  return ready();
+}
+
+CtPtr ProtocolV2::handle_server_ident(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_CONNECTING) {
+    lderr(cct) << __func__ << " not in session connect state!" << dendl;
+    return _fault();
+  }
+
+  auto server_ident = ServerIdentFrame::Decode(payload);
+  ldout(cct, 5) << __func__ << " received server identification:"
+                << " addrs=" << server_ident.addrs()
+                << " gid=" << server_ident.gid()
+                << " global_seq=" << server_ident.global_seq()
+                << " features_supported=" << std::hex
+                << server_ident.supported_features()
+                << " features_required=" << server_ident.required_features()
+                << " flags=" << server_ident.flags()
+                << " cookie=" << server_ident.cookie() << std::dec << dendl;
+
+  // is this who we intended to talk to?
+  // be a bit forgiving here, since we may be connecting based on addresses parsed out
+  // of mon_host or something.
+  if (!server_ident.addrs().contains(connection->target_addr)) {
+    ldout(cct,1) << __func__ << " peer identifies as " << server_ident.addrs()
+		 << ", does not include " << connection->target_addr << dendl;
+    return _fault();
+  }
+
+  server_cookie = server_ident.cookie();
+
+  connection->set_peer_addrs(server_ident.addrs());
+  peer_name = entity_name_t(connection->get_peer_type(), server_ident.gid());
+  connection->set_features(server_ident.supported_features() &
+                           connection->policy.features_supported);
+  peer_global_seq = server_ident.global_seq();
+
+  connection->policy.lossy = server_ident.flags() & CEPH_MSG_CONNECT_LOSSY;
+
+  backoff = utime_t();
+  ldout(cct, 10) << __func__ << " connect success " << connect_seq
+                 << ", lossy = " << connection->policy.lossy << ", features "
+                 << connection->get_features() << dendl;
+
+  if (connection->delay_state) {
+    ceph_assert(connection->delay_state->ready());
+  }
+
+  connection->dispatch_queue->queue_connect(connection);
+  messenger->ms_deliver_handle_fast_connect(connection);
+
+  return ready();
+}
+
+/* Server Protocol Methods */
+
+CtPtr ProtocolV2::start_server_banner_exchange() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  INTERCEPT(2);
+
+  state = BANNER_ACCEPTING;
+
+  return _banner_exchange(CONTINUATION(post_server_banner_exchange));
+}
+
+CtPtr ProtocolV2::post_server_banner_exchange() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  state = AUTH_ACCEPTING;
+
+  return CONTINUE(read_frame);
+}
+
+CtPtr ProtocolV2::handle_auth_request(ceph::bufferlist &payload) {
+  ldout(cct, 20) << __func__ << " payload.length()=" << payload.length()
+                 << dendl;
+
+  if (state != AUTH_ACCEPTING) {
+    lderr(cct) << __func__ << " not in auth accept state!" << dendl;
+    return _fault();
+  }
+
+  auto request = AuthRequestFrame::Decode(payload);
+  ldout(cct, 10) << __func__ << " AuthRequest(method=" << request.method()
+		 << ", preferred_modes=" << request.preferred_modes()
+                 << ", payload_len=" << request.auth_payload().length() << ")"
+                 << dendl;
+  auth_meta->auth_method = request.method();
+  auth_meta->con_mode = messenger->auth_server->pick_con_mode(
+    connection->get_peer_type(), auth_meta->auth_method,
+    request.preferred_modes());
+  if (auth_meta->con_mode == CEPH_CON_MODE_UNKNOWN) {
+    return _auth_bad_method(-EOPNOTSUPP);
+  }
+  return _handle_auth_request(request.auth_payload(), false);
+}
+
+CtPtr ProtocolV2::_auth_bad_method(int r)
+{
+  ceph_assert(r < 0);
+  std::vector<uint32_t> allowed_methods;
+  std::vector<uint32_t> allowed_modes;
+  messenger->auth_server->get_supported_auth_methods(
+    connection->get_peer_type(), &allowed_methods, &allowed_modes);
+  ldout(cct, 1) << __func__ << " auth_method " << auth_meta->auth_method
+		<< " r " << cpp_strerror(r)
+		<< ", allowed_methods " << allowed_methods
+		<< ", allowed_modes " << allowed_modes
+		<< dendl;
+  auto bad_method = AuthBadMethodFrame::Encode(auth_meta->auth_method, r,
+                                               allowed_methods, allowed_modes);
+  return WRITE(bad_method, "bad auth method", read_frame);
+}
+
+CtPtr ProtocolV2::_handle_auth_request(bufferlist& auth_payload, bool more)
+{
+  if (!messenger->auth_server) {
+    return _fault();
+  }
+  bufferlist reply;
+  auto am = auth_meta;
+  connection->lock.unlock();
+  int r = messenger->auth_server->handle_auth_request(
+    connection, am.get(),
+    more, am->auth_method, auth_payload,
+    &reply);
+  connection->lock.lock();
+  if (state != AUTH_ACCEPTING && state != AUTH_ACCEPTING_MORE) {
+    ldout(cct, 1) << __func__
+                  << " state changed while accept, it must be mark_down"
+                  << dendl;
+    ceph_assert(state == CLOSED);
+    return _fault();
+  }
+  if (r == 1) {
+    INTERCEPT(10);
+    state = AUTH_ACCEPTING_SIGN;
+
+    auto auth_done = AuthDoneFrame::Encode(connection->peer_global_id,
+                                           auth_meta->con_mode,
+                                           reply);
+    return WRITE(auth_done, "auth done", finish_auth);
+  } else if (r == 0) {
+    state = AUTH_ACCEPTING_MORE;
+
+    auto more = AuthReplyMoreFrame::Encode(reply);
+    return WRITE(more, "auth reply more", read_frame);
+  } else if (r == -EBUSY) {
+    // kick the client and maybe they'll come back later
+    return _fault();
+  } else {
+    return _auth_bad_method(r);
+  }
+}
+
+CtPtr ProtocolV2::finish_auth()
+{
+  ceph_assert(auth_meta);
+  // TODO: having a possibility to check whether we're server or client could
+  // allow reusing finish_auth().
+  bool is_rev1 = HAVE_MSGR2_FEATURE(peer_supported_features, REVISION_1);
+  session_stream_handlers = ceph::crypto::onwire::rxtx_t::create_handler_pair(
+      cct, *auth_meta, /*new_nonce_format=*/is_rev1, /*crossed=*/true);
+
+  const auto sig = auth_meta->session_key.empty() ? sha256_digest_t() :
+    auth_meta->session_key.hmac_sha256(cct, pre_auth.rxbuf);
+  auto sig_frame = AuthSignatureFrame::Encode(sig);
+  pre_auth.enabled = false;
+  pre_auth.rxbuf.clear();
+  return WRITE(sig_frame, "auth signature", read_frame);
+}
+
+CtPtr ProtocolV2::handle_auth_request_more(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != AUTH_ACCEPTING_MORE) {
+    lderr(cct) << __func__ << " not in auth accept more state!" << dendl;
+    return _fault();
+  }
+
+  auto auth_more = AuthRequestMoreFrame::Decode(payload);
+  return _handle_auth_request(auth_more.auth_payload(), true);
+}
+
+CtPtr ProtocolV2::handle_auth_signature(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != AUTH_ACCEPTING_SIGN && state != AUTH_CONNECTING_SIGN) {
+    lderr(cct) << __func__
+               << " pre-auth verification signature seen in wrong state!"
+               << dendl;
+    return _fault();
+  }
+
+  auto sig_frame = AuthSignatureFrame::Decode(payload);
+
+  const auto actual_tx_sig = auth_meta->session_key.empty() ?
+    sha256_digest_t() : auth_meta->session_key.hmac_sha256(cct, pre_auth.txbuf);
+  if (sig_frame.signature() != actual_tx_sig) {
+    ldout(cct, 2) << __func__ << " pre-auth signature mismatch"
+                  << " actual_tx_sig=" << actual_tx_sig
+                  << " sig_frame.signature()=" << sig_frame.signature()
+                  << dendl;
+    return _fault();
+  } else {
+    ldout(cct, 20) << __func__ << " pre-auth signature success"
+                   << " sig_frame.signature()=" << sig_frame.signature()
+                   << dendl;
+    pre_auth.txbuf.clear();
+  }
+
+  if (state == AUTH_ACCEPTING_SIGN) {
+    // server had sent AuthDone and client responded with correct pre-auth
+    // signature. we can start accepting new sessions/reconnects.
+    state = SESSION_ACCEPTING;
+    return CONTINUE(read_frame);
+  } else if (state == AUTH_CONNECTING_SIGN) {
+    // this happened at client side
+    return finish_client_auth();
+  } else {
+    ceph_assert_always("state corruption" == nullptr);
+  }
+}
+
+CtPtr ProtocolV2::handle_client_ident(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_ACCEPTING) {
+    lderr(cct) << __func__ << " not in session accept state!" << dendl;
+    return _fault();
+  }
+
+  auto client_ident = ClientIdentFrame::Decode(payload);
+
+  ldout(cct, 5) << __func__ << " received client identification:"
+                << " addrs=" << client_ident.addrs()
+		            << " target=" << client_ident.target_addr()
+                << " gid=" << client_ident.gid()
+                << " global_seq=" << client_ident.global_seq()
+                << " features_supported=" << std::hex
+                << client_ident.supported_features()
+                << " features_required=" << client_ident.required_features()
+                << " flags=" << client_ident.flags()
+                << " cookie=" << client_ident.cookie() << std::dec << dendl;
+
+  if (client_ident.addrs().empty() ||
+      client_ident.addrs().front() == entity_addr_t()) {
+    ldout(cct,5) << __func__ << " oops, client_ident.addrs() is empty" << dendl;
+    return _fault();  // a v2 peer should never do this
+  }
+  if (!messenger->get_myaddrs().contains(client_ident.target_addr())) {
+    ldout(cct,5) << __func__ << " peer is trying to reach "
+		 << client_ident.target_addr()
+		 << " which is not us (" << messenger->get_myaddrs() << ")"
+		 << dendl;
+    return _fault();
+  }
+
+  connection->set_peer_addrs(client_ident.addrs());
+  connection->target_addr = connection->_infer_target_addr(client_ident.addrs());
+
+  peer_name = entity_name_t(connection->get_peer_type(), client_ident.gid());
+  connection->set_peer_id(client_ident.gid());
+
+  client_cookie = client_ident.cookie();
+
+  uint64_t feat_missing =
+    (connection->policy.features_required | msgr2_required) &
+    ~(uint64_t)client_ident.supported_features();
+  if (feat_missing) {
+    ldout(cct, 1) << __func__ << " peer missing required features " << std::hex
+                  << feat_missing << std::dec << dendl;
+    auto ident_missing_features =
+        IdentMissingFeaturesFrame::Encode(feat_missing);
+
+    return WRITE(ident_missing_features, "ident missing features", read_frame);
+  }
+
+  connection_features =
+      client_ident.supported_features() & connection->policy.features_supported;
+
+  peer_global_seq = client_ident.global_seq();
+
+  // Looks good so far, let's check if there is already an existing connection
+  // to this peer.
+
+  connection->lock.unlock();
+  AsyncConnectionRef existing = messenger->lookup_conn(*connection->peer_addrs);
+
+  if (existing &&
+      existing->protocol->proto_type != 2) {
+    ldout(cct,1) << __func__ << " existing " << existing << " proto "
+		 << existing->protocol.get() << " version is "
+		 << existing->protocol->proto_type << ", marking down" << dendl;
+    existing->mark_down();
+    existing = nullptr;
+  }
+
+  connection->inject_delay();
+
+  connection->lock.lock();
+  if (state != SESSION_ACCEPTING) {
+    ldout(cct, 1) << __func__
+                  << " state changed while accept, it must be mark_down"
+                  << dendl;
+    ceph_assert(state == CLOSED);
+    return _fault();
+  }
+
+  if (existing) {
+    return handle_existing_connection(existing);
+  }
+
+  // if everything is OK reply with server identification
+  return send_server_ident();
+}
+
+CtPtr ProtocolV2::handle_reconnect(ceph::bufferlist &payload)
+{
+  ldout(cct, 20) << __func__
+		 << " payload.length()=" << payload.length() << dendl;
+
+  if (state != SESSION_ACCEPTING) {
+    lderr(cct) << __func__ << " not in session accept state!" << dendl;
+    return _fault();
+  }
+
+  auto reconnect = ReconnectFrame::Decode(payload);
+
+  ldout(cct, 5) << __func__
+                << " received reconnect:" 
+                << " client_cookie=" << std::hex << reconnect.client_cookie()
+                << " server_cookie=" << reconnect.server_cookie() << std::dec
+                << " gs=" << reconnect.global_seq()
+                << " cs=" << reconnect.connect_seq()
+                << " ms=" << reconnect.msg_seq()
+		            << dendl;
+
+  // Should we check if one of the ident.addrs match connection->target_addr
+  // as we do in ProtocolV1?
+  connection->set_peer_addrs(reconnect.addrs());
+  connection->target_addr = connection->_infer_target_addr(reconnect.addrs());
+  peer_global_seq = reconnect.global_seq();
+
+  connection->lock.unlock();
+  AsyncConnectionRef existing = messenger->lookup_conn(*connection->peer_addrs);
+
+  if (existing &&
+      existing->protocol->proto_type != 2) {
+    ldout(cct,1) << __func__ << " existing " << existing << " proto "
+		 << existing->protocol.get() << " version is "
+		 << existing->protocol->proto_type << ", marking down" << dendl;
+    existing->mark_down();
+    existing = nullptr;
+  }
+
+  connection->inject_delay();
+
+  connection->lock.lock();
+  if (state != SESSION_ACCEPTING) {
+    ldout(cct, 1) << __func__
+                  << " state changed while accept, it must be mark_down"
+                  << dendl;
+    ceph_assert(state == CLOSED);
+    return _fault();
+  }
+
+  if (!existing) {
+    // there is no existing connection therefore cannot reconnect to previous
+    // session
+    ldout(cct, 0) << __func__
+                  << " no existing connection exists, reseting client" << dendl;
+    auto reset = ResetFrame::Encode(true);
+    return WRITE(reset, "session reset", read_frame);
+  }
+
+  std::lock_guard<std::mutex> l(existing->lock);
+
+  ProtocolV2 *exproto = dynamic_cast<ProtocolV2 *>(existing->protocol.get());
+  if (!exproto) {
+    ldout(cct, 1) << __func__ << " existing=" << existing << dendl;
+    ceph_assert(false);
+  }
+
+  if (exproto->state == CLOSED) {
+    ldout(cct, 5) << __func__ << " existing " << existing
+                  << " already closed. Reseting client" << dendl;
+    auto reset = ResetFrame::Encode(true);
+    return WRITE(reset, "session reset", read_frame);
+  }
+
+  if (exproto->replacing) {
+    ldout(cct, 1) << __func__
+                  << " existing racing replace happened while replacing."
+                  << " existing=" << existing << dendl;
+    auto retry = RetryGlobalFrame::Encode(exproto->peer_global_seq);
+    return WRITE(retry, "session retry", read_frame);
+  }
+
+  if (exproto->client_cookie != reconnect.client_cookie()) {
+    ldout(cct, 1) << __func__ << " existing=" << existing
+                  << " client cookie mismatch, I must have reseted:"
+                  << " cc=" << std::hex << exproto->client_cookie
+                  << " rcc=" << reconnect.client_cookie()
+                  << ", reseting client." << std::dec
+                  << dendl;
+    auto reset = ResetFrame::Encode(connection->policy.resetcheck);
+    return WRITE(reset, "session reset", read_frame);
+  } else if (exproto->server_cookie == 0) {
+    // this happens when:
+    //   - a connects to b
+    //   - a sends client_ident
+    //   - b gets client_ident, sends server_ident and sets cookie X
+    //   - connection fault
+    //   - b reconnects to a with cookie X, connect_seq=1
+    //   - a has cookie==0
+    ldout(cct, 1) << __func__ << " I was a client and didn't received the"
+                  << " server_ident. Asking peer to resume session"
+                  << " establishment" << dendl;
+    auto reset = ResetFrame::Encode(false);
+    return WRITE(reset, "session reset", read_frame);
+  }
+
+  if (exproto->peer_global_seq > reconnect.global_seq()) {
+    ldout(cct, 5) << __func__
+                  << " stale global_seq: sgs=" << exproto->peer_global_seq
+                  << " cgs=" << reconnect.global_seq()
+                  << ", ask client to retry global" << dendl;
+    auto retry = RetryGlobalFrame::Encode(exproto->peer_global_seq);
+
+    INTERCEPT(18);
+
+    return WRITE(retry, "session retry", read_frame);
+  }
+
+  if (exproto->connect_seq > reconnect.connect_seq()) {
+    ldout(cct, 5) << __func__
+                  << " stale connect_seq scs=" << exproto->connect_seq
+                  << " ccs=" << reconnect.connect_seq()
+                  << " , ask client to retry" << dendl;
+    auto retry = RetryFrame::Encode(exproto->connect_seq);
+    return WRITE(retry, "session retry", read_frame);
+  }
+
+  if (exproto->connect_seq == reconnect.connect_seq()) {
+    // reconnect race: both peers are sending reconnect messages
+    if (existing->peer_addrs->msgr2_addr() >
+            messenger->get_myaddrs().msgr2_addr() &&
+        !existing->policy.server) {
+      // the existing connection wins
+      ldout(cct, 1)
+          << __func__
+          << " reconnect race detected, this connection loses to existing="
+          << existing << dendl;
+
+      auto wait = WaitFrame::Encode();
+      return WRITE(wait, "wait", read_frame);
+    } else {
+      // this connection wins
+      ldout(cct, 1) << __func__
+                    << " reconnect race detected, replacing existing="
+                    << existing << " socket by this connection's socket"
+                    << dendl;
+    }
+  }
+
+  ldout(cct, 1) << __func__ << " reconnect to existing=" << existing << dendl;
+
+  reconnecting = true;
+
+  // everything looks good
+  exproto->connect_seq = reconnect.connect_seq();
+  exproto->message_seq = reconnect.msg_seq();
+
+  return reuse_connection(existing, exproto);
+}
+
+CtPtr ProtocolV2::handle_existing_connection(AsyncConnectionRef existing) {
+  ldout(cct, 20) << __func__ << " existing=" << existing << dendl;
+
+  std::lock_guard<std::mutex> l(existing->lock);
+
+  ProtocolV2 *exproto = dynamic_cast<ProtocolV2 *>(existing->protocol.get());
+  if (!exproto) {
+    ldout(cct, 1) << __func__ << " existing=" << existing << dendl;
+    ceph_assert(false);
+  }
+
+  if (exproto->state == CLOSED) {
+    ldout(cct, 1) << __func__ << " existing " << existing << " already closed."
+                  << dendl;
+    return send_server_ident();
+  }
+
+  if (exproto->replacing) {
+    ldout(cct, 1) << __func__
+                  << " existing racing replace happened while replacing."
+                  << " existing=" << existing << dendl;
+    auto wait = WaitFrame::Encode();
+    return WRITE(wait, "wait", read_frame);
+  }
+
+  if (exproto->peer_global_seq > peer_global_seq) {
+    ldout(cct, 1) << __func__ << " this is a stale connection, peer_global_seq="
+                  << peer_global_seq
+                  << " existing->peer_global_seq=" << exproto->peer_global_seq
+                  << ", stopping this connection." << dendl;
+    stop();
+    connection->dispatch_queue->queue_reset(connection);
+    return nullptr;
+  }
+
+  if (existing->policy.lossy) {
+    // existing connection can be thrown out in favor of this one
+    ldout(cct, 1)
+        << __func__ << " existing=" << existing
+        << " is a lossy channel. Stopping existing in favor of this connection"
+        << dendl;
+    existing->protocol->stop();
+    existing->dispatch_queue->queue_reset(existing.get());
+    return send_server_ident();
+  }
+
+  if (exproto->server_cookie && exproto->client_cookie &&
+      exproto->client_cookie != client_cookie) {
+    // Found previous session
+    // peer has reseted and we're going to reuse the existing connection
+    // by replacing the communication socket
+    ldout(cct, 1) << __func__ << " found previous session existing=" << existing
+                  << ", peer must have reseted." << dendl;
+    if (connection->policy.resetcheck) {
+      exproto->reset_session();
+    }
+    return reuse_connection(existing, exproto);
+  }
+
+  if (exproto->client_cookie == client_cookie) {
+    // session establishment interrupted between client_ident and server_ident,
+    // continuing...
+    ldout(cct, 1) << __func__ << " found previous session existing=" << existing
+                  << ", continuing session establishment." << dendl;
+    return reuse_connection(existing, exproto);
+  }
+
+  if (exproto->state == READY || exproto->state == STANDBY) {
+    ldout(cct, 1) << __func__ << " existing=" << existing
+                  << " is READY/STANDBY, lets reuse it" << dendl;
+    return reuse_connection(existing, exproto);
+  }
+
+  // Looks like a connection race: server and client are both connecting to
+  // each other at the same time.
+  if (connection->peer_addrs->msgr2_addr() <
+          messenger->get_myaddrs().msgr2_addr() ||
+      existing->policy.server) {
+    // this connection wins
+    ldout(cct, 1) << __func__
+                  << " connection race detected, replacing existing="
+                  << existing << " socket by this connection's socket" << dendl;
+    return reuse_connection(existing, exproto);
+  } else {
+    // the existing connection wins
+    ldout(cct, 1)
+        << __func__
+        << " connection race detected, this connection loses to existing="
+        << existing << dendl;
+    ceph_assert(connection->peer_addrs->msgr2_addr() >
+                messenger->get_myaddrs().msgr2_addr());
+
+    // make sure we follow through with opening the existing
+    // connection (if it isn't yet open) since we know the peer
+    // has something to send to us.
+    existing->send_keepalive();
+    auto wait = WaitFrame::Encode();
+    return WRITE(wait, "wait", read_frame);
+  }
+}
+
+CtPtr ProtocolV2::reuse_connection(AsyncConnectionRef existing,
+                                   ProtocolV2 *exproto) {
+  ldout(cct, 20) << __func__ << " existing=" << existing
+                 << " reconnect=" << reconnecting << dendl;
+
+  connection->inject_delay();
+
+  std::lock_guard<std::mutex> l(existing->write_lock);
+
+  connection->center->delete_file_event(connection->cs.fd(),
+                                        EVENT_READABLE | EVENT_WRITABLE);
+
+  if (existing->delay_state) {
+    existing->delay_state->flush();
+    ceph_assert(!connection->delay_state);
+  }
+  exproto->reset_recv_state();
+  exproto->pre_auth.enabled = false;
+
+  if (!reconnecting) {
+    exproto->peer_supported_features = peer_supported_features;
+    exproto->tx_frame_asm.set_is_rev1(tx_frame_asm.get_is_rev1());
+    exproto->rx_frame_asm.set_is_rev1(rx_frame_asm.get_is_rev1());
+
+    exproto->client_cookie = client_cookie;
+    exproto->peer_name = peer_name;
+    exproto->connection_features = connection_features;
+    existing->set_features(connection_features);
+  }
+  exproto->peer_global_seq = peer_global_seq;
+
+  ceph_assert(connection->center->in_thread());
+  auto temp_cs = std::move(connection->cs);
+  EventCenter *new_center = connection->center;
+  Worker *new_worker = connection->worker;
+  // we can steal the session_stream_handlers under the assumption
+  // this happens in the event center's thread as there should be
+  // no user outside its boundaries (simlarly to e.g. outgoing_bl).
+  auto temp_stream_handlers = std::move(session_stream_handlers);
+  exproto->auth_meta = auth_meta;
+
+  ldout(messenger->cct, 5) << __func__ << " stop myself to swap existing"
+                           << dendl;
+
+  // avoid _stop shutdown replacing socket
+  // queue a reset on the new connection, which we're dumping for the old
+  stop();
+
+  connection->dispatch_queue->queue_reset(connection);
+
+  exproto->can_write = false;
+  exproto->write_in_progress = false;
+  exproto->reconnecting = reconnecting;
+  exproto->replacing = true;
+  existing->state_offset = 0;
+  // avoid previous thread modify event
+  exproto->state = NONE;
+  existing->state = AsyncConnection::STATE_NONE;
+  // Discard existing prefetch buffer in `recv_buf`
+  existing->recv_start = existing->recv_end = 0;
+  // there shouldn't exist any buffer
+  ceph_assert(connection->recv_start == connection->recv_end);
+
+  auto deactivate_existing = std::bind(
+      [ existing,
+        new_worker,
+        new_center,
+        exproto,
+        temp_stream_handlers=std::move(temp_stream_handlers)
+      ](ConnectedSocket &cs) mutable {
+        // we need to delete time event in original thread
+        {
+          std::lock_guard<std::mutex> l(existing->lock);
+          existing->write_lock.lock();
+          exproto->requeue_sent();
+          // XXX: do we really need the locking for `outgoing_bl`? There is
+          // a comment just above its definition saying "lockfree, only used
+          // in own thread". I'm following lockfull schema just in the case.
+          // From performance point of view it should be fine – this happens
+          // far away from hot paths.
+          existing->outgoing_bl.clear();
+          existing->open_write = false;
+          exproto->session_stream_handlers = std::move(temp_stream_handlers);
+          existing->write_lock.unlock();
+          if (exproto->state == NONE) {
+            existing->shutdown_socket();
+            existing->cs = std::move(cs);
+            existing->worker->references--;
+            new_worker->references++;
+            existing->logger = new_worker->get_perf_counter();
+            existing->worker = new_worker;
+            existing->center = new_center;
+            if (existing->delay_state)
+              existing->delay_state->set_center(new_center);
+          } else if (exproto->state == CLOSED) {
+            auto back_to_close = std::bind(
+                [](ConnectedSocket &cs) mutable { cs.close(); }, std::move(cs));
+            new_center->submit_to(new_center->get_id(),
+                                  std::move(back_to_close), true);
+            return;
+          } else {
+            ceph_abort();
+          }
+        }
+
+        // Before changing existing->center, it may already exists some
+        // events in existing->center's queue. Then if we mark down
+        // `existing`, it will execute in another thread and clean up
+        // connection. Previous event will result in segment fault
+        auto transfer_existing = [existing, exproto]() mutable {
+          std::lock_guard<std::mutex> l(existing->lock);
+          if (exproto->state == CLOSED) return;
+          ceph_assert(exproto->state == NONE);
+
+          exproto->state = SESSION_ACCEPTING;
+          // we have called shutdown_socket above
+          ceph_assert(existing->last_tick_id == 0);
+          // restart timer since we are going to re-build connection
+          existing->last_connect_started = ceph::coarse_mono_clock::now();
+          existing->last_tick_id = existing->center->create_time_event(
+            existing->connect_timeout_us, existing->tick_handler);
+          existing->state = AsyncConnection::STATE_CONNECTION_ESTABLISHED;
+          existing->center->create_file_event(existing->cs.fd(), EVENT_READABLE,
+                                              existing->read_handler);
+          if (!exproto->reconnecting) {
+            exproto->run_continuation(exproto->send_server_ident());
+          } else {
+            exproto->run_continuation(exproto->send_reconnect_ok());
+          }
+        };
+        if (existing->center->in_thread())
+          transfer_existing();
+        else
+          existing->center->submit_to(existing->center->get_id(),
+                                      std::move(transfer_existing), true);
+      },
+      std::move(temp_cs));
+
+  existing->center->submit_to(existing->center->get_id(),
+                              std::move(deactivate_existing), true);
+  return nullptr;
+}
+
+CtPtr ProtocolV2::send_server_ident() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  // this is required for the case when this connection is being replaced
+  out_seq = discard_requeued_up_to(out_seq, 0);
+  in_seq = 0;
+
+  if (!connection->policy.lossy) {
+    server_cookie = ceph::util::generate_random_number<uint64_t>(1, -1ll);
+  }
+
+  uint64_t flags = 0;
+  if (connection->policy.lossy) {
+    flags = flags | CEPH_MSG_CONNECT_LOSSY;
+  }
+
+  uint64_t gs = messenger->get_global_seq();
+  auto server_ident = ServerIdentFrame::Encode(
+          messenger->get_myaddrs(),
+          messenger->get_myname().num(),
+          gs,
+          connection->policy.features_supported,
+          connection->policy.features_required | msgr2_required,
+          flags,
+          server_cookie);
+
+  ldout(cct, 5) << __func__ << " sending identification:"
+                << " addrs=" << messenger->get_myaddrs()
+                << " gid=" << messenger->get_myname().num()
+                << " global_seq=" << gs << " features_supported=" << std::hex
+                << connection->policy.features_supported
+                << " features_required="
+		            << (connection->policy.features_required | msgr2_required)
+                << " flags=" << flags
+                << " cookie=" << server_cookie << std::dec << dendl;
+
+  connection->lock.unlock();
+  // Because "replacing" will prevent other connections preempt this addr,
+  // it's safe that here we don't acquire Connection's lock
+  ssize_t r = messenger->accept_conn(connection);
+
+  connection->inject_delay();
+
+  connection->lock.lock();
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " existing race replacing process for addr = "
+                  << connection->peer_addrs->msgr2_addr()
+                  << " just fail later one(this)" << dendl;
+    connection->inject_delay();
+    return _fault();
+  }
+  if (state != SESSION_ACCEPTING) {
+    ldout(cct, 1) << __func__
+                  << " state changed while accept_conn, it must be mark_down"
+                  << dendl;
+    ceph_assert(state == CLOSED || state == NONE);
+    messenger->unregister_conn(connection);
+    connection->inject_delay();
+    return _fault();
+  }
+
+  connection->set_features(connection_features);
+
+  // notify
+  connection->dispatch_queue->queue_accept(connection);
+  messenger->ms_deliver_handle_fast_accept(connection);
+
+  INTERCEPT(12);
+
+  return WRITE(server_ident, "server ident", server_ready);
+}
+
+CtPtr ProtocolV2::server_ready() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  if (connection->delay_state) {
+    ceph_assert(connection->delay_state->ready());
+  }
+
+  return ready();
+}
+
+CtPtr ProtocolV2::send_reconnect_ok() {
+  ldout(cct, 20) << __func__ << dendl;
+
+  out_seq = discard_requeued_up_to(out_seq, message_seq);
+
+  uint64_t ms = in_seq;
+  auto reconnect_ok = ReconnectOkFrame::Encode(ms);
+
+  ldout(cct, 5) << __func__ << " sending reconnect_ok: msg_seq=" << ms << dendl;
+
+  connection->lock.unlock();
+  // Because "replacing" will prevent other connections preempt this addr,
+  // it's safe that here we don't acquire Connection's lock
+  ssize_t r = messenger->accept_conn(connection);
+
+  connection->inject_delay();
+
+  connection->lock.lock();
+
+  if (r < 0) {
+    ldout(cct, 1) << __func__ << " existing race replacing process for addr = "
+                  << connection->peer_addrs->msgr2_addr()
+                  << " just fail later one(this)" << dendl;
+    connection->inject_delay();
+    return _fault();
+  }
+  if (state != SESSION_ACCEPTING) {
+    ldout(cct, 1) << __func__
+                  << " state changed while accept_conn, it must be mark_down"
+                  << dendl;
+    ceph_assert(state == CLOSED || state == NONE);
+    messenger->unregister_conn(connection);
+    connection->inject_delay();
+    return _fault();
+  }
+
+  // notify
+  connection->dispatch_queue->queue_accept(connection);
+  messenger->ms_deliver_handle_fast_accept(connection);
+
+  INTERCEPT(14);
+
+  return WRITE(reconnect_ok, "reconnect ok", server_ready);
+}