From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/msg/async/frames_v2.h | 900 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 900 insertions(+) create mode 100644 src/msg/async/frames_v2.h (limited to 'src/msg/async/frames_v2.h') diff --git a/src/msg/async/frames_v2.h b/src/msg/async/frames_v2.h new file mode 100644 index 000000000..9431d6e2d --- /dev/null +++ b/src/msg/async/frames_v2.h @@ -0,0 +1,900 @@ +#ifndef _MSG_ASYNC_FRAMES_V2_ +#define _MSG_ASYNC_FRAMES_V2_ + +#include "include/types.h" +#include "common/Clock.h" +#include "crypto_onwire.h" +#include "compression_onwire.h" +#include +#include +#include + +#include + +/** + * Protocol V2 Frame Structures + * + * Documentation in: doc/dev/msgr2.rst + **/ + +namespace ceph::msgr::v2 { + +// We require these features from any peer, period, in order to encode +// a entity_addrvec_t. +const uint64_t msgr2_required = CEPH_FEATUREMASK_MSG_ADDR2; + +// We additionally assume the peer has the below features *purely for +// the purpose of encoding the frames themselves*. The only complex +// types in the frames are entity_addr_t and entity_addrvec_t, and we +// specifically want the peer to understand the (new in nautilus) +// TYPE_ANY. We treat narrow this assumption to frames because we +// expect there may be future clients (the kernel) that understand +// msgr v2 and understand this encoding but don't necessarily have +// everything else that SERVER_NAUTILUS implies. Yes, a fresh feature +// bit would be a cleaner approach, but those are scarce these days. +const uint64_t msgr2_frame_assumed = + msgr2_required | + CEPH_FEATUREMASK_SERVER_NAUTILUS; + +enum class Tag : __u8 { + HELLO = 1, + AUTH_REQUEST, + AUTH_BAD_METHOD, + AUTH_REPLY_MORE, + AUTH_REQUEST_MORE, + AUTH_DONE, + AUTH_SIGNATURE, + CLIENT_IDENT, + SERVER_IDENT, + IDENT_MISSING_FEATURES, + SESSION_RECONNECT, + SESSION_RESET, + SESSION_RETRY, + SESSION_RETRY_GLOBAL, + SESSION_RECONNECT_OK, + WAIT, + MESSAGE, + KEEPALIVE2, + KEEPALIVE2_ACK, + ACK, + COMPRESSION_REQUEST, + COMPRESSION_DONE +}; + +struct segment_t { + // TODO: this will be dropped with support for `allocation policies`. + // We need them because of the rx_buffers zero-copy optimization. + static constexpr __u16 PAGE_SIZE_ALIGNMENT = 4096; + + static constexpr __u16 DEFAULT_ALIGNMENT = sizeof(void *); + + ceph_le32 length; + ceph_le16 alignment; +} __attribute__((packed)); + +struct SegmentIndex { + struct Msg { + static constexpr std::size_t HEADER = 0; + static constexpr std::size_t FRONT = 1; + static constexpr std::size_t MIDDLE = 2; + static constexpr std::size_t DATA = 3; + }; + + struct Control { + static constexpr std::size_t PAYLOAD = 0; + }; +}; + +static constexpr uint8_t CRYPTO_BLOCK_SIZE { 16 }; + +static constexpr std::size_t MAX_NUM_SEGMENTS = 4; + +// V2 preamble consists of one or more preamble blocks depending on +// the number of segments a particular frame needs. Each block holds +// up to MAX_NUM_SEGMENTS segments and has its own CRC. +// +// XXX: currently the multi-segment facility is NOT implemented. +struct preamble_block_t { + // Tag. For multi-segmented frames the value is the same + // between subsequent preamble blocks. + __u8 tag; + + // Number of segments to go in entire frame. First preable block has + // set this to just #segments, second #segments - MAX_NUM_SEGMENTS, + // third to #segments - MAX_NUM_SEGMENTS and so on. + __u8 num_segments; + + segment_t segments[MAX_NUM_SEGMENTS]; + + __u8 flags; + __u8 _reserved; + + // CRC32 for this single preamble block. + ceph_le32 crc; +} __attribute__((packed)); +static_assert(sizeof(preamble_block_t) % CRYPTO_BLOCK_SIZE == 0); +static_assert(std::is_standard_layout::value); + +struct epilogue_crc_rev0_block_t { + __u8 late_flags; // FRAME_LATE_FLAG_ABORTED + ceph_le32 crc_values[MAX_NUM_SEGMENTS]; +} __attribute__((packed)); +static_assert(std::is_standard_layout_v); + +struct epilogue_crc_rev1_block_t { + __u8 late_status; // FRAME_LATE_STATUS_* + ceph_le32 crc_values[MAX_NUM_SEGMENTS - 1]; +} __attribute__((packed)); +static_assert(std::is_standard_layout_v); + +struct epilogue_secure_rev0_block_t { + __u8 late_flags; // FRAME_LATE_FLAG_ABORTED + __u8 padding[CRYPTO_BLOCK_SIZE - sizeof(late_flags)]; +} __attribute__((packed)); +static_assert(sizeof(epilogue_secure_rev0_block_t) % CRYPTO_BLOCK_SIZE == 0); +static_assert(std::is_standard_layout_v); + +// epilogue_secure_rev0_block_t with late_flags changed to late_status +struct epilogue_secure_rev1_block_t { + __u8 late_status; // FRAME_LATE_STATUS_* + __u8 padding[CRYPTO_BLOCK_SIZE - sizeof(late_status)]; +} __attribute__((packed)); +static_assert(sizeof(epilogue_secure_rev1_block_t) % CRYPTO_BLOCK_SIZE == 0); +static_assert(std::is_standard_layout_v); + +static constexpr uint32_t FRAME_CRC_SIZE = 4; +static constexpr uint32_t FRAME_PREAMBLE_INLINE_SIZE = 48; +static_assert(FRAME_PREAMBLE_INLINE_SIZE % CRYPTO_BLOCK_SIZE == 0); +// just for performance, nothing should break otherwise +static_assert(sizeof(ceph_msg_header2) <= FRAME_PREAMBLE_INLINE_SIZE); +static constexpr uint32_t FRAME_PREAMBLE_WITH_INLINE_SIZE = + sizeof(preamble_block_t) + FRAME_PREAMBLE_INLINE_SIZE; + +// A frame can be aborted by the sender after transmitting the +// preamble and the first segment. The remainder of the frame +// is filled with zeros, up until the epilogue. +// +// This flag is for msgr2.0. Note that in crc mode, late_flags +// is not covered by any crc -- a single bit flip can result in +// a completed frame being dropped or in an aborted frame with +// garbage segment payloads being dispatched. +#define FRAME_LATE_FLAG_ABORTED (1<<0) + +// For msgr2.1, FRAME_LATE_STATUS_ABORTED has the same meaning +// as FRAME_LATE_FLAG_ABORTED and late_status replaces late_flags. +// Bit error detection in crc mode is achieved by using a 4-bit +// nibble per flag with two code words that are far apart in terms +// of Hamming Distance (HD=4, same as provided by CRC32-C for +// input lengths over ~5K). +#define FRAME_LATE_STATUS_ABORTED 0x1 +#define FRAME_LATE_STATUS_COMPLETE 0xe +#define FRAME_LATE_STATUS_ABORTED_MASK 0xf + +#define FRAME_LATE_STATUS_RESERVED_TRUE 0x10 +#define FRAME_LATE_STATUS_RESERVED_FALSE 0xe0 +#define FRAME_LATE_STATUS_RESERVED_MASK 0xf0 + +// For msgr 2.1, FRAME_EARLY_X flags are sent as part of epilogue. +// +// This flag indicates whether frame segments have been compressed by +// sender, and used in segments' disassemblig phase. +#define FRAME_EARLY_DATA_COMPRESSED 0X1 + +struct FrameError : std::runtime_error { + using runtime_error::runtime_error; +}; + +class FrameAssembler { +public: + // crypto must be non-null + FrameAssembler(const ceph::crypto::onwire::rxtx_t* crypto, bool is_rev1, + bool with_data_crc, const ceph::compression::onwire::rxtx_t* compression) + : m_crypto(crypto), m_is_rev1(is_rev1), m_with_data_crc(with_data_crc), + m_compression(compression) {} + + void set_is_rev1(bool is_rev1) { + m_descs.clear(); + m_flags = 0; + m_is_rev1 = is_rev1; + } + + bool get_is_rev1() { + return m_is_rev1; + } + + size_t get_num_segments() const { + ceph_assert(!m_descs.empty()); + return m_descs.size(); + } + + uint32_t get_segment_logical_len(size_t seg_idx) const { + ceph_assert(seg_idx < m_descs.size()); + return m_descs[seg_idx].logical_len; + } + + uint16_t get_segment_align(size_t seg_idx) const { + ceph_assert(seg_idx < m_descs.size()); + return m_descs[seg_idx].align; + } + + // Preamble: + // + // preamble_block_t + // [preamble inline buffer + auth tag -- only in msgr2.1 secure mode] + // + // The preamble is generated unconditionally. + // + // In msgr2.1 secure mode, the first segment is inlined into the + // preamble inline buffer, either fully or partially. + uint32_t get_preamble_onwire_len() const { + if (m_is_rev1 && m_crypto->rx) { + return FRAME_PREAMBLE_WITH_INLINE_SIZE + get_auth_tag_len(); + } + return sizeof(preamble_block_t); + } + + // Segment: + // + // segment payload + // [zero padding -- only in secure mode] + // [crc or auth tag -- only in msgr2.1, only for the first segment] + // + // For an empty segment, nothing is generated. In msgr2.1 secure + // mode, if the first segment gets fully inlined into the preamble + // inline buffer, it is considered empty. + uint32_t get_segment_onwire_len(size_t seg_idx) const { + ceph_assert(seg_idx < m_descs.size()); + if (m_crypto->rx) { + uint32_t padded_len = get_segment_padded_len(seg_idx); + if (m_is_rev1 && seg_idx == 0) { + if (padded_len > FRAME_PREAMBLE_INLINE_SIZE) { + return padded_len + get_auth_tag_len() - FRAME_PREAMBLE_INLINE_SIZE; + } + return 0; + } + return padded_len; + } + if (m_is_rev1 && seg_idx == 0 && m_descs[0].logical_len > 0) { + return m_descs[0].logical_len + FRAME_CRC_SIZE; + } + return m_descs[seg_idx].logical_len; + } + + // Epilogue: + // + // epilogue_*_block_t + // [auth tag -- only in secure mode] + // + // For msgr2.0, the epilogue is generated unconditionally. In + // crc mode, it stores crcs for all segments; the preamble is + // covered by its own crc. In secure mode, the epilogue auth tag + // covers the whole frame. + // + // For msgr2.1, the epilogue is generated only if the frame has + // more than one segment (i.e. at least one of second to fourth + // segments is not empty). In crc mode, it stores crcs for + // second to fourh segments; the preamble and the first segment + // are covered by their own crcs. In secure mode, the epilogue + // auth tag covers second to fourth segments; the preamble and the + // first segment (if not fully inlined into the preamble inline + // buffer) are covered by their own auth tags. + // + // Note that the auth tag format is an implementation detail of a + // particular cipher. FrameAssembler is concerned only with where + // the auth tag is placed (at the end of the ciphertext) and how + // long it is (RxHandler::get_extra_size_at_final()). This is to + // provide room for other encryption algorithms: currently we use + // AES-128-GCM with 16-byte tags, but it is possible to switch to + // e.g. AES-128-CBC + HMAC-SHA512 without affecting the protocol + // (except for the cipher negotiation, of course). + // + // Additionally, each variant of the epilogue contains either + // late_flags or late_status field that directs handling of frames + // with more than one segment. + uint32_t get_epilogue_onwire_len() const { + ceph_assert(!m_descs.empty()); + if (m_is_rev1 && m_descs.size() == 1) { + return 0; + } + if (m_crypto->rx) { + return (m_is_rev1 ? sizeof(epilogue_secure_rev1_block_t) : + sizeof(epilogue_secure_rev0_block_t)) + get_auth_tag_len(); + } + return m_is_rev1 ? sizeof(epilogue_crc_rev1_block_t) : + sizeof(epilogue_crc_rev0_block_t); + } + + uint64_t get_frame_logical_len() const; + uint64_t get_frame_onwire_len() const; + + bufferlist assemble_frame(Tag tag, bufferlist segment_bls[], + const uint16_t segment_aligns[], + size_t segment_count); + + Tag disassemble_preamble(bufferlist& preamble_bl); + + bool disassemble_segments(bufferlist& preamble_bl, + bufferlist segments_bls[], + bufferlist& epilogue_bl) const; + +private: + struct segment_desc_t { + uint32_t logical_len; + uint16_t align; + }; + + uint32_t get_segment_padded_len(size_t seg_idx) const { + return p2roundup(m_descs[seg_idx].logical_len, + CRYPTO_BLOCK_SIZE); + } + + uint32_t get_auth_tag_len() const { + return m_crypto->rx->get_extra_size_at_final(); + } + + bool is_compressed() const { + return m_flags & FRAME_EARLY_DATA_COMPRESSED; + } + + void asm_compress(bufferlist segment_bls[]); + + bufferlist asm_crc_rev0(const preamble_block_t& preamble, + bufferlist segment_bls[]) const; + bufferlist asm_secure_rev0(const preamble_block_t& preamble, + bufferlist segment_bls[]) const; + bufferlist asm_crc_rev1(const preamble_block_t& preamble, + bufferlist segment_bls[]) const; + bufferlist asm_secure_rev1(const preamble_block_t& preamble, + bufferlist segment_bls[]) const; + + // Like msgr1, and unlike msgr2.0, msgr2.1 allows interpreting the + // first segment before reading in the rest of the frame. + // + // For msgr2.1 (set_is_rev1(true)), you may: + // + // - read in the first segment + // - call disassemble_first_segment() + // - use the contents of the first segment, for example to + // look up user-provided buffers based on ceph_msg_header2::tid + // - read in the remaining segments, possibly directly into + // user-provided buffers + // - read in epilogue + // - call disassemble_remaining_segments() + // - call disasm_all_decompress() + // + // For msgr2.0 (set_is_rev1(false)), disassemble_first_segment() is + // a noop. To accomodate, disassemble_remaining_segments() always + // takes all segments and skips over the first segment in msgr2.1 + // case. You must: + // + // - read in all segments + // - read in epilogue + // - call disassemble_remaining_segments() + // - call disasm_all_decompress() + // + // disassemble_remaining_segments() returns true if the frame is + // ready for dispatching, or false if it was aborted by the sender + // and must be dropped. + void disassemble_first_segment(bufferlist& preamble_bl, + bufferlist& segment_bl) const; + bool disassemble_remaining_segments(bufferlist segment_bls[], + bufferlist& epilogue_bl) const; + void disassemble_decompress(bufferlist segment_bls[]) const; + + bool disasm_all_crc_rev0(bufferlist segment_bls[], + bufferlist& epilogue_bl) const; + bool disasm_all_secure_rev0(bufferlist segment_bls[], + bufferlist& epilogue_bl) const; + void disasm_first_crc_rev1(bufferlist& preamble_bl, + bufferlist& segment_bl) const; + bool disasm_remaining_crc_rev1(bufferlist segment_bls[], + bufferlist& epilogue_bl) const; + void disasm_first_secure_rev1(bufferlist& preamble_bl, + bufferlist& segment_bl) const; + bool disasm_remaining_secure_rev1(bufferlist segment_bls[], + bufferlist& epilogue_bl) const; + + void fill_preamble(Tag tag, preamble_block_t& preamble) const; + friend std::ostream& operator<<(std::ostream& os, + const FrameAssembler& frame_asm); + + boost::container::static_vector m_descs; + __u8 m_flags; + const ceph::crypto::onwire::rxtx_t* m_crypto; + bool m_is_rev1; // msgr2.1? + bool m_with_data_crc; + const ceph::compression::onwire::rxtx_t* m_compression; +}; + +template +struct Frame { + static constexpr size_t SegmentsNumV = sizeof...(SegmentAlignmentVs); + static_assert(SegmentsNumV > 0 && SegmentsNumV <= MAX_NUM_SEGMENTS); +protected: + std::array segments; + +private: + static constexpr std::array alignments { + SegmentAlignmentVs... + }; + +public: + ceph::bufferlist get_buffer(FrameAssembler& tx_frame_asm) { + auto bl = tx_frame_asm.assemble_frame(T::tag, segments.data(), + alignments.data(), SegmentsNumV); + ceph_assert(bl.length() == tx_frame_asm.get_frame_onwire_len()); + return bl; + } +}; + +// ControlFrames are used to manage transceiver state (like connections) and +// orchestrate transfers of MessageFrames. They use only single segment with +// marshalling facilities -- derived classes specify frame structure through +// Args pack while ControlFrame provides common encode/decode machinery. +template +class ControlFrame : public Frame { +protected: + ceph::bufferlist &get_payload_segment() { + return this->segments[SegmentIndex::Control::PAYLOAD]; + } + + // this tuple is only used when decoding values from a payload segment + std::tuple _values; + + // FIXME: for now, we assume specific features for the purpoess of encoding + // the frames themselves (*not* messages in message frames!). + uint64_t features = msgr2_frame_assumed; + + template + inline void _encode_payload_each(T &t) { + if constexpr (std::is_same const>()) { + encode((uint32_t)t.size(), this->get_payload_segment(), features); + for (const auto &elem : t) { + encode(elem, this->get_payload_segment(), features); + } + } else { + encode(t, this->get_payload_segment(), features); + } + } + + template + inline void _decode_payload_each(T &t, bufferlist::const_iterator &ti) const { + if constexpr (std::is_same>()) { + uint32_t size; + decode(size, ti); + t.resize(size); + for (uint32_t i = 0; i < size; ++i) { + decode(t[i], ti); + } + } else { + decode(t, ti); + } + } + + template + inline void _decode_payload(bufferlist::const_iterator &ti, + std::index_sequence) const { + (_decode_payload_each((Args &)std::get(_values), ti), ...); + } + + template + inline decltype(auto) get_val() { + return std::get(_values); + } + + ControlFrame() + : Frame() { + } + + void _encode(const Args &... args) { + (_encode_payload_each(args), ...); + } + + void _decode(const ceph::bufferlist &bl) { + auto ti = bl.cbegin(); + _decode_payload(ti, std::index_sequence_for()); + } + +public: + static C Encode(const Args &... args) { + C c; + c._encode(args...); + return c; + } + + static C Decode(const ceph::bufferlist &payload) { + C c; + c._decode(payload); + return c; + } +}; + +struct HelloFrame : public ControlFrame { // peer address + static const Tag tag = Tag::HELLO; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint8_t &entity_type() { return get_val<0>(); } + inline entity_addr_t &peer_addr() { return get_val<1>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AuthRequestFrame : public ControlFrame, // preferred modes + bufferlist> { // auth payload + static const Tag tag = Tag::AUTH_REQUEST; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint32_t &method() { return get_val<0>(); } + inline std::vector &preferred_modes() { return get_val<1>(); } + inline bufferlist &auth_payload() { return get_val<2>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AuthBadMethodFrame : public ControlFrame, // allowed methods + std::vector> { // allowed modes + static const Tag tag = Tag::AUTH_BAD_METHOD; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint32_t &method() { return get_val<0>(); } + inline int32_t &result() { return get_val<1>(); } + inline std::vector &allowed_methods() { return get_val<2>(); } + inline std::vector &allowed_modes() { return get_val<3>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AuthReplyMoreFrame : public ControlFrame { // auth payload + static const Tag tag = Tag::AUTH_REPLY_MORE; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline bufferlist &auth_payload() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AuthRequestMoreFrame : public ControlFrame { // auth payload + static const Tag tag = Tag::AUTH_REQUEST_MORE; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline bufferlist &auth_payload() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AuthDoneFrame : public ControlFrame { // auth method payload + static const Tag tag = Tag::AUTH_DONE; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint64_t &global_id() { return get_val<0>(); } + inline uint32_t &con_mode() { return get_val<1>(); } + inline bufferlist &auth_payload() { return get_val<2>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AuthSignatureFrame + : public ControlFrame { + static const Tag tag = Tag::AUTH_SIGNATURE; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline sha256_digest_t &signature() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct ClientIdentFrame + : public ControlFrame { // client cookie + static const Tag tag = Tag::CLIENT_IDENT; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline entity_addrvec_t &addrs() { return get_val<0>(); } + inline entity_addr_t &target_addr() { return get_val<1>(); } + inline int64_t &gid() { return get_val<2>(); } + inline uint64_t &global_seq() { return get_val<3>(); } + inline uint64_t &supported_features() { return get_val<4>(); } + inline uint64_t &required_features() { return get_val<5>(); } + inline uint64_t &flags() { return get_val<6>(); } + inline uint64_t &cookie() { return get_val<7>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct ServerIdentFrame + : public ControlFrame { // server cookie + static const Tag tag = Tag::SERVER_IDENT; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline entity_addrvec_t &addrs() { return get_val<0>(); } + inline int64_t &gid() { return get_val<1>(); } + inline uint64_t &global_seq() { return get_val<2>(); } + inline uint64_t &supported_features() { return get_val<3>(); } + inline uint64_t &required_features() { return get_val<4>(); } + inline uint64_t &flags() { return get_val<5>(); } + inline uint64_t &cookie() { return get_val<6>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct ReconnectFrame + : public ControlFrame { // message sequence + static const Tag tag = Tag::SESSION_RECONNECT; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline entity_addrvec_t &addrs() { return get_val<0>(); } + inline uint64_t &client_cookie() { return get_val<1>(); } + inline uint64_t &server_cookie() { return get_val<2>(); } + inline uint64_t &global_seq() { return get_val<3>(); } + inline uint64_t &connect_seq() { return get_val<4>(); } + inline uint64_t &msg_seq() { return get_val<5>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct ResetFrame : public ControlFrame { // full reset + static const Tag tag = Tag::SESSION_RESET; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline bool &full() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct RetryFrame : public ControlFrame { // connection seq + static const Tag tag = Tag::SESSION_RETRY; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint64_t &connect_seq() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct RetryGlobalFrame : public ControlFrame { // global seq + static const Tag tag = Tag::SESSION_RETRY_GLOBAL; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint64_t &global_seq() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct WaitFrame : public ControlFrame { + static const Tag tag = Tag::WAIT; + using ControlFrame::Encode; + using ControlFrame::Decode; + +protected: + using ControlFrame::ControlFrame; +}; + +struct ReconnectOkFrame : public ControlFrame { // message seq + static const Tag tag = Tag::SESSION_RECONNECT_OK; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint64_t &msg_seq() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct IdentMissingFeaturesFrame + : public ControlFrame { // missing features mask + static const Tag tag = Tag::IDENT_MISSING_FEATURES; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint64_t &features() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct KeepAliveFrame : public ControlFrame { // timestamp + static const Tag tag = Tag::KEEPALIVE2; + using ControlFrame::Encode; + using ControlFrame::Decode; + + static KeepAliveFrame Encode() { + return KeepAliveFrame::Encode(ceph_clock_now()); + } + + inline utime_t ×tamp() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct KeepAliveFrameAck : public ControlFrame { // ack timestamp + static const Tag tag = Tag::KEEPALIVE2_ACK; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline utime_t ×tamp() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct AckFrame : public ControlFrame { // message sequence + static const Tag tag = Tag::ACK; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline uint64_t &seq() { return get_val<0>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +using segment_bls_t = + boost::container::static_vector; + +// This class is used for encoding/decoding header of the message frame. +// Body is processed almost independently with the sole junction point +// being the `extra_payload_len` passed to get_buffer(). +struct MessageFrame : public Frame { + static const Tag tag = Tag::MESSAGE; + + static MessageFrame Encode(const ceph_msg_header2 &msg_header, + const ceph::bufferlist &front, + const ceph::bufferlist &middle, + const ceph::bufferlist &data) { + MessageFrame f; + f.segments[SegmentIndex::Msg::HEADER].append( + reinterpret_cast(&msg_header), sizeof(msg_header)); + + f.segments[SegmentIndex::Msg::FRONT] = front; + f.segments[SegmentIndex::Msg::MIDDLE] = middle; + f.segments[SegmentIndex::Msg::DATA] = data; + + return f; + } + + static MessageFrame Decode(segment_bls_t& recv_segments) { + MessageFrame f; + // transfer segments' bufferlists. If a MessageFrame contains less + // SegmentsNumV segments, the missing ones will be seen as zeroed. + for (__u8 idx = 0; idx < std::size(recv_segments); idx++) { + f.segments[idx] = std::move(recv_segments[idx]); + } + return f; + } + + inline const ceph_msg_header2 &header() { + auto& hdrbl = segments[SegmentIndex::Msg::HEADER]; + return reinterpret_cast(*hdrbl.c_str()); + } + + ceph::bufferlist &front() { + return segments[SegmentIndex::Msg::FRONT]; + } + + ceph::bufferlist &middle() { + return segments[SegmentIndex::Msg::MIDDLE]; + } + + ceph::bufferlist &data() { + return segments[SegmentIndex::Msg::DATA]; + } + + uint32_t front_len() const { + return segments[SegmentIndex::Msg::FRONT].length(); + } + + uint32_t middle_len() const { + return segments[SegmentIndex::Msg::MIDDLE].length(); + } + + uint32_t data_len() const { + return segments[SegmentIndex::Msg::DATA].length(); + } + +protected: + using Frame::Frame; +}; + +struct CompressionRequestFrame : public ControlFrame> { // preferred methods + static const Tag tag = Tag::COMPRESSION_REQUEST; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline bool &is_compress() { return get_val<0>(); } + inline std::vector &preferred_methods() { return get_val<1>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +struct CompressionDoneFrame : public ControlFrame { // method + static const Tag tag = Tag::COMPRESSION_DONE; + using ControlFrame::Encode; + using ControlFrame::Decode; + + inline bool &is_compress() { return get_val<0>(); } + inline uint32_t &method() { return get_val<1>(); } + +protected: + using ControlFrame::ControlFrame; +}; + +} // namespace ceph::msgr::v2 + +#endif // _MSG_ASYNC_FRAMES_V2_ -- cgit v1.2.3