summaryrefslogtreecommitdiffstats
path: root/src/messages/MOSDRepOp.h
blob: 5ecec0007e3f1e39116ed6a1c32350c388d9d548 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
 * Ceph - scalable distributed file system
 *
 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
 *
 * This is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 2.1, as published by the Free Software
 * Foundation.  See file COPYING.
 *
 */


#ifndef CEPH_MOSDREPOP_H
#define CEPH_MOSDREPOP_H

#include "MOSDFastDispatchOp.h"

/*
 * OSD sub op - for internal ops on pobjects between primary and replicas(/stripes/whatever)
 */

class MOSDRepOp final : public MOSDFastDispatchOp {
private:
  static constexpr int HEAD_VERSION = 3;
  static constexpr int COMPAT_VERSION = 1;

public:
  epoch_t map_epoch, min_epoch;

  // metadata from original request
  osd_reqid_t reqid;

  spg_t pgid;

  ceph::buffer::list::const_iterator p;
  // Decoding flags. Decoding is only needed for messages caught by pipe reader.
  bool final_decode_needed;

  // subop
  pg_shard_t from;
  hobject_t poid;

  __u8 acks_wanted;

  // transaction to exec
  ceph::buffer::list logbl;
  pg_stat_t pg_stats;

  // subop metadata
  eversion_t version;

  // piggybacked osd/og state
  eversion_t pg_trim_to;   // primary->replica: trim to here
  eversion_t min_last_complete_ondisk; // lower bound on committed version

  hobject_t new_temp_oid;      ///< new temp object that we must now start tracking
  hobject_t discard_temp_oid;  ///< previously used temp object that we can now stop tracking

  /// non-empty if this transaction involves a hit_set history update
  std::optional<pg_hit_set_history_t> updated_hit_set_history;

  epoch_t get_map_epoch() const override {
    return map_epoch;
  }
  epoch_t get_min_epoch() const override {
    return min_epoch;
  }
  spg_t get_spg() const override {
    return pgid;
  }

  int get_cost() const override {
    return data.length();
  }

  void decode_payload() override {
    using ceph::decode;
    p = payload.cbegin();
    // split to partial and final
    decode(map_epoch, p);
    if (header.version >= 2) {
      decode(min_epoch, p);
      decode_trace(p);
    } else {
      min_epoch = map_epoch;
    }
    decode(reqid, p);
    decode(pgid, p);
  }

  void finish_decode() {
    using ceph::decode;
    if (!final_decode_needed)
      return; // Message is already final decoded
    decode(poid, p);

    decode(acks_wanted, p);
    decode(version, p);
    decode(logbl, p);
    decode(pg_stats, p);
    decode(pg_trim_to, p);


    decode(new_temp_oid, p);
    decode(discard_temp_oid, p);

    decode(from, p);
    decode(updated_hit_set_history, p);

    if (header.version >= 3) {
      decode(min_last_complete_ondisk, p);
    } else {
      /* This field used to mean pg_roll_foward_to, but ReplicatedBackend
       * simply assumes that we're rolling foward to version. */
      eversion_t pg_roll_forward_to;
      decode(pg_roll_forward_to, p);
    }
    final_decode_needed = false;
  }

  void encode_payload(uint64_t features) override {
    using ceph::encode;
    encode(map_epoch, payload);
    if (HAVE_FEATURE(features, SERVER_LUMINOUS)) {
      header.version = HEAD_VERSION;
      encode(min_epoch, payload);
      encode_trace(payload, features);
    } else {
      header.version = 1;
    }
    encode(reqid, payload);
    encode(pgid, payload);
    encode(poid, payload);

    encode(acks_wanted, payload);
    encode(version, payload);
    encode(logbl, payload);
    encode(pg_stats, payload);
    encode(pg_trim_to, payload);
    encode(new_temp_oid, payload);
    encode(discard_temp_oid, payload);
    encode(from, payload);
    encode(updated_hit_set_history, payload);
    encode(min_last_complete_ondisk, payload);
  }

  MOSDRepOp()
    : MOSDFastDispatchOp{MSG_OSD_REPOP, HEAD_VERSION, COMPAT_VERSION},
      map_epoch(0),
      final_decode_needed(true), acks_wanted (0) {}
  MOSDRepOp(osd_reqid_t r, pg_shard_t from,
	    spg_t p, const hobject_t& po, int aw,
	    epoch_t mape, epoch_t min_epoch, ceph_tid_t rtid, eversion_t v)
    : MOSDFastDispatchOp{MSG_OSD_REPOP, HEAD_VERSION, COMPAT_VERSION},
      map_epoch(mape),
      min_epoch(min_epoch),
      reqid(r),
      pgid(p),
      final_decode_needed(false),
      from(from),
      poid(po),
      acks_wanted(aw),
      version(v) {
    set_tid(rtid);
  }

  void set_rollback_to(const eversion_t &rollback_to) {
    header.version = 2;
    min_last_complete_ondisk = rollback_to;
  }
private:
  ~MOSDRepOp() final {}

public:
  std::string_view get_type_name() const override { return "osd_repop"; }
  void print(std::ostream& out) const override {
    out << "osd_repop(" << reqid
	<< " " << pgid << " e" << map_epoch << "/" << min_epoch;
    if (!final_decode_needed) {
      out << " " << poid << " v " << version;
      if (updated_hit_set_history)
        out << ", has_updated_hit_set_history";
      if (header.version < 3) {
	out << ", rollback_to(legacy)=" << min_last_complete_ondisk;
      } else {
	out << ", mlcod=" << min_last_complete_ondisk;
      }
    }
    out << ")";
  }
private:
  template<class T, typename... Args>
  friend boost::intrusive_ptr<T> ceph::make_message(Args&&... args);
};

#endif