1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
|
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
// vim: ts=8 sw=2 smarttab
/*
* This is a simple example RADOS class, designed to be usable as a
* template for implementing new methods.
*
* Our goal here is to illustrate the interface between the OSD and
* the class and demonstrate what kinds of things a class can do.
*
* Note that any *real* class will probably have a much more
* sophisticated protocol dealing with the in and out data buffers.
* For an example of the model that we've settled on for handling that
* in a clean way, please refer to cls_lock or cls_version for
* relatively simple examples of how the parameter encoding can be
* encoded in a way that allows for forward and backward compatibility
* between client vs class revisions.
*/
/*
* A quick note about bufferlists:
*
* The bufferlist class allows memory buffers to be concatenated,
* truncated, spliced, "copied," encoded/embedded, and decoded. For
* most operations no actual data is ever copied, making bufferlists
* very convenient for efficiently passing data around.
*
* bufferlist is actually a typedef of buffer::list, and is defined in
* include/buffer.h (and implemented in common/buffer.cc).
*/
#include <algorithm>
#include <string>
#include <sstream>
#include <cerrno>
#include "objclass/objclass.h"
#include "osd/osd_types.h"
using std::string;
using std::ostringstream;
using ceph::bufferlist;
using ceph::decode;
using ceph::encode;
CLS_VER(1,0)
CLS_NAME(hello)
/**
* say hello - a "read" method that does not depend on the object
*
* This is an example of a method that does some computation and
* returns data to the caller, without depending on the local object
* content.
*/
static int say_hello(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
// see if the input data from the client matches what this method
// expects to receive. your class can fill this buffer with what it
// wants.
if (in->length() > 100)
return -EINVAL;
// we generate our reply
out->append("Hello, ");
if (in->length() == 0)
out->append("world");
else
out->append(*in);
out->append("!");
// this return value will be returned back to the librados caller
return 0;
}
/**
* record hello - a "write" method that creates an object
*
* This method modifies a local object (in this case, by creating it
* if it doesn't exist). We make multiple write calls (write,
* setxattr) which are accumulated and applied as an atomic
* transaction.
*/
static int record_hello(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
// we can write arbitrary stuff to the ceph-osd debug log. each log
// message is accompanied by an integer log level. smaller is
// "louder". how much of this makes it into the log is controlled
// by the debug_cls option on the ceph-osd, similar to how other log
// levels are controlled. this message, at level 20, will generally
// not be seen by anyone unless debug_cls is set at 20 or higher.
CLS_LOG(20, "in record_hello");
// see if the input data from the client matches what this method
// expects to receive. your class can fill this buffer with what it
// wants.
if (in->length() > 100)
return -EINVAL;
// only say hello to non-existent objects
if (cls_cxx_stat(hctx, NULL, NULL) == 0)
return -EEXIST;
bufferlist content;
content.append("Hello, ");
if (in->length() == 0)
content.append("world");
else
content.append(*in);
content.append("!");
// create/write the object
int r = cls_cxx_write_full(hctx, &content);
if (r < 0)
return r;
// also make note of who said it
entity_inst_t origin;
cls_get_request_origin(hctx, &origin);
ostringstream ss;
ss << origin;
bufferlist attrbl;
attrbl.append(ss.str());
r = cls_cxx_setxattr(hctx, "said_by", &attrbl);
if (r < 0)
return r;
// For write operations, there are two possible outcomes:
//
// * For a failure, we return a negative error code. The out
// buffer can contain any data that we want, and that data will
// be returned to the caller. No change is made to the object.
//
// * For a success, we must return 0 and *no* data in the out
// buffer. This is becaues the OSD does not log write result
// codes or output buffers and we need a replayed/resent
// operation (e.g., after a TCP disconnect) to be idempotent.
//
// If a class returns a positive value or puts data in the out
// buffer, the OSD code will ignore it and return 0 to the
// client.
return 0;
}
static int write_return_data(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
// make some change to the object
bufferlist attrbl;
attrbl.append("bar");
int r = cls_cxx_setxattr(hctx, "foo", &attrbl);
if (r < 0)
return r;
if (in->length() > 0) {
// note that if we return anything < 0 (an error), this
// operation/transaction will abort, and the setattr above will
// never happen. however, we *can* return data on error.
out->append("too much input data!");
return -EINVAL;
}
// try to return some data. note that this will only reach the client
// if the client has set the CEPH_OSD_FLAG_RETURNVEC flag on the op.
out->append("you might see this");
// client will only see a >0 value with the RETURNVEC flag is set; otherwise
// they will see 0.
return 42;
}
static int write_too_much_return_data(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
// make some change to the object
bufferlist attrbl;
attrbl.append("bar");
int r = cls_cxx_setxattr(hctx, "foo", &attrbl);
if (r < 0)
return r;
// try to return too much data. this should be enough to exceed
// osd_max_write_op_reply_len, which defaults to a pretty small number.
for (unsigned i=0; i < 10; ++i) {
out->append("you should not see this because it is toooooo long. ");
}
return 42;
}
/**
* replay - a "read" method to get a previously recorded hello
*
* This is a read method that will retrieve a previously recorded
* hello statement.
*/
static int replay(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
// read contents out of the on-disk object. our behavior can be a
// function of either the request alone, or the request and the
// on-disk state, depending on whether the RD flag is specified when
// registering the method (see the __cls__init function below).
int r = cls_cxx_read(hctx, 0, 1100, out);
if (r < 0)
return r;
// note that our return value need not be the length of the returned
// data; it can be whatever value we want: positive, zero or
// negative (this is a read).
return 0;
}
/**
* turn_it_to_11 - a "write" method that mutates existing object data
*
* A write method can depend on previous object content (i.e., perform
* a read/modify/write operation). This atomically transitions the
* object state from the old content to the new content.
*/
static int turn_it_to_11(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
// see if the input data from the client matches what this method
// expects to receive. your class can fill this buffer with what it
// wants.
if (in->length() != 0)
return -EINVAL;
bufferlist previous;
int r = cls_cxx_read(hctx, 0, 1100, &previous);
if (r < 0)
return r;
std::string str(previous.c_str(), previous.length());
std::transform(str.begin(), str.end(), str.begin(), ::toupper);
previous.clear();
previous.append(str);
// replace previous byte data content (write_full == truncate(0) + write)
r = cls_cxx_write_full(hctx, &previous);
if (r < 0)
return r;
// record who did it
entity_inst_t origin;
cls_get_request_origin(hctx, &origin);
ostringstream ss;
ss << origin;
bufferlist attrbl;
attrbl.append(ss.str());
r = cls_cxx_setxattr(hctx, "amplified_by", &attrbl);
if (r < 0)
return r;
// return value is 0 for success; out buffer is empty.
return 0;
}
/**
* example method that does not behave
*
* This method is registered as WR but tries to read
*/
static int bad_reader(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
return cls_cxx_read(hctx, 0, 100, out);
}
/**
* example method that does not behave
*
* This method is registered as RD but tries to write
*/
static int bad_writer(cls_method_context_t hctx, bufferlist *in, bufferlist *out)
{
return cls_cxx_write_full(hctx, in);
}
class PGLSHelloFilter : public PGLSFilter {
string val;
public:
int init(bufferlist::const_iterator& params) override {
try {
decode(xattr, params);
decode(val, params);
} catch (ceph::buffer::error &e) {
return -EINVAL;
}
return 0;
}
~PGLSHelloFilter() override {}
bool filter(const hobject_t& obj,
const bufferlist& xattr_data) const override
{
return xattr_data.contents_equal(val.c_str(), val.size());
}
};
PGLSFilter *hello_filter()
{
return new PGLSHelloFilter();
}
/**
* initialize class
*
* We do two things here: we register the new class, and then register
* all of the class's methods.
*/
CLS_INIT(hello)
{
// this log message, at level 0, will always appear in the ceph-osd
// log file.
CLS_LOG(0, "loading cls_hello");
cls_handle_t h_class;
cls_method_handle_t h_say_hello;
cls_method_handle_t h_record_hello;
cls_method_handle_t h_replay;
cls_method_handle_t h_write_return_data;
cls_method_handle_t h_writes_dont_return_data;
cls_method_handle_t h_write_too_much_return_data;
cls_method_handle_t h_turn_it_to_11;
cls_method_handle_t h_bad_reader;
cls_method_handle_t h_bad_writer;
cls_register("hello", &h_class);
// There are two flags we specify for methods:
//
// RD : whether this method (may) read prior object state
// WR : whether this method (may) write or update the object
//
// A method can be RD, WR, neither, or both. If a method does
// neither, the data it returns to the caller is a function of the
// request and not the object contents.
cls_register_cxx_method(h_class, "say_hello",
CLS_METHOD_RD,
say_hello, &h_say_hello);
cls_register_cxx_method(h_class, "record_hello",
CLS_METHOD_WR | CLS_METHOD_PROMOTE,
record_hello, &h_record_hello);
cls_register_cxx_method(h_class, "write_return_data",
CLS_METHOD_WR,
write_return_data, &h_write_return_data);
// legacy alias for this method for pre-octopus clients
cls_register_cxx_method(h_class, "writes_dont_return_data",
CLS_METHOD_WR,
write_return_data, &h_writes_dont_return_data);
cls_register_cxx_method(h_class, "write_too_much_return_data",
CLS_METHOD_WR,
write_too_much_return_data, &h_write_too_much_return_data);
cls_register_cxx_method(h_class, "replay",
CLS_METHOD_RD,
replay, &h_replay);
// RD | WR is a read-modify-write method.
cls_register_cxx_method(h_class, "turn_it_to_11",
CLS_METHOD_RD | CLS_METHOD_WR | CLS_METHOD_PROMOTE,
turn_it_to_11, &h_turn_it_to_11);
// counter-examples
cls_register_cxx_method(h_class, "bad_reader", CLS_METHOD_WR,
bad_reader, &h_bad_reader);
cls_register_cxx_method(h_class, "bad_writer", CLS_METHOD_RD,
bad_writer, &h_bad_writer);
// A PGLS filter
cls_register_cxx_filter(h_class, "hello", hello_filter);
}
|