summaryrefslogtreecommitdiffstats
path: root/src/mds/ScatterLock.h
blob: f654fd2ff3197980cd064b34b4a5ed2b4bedfeb8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- 
// vim: ts=8 sw=2 smarttab
/*
 * Ceph - scalable distributed file system
 *
 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
 *
 * This is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License version 2.1, as published by the Free Software 
 * Foundation.  See file COPYING.
 * 
 */


#ifndef CEPH_SCATTERLOCK_H
#define CEPH_SCATTERLOCK_H

#include "SimpleLock.h"

#include "MDSContext.h"

class ScatterLock : public SimpleLock {
public:
  ScatterLock(MDSCacheObject *o, LockType *lt) :
    SimpleLock(o, lt) {}
  ~ScatterLock() override {
    ceph_assert(!_more);
  }

  bool is_scatterlock() const override {
    return true;
  }

  bool is_sync_and_unlocked() const {
    return
      SimpleLock::is_sync_and_unlocked() && 
      !is_dirty() &&
      !is_flushing();
  }

  bool can_scatter_pin(client_t loner) {
    /*
      LOCK : NOT okay because it can MIX and force replicas to journal something
      TSYN : also not okay for same reason
      EXCL : also not okay

      MIX  : okay, replica can stall before sending AC_SYNCACK
      SYNC : okay, replica can stall before sending AC_MIXACK or AC_LOCKACK
    */   
    return
      get_state() == LOCK_SYNC ||
      get_state() == LOCK_MIX;
  }

  void set_xlock_snap_sync(MDSContext *c)
  {
    ceph_assert(get_type() == CEPH_LOCK_IFILE);
    ceph_assert(state == LOCK_XLOCK || state == LOCK_XLOCKDONE);
    state = LOCK_XLOCKSNAP;
    add_waiter(WAIT_STABLE, c);
  }

  xlist<ScatterLock*>::item *get_updated_item() { return &more()->item_updated; }

  utime_t get_update_stamp() {
    return _more ? _more->update_stamp : utime_t();
  }

  void set_update_stamp(utime_t t) { more()->update_stamp = t; }

  void set_scatter_wanted() {
    state_flags |= SCATTER_WANTED;
  }
  void set_unscatter_wanted() {
    state_flags |= UNSCATTER_WANTED;
  }
  void clear_scatter_wanted() {
    state_flags &= ~SCATTER_WANTED;
  }
  void clear_unscatter_wanted() {
    state_flags &= ~UNSCATTER_WANTED;
  }
  bool get_scatter_wanted() const {
    return state_flags & SCATTER_WANTED;
  }
  bool get_unscatter_wanted() const {
    return state_flags & UNSCATTER_WANTED;
  }

  bool is_dirty() const override {
    return state_flags & DIRTY;
  }
  bool is_flushing() const override {
    return state_flags & FLUSHING;
  }
  bool is_flushed() const override {
    return state_flags & FLUSHED;
  }
  bool is_dirty_or_flushing() const {
    return is_dirty() || is_flushing();
  }

  void mark_dirty() { 
    if (!is_dirty()) {
      if (!is_flushing())
	parent->get(MDSCacheObject::PIN_DIRTYSCATTERED);
      set_dirty();
    }
  }
  void start_flush() {
    if (is_dirty()) {
      set_flushing();
      clear_dirty();
    }
  }
  void finish_flush() {
    if (is_flushing()) {
      clear_flushing();
      set_flushed();
      if (!is_dirty()) {
	parent->put(MDSCacheObject::PIN_DIRTYSCATTERED);
	parent->clear_dirty_scattered(get_type());
      }
    }
  }
  void clear_flushed() override {
    state_flags &= ~FLUSHED;
  }
  void remove_dirty() {
    start_flush();
    finish_flush();
    clear_flushed();
  }

  void infer_state_from_strong_rejoin(int rstate, bool locktoo) {
    if (rstate == LOCK_MIX || 
	rstate == LOCK_MIX_LOCK || // replica still has wrlocks?
	rstate == LOCK_MIX_SYNC)
      state = LOCK_MIX;
    else if (locktoo && rstate == LOCK_LOCK)
      state = LOCK_LOCK;
  }

  void encode_state_for_rejoin(ceph::buffer::list& bl, int rep) {
    __s16 s = get_replica_state();
    if (is_gathering(rep)) {
      // the recovering mds may hold rejoined wrlocks
      if (state == LOCK_MIX_SYNC)
	s = LOCK_MIX_SYNC;
      else
	s = LOCK_MIX_LOCK;
    }

    // If there is a recovering mds who replcated an object when it failed
    // and scatterlock in the object was in MIX state, It's possible that
    // the recovering mds needs to take wrlock on the scatterlock when it
    // replays unsafe requests. So this mds should delay taking rdlock on
    // the scatterlock until the recovering mds finishes replaying unsafe.
    // Otherwise unsafe requests may get replayed after current request.
    //
    // For example:
    // The recovering mds is auth mds of a dirfrag, this mds is auth mds
    // of corresponding inode. when 'rm -rf' the direcotry, this mds should
    // delay the rmdir request until the recovering mds has replayed unlink
    // requests.
    if (s == LOCK_MIX || s == LOCK_MIX_LOCK || s == LOCK_MIX_SYNC)
      mark_need_recover();

    using ceph::encode;
    encode(s, bl);
  }

  void decode_state_rejoin(ceph::buffer::list::const_iterator& p, MDSContext::vec& waiters, bool survivor) {
    SimpleLock::decode_state_rejoin(p, waiters, survivor);
    if (is_flushing()) {
      set_dirty();
      clear_flushing();
    }
  }

  bool remove_replica(int from, bool rejoin) {
    if (rejoin &&
	(state == LOCK_MIX ||
	 state == LOCK_MIX_SYNC ||
	 state == LOCK_MIX_LOCK2 ||
	 state == LOCK_MIX_TSYN ||
	 state == LOCK_MIX_EXCL))
      return false;
    return SimpleLock::remove_replica(from);
  }

  void print(std::ostream& out) const override {
    out << "(";
    _print(out);
    if (is_dirty())
      out << " dirty";
    if (is_flushing())
      out << " flushing";
    if (is_flushed())
      out << " flushed";
    if (get_scatter_wanted())
      out << " scatter_wanted";
    out << ")";
  }

private:
  struct more_bits_t {
    xlist<ScatterLock*>::item item_updated;
    utime_t update_stamp;

    explicit more_bits_t(ScatterLock *lock) :
      item_updated(lock)
    {}
  };

  more_bits_t *more() {
    if (!_more)
      _more.reset(new more_bits_t(this));
    return _more.get();
  }

  enum {
    SCATTER_WANTED   = 1 << 8,
    UNSCATTER_WANTED = 1 << 9,
    DIRTY            = 1 << 10,
    FLUSHING         = 1 << 11,
    FLUSHED          = 1 << 12,
  };

  void set_flushing() {
    state_flags |= FLUSHING;
  }
  void clear_flushing() {
    state_flags &= ~FLUSHING;
  }
  void set_flushed() {
    state_flags |= FLUSHED;
  }
  void set_dirty() {
    state_flags |= DIRTY;
  }
  void clear_dirty() {
    state_flags &= ~DIRTY;
    if (_more) {
      _more->item_updated.remove_myself();
      _more.reset();
    }
  }

  mutable std::unique_ptr<more_bits_t> _more;
};

#endif