summaryrefslogtreecommitdiffstats
path: root/fs/dlm
diff options
context:
space:
mode:
Diffstat (limited to 'fs/dlm')
-rw-r--r--fs/dlm/ast.c14
-rw-r--r--fs/dlm/dlm_internal.h3
-rw-r--r--fs/dlm/lock.c108
-rw-r--r--fs/dlm/plock.c44
-rw-r--r--fs/dlm/user.c15
5 files changed, 111 insertions, 73 deletions
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index decedc4ee1..1f2f70a1b8 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -12,7 +12,6 @@
#include <trace/events/dlm.h>
#include "dlm_internal.h"
-#include "lvb_table.h"
#include "memory.h"
#include "lock.h"
#include "user.h"
@@ -43,7 +42,6 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int rv = DLM_ENQUEUE_CALLBACK_SUCCESS;
struct dlm_callback *cb;
- int copy_lvb = 0;
int prev_mode;
if (flags & DLM_CB_BAST) {
@@ -75,17 +73,6 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
goto out;
}
}
- } else if (flags & DLM_CB_CAST) {
- if (test_bit(DLM_DFL_USER_BIT, &lkb->lkb_dflags)) {
- if (lkb->lkb_last_cast)
- prev_mode = lkb->lkb_last_cb->mode;
- else
- prev_mode = -1;
-
- if (!status && lkb->lkb_lksb->sb_lvbptr &&
- dlm_lvb_operations[prev_mode + 1][mode + 1])
- copy_lvb = 1;
- }
}
cb = dlm_allocate_cb();
@@ -98,7 +85,6 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
cb->mode = mode;
cb->sb_status = status;
cb->sb_flags = (sbflags & 0x000000FF);
- cb->copy_lvb = copy_lvb;
kref_init(&cb->ref);
if (!test_and_set_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags))
rv = DLM_ENQUEUE_CALLBACK_NEED_SCHED;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 511d0b984f..3b4dbce849 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -222,7 +222,6 @@ struct dlm_callback {
int sb_status; /* copy to lksb status */
uint8_t sb_flags; /* copy to lksb flags */
int8_t mode; /* rq mode of bast, gr mode of cast */
- int copy_lvb;
struct list_head list;
struct kref ref;
@@ -247,7 +246,7 @@ struct dlm_lkb {
int8_t lkb_highbast; /* highest mode bast sent for */
int8_t lkb_wait_type; /* type of reply waiting for */
- atomic_t lkb_wait_count;
+ int8_t lkb_wait_count;
int lkb_wait_nodeid; /* for debugging */
struct list_head lkb_statequeue; /* rsb g/c/w list */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 652c51fbbf..fd752dd038 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1407,7 +1407,6 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
int error = 0;
- int wc;
mutex_lock(&ls->ls_waiters_mutex);
@@ -1429,17 +1428,20 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid)
error = -EBUSY;
goto out;
}
- wc = atomic_inc_return(&lkb->lkb_wait_count);
+ lkb->lkb_wait_count++;
hold_lkb(lkb);
log_debug(ls, "addwait %x cur %d overlap %d count %d f %x",
- lkb->lkb_id, lkb->lkb_wait_type, mstype, wc,
- dlm_iflags_val(lkb));
+ lkb->lkb_id, lkb->lkb_wait_type, mstype,
+ lkb->lkb_wait_count, dlm_iflags_val(lkb));
goto out;
}
- wc = atomic_fetch_inc(&lkb->lkb_wait_count);
- DLM_ASSERT(!wc, dlm_print_lkb(lkb); printk("wait_count %d\n", wc););
+ DLM_ASSERT(!lkb->lkb_wait_count,
+ dlm_print_lkb(lkb);
+ printk("wait_count %d\n", lkb->lkb_wait_count););
+
+ lkb->lkb_wait_count++;
lkb->lkb_wait_type = mstype;
lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */
hold_lkb(lkb);
@@ -1502,7 +1504,7 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
log_debug(ls, "remwait %x convert_reply zap overlap_cancel",
lkb->lkb_id);
lkb->lkb_wait_type = 0;
- atomic_dec(&lkb->lkb_wait_count);
+ lkb->lkb_wait_count--;
unhold_lkb(lkb);
goto out_del;
}
@@ -1529,15 +1531,16 @@ static int _remove_from_waiters(struct dlm_lkb *lkb, int mstype,
if (overlap_done && lkb->lkb_wait_type) {
log_error(ls, "remwait error %x reply %d wait_type %d overlap",
lkb->lkb_id, mstype, lkb->lkb_wait_type);
- atomic_dec(&lkb->lkb_wait_count);
+ lkb->lkb_wait_count--;
unhold_lkb(lkb);
lkb->lkb_wait_type = 0;
}
- DLM_ASSERT(atomic_read(&lkb->lkb_wait_count), dlm_print_lkb(lkb););
+ DLM_ASSERT(lkb->lkb_wait_count, dlm_print_lkb(lkb););
clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags);
- if (atomic_dec_and_test(&lkb->lkb_wait_count))
+ lkb->lkb_wait_count--;
+ if (!lkb->lkb_wait_count)
list_del_init(&lkb->lkb_wait_reply);
unhold_lkb(lkb);
return 0;
@@ -2666,7 +2669,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
goto out;
/* lock not allowed if there's any op in progress */
- if (lkb->lkb_wait_type || atomic_read(&lkb->lkb_wait_count))
+ if (lkb->lkb_wait_type || lkb->lkb_wait_count)
goto out;
if (is_overlap(lkb))
@@ -2728,7 +2731,7 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
/* normal unlock not allowed if there's any op in progress */
if (!(args->flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)) &&
- (lkb->lkb_wait_type || atomic_read(&lkb->lkb_wait_count)))
+ (lkb->lkb_wait_type || lkb->lkb_wait_count))
goto out;
/* an lkb may be waiting for an rsb lookup to complete where the
@@ -5011,21 +5014,32 @@ static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls)
return lkb;
}
-/* Deal with lookups and lkb's marked RESEND from _pre. We may now be the
- master or dir-node for r. Processing the lkb may result in it being placed
- back on waiters. */
-
-/* We do this after normal locking has been enabled and any saved messages
- (in requestqueue) have been processed. We should be confident that at
- this point we won't get or process a reply to any of these waiting
- operations. But, new ops may be coming in on the rsbs/locks here from
- userspace or remotely. */
-
-/* there may have been an overlap unlock/cancel prior to recovery or after
- recovery. if before, the lkb may still have a pos wait_count; if after, the
- overlap flag would just have been set and nothing new sent. we can be
- confident here than any replies to either the initial op or overlap ops
- prior to recovery have been received. */
+/*
+ * Forced state reset for locks that were in the middle of remote operations
+ * when recovery happened (i.e. lkbs that were on the waiters list, waiting
+ * for a reply from a remote operation.) The lkbs remaining on the waiters
+ * list need to be reevaluated; some may need resending to a different node
+ * than previously, and some may now need local handling rather than remote.
+ *
+ * First, the lkb state for the voided remote operation is forcibly reset,
+ * equivalent to what remove_from_waiters() would normally do:
+ * . lkb removed from ls_waiters list
+ * . lkb wait_type cleared
+ * . lkb waiters_count cleared
+ * . lkb ref count decremented for each waiters_count (almost always 1,
+ * but possibly 2 in case of cancel/unlock overlapping, which means
+ * two remote replies were being expected for the lkb.)
+ *
+ * Second, the lkb is reprocessed like an original operation would be,
+ * by passing it to _request_lock or _convert_lock, which will either
+ * process the lkb operation locally, or send it to a remote node again
+ * and put the lkb back onto the waiters list.
+ *
+ * When reprocessing the lkb, we may find that it's flagged for an overlapping
+ * force-unlock or cancel, either from before recovery began, or after recovery
+ * finished. If this is the case, the unlock/cancel is done directly, and the
+ * original operation is not initiated again (no _request_lock/_convert_lock.)
+ */
int dlm_recover_waiters_post(struct dlm_ls *ls)
{
@@ -5040,6 +5054,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
break;
}
+ /*
+ * Find an lkb from the waiters list that's been affected by
+ * recovery node changes, and needs to be reprocessed. Does
+ * hold_lkb(), adding a refcount.
+ */
lkb = find_resend_waiter(ls);
if (!lkb)
break;
@@ -5048,6 +5067,11 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
hold_rsb(r);
lock_rsb(r);
+ /*
+ * If the lkb has been flagged for a force unlock or cancel,
+ * then the reprocessing below will be replaced by just doing
+ * the unlock/cancel directly.
+ */
mstype = lkb->lkb_wait_type;
oc = test_and_clear_bit(DLM_IFL_OVERLAP_CANCEL_BIT,
&lkb->lkb_iflags);
@@ -5061,22 +5085,40 @@ int dlm_recover_waiters_post(struct dlm_ls *ls)
r->res_nodeid, lkb->lkb_nodeid, lkb->lkb_wait_nodeid,
dlm_dir_nodeid(r), oc, ou);
- /* At this point we assume that we won't get a reply to any
- previous op or overlap op on this lock. First, do a big
- remove_from_waiters() for all previous ops. */
+ /*
+ * No reply to the pre-recovery operation will now be received,
+ * so a forced equivalent of remove_from_waiters() is needed to
+ * reset the waiters state that was in place before recovery.
+ */
clear_bit(DLM_IFL_RESEND_BIT, &lkb->lkb_iflags);
+
+ /* Forcibly clear wait_type */
lkb->lkb_wait_type = 0;
- /* drop all wait_count references we still
- * hold a reference for this iteration.
+
+ /*
+ * Forcibly reset wait_count and associated refcount. The
+ * wait_count will almost always be 1, but in case of an
+ * overlapping unlock/cancel it could be 2: see where
+ * add_to_waiters() finds the lkb is already on the waiters
+ * list and does lkb_wait_count++; hold_lkb().
*/
- while (!atomic_dec_and_test(&lkb->lkb_wait_count))
+ while (lkb->lkb_wait_count) {
+ lkb->lkb_wait_count--;
unhold_lkb(lkb);
+ }
+ /* Forcibly remove from waiters list */
mutex_lock(&ls->ls_waiters_mutex);
list_del_init(&lkb->lkb_wait_reply);
mutex_unlock(&ls->ls_waiters_mutex);
+ /*
+ * The lkb is now clear of all prior waiters state and can be
+ * processed locally, or sent to remote node again, or directly
+ * cancelled/unlocked.
+ */
+
if (oc || ou) {
/* do an unlock or cancel instead of resending */
switch (mstype) {
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index d814c51213..9ca83ef70e 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -138,14 +138,14 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
op->info.optype = DLM_PLOCK_OP_LOCK;
- op->info.pid = fl->fl_pid;
- op->info.ex = (fl->fl_type == F_WRLCK);
- op->info.wait = !!(fl->fl_flags & FL_SLEEP);
+ op->info.pid = fl->c.flc_pid;
+ op->info.ex = lock_is_write(fl);
+ op->info.wait = !!(fl->c.flc_flags & FL_SLEEP);
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- op->info.owner = (__u64)(long)fl->fl_owner;
+ op->info.owner = (__u64)(long) fl->c.flc_owner;
/* async handling */
if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
op_data = kzalloc(sizeof(*op_data), GFP_NOFS);
@@ -258,7 +258,7 @@ static int dlm_plock_callback(struct plock_op *op)
}
/* got fs lock; bookkeep locally as well: */
- flc->fl_flags &= ~FL_SLEEP;
+ flc->c.flc_flags &= ~FL_SLEEP;
if (posix_lock_file(file, flc, NULL)) {
/*
* This can only happen in the case of kmalloc() failure.
@@ -291,7 +291,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
struct dlm_ls *ls;
struct plock_op *op;
int rv;
- unsigned char fl_flags = fl->fl_flags;
+ unsigned char saved_flags = fl->c.flc_flags;
ls = dlm_find_lockspace_local(lockspace);
if (!ls)
@@ -304,7 +304,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
/* cause the vfs unlock to return ENOENT if lock is not found */
- fl->fl_flags |= FL_EXISTS;
+ fl->c.flc_flags |= FL_EXISTS;
rv = locks_lock_file_wait(file, fl);
if (rv == -ENOENT) {
@@ -317,14 +317,14 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
op->info.optype = DLM_PLOCK_OP_UNLOCK;
- op->info.pid = fl->fl_pid;
+ op->info.pid = fl->c.flc_pid;
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- op->info.owner = (__u64)(long)fl->fl_owner;
+ op->info.owner = (__u64)(long) fl->c.flc_owner;
- if (fl->fl_flags & FL_CLOSE) {
+ if (fl->c.flc_flags & FL_CLOSE) {
op->info.flags |= DLM_PLOCK_FL_CLOSE;
send_op(op);
rv = 0;
@@ -345,7 +345,7 @@ out_free:
dlm_release_plock_op(op);
out:
dlm_put_lockspace(ls);
- fl->fl_flags = fl_flags;
+ fl->c.flc_flags = saved_flags;
return rv;
}
EXPORT_SYMBOL_GPL(dlm_posix_unlock);
@@ -375,14 +375,14 @@ int dlm_posix_cancel(dlm_lockspace_t *lockspace, u64 number, struct file *file,
return -EINVAL;
memset(&info, 0, sizeof(info));
- info.pid = fl->fl_pid;
- info.ex = (fl->fl_type == F_WRLCK);
+ info.pid = fl->c.flc_pid;
+ info.ex = lock_is_write(fl);
info.fsid = ls->ls_global_id;
dlm_put_lockspace(ls);
info.number = number;
info.start = fl->fl_start;
info.end = fl->fl_end;
- info.owner = (__u64)(long)fl->fl_owner;
+ info.owner = (__u64)(long) fl->c.flc_owner;
rv = do_lock_cancel(&info);
switch (rv) {
@@ -437,13 +437,13 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
}
op->info.optype = DLM_PLOCK_OP_GET;
- op->info.pid = fl->fl_pid;
- op->info.ex = (fl->fl_type == F_WRLCK);
+ op->info.pid = fl->c.flc_pid;
+ op->info.ex = lock_is_write(fl);
op->info.fsid = ls->ls_global_id;
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- op->info.owner = (__u64)(long)fl->fl_owner;
+ op->info.owner = (__u64)(long) fl->c.flc_owner;
send_op(op);
wait_event(recv_wq, (op->done != 0));
@@ -455,16 +455,16 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
rv = op->info.rv;
- fl->fl_type = F_UNLCK;
+ fl->c.flc_type = F_UNLCK;
if (rv == -ENOENT)
rv = 0;
else if (rv > 0) {
locks_init_lock(fl);
- fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
- fl->fl_flags = FL_POSIX;
- fl->fl_pid = op->info.pid;
+ fl->c.flc_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
+ fl->c.flc_flags = FL_POSIX;
+ fl->c.flc_pid = op->info.pid;
if (op->info.nodeid != dlm_our_nodeid())
- fl->fl_pid = -fl->fl_pid;
+ fl->c.flc_pid = -fl->c.flc_pid;
fl->fl_start = op->info.start;
fl->fl_end = op->info.end;
rv = 0;
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 12a483deee..9f9b684488 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -21,6 +21,7 @@
#include "dlm_internal.h"
#include "lockspace.h"
#include "lock.h"
+#include "lvb_table.h"
#include "user.h"
#include "ast.h"
#include "config.h"
@@ -805,7 +806,8 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
struct dlm_lkb *lkb;
DECLARE_WAITQUEUE(wait, current);
struct dlm_callback *cb;
- int rv, ret;
+ int rv, ret, copy_lvb = 0;
+ int old_mode, new_mode;
if (count == sizeof(struct dlm_device_version)) {
rv = copy_version_to_user(buf, count);
@@ -862,6 +864,9 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
lkb = list_first_entry(&proc->asts, struct dlm_lkb, lkb_cb_list);
+ /* rem_lkb_callback sets a new lkb_last_cast */
+ old_mode = lkb->lkb_last_cast->mode;
+
rv = dlm_dequeue_lkb_callback(lkb, &cb);
switch (rv) {
case DLM_DEQUEUE_CALLBACK_EMPTY:
@@ -890,6 +895,12 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
if (cb->flags & DLM_CB_BAST) {
trace_dlm_bast(lkb->lkb_resource->res_ls, lkb, cb->mode);
} else if (cb->flags & DLM_CB_CAST) {
+ new_mode = cb->mode;
+
+ if (!cb->sb_status && lkb->lkb_lksb->sb_lvbptr &&
+ dlm_lvb_operations[old_mode + 1][new_mode + 1])
+ copy_lvb = 1;
+
lkb->lkb_lksb->sb_status = cb->sb_status;
lkb->lkb_lksb->sb_flags = cb->sb_flags;
trace_dlm_ast(lkb->lkb_resource->res_ls, lkb);
@@ -897,7 +908,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
ret = copy_result_to_user(lkb->lkb_ua,
test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags),
- cb->flags, cb->mode, cb->copy_lvb, buf, count);
+ cb->flags, cb->mode, copy_lvb, buf, count);
kref_put(&cb->ref, dlm_release_callback);