Adding upstream version 2:4.20.0+dfsg.upstream/2%4.20.0+dfsg

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 17:20:00 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 17:20:00 +0000
commit: 8daa83a594a2e98f39d764422bfbdbc62c9efd44 (patch)
tree: 4099e8021376c7d8c05bdf8503093d80e9c7bad0 /source3/locking/brlock.c
parent: Initial commit. (diff)
download: samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.tar.xz
samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.zip
1 files changed, 1998 insertions, 0 deletions
diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
new file mode 100644
index 0000000..905da04
--- /dev/null
+++ b/source3/locking/brlock.c
@@ -0,0 +1,1998 @@
+/*
+   Unix SMB/CIFS implementation.
+   byte range locking code
+   Updated to handle range splits/merges.
+
+   Copyright (C) Andrew Tridgell 1992-2000
+   Copyright (C) Jeremy Allison 1992-2000
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* This module implements a tdb based byte range locking service,
+   replacing the fcntl() based byte range locking previously
+   used. This allows us to provide the same semantics as NT */
+
+#include "includes.h"
+#include "system/filesys.h"
+#include "lib/util/server_id.h"
+#include "locking/proto.h"
+#include "smbd/globals.h"
+#include "dbwrap/dbwrap.h"
+#include "dbwrap/dbwrap_open.h"
+#include "serverid.h"
+#include "messages.h"
+#include "util_tdb.h"
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
+
+#define ZERO_ZERO 0
+
+/* The open brlock.tdb database. */
+
+static struct db_context *brlock_db;
+
+struct byte_range_lock {
+	struct files_struct *fsp;
+	TALLOC_CTX *req_mem_ctx;
+	const struct GUID *req_guid;
+	unsigned int num_locks;
+	bool modified;
+	struct lock_struct *lock_data;
+	struct db_record *record;
+};
+
+/****************************************************************************
+ Debug info at level 10 for lock struct.
+****************************************************************************/
+
+static void print_lock_struct(unsigned int i, const struct lock_struct *pls)
+{
+	struct server_id_buf tmp;
+
+	DBG_DEBUG("[%u]: smblctx = %"PRIu64", tid = %"PRIu32", pid = %s, "
+		  "start = %"PRIu64", size = %"PRIu64", fnum = %"PRIu64", "
+		  "%s %s\n",
+		  i,
+		  pls->context.smblctx,
+		  pls->context.tid,
+		  server_id_str_buf(pls->context.pid, &tmp),
+		  pls->start,
+		  pls->size,
+		  pls->fnum,
+		  lock_type_name(pls->lock_type),
+		  lock_flav_name(pls->lock_flav));
+}
+
+unsigned int brl_num_locks(const struct byte_range_lock *brl)
+{
+	return brl->num_locks;
+}
+
+struct files_struct *brl_fsp(struct byte_range_lock *brl)
+{
+	return brl->fsp;
+}
+
+TALLOC_CTX *brl_req_mem_ctx(const struct byte_range_lock *brl)
+{
+	if (brl->req_mem_ctx == NULL) {
+		return talloc_get_type_abort(brl, struct byte_range_lock);
+	}
+
+	return brl->req_mem_ctx;
+}
+
+const struct GUID *brl_req_guid(const struct byte_range_lock *brl)
+{
+	if (brl->req_guid == NULL) {
+		static const struct GUID brl_zero_req_guid;
+		return &brl_zero_req_guid;
+	}
+
+	return brl->req_guid;
+}
+
+/****************************************************************************
+ See if two locking contexts are equal.
+****************************************************************************/
+
+static bool brl_same_context(const struct lock_context *ctx1,
+			     const struct lock_context *ctx2)
+{
+	return (server_id_equal(&ctx1->pid, &ctx2->pid) &&
+		(ctx1->smblctx == ctx2->smblctx) &&
+		(ctx1->tid == ctx2->tid));
+}
+
+bool byte_range_valid(uint64_t ofs, uint64_t len)
+{
+	uint64_t max_len = UINT64_MAX - ofs;
+	uint64_t effective_len;
+
+	/*
+	 * [MS-FSA] specifies this:
+	 *
+	 * If (((FileOffset + Length - 1) < FileOffset) && Length != 0) {
+	 *   return STATUS_INVALID_LOCK_RANGE
+	 * }
+	 *
+	 * We avoid integer wrapping and calculate
+	 * max and effective len instead.
+	 */
+
+	if (len == 0) {
+		return true;
+	}
+
+	effective_len = len - 1;
+	if (effective_len <= max_len) {
+		return true;
+	}
+
+	return false;
+}
+
+bool byte_range_overlap(uint64_t ofs1,
+			uint64_t len1,
+			uint64_t ofs2,
+			uint64_t len2)
+{
+	uint64_t last1;
+	uint64_t last2;
+	bool valid;
+
+	/*
+	 * This is based on [MS-FSA] 2.1.4.10
+	 * Algorithm for Determining If a Range Access
+	 * Conflicts with Byte-Range Locks
+	 */
+
+	/*
+	 * The {0, 0} range doesn't conflict with any byte-range lock
+	 */
+	if (ofs1 == 0 && len1 == 0) {
+		return false;
+	}
+	if (ofs2 == 0 && len2 == 0) {
+		return false;
+	}
+
+	/*
+	 * The caller should have checked that the ranges are
+	 * valid. But currently we gracefully handle
+	 * the overflow of a read/write check.
+	 */
+	valid = byte_range_valid(ofs1, len1);
+	if (valid) {
+		last1 = ofs1 + len1 - 1;
+	} else {
+		last1 = UINT64_MAX;
+	}
+	valid = byte_range_valid(ofs2, len2);
+	if (valid) {
+		last2 = ofs2 + len2 - 1;
+	} else {
+		last2 = UINT64_MAX;
+	}
+
+	/*
+	 * If one range starts after the last
+	 * byte of the other range there's
+	 * no conflict.
+	 */
+	if (ofs1 > last2) {
+		return false;
+	}
+	if (ofs2 > last1) {
+		return false;
+	}
+
+	return true;
+}
+
+/****************************************************************************
+ See if lck1 and lck2 overlap.
+****************************************************************************/
+
+static bool brl_overlap(const struct lock_struct *lck1,
+                        const struct lock_struct *lck2)
+{
+	return byte_range_overlap(lck1->start,
+				  lck1->size,
+				  lck2->start,
+				  lck2->size);
+}
+
+/****************************************************************************
+ See if lock2 can be added when lock1 is in place.
+****************************************************************************/
+
+static bool brl_conflict(const struct lock_struct *lck1,
+			 const struct lock_struct *lck2)
+{
+	/* Read locks never conflict. */
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	/* A READ lock can stack on top of a WRITE lock if they have the same
+	 * context & fnum. */
+	if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
+	    brl_same_context(&lck1->context, &lck2->context) &&
+	    lck1->fnum == lck2->fnum) {
+		return False;
+	}
+
+	return brl_overlap(lck1, lck2);
+}
+
+/****************************************************************************
+ See if lock2 can be added when lock1 is in place - when both locks are POSIX
+ flavour. POSIX locks ignore fnum - they only care about dev/ino which we
+ know already match.
+****************************************************************************/
+
+static bool brl_conflict_posix(const struct lock_struct *lck1,
+			 	const struct lock_struct *lck2)
+{
+#if defined(DEVELOPER)
+	SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
+	SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
+#endif
+
+	/* Read locks never conflict. */
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	/* Locks on the same context don't conflict. Ignore fnum. */
+	if (brl_same_context(&lck1->context, &lck2->context)) {
+		return False;
+	}
+
+	/* One is read, the other write, or the context is different,
+	   do they overlap ? */
+	return brl_overlap(lck1, lck2);
+}
+
+#if ZERO_ZERO
+static bool brl_conflict1(const struct lock_struct *lck1,
+			 const struct lock_struct *lck2)
+{
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	if (brl_same_context(&lck1->context, &lck2->context) &&
+	    lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
+		return False;
+	}
+
+	if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
+		return True;
+	}
+
+	if (lck1->start >= (lck2->start + lck2->size) ||
+	    lck2->start >= (lck1->start + lck1->size)) {
+		return False;
+	}
+
+	return True;
+}
+#endif
+
+/****************************************************************************
+ Check to see if this lock conflicts, but ignore our own locks on the
+ same fnum only. This is the read/write lock check code path.
+ This is never used in the POSIX lock case.
+****************************************************************************/
+
+static bool brl_conflict_other(const struct lock_struct *lock,
+			       const struct lock_struct *rw_probe)
+{
+	if (lock->lock_type == READ_LOCK && rw_probe->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	if (lock->lock_flav == POSIX_LOCK &&
+	    rw_probe->lock_flav == POSIX_LOCK) {
+		/*
+		 * POSIX flavour locks never conflict here - this is only called
+		 * in the read/write path.
+		 */
+		return False;
+	}
+
+	if (!brl_overlap(lock, rw_probe)) {
+		/*
+		 * I/O can only conflict when overlapping a lock, thus let it
+		 * pass
+		 */
+		return false;
+	}
+
+	if (!brl_same_context(&lock->context, &rw_probe->context)) {
+		/*
+		 * Different process, conflict
+		 */
+		return true;
+	}
+
+	if (lock->fnum != rw_probe->fnum) {
+		/*
+		 * Different file handle, conflict
+		 */
+		return true;
+	}
+
+	if ((lock->lock_type == READ_LOCK) &&
+	    (rw_probe->lock_type == WRITE_LOCK)) {
+		/*
+		 * Incoming WRITE locks conflict with existing READ locks even
+		 * if the context is the same. JRA. See LOCKTEST7 in
+		 * smbtorture.
+		 */
+		return true;
+	}
+
+	/*
+	 * I/O request compatible with existing lock, let it pass without
+	 * conflict
+	 */
+
+	return false;
+}
+
+/****************************************************************************
+ Open up the brlock.tdb database.
+****************************************************************************/
+
+void brl_init(bool read_only)
+{
+	int tdb_flags;
+	char *db_path;
+
+	if (brlock_db) {
+		return;
+	}
+
+	tdb_flags = SMBD_VOLATILE_TDB_FLAGS | TDB_SEQNUM;
+
+	db_path = lock_path(talloc_tos(), "brlock.tdb");
+	if (db_path == NULL) {
+		DEBUG(0, ("out of memory!\n"));
+		return;
+	}
+
+	brlock_db = db_open(NULL, db_path,
+			    SMBD_VOLATILE_TDB_HASH_SIZE, tdb_flags,
+			    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644,
+			    DBWRAP_LOCK_ORDER_2, DBWRAP_FLAG_NONE);
+	if (!brlock_db) {
+		DEBUG(0,("Failed to open byte range locking database %s\n",
+			 db_path));
+		TALLOC_FREE(db_path);
+		return;
+	}
+	TALLOC_FREE(db_path);
+}
+
+/****************************************************************************
+ Close down the brlock.tdb database.
+****************************************************************************/
+
+void brl_shutdown(void)
+{
+	TALLOC_FREE(brlock_db);
+}
+
+#if ZERO_ZERO
+/****************************************************************************
+ Compare two locks for sorting.
+****************************************************************************/
+
+static int lock_compare(const struct lock_struct *lck1,
+			 const struct lock_struct *lck2)
+{
+	if (lck1->start != lck2->start) {
+		return (lck1->start - lck2->start);
+	}
+	if (lck2->size != lck1->size) {
+		return ((int)lck1->size - (int)lck2->size);
+	}
+	return 0;
+}
+#endif
+
+/****************************************************************************
+ Lock a range of bytes - Windows lock semantics.
+****************************************************************************/
+
+NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
+				  struct lock_struct *plock)
+{
+	unsigned int i;
+	files_struct *fsp = br_lck->fsp;
+	struct lock_struct *locks = br_lck->lock_data;
+	NTSTATUS status;
+	bool valid;
+
+	SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
+
+	valid = byte_range_valid(plock->start, plock->size);
+	if (!valid) {
+		return NT_STATUS_INVALID_LOCK_RANGE;
+	}
+
+	for (i=0; i < br_lck->num_locks; i++) {
+		/* Do any Windows or POSIX locks conflict ? */
+		if (brl_conflict(&locks[i], plock)) {
+			if (!serverid_exists(&locks[i].context.pid)) {
+				locks[i].context.pid.pid = 0;
+				br_lck->modified = true;
+				continue;
+			}
+			/* Remember who blocked us. */
+			plock->context.smblctx = locks[i].context.smblctx;
+			return NT_STATUS_LOCK_NOT_GRANTED;
+		}
+#if ZERO_ZERO
+		if (plock->start == 0 && plock->size == 0 &&
+				locks[i].size == 0) {
+			break;
+		}
+#endif
+	}
+
+	contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+
+	/* We can get the Windows lock, now see if it needs to
+	   be mapped into a lower level POSIX one, and if so can
+	   we get it ? */
+
+	if (lp_posix_locking(fsp->conn->params)) {
+		int errno_ret;
+		if (!set_posix_lock_windows_flavour(fsp,
+				plock->start,
+				plock->size,
+				plock->lock_type,
+				&plock->context,
+				locks,
+				br_lck->num_locks,
+				&errno_ret)) {
+
+			/* We don't know who blocked us. */
+			plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
+
+			if (errno_ret == EACCES || errno_ret == EAGAIN) {
+				status = NT_STATUS_LOCK_NOT_GRANTED;
+				goto fail;
+			} else {
+				status = map_nt_error_from_unix(errno);
+				goto fail;
+			}
+		}
+	}
+
+	/* no conflicts - add it to the list of locks */
+	locks = talloc_realloc(br_lck, locks, struct lock_struct,
+			       (br_lck->num_locks + 1));
+	if (!locks) {
+		status = NT_STATUS_NO_MEMORY;
+		goto fail;
+	}
+
+	memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
+	br_lck->num_locks += 1;
+	br_lck->lock_data = locks;
+	br_lck->modified = True;
+
+	return NT_STATUS_OK;
+ fail:
+	contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+	return status;
+}
+
+/****************************************************************************
+ Cope with POSIX range splits and merges.
+****************************************************************************/
+
+static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,	/* Output array. */
+						struct lock_struct *ex,		/* existing lock. */
+						struct lock_struct *plock)	/* proposed lock. */
+{
+	bool lock_types_differ = (ex->lock_type != plock->lock_type);
+
+	/* We can't merge non-conflicting locks on different context - ignore fnum. */
+
+	if (!brl_same_context(&ex->context, &plock->context)) {
+		/* Just copy. */
+		memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+		return 1;
+	}
+
+	/* We now know we have the same context. */
+
+	/* Did we overlap ? */
+
+/*********************************************
+                                        +---------+
+                                        | ex      |
+                                        +---------+
+                         +-------+
+                         | plock |
+                         +-------+
+OR....
+        +---------+
+        |  ex     |
+        +---------+
+**********************************************/
+
+	if ( (ex->start > (plock->start + plock->size)) ||
+		(plock->start > (ex->start + ex->size))) {
+
+		/* No overlap with this lock - copy existing. */
+
+		memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+		return 1;
+	}
+
+/*********************************************
+        +---------------------------+
+        |          ex               |
+        +---------------------------+
+        +---------------------------+
+        |       plock               | -> replace with plock.
+        +---------------------------+
+OR
+             +---------------+
+             |       ex      |
+             +---------------+
+        +---------------------------+
+        |       plock               | -> replace with plock.
+        +---------------------------+
+
+**********************************************/
+
+	if ( (ex->start >= plock->start) &&
+		(ex->start + ex->size <= plock->start + plock->size) ) {
+
+		/* Replace - discard existing lock. */
+
+		return 0;
+	}
+
+/*********************************************
+Adjacent after.
+                        +-------+
+                        |  ex   |
+                        +-------+
+        +---------------+
+        |   plock       |
+        +---------------+
+
+BECOMES....
+        +---------------+-------+
+        |   plock       | ex    | - different lock types.
+        +---------------+-------+
+OR.... (merge)
+        +-----------------------+
+        |   plock               | - same lock type.
+        +-----------------------+
+**********************************************/
+
+	if (plock->start + plock->size == ex->start) {
+
+		/* If the lock types are the same, we merge, if different, we
+		   add the remainder of the old lock. */
+
+		if (lock_types_differ) {
+			/* Add existing. */
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			return 1;
+		} else {
+			/* Merge - adjust incoming lock as we may have more
+			 * merging to come. */
+			plock->size += ex->size;
+			return 0;
+		}
+	}
+
+/*********************************************
+Adjacent before.
+        +-------+
+        |  ex   |
+        +-------+
+                +---------------+
+                |   plock       |
+                +---------------+
+BECOMES....
+        +-------+---------------+
+        | ex    |   plock       | - different lock types
+        +-------+---------------+
+
+OR.... (merge)
+        +-----------------------+
+        |      plock            | - same lock type.
+        +-----------------------+
+
+**********************************************/
+
+	if (ex->start + ex->size == plock->start) {
+
+		/* If the lock types are the same, we merge, if different, we
+		   add the existing lock. */
+
+		if (lock_types_differ) {
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			return 1;
+		} else {
+			/* Merge - adjust incoming lock as we may have more
+			 * merging to come. */
+			plock->start = ex->start;
+			plock->size += ex->size;
+			return 0;
+		}
+	}
+
+/*********************************************
+Overlap after.
+        +-----------------------+
+        |          ex           |
+        +-----------------------+
+        +---------------+
+        |   plock       |
+        +---------------+
+OR
+               +----------------+
+               |       ex       |
+               +----------------+
+        +---------------+
+        |   plock       |
+        +---------------+
+
+BECOMES....
+        +---------------+-------+
+        |   plock       | ex    | - different lock types.
+        +---------------+-------+
+OR.... (merge)
+        +-----------------------+
+        |   plock               | - same lock type.
+        +-----------------------+
+**********************************************/
+
+	if ( (ex->start >= plock->start) &&
+		(ex->start <= plock->start + plock->size) &&
+		(ex->start + ex->size > plock->start + plock->size) ) {
+
+		/* If the lock types are the same, we merge, if different, we
+		   add the remainder of the old lock. */
+
+		if (lock_types_differ) {
+			/* Add remaining existing. */
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			/* Adjust existing start and size. */
+			lck_arr[0].start = plock->start + plock->size;
+			lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
+			return 1;
+		} else {
+			/* Merge - adjust incoming lock as we may have more
+			 * merging to come. */
+			plock->size += (ex->start + ex->size) - (plock->start + plock->size);
+			return 0;
+		}
+	}
+
+/*********************************************
+Overlap before.
+        +-----------------------+
+        |  ex                   |
+        +-----------------------+
+                +---------------+
+                |   plock       |
+                +---------------+
+OR
+        +-------------+
+        |  ex         |
+        +-------------+
+                +---------------+
+                |   plock       |
+                +---------------+
+
+BECOMES....
+        +-------+---------------+
+        | ex    |   plock       | - different lock types
+        +-------+---------------+
+
+OR.... (merge)
+        +-----------------------+
+        |      plock            | - same lock type.
+        +-----------------------+
+
+**********************************************/
+
+	if ( (ex->start < plock->start) &&
+			(ex->start + ex->size >= plock->start) &&
+			(ex->start + ex->size <= plock->start + plock->size) ) {
+
+		/* If the lock types are the same, we merge, if different, we
+		   add the truncated old lock. */
+
+		if (lock_types_differ) {
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			/* Adjust existing size. */
+			lck_arr[0].size = plock->start - ex->start;
+			return 1;
+		} else {
+			/* Merge - adjust incoming lock as we may have more
+			 * merging to come. MUST ADJUST plock SIZE FIRST ! */
+			plock->size += (plock->start - ex->start);
+			plock->start = ex->start;
+			return 0;
+		}
+	}
+
+/*********************************************
+Complete overlap.
+        +---------------------------+
+        |        ex                 |
+        +---------------------------+
+                +---------+
+                |  plock  |
+                +---------+
+BECOMES.....
+        +-------+---------+---------+
+        | ex    |  plock  | ex      | - different lock types.
+        +-------+---------+---------+
+OR
+        +---------------------------+
+        |        plock              | - same lock type.
+        +---------------------------+
+**********************************************/
+
+	if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
+
+		if (lock_types_differ) {
+
+			/* We have to split ex into two locks here. */
+
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
+
+			/* Adjust first existing size. */
+			lck_arr[0].size = plock->start - ex->start;
+
+			/* Adjust second existing start and size. */
+			lck_arr[1].start = plock->start + plock->size;
+			lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
+			return 2;
+		} else {
+			/* Just eat the existing locks, merge them into plock. */
+			plock->start = ex->start;
+			plock->size = ex->size;
+			return 0;
+		}
+	}
+
+	/* Never get here. */
+	smb_panic("brlock_posix_split_merge");
+	/* Notreached. */
+
+	/* Keep some compilers happy. */
+	return 0;
+}
+
+/****************************************************************************
+ Lock a range of bytes - POSIX lock semantics.
+ We must cope with range splits and merges.
+****************************************************************************/
+
+static NTSTATUS brl_lock_posix(struct byte_range_lock *br_lck,
+			       struct lock_struct *plock)
+{
+	unsigned int i, count, posix_count;
+	struct lock_struct *locks = br_lck->lock_data;
+	struct lock_struct *tp;
+	bool break_oplocks = false;
+	NTSTATUS status;
+
+	/* No zero-zero locks for POSIX. */
+	if (plock->start == 0 && plock->size == 0) {
+		return NT_STATUS_INVALID_PARAMETER;
+	}
+
+	/* Don't allow 64-bit lock wrap. */
+	if (plock->start + plock->size - 1 < plock->start) {
+		return NT_STATUS_INVALID_PARAMETER;
+	}
+
+	/* The worst case scenario here is we have to split an
+	   existing POSIX lock range into two, and add our lock,
+	   so we need at most 2 more entries. */
+
+	tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 2);
+	if (!tp) {
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	count = posix_count = 0;
+
+	for (i=0; i < br_lck->num_locks; i++) {
+		struct lock_struct *curr_lock = &locks[i];
+
+		if (curr_lock->lock_flav == WINDOWS_LOCK) {
+			/* Do any Windows flavour locks conflict ? */
+			if (brl_conflict(curr_lock, plock)) {
+				if (!serverid_exists(&curr_lock->context.pid)) {
+					curr_lock->context.pid.pid = 0;
+					br_lck->modified = true;
+					continue;
+				}
+				/* No games with error messages. */
+				TALLOC_FREE(tp);
+				/* Remember who blocked us. */
+				plock->context.smblctx = curr_lock->context.smblctx;
+				return NT_STATUS_LOCK_NOT_GRANTED;
+			}
+			/* Just copy the Windows lock into the new array. */
+			memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
+			count++;
+		} else {
+			unsigned int tmp_count = 0;
+
+			/* POSIX conflict semantics are different. */
+			if (brl_conflict_posix(curr_lock, plock)) {
+				if (!serverid_exists(&curr_lock->context.pid)) {
+					curr_lock->context.pid.pid = 0;
+					br_lck->modified = true;
+					continue;
+				}
+				/* Can't block ourselves with POSIX locks. */
+				/* No games with error messages. */
+				TALLOC_FREE(tp);
+				/* Remember who blocked us. */
+				plock->context.smblctx = curr_lock->context.smblctx;
+				return NT_STATUS_LOCK_NOT_GRANTED;
+			}
+
+			/* Work out overlaps. */
+			tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
+			posix_count += tmp_count;
+			count += tmp_count;
+		}
+	}
+
+	/*
+	 * Break oplocks while we hold a brl. Since lock() and unlock() calls
+	 * are not symmetric with POSIX semantics, we cannot guarantee our
+	 * contend_level2_oplocks_begin/end calls will be acquired and
+	 * released one-for-one as with Windows semantics. Therefore we only
+	 * call contend_level2_oplocks_begin if this is the first POSIX brl on
+	 * the file.
+	 */
+	break_oplocks = (posix_count == 0);
+	if (break_oplocks) {
+		contend_level2_oplocks_begin(br_lck->fsp,
+					     LEVEL2_CONTEND_POSIX_BRL);
+	}
+
+	/* Try and add the lock in order, sorted by lock start. */
+	for (i=0; i < count; i++) {
+		struct lock_struct *curr_lock = &tp[i];
+
+		if (curr_lock->start <= plock->start) {
+			continue;
+		}
+	}
+
+	if (i < count) {
+		memmove(&tp[i+1], &tp[i],
+			(count - i)*sizeof(struct lock_struct));
+	}
+	memcpy(&tp[i], plock, sizeof(struct lock_struct));
+	count++;
+
+	/* We can get the POSIX lock, now see if it needs to
+	   be mapped into a lower level POSIX one, and if so can
+	   we get it ? */
+
+	if (lp_posix_locking(br_lck->fsp->conn->params)) {
+		int errno_ret;
+
+		/* The lower layer just needs to attempt to
+		   get the system POSIX lock. We've weeded out
+		   any conflicts above. */
+
+		if (!set_posix_lock_posix_flavour(br_lck->fsp,
+				plock->start,
+				plock->size,
+				plock->lock_type,
+				&plock->context,
+				&errno_ret)) {
+
+			/* We don't know who blocked us. */
+			plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
+
+			if (errno_ret == EACCES || errno_ret == EAGAIN) {
+				TALLOC_FREE(tp);
+				status = NT_STATUS_LOCK_NOT_GRANTED;
+				goto fail;
+			} else {
+				TALLOC_FREE(tp);
+				status = map_nt_error_from_unix(errno);
+				goto fail;
+			}
+		}
+	}
+
+	/* If we didn't use all the allocated size,
+	 * Realloc so we don't leak entries per lock call. */
+	if (count < br_lck->num_locks + 2) {
+		tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
+		if (!tp) {
+			status = NT_STATUS_NO_MEMORY;
+			goto fail;
+		}
+	}
+
+	br_lck->num_locks = count;
+	TALLOC_FREE(br_lck->lock_data);
+	br_lck->lock_data = tp;
+	locks = tp;
+	br_lck->modified = True;
+
+	/* A successful downgrade from write to read lock can trigger a lock
+	   re-evalutation where waiting readers can now proceed. */
+
+	return NT_STATUS_OK;
+ fail:
+	if (break_oplocks) {
+		contend_level2_oplocks_end(br_lck->fsp,
+					   LEVEL2_CONTEND_POSIX_BRL);
+	}
+	return status;
+}
+
+/****************************************************************************
+ Lock a range of bytes.
+****************************************************************************/
+
+NTSTATUS brl_lock(
+	struct byte_range_lock *br_lck,
+	uint64_t smblctx,
+	struct server_id pid,
+	br_off start,
+	br_off size,
+	enum brl_type lock_type,
+	enum brl_flavour lock_flav,
+	struct server_id *blocker_pid,
+	uint64_t *psmblctx)
+{
+	NTSTATUS ret;
+	struct lock_struct lock;
+
+	ZERO_STRUCT(lock);
+
+#if !ZERO_ZERO
+	if (start == 0 && size == 0) {
+		DEBUG(0,("client sent 0/0 lock - please report this\n"));
+	}
+#endif
+
+	lock = (struct lock_struct) {
+		.context.smblctx = smblctx,
+		.context.pid = pid,
+		.context.tid = br_lck->fsp->conn->cnum,
+		.start = start,
+		.size = size,
+		.fnum = br_lck->fsp->fnum,
+		.lock_type = lock_type,
+		.lock_flav = lock_flav
+	};
+
+	if (lock_flav == WINDOWS_LOCK) {
+		ret = SMB_VFS_BRL_LOCK_WINDOWS(
+			br_lck->fsp->conn, br_lck, &lock);
+	} else {
+		ret = brl_lock_posix(br_lck, &lock);
+	}
+
+#if ZERO_ZERO
+	/* sort the lock list */
+	TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
+#endif
+	/* If we're returning an error, return who blocked us. */
+	if (!NT_STATUS_IS_OK(ret) && psmblctx) {
+		*blocker_pid = lock.context.pid;
+		*psmblctx = lock.context.smblctx;
+	}
+	return ret;
+}
+
+/****************************************************************************
+ Unlock a range of bytes - Windows semantics.
+****************************************************************************/
+
+bool brl_unlock_windows_default(struct byte_range_lock *br_lck,
+				const struct lock_struct *plock)
+{
+	unsigned int i;
+	struct lock_struct *locks = br_lck->lock_data;
+	enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
+
+	SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
+
+#if ZERO_ZERO
+	/* Delete write locks by preference... The lock list
+	   is sorted in the zero zero case. */
+
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+
+		if (lock->lock_type == WRITE_LOCK &&
+		    brl_same_context(&lock->context, &plock->context) &&
+		    lock->fnum == plock->fnum &&
+		    lock->lock_flav == WINDOWS_LOCK &&
+		    lock->start == plock->start &&
+		    lock->size == plock->size) {
+
+			/* found it - delete it */
+			deleted_lock_type = lock->lock_type;
+			break;
+		}
+	}
+
+	if (i != br_lck->num_locks) {
+		/* We found it - don't search again. */
+		goto unlock_continue;
+	}
+#endif
+
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+
+		/* Only remove our own locks that match in start, size, and flavour. */
+		if (brl_same_context(&lock->context, &plock->context) &&
+					lock->fnum == plock->fnum &&
+					lock->lock_flav == WINDOWS_LOCK &&
+					lock->start == plock->start &&
+					lock->size == plock->size ) {
+			deleted_lock_type = lock->lock_type;
+			break;
+		}
+	}
+
+	if (i == br_lck->num_locks) {
+		/* we didn't find it */
+		return False;
+	}
+
+#if ZERO_ZERO
+  unlock_continue:
+#endif
+
+	ARRAY_DEL_ELEMENT(locks, i, br_lck->num_locks);
+	br_lck->num_locks -= 1;
+	br_lck->modified = True;
+
+	/* Unlock the underlying POSIX regions. */
+	if(lp_posix_locking(br_lck->fsp->conn->params)) {
+		release_posix_lock_windows_flavour(br_lck->fsp,
+				plock->start,
+				plock->size,
+				deleted_lock_type,
+				&plock->context,
+				locks,
+				br_lck->num_locks);
+	}
+
+	contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+	return True;
+}
+
+/****************************************************************************
+ Unlock a range of bytes - POSIX semantics.
+****************************************************************************/
+
+static bool brl_unlock_posix(struct byte_range_lock *br_lck,
+			     struct lock_struct *plock)
+{
+	unsigned int i, count;
+	struct lock_struct *tp;
+	struct lock_struct *locks = br_lck->lock_data;
+	bool overlap_found = False;
+
+	/* No zero-zero locks for POSIX. */
+	if (plock->start == 0 && plock->size == 0) {
+		return False;
+	}
+
+	/* Don't allow 64-bit lock wrap. */
+	if (plock->start + plock->size < plock->start ||
+			plock->start + plock->size < plock->size) {
+		DEBUG(10,("brl_unlock_posix: lock wrap\n"));
+		return False;
+	}
+
+	/* The worst case scenario here is we have to split an
+	   existing POSIX lock range into two, so we need at most
+	   1 more entry. */
+
+	tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 1);
+	if (!tp) {
+		DEBUG(10,("brl_unlock_posix: malloc fail\n"));
+		return False;
+	}
+
+	count = 0;
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+		unsigned int tmp_count;
+
+		/* Only remove our own locks - ignore fnum. */
+		if (!brl_same_context(&lock->context, &plock->context)) {
+			memcpy(&tp[count], lock, sizeof(struct lock_struct));
+			count++;
+			continue;
+		}
+
+		if (lock->lock_flav == WINDOWS_LOCK) {
+			/* Do any Windows flavour locks conflict ? */
+			if (brl_conflict(lock, plock)) {
+				TALLOC_FREE(tp);
+				return false;
+			}
+			/* Just copy the Windows lock into the new array. */
+			memcpy(&tp[count], lock, sizeof(struct lock_struct));
+			count++;
+			continue;
+		}
+
+		/* Work out overlaps. */
+		tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
+
+		if (tmp_count == 0) {
+			/* plock overlapped the existing lock completely,
+			   or replaced it. Don't copy the existing lock. */
+			overlap_found = true;
+		} else if (tmp_count == 1) {
+			/* Either no overlap, (simple copy of existing lock) or
+			 * an overlap of an existing lock. */
+			/* If the lock changed size, we had an overlap. */
+			if (tp[count].size != lock->size) {
+				overlap_found = true;
+			}
+			count += tmp_count;
+		} else if (tmp_count == 2) {
+			/* We split a lock range in two. */
+			overlap_found = true;
+			count += tmp_count;
+
+			/* Optimisation... */
+			/* We know we're finished here as we can't overlap any
+			   more POSIX locks. Copy the rest of the lock array. */
+
+			if (i < br_lck->num_locks - 1) {
+				memcpy(&tp[count], &locks[i+1],
+					sizeof(*locks)*((br_lck->num_locks-1) - i));
+				count += ((br_lck->num_locks-1) - i);
+			}
+			break;
+		}
+
+	}
+
+	if (!overlap_found) {
+		/* Just ignore - no change. */
+		TALLOC_FREE(tp);
+		DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
+		return True;
+	}
+
+	/* Unlock any POSIX regions. */
+	if(lp_posix_locking(br_lck->fsp->conn->params)) {
+		release_posix_lock_posix_flavour(br_lck->fsp,
+						plock->start,
+						plock->size,
+						&plock->context,
+						tp,
+						count);
+	}
+
+	/* Realloc so we don't leak entries per unlock call. */
+	if (count) {
+		tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
+		if (!tp) {
+			DEBUG(10,("brl_unlock_posix: realloc fail\n"));
+			return False;
+		}
+	} else {
+		/* We deleted the last lock. */
+		TALLOC_FREE(tp);
+		tp = NULL;
+	}
+
+	contend_level2_oplocks_end(br_lck->fsp,
+				   LEVEL2_CONTEND_POSIX_BRL);
+
+	br_lck->num_locks = count;
+	TALLOC_FREE(br_lck->lock_data);
+	locks = tp;
+	br_lck->lock_data = tp;
+	br_lck->modified = True;
+
+	return True;
+}
+
+/****************************************************************************
+ Unlock a range of bytes.
+****************************************************************************/
+
+bool brl_unlock(struct byte_range_lock *br_lck,
+		uint64_t smblctx,
+		struct server_id pid,
+		br_off start,
+		br_off size,
+		enum brl_flavour lock_flav)
+{
+	struct lock_struct lock;
+
+	lock.context.smblctx = smblctx;
+	lock.context.pid = pid;
+	lock.context.tid = br_lck->fsp->conn->cnum;
+	lock.start = start;
+	lock.size = size;
+	lock.fnum = br_lck->fsp->fnum;
+	lock.lock_type = UNLOCK_LOCK;
+	lock.lock_flav = lock_flav;
+
+	if (lock_flav == WINDOWS_LOCK) {
+		return SMB_VFS_BRL_UNLOCK_WINDOWS(
+			br_lck->fsp->conn, br_lck, &lock);
+	} else {
+		return brl_unlock_posix(br_lck, &lock);
+	}
+}
+
+/****************************************************************************
+ Test if we could add a lock if we wanted to.
+ Returns True if the region required is currently unlocked, False if locked.
+****************************************************************************/
+
+bool brl_locktest(struct byte_range_lock *br_lck,
+		  const struct lock_struct *rw_probe)
+{
+	bool ret = True;
+	unsigned int i;
+	struct lock_struct *locks = br_lck->lock_data;
+	files_struct *fsp = br_lck->fsp;
+
+	/* Make sure existing locks don't conflict */
+	for (i=0; i < br_lck->num_locks; i++) {
+		/*
+		 * Our own locks don't conflict.
+		 */
+		if (brl_conflict_other(&locks[i], rw_probe)) {
+			if (br_lck->record == NULL) {
+				/* readonly */
+				return false;
+			}
+
+			if (!serverid_exists(&locks[i].context.pid)) {
+				locks[i].context.pid.pid = 0;
+				br_lck->modified = true;
+				continue;
+			}
+
+			return False;
+		}
+	}
+
+	/*
+	 * There is no lock held by an SMB daemon, check to
+	 * see if there is a POSIX lock from a UNIX or NFS process.
+	 * This only conflicts with Windows locks, not POSIX locks.
+	 */
+
+	if(lp_posix_locking(fsp->conn->params) &&
+	   (rw_probe->lock_flav == WINDOWS_LOCK)) {
+		/*
+		 * Make copies -- is_posix_locked might modify the values
+		 */
+
+		br_off start = rw_probe->start;
+		br_off size = rw_probe->size;
+		enum brl_type lock_type = rw_probe->lock_type;
+
+		ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
+
+		DEBUG(10, ("brl_locktest: posix start=%ju len=%ju %s for %s "
+			   "file %s\n", (uintmax_t)start, (uintmax_t)size,
+			   ret ? "locked" : "unlocked",
+			   fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
+
+		/* We need to return the inverse of is_posix_locked. */
+		ret = !ret;
+        }
+
+	/* no conflicts - we could have added it */
+	return ret;
+}
+
+/****************************************************************************
+ Query for existing locks.
+****************************************************************************/
+
+NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
+		uint64_t *psmblctx,
+		struct server_id pid,
+		br_off *pstart,
+		br_off *psize,
+		enum brl_type *plock_type,
+		enum brl_flavour lock_flav)
+{
+	unsigned int i;
+	struct lock_struct lock;
+	const struct lock_struct *locks = br_lck->lock_data;
+	files_struct *fsp = br_lck->fsp;
+
+	lock.context.smblctx = *psmblctx;
+	lock.context.pid = pid;
+	lock.context.tid = br_lck->fsp->conn->cnum;
+	lock.start = *pstart;
+	lock.size = *psize;
+	lock.fnum = fsp->fnum;
+	lock.lock_type = *plock_type;
+	lock.lock_flav = lock_flav;
+
+	/* Make sure existing locks don't conflict */
+	for (i=0; i < br_lck->num_locks; i++) {
+		const struct lock_struct *exlock = &locks[i];
+		bool conflict = False;
+
+		if (exlock->lock_flav == WINDOWS_LOCK) {
+			conflict = brl_conflict(exlock, &lock);
+		} else {
+			conflict = brl_conflict_posix(exlock, &lock);
+		}
+
+		if (conflict) {
+			*psmblctx = exlock->context.smblctx;
+        		*pstart = exlock->start;
+		        *psize = exlock->size;
+        		*plock_type = exlock->lock_type;
+			return NT_STATUS_LOCK_NOT_GRANTED;
+		}
+	}
+
+	/*
+	 * There is no lock held by an SMB daemon, check to
+	 * see if there is a POSIX lock from a UNIX or NFS process.
+	 */
+
+	if(lp_posix_locking(fsp->conn->params)) {
+		bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
+
+		DEBUG(10, ("brl_lockquery: posix start=%ju len=%ju %s for %s "
+			   "file %s\n", (uintmax_t)*pstart,
+			   (uintmax_t)*psize, ret ? "locked" : "unlocked",
+			   fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
+
+		if (ret) {
+			/* Hmmm. No clue what to set smblctx to - use -1. */
+			*psmblctx = 0xFFFFFFFFFFFFFFFFLL;
+			return NT_STATUS_LOCK_NOT_GRANTED;
+		}
+        }
+
+	return NT_STATUS_OK;
+}
+
+
+/****************************************************************************
+ Remove any locks associated with a open file.
+ We return True if this process owns any other Windows locks on this
+ fd and so we should not immediately close the fd.
+****************************************************************************/
+
+void brl_close_fnum(struct byte_range_lock *br_lck)
+{
+	files_struct *fsp = br_lck->fsp;
+	uint32_t tid = fsp->conn->cnum;
+	uint64_t fnum = fsp->fnum;
+	unsigned int i;
+	struct lock_struct *locks = br_lck->lock_data;
+	struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx);
+	struct lock_struct *locks_copy;
+	unsigned int num_locks_copy;
+
+	/* Copy the current lock array. */
+	if (br_lck->num_locks) {
+		locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
+		if (!locks_copy) {
+			smb_panic("brl_close_fnum: talloc failed");
+			}
+	} else {
+		locks_copy = NULL;
+	}
+
+	num_locks_copy = br_lck->num_locks;
+
+	for (i=0; i < num_locks_copy; i++) {
+		struct lock_struct *lock = &locks_copy[i];
+
+		if (lock->context.tid == tid &&
+		    server_id_equal(&lock->context.pid, &pid) &&
+				(lock->fnum == fnum)) {
+			brl_unlock(
+				br_lck,
+				lock->context.smblctx,
+				pid,
+				lock->start,
+				lock->size,
+				lock->lock_flav);
+		}
+	}
+}
+
+bool brl_mark_disconnected(struct files_struct *fsp)
+{
+	uint32_t tid = fsp->conn->cnum;
+	uint64_t smblctx;
+	uint64_t fnum = fsp->fnum;
+	unsigned int i;
+	struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
+	struct byte_range_lock *br_lck = NULL;
+
+	if (fsp->op == NULL) {
+		return false;
+	}
+
+	smblctx = fsp->op->global->open_persistent_id;
+
+	if (!fsp->op->global->durable) {
+		return false;
+	}
+
+	if (fsp->current_lock_count == 0) {
+		return true;
+	}
+
+	br_lck = brl_get_locks(talloc_tos(), fsp);
+	if (br_lck == NULL) {
+		return false;
+	}
+
+	for (i=0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &br_lck->lock_data[i];
+
+		/*
+		 * as this is a durable handle, we only expect locks
+		 * of the current file handle!
+		 */
+
+		if (lock->context.smblctx != smblctx) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		if (lock->context.tid != tid) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		if (!server_id_equal(&lock->context.pid, &self)) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		if (lock->fnum != fnum) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		server_id_set_disconnected(&lock->context.pid);
+		lock->context.tid = TID_FIELD_INVALID;
+		lock->fnum = FNUM_FIELD_INVALID;
+	}
+
+	br_lck->modified = true;
+	TALLOC_FREE(br_lck);
+	return true;
+}
+
+bool brl_reconnect_disconnected(struct files_struct *fsp)
+{
+	uint32_t tid = fsp->conn->cnum;
+	uint64_t smblctx;
+	uint64_t fnum = fsp->fnum;
+	unsigned int i;
+	struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
+	struct byte_range_lock *br_lck = NULL;
+
+	if (fsp->op == NULL) {
+		return false;
+	}
+
+	smblctx = fsp->op->global->open_persistent_id;
+
+	if (!fsp->op->global->durable) {
+		return false;
+	}
+
+	/*
+	 * When reconnecting, we do not want to validate the brlock entries
+	 * and thereby remove our own (disconnected) entries but reactivate
+	 * them instead.
+	 */
+
+	br_lck = brl_get_locks(talloc_tos(), fsp);
+	if (br_lck == NULL) {
+		return false;
+	}
+
+	if (br_lck->num_locks == 0) {
+		TALLOC_FREE(br_lck);
+		return true;
+	}
+
+	for (i=0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &br_lck->lock_data[i];
+
+		/*
+		 * as this is a durable handle we only expect locks
+		 * of the current file handle!
+		 */
+
+		if (lock->context.smblctx != smblctx) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		if (lock->context.tid != TID_FIELD_INVALID) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		if (!server_id_is_disconnected(&lock->context.pid)) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		if (lock->fnum != FNUM_FIELD_INVALID) {
+			TALLOC_FREE(br_lck);
+			return false;
+		}
+
+		lock->context.pid = self;
+		lock->context.tid = tid;
+		lock->fnum = fnum;
+	}
+
+	fsp->current_lock_count = br_lck->num_locks;
+	br_lck->modified = true;
+	TALLOC_FREE(br_lck);
+	return true;
+}
+
+struct brl_forall_cb {
+	void (*fn)(struct file_id id, struct server_id pid,
+		   enum brl_type lock_type,
+		   enum brl_flavour lock_flav,
+		   br_off start, br_off size,
+		   void *private_data);
+	void *private_data;
+};
+
+/****************************************************************************
+ Traverse the whole database with this function, calling traverse_callback
+ on each lock.
+****************************************************************************/
+
+static int brl_traverse_fn(struct db_record *rec, void *state)
+{
+	struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
+	struct lock_struct *locks;
+	struct file_id *key;
+	unsigned int i;
+	unsigned int num_locks = 0;
+	TDB_DATA dbkey;
+	TDB_DATA value;
+
+	dbkey = dbwrap_record_get_key(rec);
+	value = dbwrap_record_get_value(rec);
+
+	/* In a traverse function we must make a copy of
+	   dbuf before modifying it. */
+
+	locks = (struct lock_struct *)talloc_memdup(
+		talloc_tos(), value.dptr, value.dsize);
+	if (!locks) {
+		return -1; /* Terminate traversal. */
+	}
+
+	key = (struct file_id *)dbkey.dptr;
+	num_locks = value.dsize/sizeof(*locks);
+
+	if (cb->fn) {
+		for ( i=0; i<num_locks; i++) {
+			cb->fn(*key,
+				locks[i].context.pid,
+				locks[i].lock_type,
+				locks[i].lock_flav,
+				locks[i].start,
+				locks[i].size,
+				cb->private_data);
+		}
+	}
+
+	TALLOC_FREE(locks);
+	return 0;
+}
+
+/*******************************************************************
+ Call the specified function on each lock in the database.
+********************************************************************/
+
+int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
+			  enum brl_type lock_type,
+			  enum brl_flavour lock_flav,
+			  br_off start, br_off size,
+			  void *private_data),
+	       void *private_data)
+{
+	struct brl_forall_cb cb;
+	NTSTATUS status;
+	int count = 0;
+
+	if (!brlock_db) {
+		return 0;
+	}
+	cb.fn = fn;
+	cb.private_data = private_data;
+	status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count);
+
+	if (!NT_STATUS_IS_OK(status)) {
+		return -1;
+	} else {
+		return count;
+	}
+}
+
+/*******************************************************************
+ Store a potentially modified set of byte range lock data back into
+ the database.
+ Unlock the record.
+********************************************************************/
+
+static void byte_range_lock_flush(struct byte_range_lock *br_lck)
+{
+	unsigned i;
+	struct lock_struct *locks = br_lck->lock_data;
+
+	if (!br_lck->modified) {
+		DEBUG(10, ("br_lck not modified\n"));
+		goto done;
+	}
+
+	i = 0;
+
+	while (i < br_lck->num_locks) {
+		if (locks[i].context.pid.pid == 0) {
+			/*
+			 * Autocleanup, the process conflicted and does not
+			 * exist anymore.
+			 */
+			locks[i] = locks[br_lck->num_locks-1];
+			br_lck->num_locks -= 1;
+		} else {
+			i += 1;
+		}
+	}
+
+	if (br_lck->num_locks == 0) {
+		/* No locks - delete this entry. */
+		NTSTATUS status = dbwrap_record_delete(br_lck->record);
+		if (!NT_STATUS_IS_OK(status)) {
+			DEBUG(0, ("delete_rec returned %s\n",
+				  nt_errstr(status)));
+			smb_panic("Could not delete byte range lock entry");
+		}
+	} else {
+		TDB_DATA data = {
+			.dsize = br_lck->num_locks * sizeof(struct lock_struct),
+			.dptr = (uint8_t *)br_lck->lock_data,
+		};
+		NTSTATUS status;
+
+		status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
+		if (!NT_STATUS_IS_OK(status)) {
+			DEBUG(0, ("store returned %s\n", nt_errstr(status)));
+			smb_panic("Could not store byte range mode entry");
+		}
+	}
+
+	DEBUG(10, ("seqnum=%d\n", dbwrap_get_seqnum(brlock_db)));
+
+ done:
+	br_lck->modified = false;
+	TALLOC_FREE(br_lck->record);
+}
+
+static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
+{
+	byte_range_lock_flush(br_lck);
+	return 0;
+}
+
+static bool brl_parse_data(struct byte_range_lock *br_lck, TDB_DATA data)
+{
+	size_t data_len;
+
+	if (data.dsize == 0) {
+		return true;
+	}
+	if (data.dsize % sizeof(struct lock_struct) != 0) {
+		DEBUG(1, ("Invalid data size: %u\n", (unsigned)data.dsize));
+		return false;
+	}
+
+	br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
+	data_len = br_lck->num_locks * sizeof(struct lock_struct);
+
+	br_lck->lock_data = talloc_memdup(br_lck, data.dptr, data_len);
+	if (br_lck->lock_data == NULL) {
+		DEBUG(1, ("talloc_memdup failed\n"));
+		return false;
+	}
+	return true;
+}
+
+/*******************************************************************
+ Fetch a set of byte range lock data from the database.
+ Leave the record locked.
+ TALLOC_FREE(brl) will release the lock in the destructor.
+********************************************************************/
+
+struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
+{
+	TDB_DATA key, data;
+	struct byte_range_lock *br_lck;
+
+	br_lck = talloc_zero(mem_ctx, struct byte_range_lock);
+	if (br_lck == NULL) {
+		return NULL;
+	}
+
+	br_lck->fsp = fsp;
+
+	key.dptr = (uint8_t *)&fsp->file_id;
+	key.dsize = sizeof(struct file_id);
+
+	br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
+
+	if (br_lck->record == NULL) {
+		DEBUG(3, ("Could not lock byte range lock entry\n"));
+		TALLOC_FREE(br_lck);
+		return NULL;
+	}
+
+	data = dbwrap_record_get_value(br_lck->record);
+
+	if (!brl_parse_data(br_lck, data)) {
+		TALLOC_FREE(br_lck);
+		return NULL;
+	}
+
+	talloc_set_destructor(br_lck, byte_range_lock_destructor);
+
+	if (DEBUGLEVEL >= 10) {
+		unsigned int i;
+		struct file_id_buf buf;
+		struct lock_struct *locks = br_lck->lock_data;
+		DBG_DEBUG("%u current locks on file_id %s\n",
+			  br_lck->num_locks,
+			  file_id_str_buf(fsp->file_id, &buf));
+		for( i = 0; i < br_lck->num_locks; i++) {
+			print_lock_struct(i, &locks[i]);
+		}
+	}
+
+	return br_lck;
+}
+
+struct byte_range_lock *brl_get_locks_for_locking(TALLOC_CTX *mem_ctx,
+						  files_struct *fsp,
+						  TALLOC_CTX *req_mem_ctx,
+						  const struct GUID *req_guid)
+{
+	struct byte_range_lock *br_lck = NULL;
+
+	br_lck = brl_get_locks(mem_ctx, fsp);
+	if (br_lck == NULL) {
+		return NULL;
+	}
+	SMB_ASSERT(req_mem_ctx != NULL);
+	br_lck->req_mem_ctx = req_mem_ctx;
+	SMB_ASSERT(req_guid != NULL);
+	br_lck->req_guid = req_guid;
+
+	return br_lck;
+}
+
+struct brl_get_locks_readonly_state {
+	TALLOC_CTX *mem_ctx;
+	struct byte_range_lock **br_lock;
+};
+
+static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
+					  void *private_data)
+{
+	struct brl_get_locks_readonly_state *state =
+		(struct brl_get_locks_readonly_state *)private_data;
+	struct byte_range_lock *br_lck;
+
+	br_lck = talloc_pooled_object(
+		state->mem_ctx, struct byte_range_lock, 1, data.dsize);
+	if (br_lck == NULL) {
+		*state->br_lock = NULL;
+		return;
+	}
+	*br_lck = (struct byte_range_lock) { 0 };
+	if (!brl_parse_data(br_lck, data)) {
+		*state->br_lock = NULL;
+		return;
+	}
+	*state->br_lock = br_lck;
+}
+
+struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
+{
+	struct byte_range_lock *br_lock = NULL;
+	struct brl_get_locks_readonly_state state;
+	NTSTATUS status;
+
+	DEBUG(10, ("seqnum=%d, fsp->brlock_seqnum=%d\n",
+		   dbwrap_get_seqnum(brlock_db), fsp->brlock_seqnum));
+
+	if ((fsp->brlock_rec != NULL)
+	    && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
+		/*
+		 * We have cached the brlock_rec and the database did not
+		 * change.
+		 */
+		return fsp->brlock_rec;
+	}
+
+	/*
+	 * Parse the record fresh from the database
+	 */
+
+	state.mem_ctx = fsp;
+	state.br_lock = &br_lock;
+
+	status = dbwrap_parse_record(
+		brlock_db,
+		make_tdb_data((uint8_t *)&fsp->file_id,
+			      sizeof(fsp->file_id)),
+		brl_get_locks_readonly_parser, &state);
+
+	if (NT_STATUS_EQUAL(status,NT_STATUS_NOT_FOUND)) {
+		/*
+		 * No locks on this file. Return an empty br_lock.
+		 */
+		br_lock = talloc_zero(fsp, struct byte_range_lock);
+		if (br_lock == NULL) {
+			return NULL;
+		}
+
+	} else if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(3, ("Could not parse byte range lock record: "
+			  "%s\n", nt_errstr(status)));
+		return NULL;
+	}
+	if (br_lock == NULL) {
+		return NULL;
+	}
+
+	br_lock->fsp = fsp;
+	br_lock->modified = false;
+	br_lock->record = NULL;
+
+	/*
+	 * Cache the brlock struct, invalidated when the dbwrap_seqnum
+	 * changes. See beginning of this routine.
+	 */
+	TALLOC_FREE(fsp->brlock_rec);
+	fsp->brlock_rec = br_lock;
+	fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
+
+	return br_lock;
+}
+
+bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id)
+{
+	bool ret = false;
+	TALLOC_CTX *frame = talloc_stackframe();
+	TDB_DATA key, val;
+	struct db_record *rec;
+	struct lock_struct *lock;
+	unsigned n, num;
+	struct file_id_buf buf;
+	NTSTATUS status;
+
+	key = make_tdb_data((void*)&fid, sizeof(fid));
+
+	rec = dbwrap_fetch_locked(brlock_db, frame, key);
+	if (rec == NULL) {
+		DBG_INFO("failed to fetch record for file %s\n",
+			 file_id_str_buf(fid, &buf));
+		goto done;
+	}
+
+	val = dbwrap_record_get_value(rec);
+	lock = (struct lock_struct*)val.dptr;
+	num = val.dsize / sizeof(struct lock_struct);
+	if (lock == NULL) {
+		DBG_DEBUG("no byte range locks for file %s\n",
+			  file_id_str_buf(fid, &buf));
+		ret = true;
+		goto done;
+	}
+
+	for (n=0; n<num; n++) {
+		struct lock_context *ctx = &lock[n].context;
+
+		if (!server_id_is_disconnected(&ctx->pid)) {
+			struct server_id_buf tmp;
+			DBG_INFO("byte range lock "
+				 "%s used by server %s, do not cleanup\n",
+				 file_id_str_buf(fid, &buf),
+				 server_id_str_buf(ctx->pid, &tmp));
+			goto done;
+		}
+
+		if (ctx->smblctx != open_persistent_id)	{
+			DBG_INFO("byte range lock %s expected smblctx %"PRIu64" "
+				 "but found %"PRIu64", do not cleanup\n",
+				 file_id_str_buf(fid, &buf),
+				 open_persistent_id,
+				 ctx->smblctx);
+			goto done;
+		}
+	}
+
+	status = dbwrap_record_delete(rec);
+	if (!NT_STATUS_IS_OK(status)) {
+		DBG_INFO("failed to delete record "
+			 "for file %s from %s, open %"PRIu64": %s\n",
+			 file_id_str_buf(fid, &buf),
+			 dbwrap_name(brlock_db),
+			 open_persistent_id,
+			 nt_errstr(status));
+		goto done;
+	}
+
+	DBG_DEBUG("file %s cleaned up %u entries from open %"PRIu64"\n",
+		  file_id_str_buf(fid, &buf),
+		  num,
+		  open_persistent_id);
+
+	ret = true;
+done:
+	talloc_free(frame);
+	return ret;
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 17:20:00 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 17:20:00 +0000
commit	8daa83a594a2e98f39d764422bfbdbc62c9efd44 (patch)
tree	4099e8021376c7d8c05bdf8503093d80e9c7bad0 /source3/locking/brlock.c
parent	Initial commit. (diff)
download	samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.tar.xz samba-8daa83a594a2e98f39d764422bfbdbc62c9efd44.zip