summaryrefslogtreecommitdiffstats
path: root/security/landlock
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--security/landlock/Kconfig21
-rw-r--r--security/landlock/Makefile4
-rw-r--r--security/landlock/common.h20
-rw-r--r--security/landlock/cred.c46
-rw-r--r--security/landlock/cred.h58
-rw-r--r--security/landlock/fs.c1311
-rw-r--r--security/landlock/fs.h95
-rw-r--r--security/landlock/limits.h27
-rw-r--r--security/landlock/object.c67
-rw-r--r--security/landlock/object.h91
-rw-r--r--security/landlock/ptrace.c120
-rw-r--r--security/landlock/ptrace.h14
-rw-r--r--security/landlock/ruleset.c475
-rw-r--r--security/landlock/ruleset.h180
-rw-r--r--security/landlock/setup.c41
-rw-r--r--security/landlock/setup.h18
-rw-r--r--security/landlock/syscalls.c456
17 files changed, 3044 insertions, 0 deletions
diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
new file mode 100644
index 0000000000..c1e862a384
--- /dev/null
+++ b/security/landlock/Kconfig
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config SECURITY_LANDLOCK
+ bool "Landlock support"
+ depends on SECURITY
+ select SECURITY_PATH
+ help
+ Landlock is a sandboxing mechanism that enables processes to restrict
+ themselves (and their future children) by gradually enforcing
+ tailored access control policies. A Landlock security policy is a
+ set of access rights (e.g. open a file in read-only, make a
+ directory, etc.) tied to a file hierarchy. Such policy can be
+ configured and enforced by any processes for themselves using the
+ dedicated system calls: landlock_create_ruleset(),
+ landlock_add_rule(), and landlock_restrict_self().
+
+ See Documentation/userspace-api/landlock.rst for further information.
+
+ If you are unsure how to answer this question, answer N. Otherwise,
+ you should also prepend "landlock," to the content of CONFIG_LSM to
+ enable Landlock at boot time.
diff --git a/security/landlock/Makefile b/security/landlock/Makefile
new file mode 100644
index 0000000000..7bbd2f413b
--- /dev/null
+++ b/security/landlock/Makefile
@@ -0,0 +1,4 @@
+obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
+
+landlock-y := setup.o syscalls.o object.o ruleset.o \
+ cred.o ptrace.o fs.o
diff --git a/security/landlock/common.h b/security/landlock/common.h
new file mode 100644
index 0000000000..5dc0fe1570
--- /dev/null
+++ b/security/landlock/common.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Common constants and helpers
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_COMMON_H
+#define _SECURITY_LANDLOCK_COMMON_H
+
+#define LANDLOCK_NAME "landlock"
+
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+
+#define pr_fmt(fmt) LANDLOCK_NAME ": " fmt
+
+#endif /* _SECURITY_LANDLOCK_COMMON_H */
diff --git a/security/landlock/cred.c b/security/landlock/cred.c
new file mode 100644
index 0000000000..13dff2a315
--- /dev/null
+++ b/security/landlock/cred.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Credential hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/cred.h>
+#include <linux/lsm_hooks.h>
+
+#include "common.h"
+#include "cred.h"
+#include "ruleset.h"
+#include "setup.h"
+
+static int hook_cred_prepare(struct cred *const new,
+ const struct cred *const old, const gfp_t gfp)
+{
+ struct landlock_ruleset *const old_dom = landlock_cred(old)->domain;
+
+ if (old_dom) {
+ landlock_get_ruleset(old_dom);
+ landlock_cred(new)->domain = old_dom;
+ }
+ return 0;
+}
+
+static void hook_cred_free(struct cred *const cred)
+{
+ struct landlock_ruleset *const dom = landlock_cred(cred)->domain;
+
+ if (dom)
+ landlock_put_ruleset_deferred(dom);
+}
+
+static struct security_hook_list landlock_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(cred_prepare, hook_cred_prepare),
+ LSM_HOOK_INIT(cred_free, hook_cred_free),
+};
+
+__init void landlock_add_cred_hooks(void)
+{
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+ LANDLOCK_NAME);
+}
diff --git a/security/landlock/cred.h b/security/landlock/cred.h
new file mode 100644
index 0000000000..af89ab00e6
--- /dev/null
+++ b/security/landlock/cred.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Credential hooks
+ *
+ * Copyright © 2019-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_CRED_H
+#define _SECURITY_LANDLOCK_CRED_H
+
+#include <linux/cred.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+
+#include "ruleset.h"
+#include "setup.h"
+
+struct landlock_cred_security {
+ struct landlock_ruleset *domain;
+};
+
+static inline struct landlock_cred_security *
+landlock_cred(const struct cred *cred)
+{
+ return cred->security + landlock_blob_sizes.lbs_cred;
+}
+
+static inline const struct landlock_ruleset *landlock_get_current_domain(void)
+{
+ return landlock_cred(current_cred())->domain;
+}
+
+/*
+ * The call needs to come from an RCU read-side critical section.
+ */
+static inline const struct landlock_ruleset *
+landlock_get_task_domain(const struct task_struct *const task)
+{
+ return landlock_cred(__task_cred(task))->domain;
+}
+
+static inline bool landlocked(const struct task_struct *const task)
+{
+ bool has_dom;
+
+ if (task == current)
+ return !!landlock_get_current_domain();
+
+ rcu_read_lock();
+ has_dom = !!landlock_get_task_domain(task);
+ rcu_read_unlock();
+ return has_dom;
+}
+
+__init void landlock_add_cred_hooks(void);
+
+#endif /* _SECURITY_LANDLOCK_CRED_H */
diff --git a/security/landlock/fs.c b/security/landlock/fs.c
new file mode 100644
index 0000000000..1c0c198f6f
--- /dev/null
+++ b/security/landlock/fs.c
@@ -0,0 +1,1311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Filesystem management and hooks
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ * Copyright © 2021-2022 Microsoft Corporation
+ */
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/bits.h>
+#include <linux/compiler_types.h>
+#include <linux/dcache.h>
+#include <linux/err.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/list.h>
+#include <linux/lsm_hooks.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/path.h>
+#include <linux/rcupdate.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+#include <linux/types.h>
+#include <linux/wait_bit.h>
+#include <linux/workqueue.h>
+#include <uapi/linux/landlock.h>
+
+#include "common.h"
+#include "cred.h"
+#include "fs.h"
+#include "limits.h"
+#include "object.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/* Underlying object management */
+
+static void release_inode(struct landlock_object *const object)
+ __releases(object->lock)
+{
+ struct inode *const inode = object->underobj;
+ struct super_block *sb;
+
+ if (!inode) {
+ spin_unlock(&object->lock);
+ return;
+ }
+
+ /*
+ * Protects against concurrent use by hook_sb_delete() of the reference
+ * to the underlying inode.
+ */
+ object->underobj = NULL;
+ /*
+ * Makes sure that if the filesystem is concurrently unmounted,
+ * hook_sb_delete() will wait for us to finish iput().
+ */
+ sb = inode->i_sb;
+ atomic_long_inc(&landlock_superblock(sb)->inode_refs);
+ spin_unlock(&object->lock);
+ /*
+ * Because object->underobj was not NULL, hook_sb_delete() and
+ * get_inode_object() guarantee that it is safe to reset
+ * landlock_inode(inode)->object while it is not NULL. It is therefore
+ * not necessary to lock inode->i_lock.
+ */
+ rcu_assign_pointer(landlock_inode(inode)->object, NULL);
+ /*
+ * Now, new rules can safely be tied to @inode with get_inode_object().
+ */
+
+ iput(inode);
+ if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs))
+ wake_up_var(&landlock_superblock(sb)->inode_refs);
+}
+
+static const struct landlock_object_underops landlock_fs_underops = {
+ .release = release_inode
+};
+
+/* Ruleset management */
+
+static struct landlock_object *get_inode_object(struct inode *const inode)
+{
+ struct landlock_object *object, *new_object;
+ struct landlock_inode_security *inode_sec = landlock_inode(inode);
+
+ rcu_read_lock();
+retry:
+ object = rcu_dereference(inode_sec->object);
+ if (object) {
+ if (likely(refcount_inc_not_zero(&object->usage))) {
+ rcu_read_unlock();
+ return object;
+ }
+ /*
+ * We are racing with release_inode(), the object is going
+ * away. Wait for release_inode(), then retry.
+ */
+ spin_lock(&object->lock);
+ spin_unlock(&object->lock);
+ goto retry;
+ }
+ rcu_read_unlock();
+
+ /*
+ * If there is no object tied to @inode, then create a new one (without
+ * holding any locks).
+ */
+ new_object = landlock_create_object(&landlock_fs_underops, inode);
+ if (IS_ERR(new_object))
+ return new_object;
+
+ /*
+ * Protects against concurrent calls to get_inode_object() or
+ * hook_sb_delete().
+ */
+ spin_lock(&inode->i_lock);
+ if (unlikely(rcu_access_pointer(inode_sec->object))) {
+ /* Someone else just created the object, bail out and retry. */
+ spin_unlock(&inode->i_lock);
+ kfree(new_object);
+
+ rcu_read_lock();
+ goto retry;
+ }
+
+ /*
+ * @inode will be released by hook_sb_delete() on its superblock
+ * shutdown, or by release_inode() when no more ruleset references the
+ * related object.
+ */
+ ihold(inode);
+ rcu_assign_pointer(inode_sec->object, new_object);
+ spin_unlock(&inode->i_lock);
+ return new_object;
+}
+
+/* All access rights that can be tied to files. */
+/* clang-format off */
+#define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_TRUNCATE)
+/* clang-format on */
+
+/*
+ * All access rights that are denied by default whether they are handled or not
+ * by a ruleset/layer. This must be ORed with all ruleset->fs_access_masks[]
+ * entries when we need to get the absolute handled access masks.
+ */
+/* clang-format off */
+#define ACCESS_INITIALLY_DENIED ( \
+ LANDLOCK_ACCESS_FS_REFER)
+/* clang-format on */
+
+/*
+ * @path: Should have been checked by get_path_from_fd().
+ */
+int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+ const struct path *const path,
+ access_mask_t access_rights)
+{
+ int err;
+ struct landlock_object *object;
+
+ /* Files only get access rights that make sense. */
+ if (!d_is_dir(path->dentry) &&
+ (access_rights | ACCESS_FILE) != ACCESS_FILE)
+ return -EINVAL;
+ if (WARN_ON_ONCE(ruleset->num_layers != 1))
+ return -EINVAL;
+
+ /* Transforms relative access rights to absolute ones. */
+ access_rights |=
+ LANDLOCK_MASK_ACCESS_FS &
+ ~(ruleset->fs_access_masks[0] | ACCESS_INITIALLY_DENIED);
+ object = get_inode_object(d_backing_inode(path->dentry));
+ if (IS_ERR(object))
+ return PTR_ERR(object);
+ mutex_lock(&ruleset->lock);
+ err = landlock_insert_rule(ruleset, object, access_rights);
+ mutex_unlock(&ruleset->lock);
+ /*
+ * No need to check for an error because landlock_insert_rule()
+ * increments the refcount for the new object if needed.
+ */
+ landlock_put_object(object);
+ return err;
+}
+
+/* Access-control management */
+
+/*
+ * The lifetime of the returned rule is tied to @domain.
+ *
+ * Returns NULL if no rule is found or if @dentry is negative.
+ */
+static inline const struct landlock_rule *
+find_rule(const struct landlock_ruleset *const domain,
+ const struct dentry *const dentry)
+{
+ const struct landlock_rule *rule;
+ const struct inode *inode;
+
+ /* Ignores nonexistent leafs. */
+ if (d_is_negative(dentry))
+ return NULL;
+
+ inode = d_backing_inode(dentry);
+ rcu_read_lock();
+ rule = landlock_find_rule(
+ domain, rcu_dereference(landlock_inode(inode)->object));
+ rcu_read_unlock();
+ return rule;
+}
+
+/*
+ * @layer_masks is read and may be updated according to the access request and
+ * the matching rule.
+ *
+ * Returns true if the request is allowed (i.e. relevant layer masks for the
+ * request are empty).
+ */
+static inline bool
+unmask_layers(const struct landlock_rule *const rule,
+ const access_mask_t access_request,
+ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+ size_t layer_level;
+
+ if (!access_request || !layer_masks)
+ return true;
+ if (!rule)
+ return false;
+
+ /*
+ * An access is granted if, for each policy layer, at least one rule
+ * encountered on the pathwalk grants the requested access,
+ * regardless of its position in the layer stack. We must then check
+ * the remaining layers for each inode, from the first added layer to
+ * the last one. When there is multiple requested accesses, for each
+ * policy layer, the full set of requested accesses may not be granted
+ * by only one rule, but by the union (binary OR) of multiple rules.
+ * E.g. /a/b <execute> + /a <read> => /a/b <execute + read>
+ */
+ for (layer_level = 0; layer_level < rule->num_layers; layer_level++) {
+ const struct landlock_layer *const layer =
+ &rule->layers[layer_level];
+ const layer_mask_t layer_bit = BIT_ULL(layer->level - 1);
+ const unsigned long access_req = access_request;
+ unsigned long access_bit;
+ bool is_empty;
+
+ /*
+ * Records in @layer_masks which layer grants access to each
+ * requested access.
+ */
+ is_empty = true;
+ for_each_set_bit(access_bit, &access_req,
+ ARRAY_SIZE(*layer_masks)) {
+ if (layer->access & BIT_ULL(access_bit))
+ (*layer_masks)[access_bit] &= ~layer_bit;
+ is_empty = is_empty && !(*layer_masks)[access_bit];
+ }
+ if (is_empty)
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Allows access to pseudo filesystems that will never be mountable (e.g.
+ * sockfs, pipefs), but can still be reachable through
+ * /proc/<pid>/fd/<file-descriptor>
+ */
+static inline bool is_nouser_or_private(const struct dentry *dentry)
+{
+ return (dentry->d_sb->s_flags & SB_NOUSER) ||
+ (d_is_positive(dentry) &&
+ unlikely(IS_PRIVATE(d_backing_inode(dentry))));
+}
+
+static inline access_mask_t
+get_handled_accesses(const struct landlock_ruleset *const domain)
+{
+ access_mask_t access_dom = ACCESS_INITIALLY_DENIED;
+ size_t layer_level;
+
+ for (layer_level = 0; layer_level < domain->num_layers; layer_level++)
+ access_dom |= domain->fs_access_masks[layer_level];
+ return access_dom & LANDLOCK_MASK_ACCESS_FS;
+}
+
+/**
+ * init_layer_masks - Initialize layer masks from an access request
+ *
+ * Populates @layer_masks such that for each access right in @access_request,
+ * the bits for all the layers are set where this access right is handled.
+ *
+ * @domain: The domain that defines the current restrictions.
+ * @access_request: The requested access rights to check.
+ * @layer_masks: The layer masks to populate.
+ *
+ * Returns: An access mask where each access right bit is set which is handled
+ * in any of the active layers in @domain.
+ */
+static inline access_mask_t
+init_layer_masks(const struct landlock_ruleset *const domain,
+ const access_mask_t access_request,
+ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+ access_mask_t handled_accesses = 0;
+ size_t layer_level;
+
+ memset(layer_masks, 0, sizeof(*layer_masks));
+ /* An empty access request can happen because of O_WRONLY | O_RDWR. */
+ if (!access_request)
+ return 0;
+
+ /* Saves all handled accesses per layer. */
+ for (layer_level = 0; layer_level < domain->num_layers; layer_level++) {
+ const unsigned long access_req = access_request;
+ unsigned long access_bit;
+
+ for_each_set_bit(access_bit, &access_req,
+ ARRAY_SIZE(*layer_masks)) {
+ /*
+ * Artificially handles all initially denied by default
+ * access rights.
+ */
+ if (BIT_ULL(access_bit) &
+ (domain->fs_access_masks[layer_level] |
+ ACCESS_INITIALLY_DENIED)) {
+ (*layer_masks)[access_bit] |=
+ BIT_ULL(layer_level);
+ handled_accesses |= BIT_ULL(access_bit);
+ }
+ }
+ }
+ return handled_accesses;
+}
+
+/*
+ * Check that a destination file hierarchy has more restrictions than a source
+ * file hierarchy. This is only used for link and rename actions.
+ *
+ * @layer_masks_child2: Optional child masks.
+ */
+static inline bool no_more_access(
+ const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+ const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS],
+ const bool child1_is_directory,
+ const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+ const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS],
+ const bool child2_is_directory)
+{
+ unsigned long access_bit;
+
+ for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2);
+ access_bit++) {
+ /* Ignores accesses that only make sense for directories. */
+ const bool is_file_access =
+ !!(BIT_ULL(access_bit) & ACCESS_FILE);
+
+ if (child1_is_directory || is_file_access) {
+ /*
+ * Checks if the destination restrictions are a
+ * superset of the source ones (i.e. inherited access
+ * rights without child exceptions):
+ * restrictions(parent2) >= restrictions(child1)
+ */
+ if ((((*layer_masks_parent1)[access_bit] &
+ (*layer_masks_child1)[access_bit]) |
+ (*layer_masks_parent2)[access_bit]) !=
+ (*layer_masks_parent2)[access_bit])
+ return false;
+ }
+
+ if (!layer_masks_child2)
+ continue;
+ if (child2_is_directory || is_file_access) {
+ /*
+ * Checks inverted restrictions for RENAME_EXCHANGE:
+ * restrictions(parent1) >= restrictions(child2)
+ */
+ if ((((*layer_masks_parent2)[access_bit] &
+ (*layer_masks_child2)[access_bit]) |
+ (*layer_masks_parent1)[access_bit]) !=
+ (*layer_masks_parent1)[access_bit])
+ return false;
+ }
+ }
+ return true;
+}
+
+/*
+ * Removes @layer_masks accesses that are not requested.
+ *
+ * Returns true if the request is allowed, false otherwise.
+ */
+static inline bool
+scope_to_request(const access_mask_t access_request,
+ layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS])
+{
+ const unsigned long access_req = access_request;
+ unsigned long access_bit;
+
+ if (WARN_ON_ONCE(!layer_masks))
+ return true;
+
+ for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks))
+ (*layer_masks)[access_bit] = 0;
+ return !memchr_inv(layer_masks, 0, sizeof(*layer_masks));
+}
+
+/*
+ * Returns true if there is at least one access right different than
+ * LANDLOCK_ACCESS_FS_REFER.
+ */
+static inline bool
+is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS],
+ const access_mask_t access_request)
+{
+ unsigned long access_bit;
+ /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */
+ const unsigned long access_check = access_request &
+ ~LANDLOCK_ACCESS_FS_REFER;
+
+ if (!layer_masks)
+ return false;
+
+ for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) {
+ if ((*layer_masks)[access_bit])
+ return true;
+ }
+ return false;
+}
+
+/**
+ * is_access_to_paths_allowed - Check accesses for requests with a common path
+ *
+ * @domain: Domain to check against.
+ * @path: File hierarchy to walk through.
+ * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is
+ * equal to @layer_masks_parent2 (if any). This is tied to the unique
+ * requested path for most actions, or the source in case of a refer action
+ * (i.e. rename or link), or the source and destination in case of
+ * RENAME_EXCHANGE.
+ * @layer_masks_parent1: Pointer to a matrix of layer masks per access
+ * masks, identifying the layers that forbid a specific access. Bits from
+ * this matrix can be unset according to the @path walk. An empty matrix
+ * means that @domain allows all possible Landlock accesses (i.e. not only
+ * those identified by @access_request_parent1). This matrix can
+ * initially refer to domain layer masks and, when the accesses for the
+ * destination and source are the same, to requested layer masks.
+ * @dentry_child1: Dentry to the initial child of the parent1 path. This
+ * pointer must be NULL for non-refer actions (i.e. not link nor rename).
+ * @access_request_parent2: Similar to @access_request_parent1 but for a
+ * request involving a source and a destination. This refers to the
+ * destination, except in case of RENAME_EXCHANGE where it also refers to
+ * the source. Must be set to 0 when using a simple path request.
+ * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer
+ * action. This must be NULL otherwise.
+ * @dentry_child2: Dentry to the initial child of the parent2 path. This
+ * pointer is only set for RENAME_EXCHANGE actions and must be NULL
+ * otherwise.
+ *
+ * This helper first checks that the destination has a superset of restrictions
+ * compared to the source (if any) for a common path. Because of
+ * RENAME_EXCHANGE actions, source and destinations may be swapped. It then
+ * checks that the collected accesses and the remaining ones are enough to
+ * allow the request.
+ *
+ * Returns:
+ * - true if the access request is granted;
+ * - false otherwise.
+ */
+static bool is_access_to_paths_allowed(
+ const struct landlock_ruleset *const domain,
+ const struct path *const path,
+ const access_mask_t access_request_parent1,
+ layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS],
+ const struct dentry *const dentry_child1,
+ const access_mask_t access_request_parent2,
+ layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS],
+ const struct dentry *const dentry_child2)
+{
+ bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check,
+ child1_is_directory = true, child2_is_directory = true;
+ struct path walker_path;
+ access_mask_t access_masked_parent1, access_masked_parent2;
+ layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS],
+ _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS];
+ layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL,
+ (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL;
+
+ if (!access_request_parent1 && !access_request_parent2)
+ return true;
+ if (WARN_ON_ONCE(!domain || !path))
+ return true;
+ if (is_nouser_or_private(path->dentry))
+ return true;
+ if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1))
+ return false;
+
+ if (unlikely(layer_masks_parent2)) {
+ if (WARN_ON_ONCE(!dentry_child1))
+ return false;
+ /*
+ * For a double request, first check for potential privilege
+ * escalation by looking at domain handled accesses (which are
+ * a superset of the meaningful requested accesses).
+ */
+ access_masked_parent1 = access_masked_parent2 =
+ get_handled_accesses(domain);
+ is_dom_check = true;
+ } else {
+ if (WARN_ON_ONCE(dentry_child1 || dentry_child2))
+ return false;
+ /* For a simple request, only check for requested accesses. */
+ access_masked_parent1 = access_request_parent1;
+ access_masked_parent2 = access_request_parent2;
+ is_dom_check = false;
+ }
+
+ if (unlikely(dentry_child1)) {
+ unmask_layers(find_rule(domain, dentry_child1),
+ init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ &_layer_masks_child1),
+ &_layer_masks_child1);
+ layer_masks_child1 = &_layer_masks_child1;
+ child1_is_directory = d_is_dir(dentry_child1);
+ }
+ if (unlikely(dentry_child2)) {
+ unmask_layers(find_rule(domain, dentry_child2),
+ init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ &_layer_masks_child2),
+ &_layer_masks_child2);
+ layer_masks_child2 = &_layer_masks_child2;
+ child2_is_directory = d_is_dir(dentry_child2);
+ }
+
+ walker_path = *path;
+ path_get(&walker_path);
+ /*
+ * We need to walk through all the hierarchy to not miss any relevant
+ * restriction.
+ */
+ while (true) {
+ struct dentry *parent_dentry;
+ const struct landlock_rule *rule;
+
+ /*
+ * If at least all accesses allowed on the destination are
+ * already allowed on the source, respectively if there is at
+ * least as much as restrictions on the destination than on the
+ * source, then we can safely refer files from the source to
+ * the destination without risking a privilege escalation.
+ * This also applies in the case of RENAME_EXCHANGE, which
+ * implies checks on both direction. This is crucial for
+ * standalone multilayered security policies. Furthermore,
+ * this helps avoid policy writers to shoot themselves in the
+ * foot.
+ */
+ if (unlikely(is_dom_check &&
+ no_more_access(
+ layer_masks_parent1, layer_masks_child1,
+ child1_is_directory, layer_masks_parent2,
+ layer_masks_child2,
+ child2_is_directory))) {
+ allowed_parent1 = scope_to_request(
+ access_request_parent1, layer_masks_parent1);
+ allowed_parent2 = scope_to_request(
+ access_request_parent2, layer_masks_parent2);
+
+ /* Stops when all accesses are granted. */
+ if (allowed_parent1 && allowed_parent2)
+ break;
+
+ /*
+ * Now, downgrades the remaining checks from domain
+ * handled accesses to requested accesses.
+ */
+ is_dom_check = false;
+ access_masked_parent1 = access_request_parent1;
+ access_masked_parent2 = access_request_parent2;
+ }
+
+ rule = find_rule(domain, walker_path.dentry);
+ allowed_parent1 = unmask_layers(rule, access_masked_parent1,
+ layer_masks_parent1);
+ allowed_parent2 = unmask_layers(rule, access_masked_parent2,
+ layer_masks_parent2);
+
+ /* Stops when a rule from each layer grants access. */
+ if (allowed_parent1 && allowed_parent2)
+ break;
+
+jump_up:
+ if (walker_path.dentry == walker_path.mnt->mnt_root) {
+ if (follow_up(&walker_path)) {
+ /* Ignores hidden mount points. */
+ goto jump_up;
+ } else {
+ /*
+ * Stops at the real root. Denies access
+ * because not all layers have granted access.
+ */
+ break;
+ }
+ }
+ if (unlikely(IS_ROOT(walker_path.dentry))) {
+ /*
+ * Stops at disconnected root directories. Only allows
+ * access to internal filesystems (e.g. nsfs, which is
+ * reachable through /proc/<pid>/ns/<namespace>).
+ */
+ allowed_parent1 = allowed_parent2 =
+ !!(walker_path.mnt->mnt_flags & MNT_INTERNAL);
+ break;
+ }
+ parent_dentry = dget_parent(walker_path.dentry);
+ dput(walker_path.dentry);
+ walker_path.dentry = parent_dentry;
+ }
+ path_put(&walker_path);
+
+ return allowed_parent1 && allowed_parent2;
+}
+
+static inline int check_access_path(const struct landlock_ruleset *const domain,
+ const struct path *const path,
+ access_mask_t access_request)
+{
+ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+
+ access_request = init_layer_masks(domain, access_request, &layer_masks);
+ if (is_access_to_paths_allowed(domain, path, access_request,
+ &layer_masks, NULL, 0, NULL, NULL))
+ return 0;
+ return -EACCES;
+}
+
+static inline int current_check_access_path(const struct path *const path,
+ const access_mask_t access_request)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ return check_access_path(dom, path, access_request);
+}
+
+static inline access_mask_t get_mode_access(const umode_t mode)
+{
+ switch (mode & S_IFMT) {
+ case S_IFLNK:
+ return LANDLOCK_ACCESS_FS_MAKE_SYM;
+ case 0:
+ /* A zero mode translates to S_IFREG. */
+ case S_IFREG:
+ return LANDLOCK_ACCESS_FS_MAKE_REG;
+ case S_IFDIR:
+ return LANDLOCK_ACCESS_FS_MAKE_DIR;
+ case S_IFCHR:
+ return LANDLOCK_ACCESS_FS_MAKE_CHAR;
+ case S_IFBLK:
+ return LANDLOCK_ACCESS_FS_MAKE_BLOCK;
+ case S_IFIFO:
+ return LANDLOCK_ACCESS_FS_MAKE_FIFO;
+ case S_IFSOCK:
+ return LANDLOCK_ACCESS_FS_MAKE_SOCK;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+}
+
+static inline access_mask_t maybe_remove(const struct dentry *const dentry)
+{
+ if (d_is_negative(dentry))
+ return 0;
+ return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR :
+ LANDLOCK_ACCESS_FS_REMOVE_FILE;
+}
+
+/**
+ * collect_domain_accesses - Walk through a file path and collect accesses
+ *
+ * @domain: Domain to check against.
+ * @mnt_root: Last directory to check.
+ * @dir: Directory to start the walk from.
+ * @layer_masks_dom: Where to store the collected accesses.
+ *
+ * This helper is useful to begin a path walk from the @dir directory to a
+ * @mnt_root directory used as a mount point. This mount point is the common
+ * ancestor between the source and the destination of a renamed and linked
+ * file. While walking from @dir to @mnt_root, we record all the domain's
+ * allowed accesses in @layer_masks_dom.
+ *
+ * This is similar to is_access_to_paths_allowed() but much simpler because it
+ * only handles walking on the same mount point and only checks one set of
+ * accesses.
+ *
+ * Returns:
+ * - true if all the domain access rights are allowed for @dir;
+ * - false if the walk reached @mnt_root.
+ */
+static bool collect_domain_accesses(
+ const struct landlock_ruleset *const domain,
+ const struct dentry *const mnt_root, struct dentry *dir,
+ layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS])
+{
+ unsigned long access_dom;
+ bool ret = false;
+
+ if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom))
+ return true;
+ if (is_nouser_or_private(dir))
+ return true;
+
+ access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS,
+ layer_masks_dom);
+
+ dget(dir);
+ while (true) {
+ struct dentry *parent_dentry;
+
+ /* Gets all layers allowing all domain accesses. */
+ if (unmask_layers(find_rule(domain, dir), access_dom,
+ layer_masks_dom)) {
+ /*
+ * Stops when all handled accesses are allowed by at
+ * least one rule in each layer.
+ */
+ ret = true;
+ break;
+ }
+
+ /* We should not reach a root other than @mnt_root. */
+ if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir)))
+ break;
+
+ parent_dentry = dget_parent(dir);
+ dput(dir);
+ dir = parent_dentry;
+ }
+ dput(dir);
+ return ret;
+}
+
+/**
+ * current_check_refer_path - Check if a rename or link action is allowed
+ *
+ * @old_dentry: File or directory requested to be moved or linked.
+ * @new_dir: Destination parent directory.
+ * @new_dentry: Destination file or directory.
+ * @removable: Sets to true if it is a rename operation.
+ * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE.
+ *
+ * Because of its unprivileged constraints, Landlock relies on file hierarchies
+ * (and not only inodes) to tie access rights to files. Being able to link or
+ * rename a file hierarchy brings some challenges. Indeed, moving or linking a
+ * file (i.e. creating a new reference to an inode) can have an impact on the
+ * actions allowed for a set of files if it would change its parent directory
+ * (i.e. reparenting).
+ *
+ * To avoid trivial access right bypasses, Landlock first checks if the file or
+ * directory requested to be moved would gain new access rights inherited from
+ * its new hierarchy. Before returning any error, Landlock then checks that
+ * the parent source hierarchy and the destination hierarchy would allow the
+ * link or rename action. If it is not the case, an error with EACCES is
+ * returned to inform user space that there is no way to remove or create the
+ * requested source file type. If it should be allowed but the new inherited
+ * access rights would be greater than the source access rights, then the
+ * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables
+ * user space to abort the whole operation if there is no way to do it, or to
+ * manually copy the source to the destination if this remains allowed, e.g.
+ * because file creation is allowed on the destination directory but not direct
+ * linking.
+ *
+ * To achieve this goal, the kernel needs to compare two file hierarchies: the
+ * one identifying the source file or directory (including itself), and the
+ * destination one. This can be seen as a multilayer partial ordering problem.
+ * The kernel walks through these paths and collects in a matrix the access
+ * rights that are denied per layer. These matrices are then compared to see
+ * if the destination one has more (or the same) restrictions as the source
+ * one. If this is the case, the requested action will not return EXDEV, which
+ * doesn't mean the action is allowed. The parent hierarchy of the source
+ * (i.e. parent directory), and the destination hierarchy must also be checked
+ * to verify that they explicitly allow such action (i.e. referencing,
+ * creation and potentially removal rights). The kernel implementation is then
+ * required to rely on potentially four matrices of access rights: one for the
+ * source file or directory (i.e. the child), a potentially other one for the
+ * other source/destination (in case of RENAME_EXCHANGE), one for the source
+ * parent hierarchy and a last one for the destination hierarchy. These
+ * ephemeral matrices take some space on the stack, which limits the number of
+ * layers to a deemed reasonable number: 16.
+ *
+ * Returns:
+ * - 0 if access is allowed;
+ * - -EXDEV if @old_dentry would inherit new access rights from @new_dir;
+ * - -EACCES if file removal or creation is denied.
+ */
+static int current_check_refer_path(struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry,
+ const bool removable, const bool exchange)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+ bool allow_parent1, allow_parent2;
+ access_mask_t access_request_parent1, access_request_parent2;
+ struct path mnt_dir;
+ layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS],
+ layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS];
+
+ if (!dom)
+ return 0;
+ if (WARN_ON_ONCE(dom->num_layers < 1))
+ return -EACCES;
+ if (unlikely(d_is_negative(old_dentry)))
+ return -ENOENT;
+ if (exchange) {
+ if (unlikely(d_is_negative(new_dentry)))
+ return -ENOENT;
+ access_request_parent1 =
+ get_mode_access(d_backing_inode(new_dentry)->i_mode);
+ } else {
+ access_request_parent1 = 0;
+ }
+ access_request_parent2 =
+ get_mode_access(d_backing_inode(old_dentry)->i_mode);
+ if (removable) {
+ access_request_parent1 |= maybe_remove(old_dentry);
+ access_request_parent2 |= maybe_remove(new_dentry);
+ }
+
+ /* The mount points are the same for old and new paths, cf. EXDEV. */
+ if (old_dentry->d_parent == new_dir->dentry) {
+ /*
+ * The LANDLOCK_ACCESS_FS_REFER access right is not required
+ * for same-directory referer (i.e. no reparenting).
+ */
+ access_request_parent1 = init_layer_masks(
+ dom, access_request_parent1 | access_request_parent2,
+ &layer_masks_parent1);
+ if (is_access_to_paths_allowed(
+ dom, new_dir, access_request_parent1,
+ &layer_masks_parent1, NULL, 0, NULL, NULL))
+ return 0;
+ return -EACCES;
+ }
+
+ access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER;
+ access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER;
+
+ /* Saves the common mount point. */
+ mnt_dir.mnt = new_dir->mnt;
+ mnt_dir.dentry = new_dir->mnt->mnt_root;
+
+ /* new_dir->dentry is equal to new_dentry->d_parent */
+ allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry,
+ old_dentry->d_parent,
+ &layer_masks_parent1);
+ allow_parent2 = collect_domain_accesses(
+ dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2);
+
+ if (allow_parent1 && allow_parent2)
+ return 0;
+
+ /*
+ * To be able to compare source and destination domain access rights,
+ * take into account the @old_dentry access rights aggregated with its
+ * parent access rights. This will be useful to compare with the
+ * destination parent access rights.
+ */
+ if (is_access_to_paths_allowed(
+ dom, &mnt_dir, access_request_parent1, &layer_masks_parent1,
+ old_dentry, access_request_parent2, &layer_masks_parent2,
+ exchange ? new_dentry : NULL))
+ return 0;
+
+ /*
+ * This prioritizes EACCES over EXDEV for all actions, including
+ * renames with RENAME_EXCHANGE.
+ */
+ if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) ||
+ is_eacces(&layer_masks_parent2, access_request_parent2)))
+ return -EACCES;
+
+ /*
+ * Gracefully forbids reparenting if the destination directory
+ * hierarchy is not a superset of restrictions of the source directory
+ * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the
+ * source or the destination.
+ */
+ return -EXDEV;
+}
+
+/* Inode hooks */
+
+static void hook_inode_free_security(struct inode *const inode)
+{
+ /*
+ * All inodes must already have been untied from their object by
+ * release_inode() or hook_sb_delete().
+ */
+ WARN_ON_ONCE(landlock_inode(inode)->object);
+}
+
+/* Super-block hooks */
+
+/*
+ * Release the inodes used in a security policy.
+ *
+ * Cf. fsnotify_unmount_inodes() and invalidate_inodes()
+ */
+static void hook_sb_delete(struct super_block *const sb)
+{
+ struct inode *inode, *prev_inode = NULL;
+
+ if (!landlock_initialized)
+ return;
+
+ spin_lock(&sb->s_inode_list_lock);
+ list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
+ struct landlock_object *object;
+
+ /* Only handles referenced inodes. */
+ if (!atomic_read(&inode->i_count))
+ continue;
+
+ /*
+ * Protects against concurrent modification of inode (e.g.
+ * from get_inode_object()).
+ */
+ spin_lock(&inode->i_lock);
+ /*
+ * Checks I_FREEING and I_WILL_FREE to protect against a race
+ * condition when release_inode() just called iput(), which
+ * could lead to a NULL dereference of inode->security or a
+ * second call to iput() for the same Landlock object. Also
+ * checks I_NEW because such inode cannot be tied to an object.
+ */
+ if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+
+ rcu_read_lock();
+ object = rcu_dereference(landlock_inode(inode)->object);
+ if (!object) {
+ rcu_read_unlock();
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ /* Keeps a reference to this inode until the next loop walk. */
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If there is no concurrent release_inode() ongoing, then we
+ * are in charge of calling iput() on this inode, otherwise we
+ * will just wait for it to finish.
+ */
+ spin_lock(&object->lock);
+ if (object->underobj == inode) {
+ object->underobj = NULL;
+ spin_unlock(&object->lock);
+ rcu_read_unlock();
+
+ /*
+ * Because object->underobj was not NULL,
+ * release_inode() and get_inode_object() guarantee
+ * that it is safe to reset
+ * landlock_inode(inode)->object while it is not NULL.
+ * It is therefore not necessary to lock inode->i_lock.
+ */
+ rcu_assign_pointer(landlock_inode(inode)->object, NULL);
+ /*
+ * At this point, we own the ihold() reference that was
+ * originally set up by get_inode_object() and the
+ * __iget() reference that we just set in this loop
+ * walk. Therefore the following call to iput() will
+ * not sleep nor drop the inode because there is now at
+ * least two references to it.
+ */
+ iput(inode);
+ } else {
+ spin_unlock(&object->lock);
+ rcu_read_unlock();
+ }
+
+ if (prev_inode) {
+ /*
+ * At this point, we still own the __iget() reference
+ * that we just set in this loop walk. Therefore we
+ * can drop the list lock and know that the inode won't
+ * disappear from under us until the next loop walk.
+ */
+ spin_unlock(&sb->s_inode_list_lock);
+ /*
+ * We can now actually put the inode reference from the
+ * previous loop walk, which is not needed anymore.
+ */
+ iput(prev_inode);
+ cond_resched();
+ spin_lock(&sb->s_inode_list_lock);
+ }
+ prev_inode = inode;
+ }
+ spin_unlock(&sb->s_inode_list_lock);
+
+ /* Puts the inode reference from the last loop walk, if any. */
+ if (prev_inode)
+ iput(prev_inode);
+ /* Waits for pending iput() in release_inode(). */
+ wait_var_event(&landlock_superblock(sb)->inode_refs,
+ !atomic_long_read(&landlock_superblock(sb)->inode_refs));
+}
+
+/*
+ * Because a Landlock security policy is defined according to the filesystem
+ * topology (i.e. the mount namespace), changing it may grant access to files
+ * not previously allowed.
+ *
+ * To make it simple, deny any filesystem topology modification by landlocked
+ * processes. Non-landlocked processes may still change the namespace of a
+ * landlocked process, but this kind of threat must be handled by a system-wide
+ * access-control security policy.
+ *
+ * This could be lifted in the future if Landlock can safely handle mount
+ * namespace updates requested by a landlocked process. Indeed, we could
+ * update the current domain (which is currently read-only) by taking into
+ * account the accesses of the source and the destination of a new mount point.
+ * However, it would also require to make all the child domains dynamically
+ * inherit these new constraints. Anyway, for backward compatibility reasons,
+ * a dedicated user space option would be required (e.g. as a ruleset flag).
+ */
+static int hook_sb_mount(const char *const dev_name,
+ const struct path *const path, const char *const type,
+ const unsigned long flags, void *const data)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+static int hook_move_mount(const struct path *const from_path,
+ const struct path *const to_path)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * Removing a mount point may reveal a previously hidden file hierarchy, which
+ * may then grant access to files, which may have previously been forbidden.
+ */
+static int hook_sb_umount(struct vfsmount *const mnt, const int flags)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+/*
+ * pivot_root(2), like mount(2), changes the current mount namespace. It must
+ * then be forbidden for a landlocked process.
+ *
+ * However, chroot(2) may be allowed because it only changes the relative root
+ * directory of the current process. Moreover, it can be used to restrict the
+ * view of the filesystem.
+ */
+static int hook_sb_pivotroot(const struct path *const old_path,
+ const struct path *const new_path)
+{
+ if (!landlock_get_current_domain())
+ return 0;
+ return -EPERM;
+}
+
+/* Path hooks */
+
+static int hook_path_link(struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry)
+{
+ return current_check_refer_path(old_dentry, new_dir, new_dentry, false,
+ false);
+}
+
+static int hook_path_rename(const struct path *const old_dir,
+ struct dentry *const old_dentry,
+ const struct path *const new_dir,
+ struct dentry *const new_dentry,
+ const unsigned int flags)
+{
+ /* old_dir refers to old_dentry->d_parent and new_dir->mnt */
+ return current_check_refer_path(old_dentry, new_dir, new_dentry, true,
+ !!(flags & RENAME_EXCHANGE));
+}
+
+static int hook_path_mkdir(const struct path *const dir,
+ struct dentry *const dentry, const umode_t mode)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR);
+}
+
+static int hook_path_mknod(const struct path *const dir,
+ struct dentry *const dentry, const umode_t mode,
+ const unsigned int dev)
+{
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+ return check_access_path(dom, dir, get_mode_access(mode));
+}
+
+static int hook_path_symlink(const struct path *const dir,
+ struct dentry *const dentry,
+ const char *const old_name)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM);
+}
+
+static int hook_path_unlink(const struct path *const dir,
+ struct dentry *const dentry)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE);
+}
+
+static int hook_path_rmdir(const struct path *const dir,
+ struct dentry *const dentry)
+{
+ return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR);
+}
+
+static int hook_path_truncate(const struct path *const path)
+{
+ return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE);
+}
+
+/* File hooks */
+
+/**
+ * get_required_file_open_access - Get access needed to open a file
+ *
+ * @file: File being opened.
+ *
+ * Returns the access rights that are required for opening the given file,
+ * depending on the file type and open mode.
+ */
+static inline access_mask_t
+get_required_file_open_access(const struct file *const file)
+{
+ access_mask_t access = 0;
+
+ if (file->f_mode & FMODE_READ) {
+ /* A directory can only be opened in read mode. */
+ if (S_ISDIR(file_inode(file)->i_mode))
+ return LANDLOCK_ACCESS_FS_READ_DIR;
+ access = LANDLOCK_ACCESS_FS_READ_FILE;
+ }
+ if (file->f_mode & FMODE_WRITE)
+ access |= LANDLOCK_ACCESS_FS_WRITE_FILE;
+ /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */
+ if (file->f_flags & __FMODE_EXEC)
+ access |= LANDLOCK_ACCESS_FS_EXECUTE;
+ return access;
+}
+
+static int hook_file_alloc_security(struct file *const file)
+{
+ /*
+ * Grants all access rights, even if most of them are not checked later
+ * on. It is more consistent.
+ *
+ * Notably, file descriptors for regular files can also be acquired
+ * without going through the file_open hook, for example when using
+ * memfd_create(2).
+ */
+ landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS;
+ return 0;
+}
+
+static int hook_file_open(struct file *const file)
+{
+ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {};
+ access_mask_t open_access_request, full_access_request, allowed_access;
+ const access_mask_t optional_access = LANDLOCK_ACCESS_FS_TRUNCATE;
+ const struct landlock_ruleset *const dom =
+ landlock_get_current_domain();
+
+ if (!dom)
+ return 0;
+
+ /*
+ * Because a file may be opened with O_PATH, get_required_file_open_access()
+ * may return 0. This case will be handled with a future Landlock
+ * evolution.
+ */
+ open_access_request = get_required_file_open_access(file);
+
+ /*
+ * We look up more access than what we immediately need for open(), so
+ * that we can later authorize operations on opened files.
+ */
+ full_access_request = open_access_request | optional_access;
+
+ if (is_access_to_paths_allowed(
+ dom, &file->f_path,
+ init_layer_masks(dom, full_access_request, &layer_masks),
+ &layer_masks, NULL, 0, NULL, NULL)) {
+ allowed_access = full_access_request;
+ } else {
+ unsigned long access_bit;
+ const unsigned long access_req = full_access_request;
+
+ /*
+ * Calculate the actual allowed access rights from layer_masks.
+ * Add each access right to allowed_access which has not been
+ * vetoed by any layer.
+ */
+ allowed_access = 0;
+ for_each_set_bit(access_bit, &access_req,
+ ARRAY_SIZE(layer_masks)) {
+ if (!layer_masks[access_bit])
+ allowed_access |= BIT_ULL(access_bit);
+ }
+ }
+
+ /*
+ * For operations on already opened files (i.e. ftruncate()), it is the
+ * access rights at the time of open() which decide whether the
+ * operation is permitted. Therefore, we record the relevant subset of
+ * file access rights in the opened struct file.
+ */
+ landlock_file(file)->allowed_access = allowed_access;
+
+ if ((open_access_request & allowed_access) == open_access_request)
+ return 0;
+
+ return -EACCES;
+}
+
+static int hook_file_truncate(struct file *const file)
+{
+ /*
+ * Allows truncation if the truncate right was available at the time of
+ * opening the file, to get a consistent access check as for read, write
+ * and execute operations.
+ *
+ * Note: For checks done based on the file's Landlock allowed access, we
+ * enforce them independently of whether the current thread is in a
+ * Landlock domain, so that open files passed between independent
+ * processes retain their behaviour.
+ */
+ if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE)
+ return 0;
+ return -EACCES;
+}
+
+static struct security_hook_list landlock_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(inode_free_security, hook_inode_free_security),
+
+ LSM_HOOK_INIT(sb_delete, hook_sb_delete),
+ LSM_HOOK_INIT(sb_mount, hook_sb_mount),
+ LSM_HOOK_INIT(move_mount, hook_move_mount),
+ LSM_HOOK_INIT(sb_umount, hook_sb_umount),
+ LSM_HOOK_INIT(sb_remount, hook_sb_remount),
+ LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot),
+
+ LSM_HOOK_INIT(path_link, hook_path_link),
+ LSM_HOOK_INIT(path_rename, hook_path_rename),
+ LSM_HOOK_INIT(path_mkdir, hook_path_mkdir),
+ LSM_HOOK_INIT(path_mknod, hook_path_mknod),
+ LSM_HOOK_INIT(path_symlink, hook_path_symlink),
+ LSM_HOOK_INIT(path_unlink, hook_path_unlink),
+ LSM_HOOK_INIT(path_rmdir, hook_path_rmdir),
+ LSM_HOOK_INIT(path_truncate, hook_path_truncate),
+
+ LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security),
+ LSM_HOOK_INIT(file_open, hook_file_open),
+ LSM_HOOK_INIT(file_truncate, hook_file_truncate),
+};
+
+__init void landlock_add_fs_hooks(void)
+{
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+ LANDLOCK_NAME);
+}
diff --git a/security/landlock/fs.h b/security/landlock/fs.h
new file mode 100644
index 0000000000..488e481368
--- /dev/null
+++ b/security/landlock/fs.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Filesystem management and hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_FS_H
+#define _SECURITY_LANDLOCK_FS_H
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/rcupdate.h>
+
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * struct landlock_inode_security - Inode security blob
+ *
+ * Enable to reference a &struct landlock_object tied to an inode (i.e.
+ * underlying object).
+ */
+struct landlock_inode_security {
+ /**
+ * @object: Weak pointer to an allocated object. All assignments of a
+ * new object are protected by the underlying inode->i_lock. However,
+ * atomically disassociating @object from the inode is only protected
+ * by @object->lock, from the time @object's usage refcount drops to
+ * zero to the time this pointer is nulled out (cf. release_inode() and
+ * hook_sb_delete()). Indeed, such disassociation doesn't require
+ * inode->i_lock thanks to the careful rcu_access_pointer() check
+ * performed by get_inode_object().
+ */
+ struct landlock_object __rcu *object;
+};
+
+/**
+ * struct landlock_file_security - File security blob
+ *
+ * This information is populated when opening a file in hook_file_open, and
+ * tracks the relevant Landlock access rights that were available at the time
+ * of opening the file. Other LSM hooks use these rights in order to authorize
+ * operations on already opened files.
+ */
+struct landlock_file_security {
+ /**
+ * @allowed_access: Access rights that were available at the time of
+ * opening the file. This is not necessarily the full set of access
+ * rights available at that time, but it's the necessary subset as
+ * needed to authorize later operations on the open file.
+ */
+ access_mask_t allowed_access;
+};
+
+/**
+ * struct landlock_superblock_security - Superblock security blob
+ *
+ * Enable hook_sb_delete() to wait for concurrent calls to release_inode().
+ */
+struct landlock_superblock_security {
+ /**
+ * @inode_refs: Number of pending inodes (from this superblock) that
+ * are being released by release_inode().
+ * Cf. struct super_block->s_fsnotify_inode_refs .
+ */
+ atomic_long_t inode_refs;
+};
+
+static inline struct landlock_file_security *
+landlock_file(const struct file *const file)
+{
+ return file->f_security + landlock_blob_sizes.lbs_file;
+}
+
+static inline struct landlock_inode_security *
+landlock_inode(const struct inode *const inode)
+{
+ return inode->i_security + landlock_blob_sizes.lbs_inode;
+}
+
+static inline struct landlock_superblock_security *
+landlock_superblock(const struct super_block *const superblock)
+{
+ return superblock->s_security + landlock_blob_sizes.lbs_superblock;
+}
+
+__init void landlock_add_fs_hooks(void);
+
+int landlock_append_fs_rule(struct landlock_ruleset *const ruleset,
+ const struct path *const path,
+ access_mask_t access_hierarchy);
+
+#endif /* _SECURITY_LANDLOCK_FS_H */
diff --git a/security/landlock/limits.h b/security/landlock/limits.h
new file mode 100644
index 0000000000..82288f0e9e
--- /dev/null
+++ b/security/landlock/limits.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Limits for different components
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_LIMITS_H
+#define _SECURITY_LANDLOCK_LIMITS_H
+
+#include <linux/bitops.h>
+#include <linux/limits.h>
+#include <uapi/linux/landlock.h>
+
+/* clang-format off */
+
+#define LANDLOCK_MAX_NUM_LAYERS 16
+#define LANDLOCK_MAX_NUM_RULES U32_MAX
+
+#define LANDLOCK_LAST_ACCESS_FS LANDLOCK_ACCESS_FS_TRUNCATE
+#define LANDLOCK_MASK_ACCESS_FS ((LANDLOCK_LAST_ACCESS_FS << 1) - 1)
+#define LANDLOCK_NUM_ACCESS_FS __const_hweight64(LANDLOCK_MASK_ACCESS_FS)
+
+/* clang-format on */
+
+#endif /* _SECURITY_LANDLOCK_LIMITS_H */
diff --git a/security/landlock/object.c b/security/landlock/object.c
new file mode 100644
index 0000000000..1f50612f01
--- /dev/null
+++ b/security/landlock/object.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Object management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/bug.h>
+#include <linux/compiler_types.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "object.h"
+
+struct landlock_object *
+landlock_create_object(const struct landlock_object_underops *const underops,
+ void *const underobj)
+{
+ struct landlock_object *new_object;
+
+ if (WARN_ON_ONCE(!underops || !underobj))
+ return ERR_PTR(-ENOENT);
+ new_object = kzalloc(sizeof(*new_object), GFP_KERNEL_ACCOUNT);
+ if (!new_object)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&new_object->usage, 1);
+ spin_lock_init(&new_object->lock);
+ new_object->underops = underops;
+ new_object->underobj = underobj;
+ return new_object;
+}
+
+/*
+ * The caller must own the object (i.e. thanks to object->usage) to safely put
+ * it.
+ */
+void landlock_put_object(struct landlock_object *const object)
+{
+ /*
+ * The call to @object->underops->release(object) might sleep, e.g.
+ * because of iput().
+ */
+ might_sleep();
+ if (!object)
+ return;
+
+ /*
+ * If the @object's refcount cannot drop to zero, we can just decrement
+ * the refcount without holding a lock. Otherwise, the decrement must
+ * happen under @object->lock for synchronization with things like
+ * get_inode_object().
+ */
+ if (refcount_dec_and_lock(&object->usage, &object->lock)) {
+ __acquire(&object->lock);
+ /*
+ * With @object->lock initially held, remove the reference from
+ * @object->underobj to @object (if it still exists).
+ */
+ object->underops->release(object);
+ kfree_rcu(object, rcu_free);
+ }
+}
diff --git a/security/landlock/object.h b/security/landlock/object.h
new file mode 100644
index 0000000000..5f28c35e8a
--- /dev/null
+++ b/security/landlock/object.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Object management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_OBJECT_H
+#define _SECURITY_LANDLOCK_OBJECT_H
+
+#include <linux/compiler_types.h>
+#include <linux/refcount.h>
+#include <linux/spinlock.h>
+
+struct landlock_object;
+
+/**
+ * struct landlock_object_underops - Operations on an underlying object
+ */
+struct landlock_object_underops {
+ /**
+ * @release: Releases the underlying object (e.g. iput() for an inode).
+ */
+ void (*release)(struct landlock_object *const object)
+ __releases(object->lock);
+};
+
+/**
+ * struct landlock_object - Security blob tied to a kernel object
+ *
+ * The goal of this structure is to enable to tie a set of ephemeral access
+ * rights (pertaining to different domains) to a kernel object (e.g an inode)
+ * in a safe way. This implies to handle concurrent use and modification.
+ *
+ * The lifetime of a &struct landlock_object depends on the rules referring to
+ * it.
+ */
+struct landlock_object {
+ /**
+ * @usage: This counter is used to tie an object to the rules matching
+ * it or to keep it alive while adding a new rule. If this counter
+ * reaches zero, this struct must not be modified, but this counter can
+ * still be read from within an RCU read-side critical section. When
+ * adding a new rule to an object with a usage counter of zero, we must
+ * wait until the pointer to this object is set to NULL (or recycled).
+ */
+ refcount_t usage;
+ /**
+ * @lock: Protects against concurrent modifications. This lock must be
+ * held from the time @usage drops to zero until any weak references
+ * from @underobj to this object have been cleaned up.
+ *
+ * Lock ordering: inode->i_lock nests inside this.
+ */
+ spinlock_t lock;
+ /**
+ * @underobj: Used when cleaning up an object and to mark an object as
+ * tied to its underlying kernel structure. This pointer is protected
+ * by @lock. Cf. landlock_release_inodes() and release_inode().
+ */
+ void *underobj;
+ union {
+ /**
+ * @rcu_free: Enables lockless use of @usage, @lock and
+ * @underobj from within an RCU read-side critical section.
+ * @rcu_free and @underops are only used by
+ * landlock_put_object().
+ */
+ struct rcu_head rcu_free;
+ /**
+ * @underops: Enables landlock_put_object() to release the
+ * underlying object (e.g. inode).
+ */
+ const struct landlock_object_underops *underops;
+ };
+};
+
+struct landlock_object *
+landlock_create_object(const struct landlock_object_underops *const underops,
+ void *const underobj);
+
+void landlock_put_object(struct landlock_object *const object);
+
+static inline void landlock_get_object(struct landlock_object *const object)
+{
+ if (object)
+ refcount_inc(&object->usage);
+}
+
+#endif /* _SECURITY_LANDLOCK_OBJECT_H */
diff --git a/security/landlock/ptrace.c b/security/landlock/ptrace.c
new file mode 100644
index 0000000000..8a06d6c492
--- /dev/null
+++ b/security/landlock/ptrace.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Ptrace hooks
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/cred.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/lsm_hooks.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+
+#include "common.h"
+#include "cred.h"
+#include "ptrace.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * domain_scope_le - Checks domain ordering for scoped ptrace
+ *
+ * @parent: Parent domain.
+ * @child: Potential child of @parent.
+ *
+ * Checks if the @parent domain is less or equal to (i.e. an ancestor, which
+ * means a subset of) the @child domain.
+ */
+static bool domain_scope_le(const struct landlock_ruleset *const parent,
+ const struct landlock_ruleset *const child)
+{
+ const struct landlock_hierarchy *walker;
+
+ if (!parent)
+ return true;
+ if (!child)
+ return false;
+ for (walker = child->hierarchy; walker; walker = walker->parent) {
+ if (walker == parent->hierarchy)
+ /* @parent is in the scoped hierarchy of @child. */
+ return true;
+ }
+ /* There is no relationship between @parent and @child. */
+ return false;
+}
+
+static bool task_is_scoped(const struct task_struct *const parent,
+ const struct task_struct *const child)
+{
+ bool is_scoped;
+ const struct landlock_ruleset *dom_parent, *dom_child;
+
+ rcu_read_lock();
+ dom_parent = landlock_get_task_domain(parent);
+ dom_child = landlock_get_task_domain(child);
+ is_scoped = domain_scope_le(dom_parent, dom_child);
+ rcu_read_unlock();
+ return is_scoped;
+}
+
+static int task_ptrace(const struct task_struct *const parent,
+ const struct task_struct *const child)
+{
+ /* Quick return for non-landlocked tasks. */
+ if (!landlocked(parent))
+ return 0;
+ if (task_is_scoped(parent, child))
+ return 0;
+ return -EPERM;
+}
+
+/**
+ * hook_ptrace_access_check - Determines whether the current process may access
+ * another
+ *
+ * @child: Process to be accessed.
+ * @mode: Mode of attachment.
+ *
+ * If the current task has Landlock rules, then the child must have at least
+ * the same rules. Else denied.
+ *
+ * Determines whether a process may access another, returning 0 if permission
+ * granted, -errno if denied.
+ */
+static int hook_ptrace_access_check(struct task_struct *const child,
+ const unsigned int mode)
+{
+ return task_ptrace(current, child);
+}
+
+/**
+ * hook_ptrace_traceme - Determines whether another process may trace the
+ * current one
+ *
+ * @parent: Task proposed to be the tracer.
+ *
+ * If the parent has Landlock rules, then the current task must have the same
+ * or more rules. Else denied.
+ *
+ * Determines whether the nominated task is permitted to trace the current
+ * process, returning 0 if permission is granted, -errno if denied.
+ */
+static int hook_ptrace_traceme(struct task_struct *const parent)
+{
+ return task_ptrace(parent, current);
+}
+
+static struct security_hook_list landlock_hooks[] __ro_after_init = {
+ LSM_HOOK_INIT(ptrace_access_check, hook_ptrace_access_check),
+ LSM_HOOK_INIT(ptrace_traceme, hook_ptrace_traceme),
+};
+
+__init void landlock_add_ptrace_hooks(void)
+{
+ security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks),
+ LANDLOCK_NAME);
+}
diff --git a/security/landlock/ptrace.h b/security/landlock/ptrace.h
new file mode 100644
index 0000000000..265b220ae3
--- /dev/null
+++ b/security/landlock/ptrace.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Ptrace hooks
+ *
+ * Copyright © 2017-2019 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_PTRACE_H
+#define _SECURITY_LANDLOCK_PTRACE_H
+
+__init void landlock_add_ptrace_hooks(void);
+
+#endif /* _SECURITY_LANDLOCK_PTRACE_H */
diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c
new file mode 100644
index 0000000000..996484f98b
--- /dev/null
+++ b/security/landlock/ruleset.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Ruleset management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/bits.h>
+#include <linux/bug.h>
+#include <linux/compiler_types.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/overflow.h>
+#include <linux/rbtree.h>
+#include <linux/refcount.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "limits.h"
+#include "object.h"
+#include "ruleset.h"
+
+static struct landlock_ruleset *create_ruleset(const u32 num_layers)
+{
+ struct landlock_ruleset *new_ruleset;
+
+ new_ruleset =
+ kzalloc(struct_size(new_ruleset, fs_access_masks, num_layers),
+ GFP_KERNEL_ACCOUNT);
+ if (!new_ruleset)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&new_ruleset->usage, 1);
+ mutex_init(&new_ruleset->lock);
+ new_ruleset->root = RB_ROOT;
+ new_ruleset->num_layers = num_layers;
+ /*
+ * hierarchy = NULL
+ * num_rules = 0
+ * fs_access_masks[] = 0
+ */
+ return new_ruleset;
+}
+
+struct landlock_ruleset *
+landlock_create_ruleset(const access_mask_t fs_access_mask)
+{
+ struct landlock_ruleset *new_ruleset;
+
+ /* Informs about useless ruleset. */
+ if (!fs_access_mask)
+ return ERR_PTR(-ENOMSG);
+ new_ruleset = create_ruleset(1);
+ if (!IS_ERR(new_ruleset))
+ new_ruleset->fs_access_masks[0] = fs_access_mask;
+ return new_ruleset;
+}
+
+static void build_check_rule(void)
+{
+ const struct landlock_rule rule = {
+ .num_layers = ~0,
+ };
+
+ BUILD_BUG_ON(rule.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+}
+
+static struct landlock_rule *
+create_rule(struct landlock_object *const object,
+ const struct landlock_layer (*const layers)[], const u32 num_layers,
+ const struct landlock_layer *const new_layer)
+{
+ struct landlock_rule *new_rule;
+ u32 new_num_layers;
+
+ build_check_rule();
+ if (new_layer) {
+ /* Should already be checked by landlock_merge_ruleset(). */
+ if (WARN_ON_ONCE(num_layers >= LANDLOCK_MAX_NUM_LAYERS))
+ return ERR_PTR(-E2BIG);
+ new_num_layers = num_layers + 1;
+ } else {
+ new_num_layers = num_layers;
+ }
+ new_rule = kzalloc(struct_size(new_rule, layers, new_num_layers),
+ GFP_KERNEL_ACCOUNT);
+ if (!new_rule)
+ return ERR_PTR(-ENOMEM);
+ RB_CLEAR_NODE(&new_rule->node);
+ landlock_get_object(object);
+ new_rule->object = object;
+ new_rule->num_layers = new_num_layers;
+ /* Copies the original layer stack. */
+ memcpy(new_rule->layers, layers,
+ flex_array_size(new_rule, layers, num_layers));
+ if (new_layer)
+ /* Adds a copy of @new_layer on the layer stack. */
+ new_rule->layers[new_rule->num_layers - 1] = *new_layer;
+ return new_rule;
+}
+
+static void free_rule(struct landlock_rule *const rule)
+{
+ might_sleep();
+ if (!rule)
+ return;
+ landlock_put_object(rule->object);
+ kfree(rule);
+}
+
+static void build_check_ruleset(void)
+{
+ const struct landlock_ruleset ruleset = {
+ .num_rules = ~0,
+ .num_layers = ~0,
+ };
+ typeof(ruleset.fs_access_masks[0]) fs_access_mask = ~0;
+
+ BUILD_BUG_ON(ruleset.num_rules < LANDLOCK_MAX_NUM_RULES);
+ BUILD_BUG_ON(ruleset.num_layers < LANDLOCK_MAX_NUM_LAYERS);
+ BUILD_BUG_ON(fs_access_mask < LANDLOCK_MASK_ACCESS_FS);
+}
+
+/**
+ * insert_rule - Create and insert a rule in a ruleset
+ *
+ * @ruleset: The ruleset to be updated.
+ * @object: The object to build the new rule with. The underlying kernel
+ * object must be held by the caller.
+ * @layers: One or multiple layers to be copied into the new rule.
+ * @num_layers: The number of @layers entries.
+ *
+ * When user space requests to add a new rule to a ruleset, @layers only
+ * contains one entry and this entry is not assigned to any level. In this
+ * case, the new rule will extend @ruleset, similarly to a boolean OR between
+ * access rights.
+ *
+ * When merging a ruleset in a domain, or copying a domain, @layers will be
+ * added to @ruleset as new constraints, similarly to a boolean AND between
+ * access rights.
+ */
+static int insert_rule(struct landlock_ruleset *const ruleset,
+ struct landlock_object *const object,
+ const struct landlock_layer (*const layers)[],
+ size_t num_layers)
+{
+ struct rb_node **walker_node;
+ struct rb_node *parent_node = NULL;
+ struct landlock_rule *new_rule;
+
+ might_sleep();
+ lockdep_assert_held(&ruleset->lock);
+ if (WARN_ON_ONCE(!object || !layers))
+ return -ENOENT;
+ walker_node = &(ruleset->root.rb_node);
+ while (*walker_node) {
+ struct landlock_rule *const this =
+ rb_entry(*walker_node, struct landlock_rule, node);
+
+ if (this->object != object) {
+ parent_node = *walker_node;
+ if (this->object < object)
+ walker_node = &((*walker_node)->rb_right);
+ else
+ walker_node = &((*walker_node)->rb_left);
+ continue;
+ }
+
+ /* Only a single-level layer should match an existing rule. */
+ if (WARN_ON_ONCE(num_layers != 1))
+ return -EINVAL;
+
+ /* If there is a matching rule, updates it. */
+ if ((*layers)[0].level == 0) {
+ /*
+ * Extends access rights when the request comes from
+ * landlock_add_rule(2), i.e. @ruleset is not a domain.
+ */
+ if (WARN_ON_ONCE(this->num_layers != 1))
+ return -EINVAL;
+ if (WARN_ON_ONCE(this->layers[0].level != 0))
+ return -EINVAL;
+ this->layers[0].access |= (*layers)[0].access;
+ return 0;
+ }
+
+ if (WARN_ON_ONCE(this->layers[0].level == 0))
+ return -EINVAL;
+
+ /*
+ * Intersects access rights when it is a merge between a
+ * ruleset and a domain.
+ */
+ new_rule = create_rule(object, &this->layers, this->num_layers,
+ &(*layers)[0]);
+ if (IS_ERR(new_rule))
+ return PTR_ERR(new_rule);
+ rb_replace_node(&this->node, &new_rule->node, &ruleset->root);
+ free_rule(this);
+ return 0;
+ }
+
+ /* There is no match for @object. */
+ build_check_ruleset();
+ if (ruleset->num_rules >= LANDLOCK_MAX_NUM_RULES)
+ return -E2BIG;
+ new_rule = create_rule(object, layers, num_layers, NULL);
+ if (IS_ERR(new_rule))
+ return PTR_ERR(new_rule);
+ rb_link_node(&new_rule->node, parent_node, walker_node);
+ rb_insert_color(&new_rule->node, &ruleset->root);
+ ruleset->num_rules++;
+ return 0;
+}
+
+static void build_check_layer(void)
+{
+ const struct landlock_layer layer = {
+ .level = ~0,
+ .access = ~0,
+ };
+
+ BUILD_BUG_ON(layer.level < LANDLOCK_MAX_NUM_LAYERS);
+ BUILD_BUG_ON(layer.access < LANDLOCK_MASK_ACCESS_FS);
+}
+
+/* @ruleset must be locked by the caller. */
+int landlock_insert_rule(struct landlock_ruleset *const ruleset,
+ struct landlock_object *const object,
+ const access_mask_t access)
+{
+ struct landlock_layer layers[] = { {
+ .access = access,
+ /* When @level is zero, insert_rule() extends @ruleset. */
+ .level = 0,
+ } };
+
+ build_check_layer();
+ return insert_rule(ruleset, object, &layers, ARRAY_SIZE(layers));
+}
+
+static inline void get_hierarchy(struct landlock_hierarchy *const hierarchy)
+{
+ if (hierarchy)
+ refcount_inc(&hierarchy->usage);
+}
+
+static void put_hierarchy(struct landlock_hierarchy *hierarchy)
+{
+ while (hierarchy && refcount_dec_and_test(&hierarchy->usage)) {
+ const struct landlock_hierarchy *const freeme = hierarchy;
+
+ hierarchy = hierarchy->parent;
+ kfree(freeme);
+ }
+}
+
+static int merge_ruleset(struct landlock_ruleset *const dst,
+ struct landlock_ruleset *const src)
+{
+ struct landlock_rule *walker_rule, *next_rule;
+ int err = 0;
+
+ might_sleep();
+ /* Should already be checked by landlock_merge_ruleset() */
+ if (WARN_ON_ONCE(!src))
+ return 0;
+ /* Only merge into a domain. */
+ if (WARN_ON_ONCE(!dst || !dst->hierarchy))
+ return -EINVAL;
+
+ /* Locks @dst first because we are its only owner. */
+ mutex_lock(&dst->lock);
+ mutex_lock_nested(&src->lock, SINGLE_DEPTH_NESTING);
+
+ /* Stacks the new layer. */
+ if (WARN_ON_ONCE(src->num_layers != 1 || dst->num_layers < 1)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ dst->fs_access_masks[dst->num_layers - 1] = src->fs_access_masks[0];
+
+ /* Merges the @src tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule, &src->root,
+ node) {
+ struct landlock_layer layers[] = { {
+ .level = dst->num_layers,
+ } };
+
+ if (WARN_ON_ONCE(walker_rule->num_layers != 1)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ if (WARN_ON_ONCE(walker_rule->layers[0].level != 0)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ layers[0].access = walker_rule->layers[0].access;
+ err = insert_rule(dst, walker_rule->object, &layers,
+ ARRAY_SIZE(layers));
+ if (err)
+ goto out_unlock;
+ }
+
+out_unlock:
+ mutex_unlock(&src->lock);
+ mutex_unlock(&dst->lock);
+ return err;
+}
+
+static int inherit_ruleset(struct landlock_ruleset *const parent,
+ struct landlock_ruleset *const child)
+{
+ struct landlock_rule *walker_rule, *next_rule;
+ int err = 0;
+
+ might_sleep();
+ if (!parent)
+ return 0;
+
+ /* Locks @child first because we are its only owner. */
+ mutex_lock(&child->lock);
+ mutex_lock_nested(&parent->lock, SINGLE_DEPTH_NESTING);
+
+ /* Copies the @parent tree. */
+ rbtree_postorder_for_each_entry_safe(walker_rule, next_rule,
+ &parent->root, node) {
+ err = insert_rule(child, walker_rule->object,
+ &walker_rule->layers,
+ walker_rule->num_layers);
+ if (err)
+ goto out_unlock;
+ }
+
+ if (WARN_ON_ONCE(child->num_layers <= parent->num_layers)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ /* Copies the parent layer stack and leaves a space for the new layer. */
+ memcpy(child->fs_access_masks, parent->fs_access_masks,
+ flex_array_size(parent, fs_access_masks, parent->num_layers));
+
+ if (WARN_ON_ONCE(!parent->hierarchy)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ get_hierarchy(parent->hierarchy);
+ child->hierarchy->parent = parent->hierarchy;
+
+out_unlock:
+ mutex_unlock(&parent->lock);
+ mutex_unlock(&child->lock);
+ return err;
+}
+
+static void free_ruleset(struct landlock_ruleset *const ruleset)
+{
+ struct landlock_rule *freeme, *next;
+
+ might_sleep();
+ rbtree_postorder_for_each_entry_safe(freeme, next, &ruleset->root, node)
+ free_rule(freeme);
+ put_hierarchy(ruleset->hierarchy);
+ kfree(ruleset);
+}
+
+void landlock_put_ruleset(struct landlock_ruleset *const ruleset)
+{
+ might_sleep();
+ if (ruleset && refcount_dec_and_test(&ruleset->usage))
+ free_ruleset(ruleset);
+}
+
+static void free_ruleset_work(struct work_struct *const work)
+{
+ struct landlock_ruleset *ruleset;
+
+ ruleset = container_of(work, struct landlock_ruleset, work_free);
+ free_ruleset(ruleset);
+}
+
+void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset)
+{
+ if (ruleset && refcount_dec_and_test(&ruleset->usage)) {
+ INIT_WORK(&ruleset->work_free, free_ruleset_work);
+ schedule_work(&ruleset->work_free);
+ }
+}
+
+/**
+ * landlock_merge_ruleset - Merge a ruleset with a domain
+ *
+ * @parent: Parent domain.
+ * @ruleset: New ruleset to be merged.
+ *
+ * Returns the intersection of @parent and @ruleset, or returns @parent if
+ * @ruleset is empty, or returns a duplicate of @ruleset if @parent is empty.
+ */
+struct landlock_ruleset *
+landlock_merge_ruleset(struct landlock_ruleset *const parent,
+ struct landlock_ruleset *const ruleset)
+{
+ struct landlock_ruleset *new_dom;
+ u32 num_layers;
+ int err;
+
+ might_sleep();
+ if (WARN_ON_ONCE(!ruleset || parent == ruleset))
+ return ERR_PTR(-EINVAL);
+
+ if (parent) {
+ if (parent->num_layers >= LANDLOCK_MAX_NUM_LAYERS)
+ return ERR_PTR(-E2BIG);
+ num_layers = parent->num_layers + 1;
+ } else {
+ num_layers = 1;
+ }
+
+ /* Creates a new domain... */
+ new_dom = create_ruleset(num_layers);
+ if (IS_ERR(new_dom))
+ return new_dom;
+ new_dom->hierarchy =
+ kzalloc(sizeof(*new_dom->hierarchy), GFP_KERNEL_ACCOUNT);
+ if (!new_dom->hierarchy) {
+ err = -ENOMEM;
+ goto out_put_dom;
+ }
+ refcount_set(&new_dom->hierarchy->usage, 1);
+
+ /* ...as a child of @parent... */
+ err = inherit_ruleset(parent, new_dom);
+ if (err)
+ goto out_put_dom;
+
+ /* ...and including @ruleset. */
+ err = merge_ruleset(new_dom, ruleset);
+ if (err)
+ goto out_put_dom;
+
+ return new_dom;
+
+out_put_dom:
+ landlock_put_ruleset(new_dom);
+ return ERR_PTR(err);
+}
+
+/*
+ * The returned access has the same lifetime as @ruleset.
+ */
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+ const struct landlock_object *const object)
+{
+ const struct rb_node *node;
+
+ if (!object)
+ return NULL;
+ node = ruleset->root.rb_node;
+ while (node) {
+ struct landlock_rule *this =
+ rb_entry(node, struct landlock_rule, node);
+
+ if (this->object == object)
+ return this;
+ if (this->object < object)
+ node = node->rb_right;
+ else
+ node = node->rb_left;
+ }
+ return NULL;
+}
diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h
new file mode 100644
index 0000000000..55b1df8f66
--- /dev/null
+++ b/security/landlock/ruleset.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Ruleset management
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_RULESET_H
+#define _SECURITY_LANDLOCK_RULESET_H
+
+#include <linux/bitops.h>
+#include <linux/build_bug.h>
+#include <linux/mutex.h>
+#include <linux/rbtree.h>
+#include <linux/refcount.h>
+#include <linux/workqueue.h>
+
+#include "limits.h"
+#include "object.h"
+
+typedef u16 access_mask_t;
+/* Makes sure all filesystem access rights can be stored. */
+static_assert(BITS_PER_TYPE(access_mask_t) >= LANDLOCK_NUM_ACCESS_FS);
+/* Makes sure for_each_set_bit() and for_each_clear_bit() calls are OK. */
+static_assert(sizeof(unsigned long) >= sizeof(access_mask_t));
+
+typedef u16 layer_mask_t;
+/* Makes sure all layers can be checked. */
+static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS);
+
+/**
+ * struct landlock_layer - Access rights for a given layer
+ */
+struct landlock_layer {
+ /**
+ * @level: Position of this layer in the layer stack.
+ */
+ u16 level;
+ /**
+ * @access: Bitfield of allowed actions on the kernel object. They are
+ * relative to the object type (e.g. %LANDLOCK_ACTION_FS_READ).
+ */
+ access_mask_t access;
+};
+
+/**
+ * struct landlock_rule - Access rights tied to an object
+ */
+struct landlock_rule {
+ /**
+ * @node: Node in the ruleset's red-black tree.
+ */
+ struct rb_node node;
+ /**
+ * @object: Pointer to identify a kernel object (e.g. an inode). This
+ * is used as a key for this ruleset element. This pointer is set once
+ * and never modified. It always points to an allocated object because
+ * each rule increments the refcount of its object.
+ */
+ struct landlock_object *object;
+ /**
+ * @num_layers: Number of entries in @layers.
+ */
+ u32 num_layers;
+ /**
+ * @layers: Stack of layers, from the latest to the newest, implemented
+ * as a flexible array member (FAM).
+ */
+ struct landlock_layer layers[] __counted_by(num_layers);
+};
+
+/**
+ * struct landlock_hierarchy - Node in a ruleset hierarchy
+ */
+struct landlock_hierarchy {
+ /**
+ * @parent: Pointer to the parent node, or NULL if it is a root
+ * Landlock domain.
+ */
+ struct landlock_hierarchy *parent;
+ /**
+ * @usage: Number of potential children domains plus their parent
+ * domain.
+ */
+ refcount_t usage;
+};
+
+/**
+ * struct landlock_ruleset - Landlock ruleset
+ *
+ * This data structure must contain unique entries, be updatable, and quick to
+ * match an object.
+ */
+struct landlock_ruleset {
+ /**
+ * @root: Root of a red-black tree containing &struct landlock_rule
+ * nodes. Once a ruleset is tied to a process (i.e. as a domain), this
+ * tree is immutable until @usage reaches zero.
+ */
+ struct rb_root root;
+ /**
+ * @hierarchy: Enables hierarchy identification even when a parent
+ * domain vanishes. This is needed for the ptrace protection.
+ */
+ struct landlock_hierarchy *hierarchy;
+ union {
+ /**
+ * @work_free: Enables to free a ruleset within a lockless
+ * section. This is only used by
+ * landlock_put_ruleset_deferred() when @usage reaches zero.
+ * The fields @lock, @usage, @num_rules, @num_layers and
+ * @fs_access_masks are then unused.
+ */
+ struct work_struct work_free;
+ struct {
+ /**
+ * @lock: Protects against concurrent modifications of
+ * @root, if @usage is greater than zero.
+ */
+ struct mutex lock;
+ /**
+ * @usage: Number of processes (i.e. domains) or file
+ * descriptors referencing this ruleset.
+ */
+ refcount_t usage;
+ /**
+ * @num_rules: Number of non-overlapping (i.e. not for
+ * the same object) rules in this ruleset.
+ */
+ u32 num_rules;
+ /**
+ * @num_layers: Number of layers that are used in this
+ * ruleset. This enables to check that all the layers
+ * allow an access request. A value of 0 identifies a
+ * non-merged ruleset (i.e. not a domain).
+ */
+ u32 num_layers;
+ /**
+ * @fs_access_masks: Contains the subset of filesystem
+ * actions that are restricted by a ruleset. A domain
+ * saves all layers of merged rulesets in a stack
+ * (FAM), starting from the first layer to the last
+ * one. These layers are used when merging rulesets,
+ * for user space backward compatibility (i.e.
+ * future-proof), and to properly handle merged
+ * rulesets without overlapping access rights. These
+ * layers are set once and never changed for the
+ * lifetime of the ruleset.
+ */
+ access_mask_t fs_access_masks[];
+ };
+ };
+};
+
+struct landlock_ruleset *
+landlock_create_ruleset(const access_mask_t fs_access_mask);
+
+void landlock_put_ruleset(struct landlock_ruleset *const ruleset);
+void landlock_put_ruleset_deferred(struct landlock_ruleset *const ruleset);
+
+int landlock_insert_rule(struct landlock_ruleset *const ruleset,
+ struct landlock_object *const object,
+ const access_mask_t access);
+
+struct landlock_ruleset *
+landlock_merge_ruleset(struct landlock_ruleset *const parent,
+ struct landlock_ruleset *const ruleset);
+
+const struct landlock_rule *
+landlock_find_rule(const struct landlock_ruleset *const ruleset,
+ const struct landlock_object *const object);
+
+static inline void landlock_get_ruleset(struct landlock_ruleset *const ruleset)
+{
+ if (ruleset)
+ refcount_inc(&ruleset->usage);
+}
+
+#endif /* _SECURITY_LANDLOCK_RULESET_H */
diff --git a/security/landlock/setup.c b/security/landlock/setup.c
new file mode 100644
index 0000000000..0f6113528f
--- /dev/null
+++ b/security/landlock/setup.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - Security framework setup
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <linux/init.h>
+#include <linux/lsm_hooks.h>
+
+#include "common.h"
+#include "cred.h"
+#include "fs.h"
+#include "ptrace.h"
+#include "setup.h"
+
+bool landlock_initialized __ro_after_init = false;
+
+struct lsm_blob_sizes landlock_blob_sizes __ro_after_init = {
+ .lbs_cred = sizeof(struct landlock_cred_security),
+ .lbs_file = sizeof(struct landlock_file_security),
+ .lbs_inode = sizeof(struct landlock_inode_security),
+ .lbs_superblock = sizeof(struct landlock_superblock_security),
+};
+
+static int __init landlock_init(void)
+{
+ landlock_add_cred_hooks();
+ landlock_add_ptrace_hooks();
+ landlock_add_fs_hooks();
+ landlock_initialized = true;
+ pr_info("Up and running.\n");
+ return 0;
+}
+
+DEFINE_LSM(LANDLOCK_NAME) = {
+ .name = LANDLOCK_NAME,
+ .init = landlock_init,
+ .blobs = &landlock_blob_sizes,
+};
diff --git a/security/landlock/setup.h b/security/landlock/setup.h
new file mode 100644
index 0000000000..1daffab1ab
--- /dev/null
+++ b/security/landlock/setup.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Landlock LSM - Security framework setup
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#ifndef _SECURITY_LANDLOCK_SETUP_H
+#define _SECURITY_LANDLOCK_SETUP_H
+
+#include <linux/lsm_hooks.h>
+
+extern bool landlock_initialized;
+
+extern struct lsm_blob_sizes landlock_blob_sizes;
+
+#endif /* _SECURITY_LANDLOCK_SETUP_H */
diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c
new file mode 100644
index 0000000000..245cc650a4
--- /dev/null
+++ b/security/landlock/syscalls.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock LSM - System call implementations and user space interfaces
+ *
+ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2018-2020 ANSSI
+ */
+
+#include <asm/current.h>
+#include <linux/anon_inodes.h>
+#include <linux/build_bug.h>
+#include <linux/capability.h>
+#include <linux/compiler_types.h>
+#include <linux/dcache.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/limits.h>
+#include <linux/mount.h>
+#include <linux/path.h>
+#include <linux/sched.h>
+#include <linux/security.h>
+#include <linux/stddef.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/landlock.h>
+
+#include "cred.h"
+#include "fs.h"
+#include "limits.h"
+#include "ruleset.h"
+#include "setup.h"
+
+/**
+ * copy_min_struct_from_user - Safe future-proof argument copying
+ *
+ * Extend copy_struct_from_user() to check for consistent user buffer.
+ *
+ * @dst: Kernel space pointer or NULL.
+ * @ksize: Actual size of the data pointed to by @dst.
+ * @ksize_min: Minimal required size to be copied.
+ * @src: User space pointer or NULL.
+ * @usize: (Alleged) size of the data pointed to by @src.
+ */
+static __always_inline int
+copy_min_struct_from_user(void *const dst, const size_t ksize,
+ const size_t ksize_min, const void __user *const src,
+ const size_t usize)
+{
+ /* Checks buffer inconsistencies. */
+ BUILD_BUG_ON(!dst);
+ if (!src)
+ return -EFAULT;
+
+ /* Checks size ranges. */
+ BUILD_BUG_ON(ksize <= 0);
+ BUILD_BUG_ON(ksize < ksize_min);
+ if (usize < ksize_min)
+ return -EINVAL;
+ if (usize > PAGE_SIZE)
+ return -E2BIG;
+
+ /* Copies user buffer and fills with zeros. */
+ return copy_struct_from_user(dst, ksize, src, usize);
+}
+
+/*
+ * This function only contains arithmetic operations with constants, leading to
+ * BUILD_BUG_ON(). The related code is evaluated and checked at build time,
+ * but it is then ignored thanks to compiler optimizations.
+ */
+static void build_check_abi(void)
+{
+ struct landlock_ruleset_attr ruleset_attr;
+ struct landlock_path_beneath_attr path_beneath_attr;
+ size_t ruleset_size, path_beneath_size;
+
+ /*
+ * For each user space ABI structures, first checks that there is no
+ * hole in them, then checks that all architectures have the same
+ * struct size.
+ */
+ ruleset_size = sizeof(ruleset_attr.handled_access_fs);
+ BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size);
+ BUILD_BUG_ON(sizeof(ruleset_attr) != 8);
+
+ path_beneath_size = sizeof(path_beneath_attr.allowed_access);
+ path_beneath_size += sizeof(path_beneath_attr.parent_fd);
+ BUILD_BUG_ON(sizeof(path_beneath_attr) != path_beneath_size);
+ BUILD_BUG_ON(sizeof(path_beneath_attr) != 12);
+}
+
+/* Ruleset handling */
+
+static int fop_ruleset_release(struct inode *const inode,
+ struct file *const filp)
+{
+ struct landlock_ruleset *ruleset = filp->private_data;
+
+ landlock_put_ruleset(ruleset);
+ return 0;
+}
+
+static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf,
+ const size_t size, loff_t *const ppos)
+{
+ /* Dummy handler to enable FMODE_CAN_READ. */
+ return -EINVAL;
+}
+
+static ssize_t fop_dummy_write(struct file *const filp,
+ const char __user *const buf, const size_t size,
+ loff_t *const ppos)
+{
+ /* Dummy handler to enable FMODE_CAN_WRITE. */
+ return -EINVAL;
+}
+
+/*
+ * A ruleset file descriptor enables to build a ruleset by adding (i.e.
+ * writing) rule after rule, without relying on the task's context. This
+ * reentrant design is also used in a read way to enforce the ruleset on the
+ * current task.
+ */
+static const struct file_operations ruleset_fops = {
+ .release = fop_ruleset_release,
+ .read = fop_dummy_read,
+ .write = fop_dummy_write,
+};
+
+#define LANDLOCK_ABI_VERSION 3
+
+/**
+ * sys_landlock_create_ruleset - Create a new ruleset
+ *
+ * @attr: Pointer to a &struct landlock_ruleset_attr identifying the scope of
+ * the new ruleset.
+ * @size: Size of the pointed &struct landlock_ruleset_attr (needed for
+ * backward and forward compatibility).
+ * @flags: Supported value: %LANDLOCK_CREATE_RULESET_VERSION.
+ *
+ * This system call enables to create a new Landlock ruleset, and returns the
+ * related file descriptor on success.
+ *
+ * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is
+ * 0, then the returned value is the highest supported Landlock ABI version
+ * (starting at 1).
+ *
+ * Possible returned errors are:
+ *
+ * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - %EINVAL: unknown @flags, or unknown access, or too small @size;
+ * - %E2BIG or %EFAULT: @attr or @size inconsistencies;
+ * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs.
+ */
+SYSCALL_DEFINE3(landlock_create_ruleset,
+ const struct landlock_ruleset_attr __user *const, attr,
+ const size_t, size, const __u32, flags)
+{
+ struct landlock_ruleset_attr ruleset_attr;
+ struct landlock_ruleset *ruleset;
+ int err, ruleset_fd;
+
+ /* Build-time checks. */
+ build_check_abi();
+
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+ if (flags) {
+ if ((flags == LANDLOCK_CREATE_RULESET_VERSION) && !attr &&
+ !size)
+ return LANDLOCK_ABI_VERSION;
+ return -EINVAL;
+ }
+
+ /* Copies raw user space buffer. */
+ err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr),
+ offsetofend(typeof(ruleset_attr),
+ handled_access_fs),
+ attr, size);
+ if (err)
+ return err;
+
+ /* Checks content (and 32-bits cast). */
+ if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) !=
+ LANDLOCK_MASK_ACCESS_FS)
+ return -EINVAL;
+
+ /* Checks arguments and transforms to kernel struct. */
+ ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ /* Creates anonymous FD referring to the ruleset. */
+ ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops,
+ ruleset, O_RDWR | O_CLOEXEC);
+ if (ruleset_fd < 0)
+ landlock_put_ruleset(ruleset);
+ return ruleset_fd;
+}
+
+/*
+ * Returns an owned ruleset from a FD. It is thus needed to call
+ * landlock_put_ruleset() on the return value.
+ */
+static struct landlock_ruleset *get_ruleset_from_fd(const int fd,
+ const fmode_t mode)
+{
+ struct fd ruleset_f;
+ struct landlock_ruleset *ruleset;
+
+ ruleset_f = fdget(fd);
+ if (!ruleset_f.file)
+ return ERR_PTR(-EBADF);
+
+ /* Checks FD type and access right. */
+ if (ruleset_f.file->f_op != &ruleset_fops) {
+ ruleset = ERR_PTR(-EBADFD);
+ goto out_fdput;
+ }
+ if (!(ruleset_f.file->f_mode & mode)) {
+ ruleset = ERR_PTR(-EPERM);
+ goto out_fdput;
+ }
+ ruleset = ruleset_f.file->private_data;
+ if (WARN_ON_ONCE(ruleset->num_layers != 1)) {
+ ruleset = ERR_PTR(-EINVAL);
+ goto out_fdput;
+ }
+ landlock_get_ruleset(ruleset);
+
+out_fdput:
+ fdput(ruleset_f);
+ return ruleset;
+}
+
+/* Path handling */
+
+/*
+ * @path: Must call put_path(@path) after the call if it succeeded.
+ */
+static int get_path_from_fd(const s32 fd, struct path *const path)
+{
+ struct fd f;
+ int err = 0;
+
+ BUILD_BUG_ON(!__same_type(
+ fd, ((struct landlock_path_beneath_attr *)NULL)->parent_fd));
+
+ /* Handles O_PATH. */
+ f = fdget_raw(fd);
+ if (!f.file)
+ return -EBADF;
+ /*
+ * Forbids ruleset FDs, internal filesystems (e.g. nsfs), including
+ * pseudo filesystems that will never be mountable (e.g. sockfs,
+ * pipefs).
+ */
+ if ((f.file->f_op == &ruleset_fops) ||
+ (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) ||
+ (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) ||
+ d_is_negative(f.file->f_path.dentry) ||
+ IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) {
+ err = -EBADFD;
+ goto out_fdput;
+ }
+ *path = f.file->f_path;
+ path_get(path);
+
+out_fdput:
+ fdput(f);
+ return err;
+}
+
+/**
+ * sys_landlock_add_rule - Add a new rule to a ruleset
+ *
+ * @ruleset_fd: File descriptor tied to the ruleset that should be extended
+ * with the new rule.
+ * @rule_type: Identify the structure type pointed to by @rule_attr (only
+ * %LANDLOCK_RULE_PATH_BENEATH for now).
+ * @rule_attr: Pointer to a rule (only of type &struct
+ * landlock_path_beneath_attr for now).
+ * @flags: Must be 0.
+ *
+ * This system call enables to define a new rule and add it to an existing
+ * ruleset.
+ *
+ * Possible returned errors are:
+ *
+ * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - %EINVAL: @flags is not 0, or inconsistent access in the rule (i.e.
+ * &landlock_path_beneath_attr.allowed_access is not a subset of the
+ * ruleset handled accesses);
+ * - %ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access);
+ * - %EBADF: @ruleset_fd is not a file descriptor for the current thread, or a
+ * member of @rule_attr is not a file descriptor as expected;
+ * - %EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of
+ * @rule_attr is not the expected file descriptor type;
+ * - %EPERM: @ruleset_fd has no write access to the underlying ruleset;
+ * - %EFAULT: @rule_attr inconsistency.
+ */
+SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
+ const enum landlock_rule_type, rule_type,
+ const void __user *const, rule_attr, const __u32, flags)
+{
+ struct landlock_path_beneath_attr path_beneath_attr;
+ struct path path;
+ struct landlock_ruleset *ruleset;
+ int res, err;
+
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+ /* No flag for now. */
+ if (flags)
+ return -EINVAL;
+
+ /* Gets and checks the ruleset. */
+ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ if (rule_type != LANDLOCK_RULE_PATH_BENEATH) {
+ err = -EINVAL;
+ goto out_put_ruleset;
+ }
+
+ /* Copies raw user space buffer, only one type for now. */
+ res = copy_from_user(&path_beneath_attr, rule_attr,
+ sizeof(path_beneath_attr));
+ if (res) {
+ err = -EFAULT;
+ goto out_put_ruleset;
+ }
+
+ /*
+ * Informs about useless rule: empty allowed_access (i.e. deny rules)
+ * are ignored in path walks.
+ */
+ if (!path_beneath_attr.allowed_access) {
+ err = -ENOMSG;
+ goto out_put_ruleset;
+ }
+ /*
+ * Checks that allowed_access matches the @ruleset constraints
+ * (ruleset->fs_access_masks[0] is automatically upgraded to 64-bits).
+ */
+ if ((path_beneath_attr.allowed_access | ruleset->fs_access_masks[0]) !=
+ ruleset->fs_access_masks[0]) {
+ err = -EINVAL;
+ goto out_put_ruleset;
+ }
+
+ /* Gets and checks the new rule. */
+ err = get_path_from_fd(path_beneath_attr.parent_fd, &path);
+ if (err)
+ goto out_put_ruleset;
+
+ /* Imports the new rule. */
+ err = landlock_append_fs_rule(ruleset, &path,
+ path_beneath_attr.allowed_access);
+ path_put(&path);
+
+out_put_ruleset:
+ landlock_put_ruleset(ruleset);
+ return err;
+}
+
+/* Enforcement */
+
+/**
+ * sys_landlock_restrict_self - Enforce a ruleset on the calling thread
+ *
+ * @ruleset_fd: File descriptor tied to the ruleset to merge with the target.
+ * @flags: Must be 0.
+ *
+ * This system call enables to enforce a Landlock ruleset on the current
+ * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
+ * namespace or is running with no_new_privs. This avoids scenarios where
+ * unprivileged tasks can affect the behavior of privileged children.
+ *
+ * Possible returned errors are:
+ *
+ * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time;
+ * - %EINVAL: @flags is not 0.
+ * - %EBADF: @ruleset_fd is not a file descriptor for the current thread;
+ * - %EBADFD: @ruleset_fd is not a ruleset file descriptor;
+ * - %EPERM: @ruleset_fd has no read access to the underlying ruleset, or the
+ * current thread is not running with no_new_privs, or it doesn't have
+ * %CAP_SYS_ADMIN in its namespace.
+ * - %E2BIG: The maximum number of stacked rulesets is reached for the current
+ * thread.
+ */
+SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
+ flags)
+{
+ struct landlock_ruleset *new_dom, *ruleset;
+ struct cred *new_cred;
+ struct landlock_cred_security *new_llcred;
+ int err;
+
+ if (!landlock_initialized)
+ return -EOPNOTSUPP;
+
+ /*
+ * Similar checks as for seccomp(2), except that an -EPERM may be
+ * returned.
+ */
+ if (!task_no_new_privs(current) &&
+ !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN))
+ return -EPERM;
+
+ /* No flag for now. */
+ if (flags)
+ return -EINVAL;
+
+ /* Gets and checks the ruleset. */
+ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ);
+ if (IS_ERR(ruleset))
+ return PTR_ERR(ruleset);
+
+ /* Prepares new credentials. */
+ new_cred = prepare_creds();
+ if (!new_cred) {
+ err = -ENOMEM;
+ goto out_put_ruleset;
+ }
+ new_llcred = landlock_cred(new_cred);
+
+ /*
+ * There is no possible race condition while copying and manipulating
+ * the current credentials because they are dedicated per thread.
+ */
+ new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset);
+ if (IS_ERR(new_dom)) {
+ err = PTR_ERR(new_dom);
+ goto out_put_creds;
+ }
+
+ /* Replaces the old (prepared) domain. */
+ landlock_put_ruleset(new_llcred->domain);
+ new_llcred->domain = new_dom;
+
+ landlock_put_ruleset(ruleset);
+ return commit_creds(new_cred);
+
+out_put_creds:
+ abort_creds(new_cred);
+
+out_put_ruleset:
+ landlock_put_ruleset(ruleset);
+ return err;
+}