diff options
Diffstat (limited to 'fs/verity')
-rw-r--r-- | fs/verity/Kconfig | 52 | ||||
-rw-r--r-- | fs/verity/Makefile | 11 | ||||
-rw-r--r-- | fs/verity/enable.c | 405 | ||||
-rw-r--r-- | fs/verity/fsverity_private.h | 148 | ||||
-rw-r--r-- | fs/verity/hash_algs.c | 251 | ||||
-rw-r--r-- | fs/verity/init.c | 74 | ||||
-rw-r--r-- | fs/verity/measure.c | 102 | ||||
-rw-r--r-- | fs/verity/open.c | 417 | ||||
-rw-r--r-- | fs/verity/read_metadata.c | 194 | ||||
-rw-r--r-- | fs/verity/signature.c | 122 | ||||
-rw-r--r-- | fs/verity/verify.c | 364 |
11 files changed, 2140 insertions, 0 deletions
diff --git a/fs/verity/Kconfig b/fs/verity/Kconfig new file mode 100644 index 0000000000..e1036e5353 --- /dev/null +++ b/fs/verity/Kconfig @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0 + +config FS_VERITY + bool "FS Verity (read-only file-based authenticity protection)" + select CRYPTO + select CRYPTO_HASH_INFO + # SHA-256 is implied as it's intended to be the default hash algorithm. + # To avoid bloat, other wanted algorithms must be selected explicitly. + # Note that CRYPTO_SHA256 denotes the generic C implementation, but + # some architectures provided optimized implementations of the same + # algorithm that may be used instead. In this case, CRYPTO_SHA256 may + # be omitted even if SHA-256 is being used. + imply CRYPTO_SHA256 + help + This option enables fs-verity. fs-verity is the dm-verity + mechanism implemented at the file level. On supported + filesystems (currently ext4, f2fs, and btrfs), userspace can + use an ioctl to enable verity for a file, which causes the + filesystem to build a Merkle tree for the file. The filesystem + will then transparently verify any data read from the file + against the Merkle tree. The file is also made read-only. + + This serves as an integrity check, but the availability of the + Merkle tree root hash also allows efficiently supporting + various use cases where normally the whole file would need to + be hashed at once, such as: (a) auditing (logging the file's + hash), or (b) authenticity verification (comparing the hash + against a known good value, e.g. from a digital signature). + + fs-verity is especially useful on large files where not all + the contents may actually be needed. Also, fs-verity verifies + data each time it is paged back in, which provides better + protection against malicious disks vs. an ahead-of-time hash. + + If unsure, say N. + +config FS_VERITY_BUILTIN_SIGNATURES + bool "FS Verity builtin signature support" + depends on FS_VERITY + select SYSTEM_DATA_VERIFICATION + help + This option adds support for in-kernel verification of + fs-verity builtin signatures. + + Please take great care before using this feature. It is not + the only way to do signatures with fs-verity, and the + alternatives (such as userspace signature verification, and + IMA appraisal) can be much better. For details about the + limitations of this feature, see + Documentation/filesystems/fsverity.rst. + + If unsure, say N. diff --git a/fs/verity/Makefile b/fs/verity/Makefile new file mode 100644 index 0000000000..435559a4fa --- /dev/null +++ b/fs/verity/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_FS_VERITY) += enable.o \ + hash_algs.o \ + init.o \ + measure.o \ + open.o \ + read_metadata.o \ + verify.o + +obj-$(CONFIG_FS_VERITY_BUILTIN_SIGNATURES) += signature.o diff --git a/fs/verity/enable.c b/fs/verity/enable.c new file mode 100644 index 0000000000..c284f46d1b --- /dev/null +++ b/fs/verity/enable.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ioctl to enable verity on a file + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <crypto/hash.h> +#include <linux/mount.h> +#include <linux/sched/signal.h> +#include <linux/uaccess.h> + +struct block_buffer { + u32 filled; + bool is_root_hash; + u8 *data; +}; + +/* Hash a block, writing the result to the next level's pending block buffer. */ +static int hash_one_block(struct inode *inode, + const struct merkle_tree_params *params, + struct block_buffer *cur) +{ + struct block_buffer *next = cur + 1; + int err; + + /* + * Safety check to prevent a buffer overflow in case of a filesystem bug + * that allows the file size to change despite deny_write_access(), or a + * bug in the Merkle tree logic itself + */ + if (WARN_ON_ONCE(next->is_root_hash && next->filled != 0)) + return -EINVAL; + + /* Zero-pad the block if it's shorter than the block size. */ + memset(&cur->data[cur->filled], 0, params->block_size - cur->filled); + + err = fsverity_hash_block(params, inode, cur->data, + &next->data[next->filled]); + if (err) + return err; + next->filled += params->digest_size; + cur->filled = 0; + return 0; +} + +static int write_merkle_tree_block(struct inode *inode, const u8 *buf, + unsigned long index, + const struct merkle_tree_params *params) +{ + u64 pos = (u64)index << params->log_blocksize; + int err; + + err = inode->i_sb->s_vop->write_merkle_tree_block(inode, buf, pos, + params->block_size); + if (err) + fsverity_err(inode, "Error %d writing Merkle tree block %lu", + err, index); + return err; +} + +/* + * Build the Merkle tree for the given file using the given parameters, and + * return the root hash in @root_hash. + * + * The tree is written to a filesystem-specific location as determined by the + * ->write_merkle_tree_block() method. However, the blocks that comprise the + * tree are the same for all filesystems. + */ +static int build_merkle_tree(struct file *filp, + const struct merkle_tree_params *params, + u8 *root_hash) +{ + struct inode *inode = file_inode(filp); + const u64 data_size = inode->i_size; + const int num_levels = params->num_levels; + struct block_buffer _buffers[1 + FS_VERITY_MAX_LEVELS + 1] = {}; + struct block_buffer *buffers = &_buffers[1]; + unsigned long level_offset[FS_VERITY_MAX_LEVELS]; + int level; + u64 offset; + int err; + + if (data_size == 0) { + /* Empty file is a special case; root hash is all 0's */ + memset(root_hash, 0, params->digest_size); + return 0; + } + + /* + * Allocate the block buffers. Buffer "-1" is for data blocks. + * Buffers 0 <= level < num_levels are for the actual tree levels. + * Buffer 'num_levels' is for the root hash. + */ + for (level = -1; level < num_levels; level++) { + buffers[level].data = kzalloc(params->block_size, GFP_KERNEL); + if (!buffers[level].data) { + err = -ENOMEM; + goto out; + } + } + buffers[num_levels].data = root_hash; + buffers[num_levels].is_root_hash = true; + + BUILD_BUG_ON(sizeof(level_offset) != sizeof(params->level_start)); + memcpy(level_offset, params->level_start, sizeof(level_offset)); + + /* Hash each data block, also hashing the tree blocks as they fill up */ + for (offset = 0; offset < data_size; offset += params->block_size) { + ssize_t bytes_read; + loff_t pos = offset; + + buffers[-1].filled = min_t(u64, params->block_size, + data_size - offset); + bytes_read = __kernel_read(filp, buffers[-1].data, + buffers[-1].filled, &pos); + if (bytes_read < 0) { + err = bytes_read; + fsverity_err(inode, "Error %d reading file data", err); + goto out; + } + if (bytes_read != buffers[-1].filled) { + err = -EINVAL; + fsverity_err(inode, "Short read of file data"); + goto out; + } + err = hash_one_block(inode, params, &buffers[-1]); + if (err) + goto out; + for (level = 0; level < num_levels; level++) { + if (buffers[level].filled + params->digest_size <= + params->block_size) { + /* Next block at @level isn't full yet */ + break; + } + /* Next block at @level is full */ + + err = hash_one_block(inode, params, &buffers[level]); + if (err) + goto out; + err = write_merkle_tree_block(inode, + buffers[level].data, + level_offset[level], + params); + if (err) + goto out; + level_offset[level]++; + } + if (fatal_signal_pending(current)) { + err = -EINTR; + goto out; + } + cond_resched(); + } + /* Finish all nonempty pending tree blocks. */ + for (level = 0; level < num_levels; level++) { + if (buffers[level].filled != 0) { + err = hash_one_block(inode, params, &buffers[level]); + if (err) + goto out; + err = write_merkle_tree_block(inode, + buffers[level].data, + level_offset[level], + params); + if (err) + goto out; + } + } + /* The root hash was filled by the last call to hash_one_block(). */ + if (WARN_ON_ONCE(buffers[num_levels].filled != params->digest_size)) { + err = -EINVAL; + goto out; + } + err = 0; +out: + for (level = -1; level < num_levels; level++) + kfree(buffers[level].data); + return err; +} + +static int enable_verity(struct file *filp, + const struct fsverity_enable_arg *arg) +{ + struct inode *inode = file_inode(filp); + const struct fsverity_operations *vops = inode->i_sb->s_vop; + struct merkle_tree_params params = { }; + struct fsverity_descriptor *desc; + size_t desc_size = struct_size(desc, signature, arg->sig_size); + struct fsverity_info *vi; + int err; + + /* Start initializing the fsverity_descriptor */ + desc = kzalloc(desc_size, GFP_KERNEL); + if (!desc) + return -ENOMEM; + desc->version = 1; + desc->hash_algorithm = arg->hash_algorithm; + desc->log_blocksize = ilog2(arg->block_size); + + /* Get the salt if the user provided one */ + if (arg->salt_size && + copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr), + arg->salt_size)) { + err = -EFAULT; + goto out; + } + desc->salt_size = arg->salt_size; + + /* Get the builtin signature if the user provided one */ + if (arg->sig_size && + copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr), + arg->sig_size)) { + err = -EFAULT; + goto out; + } + desc->sig_size = cpu_to_le32(arg->sig_size); + + desc->data_size = cpu_to_le64(inode->i_size); + + /* Prepare the Merkle tree parameters */ + err = fsverity_init_merkle_tree_params(¶ms, inode, + arg->hash_algorithm, + desc->log_blocksize, + desc->salt, desc->salt_size); + if (err) + goto out; + + /* + * Start enabling verity on this file, serialized by the inode lock. + * Fail if verity is already enabled or is already being enabled. + */ + inode_lock(inode); + if (IS_VERITY(inode)) + err = -EEXIST; + else + err = vops->begin_enable_verity(filp); + inode_unlock(inode); + if (err) + goto out; + + /* + * Build the Merkle tree. Don't hold the inode lock during this, since + * on huge files this may take a very long time and we don't want to + * force unrelated syscalls like chown() to block forever. We don't + * need the inode lock here because deny_write_access() already prevents + * the file from being written to or truncated, and we still serialize + * ->begin_enable_verity() and ->end_enable_verity() using the inode + * lock and only allow one process to be here at a time on a given file. + */ + BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE); + err = build_merkle_tree(filp, ¶ms, desc->root_hash); + if (err) { + fsverity_err(inode, "Error %d building Merkle tree", err); + goto rollback; + } + + /* + * Create the fsverity_info. Don't bother trying to save work by + * reusing the merkle_tree_params from above. Instead, just create the + * fsverity_info from the fsverity_descriptor as if it were just loaded + * from disk. This is simpler, and it serves as an extra check that the + * metadata we're writing is valid before actually enabling verity. + */ + vi = fsverity_create_info(inode, desc); + if (IS_ERR(vi)) { + err = PTR_ERR(vi); + goto rollback; + } + + /* + * Tell the filesystem to finish enabling verity on the file. + * Serialized with ->begin_enable_verity() by the inode lock. + */ + inode_lock(inode); + err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size); + inode_unlock(inode); + if (err) { + fsverity_err(inode, "%ps() failed with err %d", + vops->end_enable_verity, err); + fsverity_free_info(vi); + } else if (WARN_ON_ONCE(!IS_VERITY(inode))) { + err = -EINVAL; + fsverity_free_info(vi); + } else { + /* Successfully enabled verity */ + + /* + * Readers can start using ->i_verity_info immediately, so it + * can't be rolled back once set. So don't set it until just + * after the filesystem has successfully enabled verity. + */ + fsverity_set_info(inode, vi); + } +out: + kfree(params.hashstate); + kfree(desc); + return err; + +rollback: + inode_lock(inode); + (void)vops->end_enable_verity(filp, NULL, 0, params.tree_size); + inode_unlock(inode); + goto out; +} + +/** + * fsverity_ioctl_enable() - enable verity on a file + * @filp: file to enable verity on + * @uarg: user pointer to fsverity_enable_arg + * + * Enable fs-verity on a file. See the "FS_IOC_ENABLE_VERITY" section of + * Documentation/filesystems/fsverity.rst for the documentation. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ioctl_enable(struct file *filp, const void __user *uarg) +{ + struct inode *inode = file_inode(filp); + struct fsverity_enable_arg arg; + int err; + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (arg.version != 1) + return -EINVAL; + + if (arg.__reserved1 || + memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2))) + return -EINVAL; + + if (!is_power_of_2(arg.block_size)) + return -EINVAL; + + if (arg.salt_size > sizeof_field(struct fsverity_descriptor, salt)) + return -EMSGSIZE; + + if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE) + return -EMSGSIZE; + + /* + * Require a regular file with write access. But the actual fd must + * still be readonly so that we can lock out all writers. This is + * needed to guarantee that no writable fds exist to the file once it + * has verity enabled, and to stabilize the data being hashed. + */ + + err = file_permission(filp, MAY_WRITE); + if (err) + return err; + /* + * __kernel_read() is used while building the Merkle tree. So, we can't + * allow file descriptors that were opened for ioctl access only, using + * the special nonstandard access mode 3. O_RDONLY only, please! + */ + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + + if (IS_APPEND(inode)) + return -EPERM; + + if (S_ISDIR(inode->i_mode)) + return -EISDIR; + + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) /* -EROFS */ + return err; + + err = deny_write_access(filp); + if (err) /* -ETXTBSY */ + goto out_drop_write; + + err = enable_verity(filp, &arg); + + /* + * We no longer drop the inode's pagecache after enabling verity. This + * used to be done to try to avoid a race condition where pages could be + * evicted after being used in the Merkle tree construction, then + * re-instantiated by a concurrent read. Such pages are unverified, and + * the backing storage could have filled them with different content, so + * they shouldn't be used to fulfill reads once verity is enabled. + * + * But, dropping the pagecache has a big performance impact, and it + * doesn't fully solve the race condition anyway. So for those reasons, + * and also because this race condition isn't very important relatively + * speaking (especially for small-ish files, where the chance of a page + * being used, evicted, *and* re-instantiated all while enabling verity + * is quite small), we no longer drop the inode's pagecache. + */ + + /* + * allow_write_access() is needed to pair with deny_write_access(). + * Regardless, the filesystem won't allow writing to verity files. + */ + allow_write_access(filp); +out_drop_write: + mnt_drop_write_file(filp); + return err; +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_enable); diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h new file mode 100644 index 0000000000..d071a6e325 --- /dev/null +++ b/fs/verity/fsverity_private.h @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * fs-verity: read-only file-based authenticity protection + * + * Copyright 2019 Google LLC + */ + +#ifndef _FSVERITY_PRIVATE_H +#define _FSVERITY_PRIVATE_H + +#define pr_fmt(fmt) "fs-verity: " fmt + +#include <linux/fsverity.h> + +/* + * Implementation limit: maximum depth of the Merkle tree. For now 8 is plenty; + * it's enough for over U64_MAX bytes of data using SHA-256 and 4K blocks. + */ +#define FS_VERITY_MAX_LEVELS 8 + +/* A hash algorithm supported by fs-verity */ +struct fsverity_hash_alg { + struct crypto_shash *tfm; /* hash tfm, allocated on demand */ + const char *name; /* crypto API name, e.g. sha256 */ + unsigned int digest_size; /* digest size in bytes, e.g. 32 for SHA-256 */ + unsigned int block_size; /* block size in bytes, e.g. 64 for SHA-256 */ + /* + * The HASH_ALGO_* constant for this algorithm. This is different from + * FS_VERITY_HASH_ALG_*, which uses a different numbering scheme. + */ + enum hash_algo algo_id; +}; + +/* Merkle tree parameters: hash algorithm, initial hash state, and topology */ +struct merkle_tree_params { + const struct fsverity_hash_alg *hash_alg; /* the hash algorithm */ + const u8 *hashstate; /* initial hash state or NULL */ + unsigned int digest_size; /* same as hash_alg->digest_size */ + unsigned int block_size; /* size of data and tree blocks */ + unsigned int hashes_per_block; /* number of hashes per tree block */ + unsigned int blocks_per_page; /* PAGE_SIZE / block_size */ + u8 log_digestsize; /* log2(digest_size) */ + u8 log_blocksize; /* log2(block_size) */ + u8 log_arity; /* log2(hashes_per_block) */ + u8 log_blocks_per_page; /* log2(blocks_per_page) */ + unsigned int num_levels; /* number of levels in Merkle tree */ + u64 tree_size; /* Merkle tree size in bytes */ + unsigned long tree_pages; /* Merkle tree size in pages */ + + /* + * Starting block index for each tree level, ordered from leaf level (0) + * to root level ('num_levels - 1') + */ + unsigned long level_start[FS_VERITY_MAX_LEVELS]; +}; + +/* + * fsverity_info - cached verity metadata for an inode + * + * When a verity file is first opened, an instance of this struct is allocated + * and stored in ->i_verity_info; it remains until the inode is evicted. It + * caches information about the Merkle tree that's needed to efficiently verify + * data read from the file. It also caches the file digest. The Merkle tree + * pages themselves are not cached here, but the filesystem may cache them. + */ +struct fsverity_info { + struct merkle_tree_params tree_params; + u8 root_hash[FS_VERITY_MAX_DIGEST_SIZE]; + u8 file_digest[FS_VERITY_MAX_DIGEST_SIZE]; + const struct inode *inode; + unsigned long *hash_block_verified; + spinlock_t hash_page_init_lock; +}; + +#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \ + sizeof(struct fsverity_descriptor)) + +/* hash_algs.c */ + +extern struct fsverity_hash_alg fsverity_hash_algs[]; + +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num); +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, + const u8 *salt, size_t salt_size); +int fsverity_hash_block(const struct merkle_tree_params *params, + const struct inode *inode, const void *data, u8 *out); +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, + const void *data, size_t size, u8 *out); +void __init fsverity_check_hash_algs(void); + +/* init.c */ + +void __printf(3, 4) __cold +fsverity_msg(const struct inode *inode, const char *level, + const char *fmt, ...); + +#define fsverity_warn(inode, fmt, ...) \ + fsverity_msg((inode), KERN_WARNING, fmt, ##__VA_ARGS__) +#define fsverity_err(inode, fmt, ...) \ + fsverity_msg((inode), KERN_ERR, fmt, ##__VA_ARGS__) + +/* open.c */ + +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, + const struct inode *inode, + unsigned int hash_algorithm, + unsigned int log_blocksize, + const u8 *salt, size_t salt_size); + +struct fsverity_info *fsverity_create_info(const struct inode *inode, + struct fsverity_descriptor *desc); + +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi); + +void fsverity_free_info(struct fsverity_info *vi); + +int fsverity_get_descriptor(struct inode *inode, + struct fsverity_descriptor **desc_ret); + +void __init fsverity_init_info_cache(void); + +/* signature.c */ + +#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES +extern int fsverity_require_signatures; +int fsverity_verify_signature(const struct fsverity_info *vi, + const u8 *signature, size_t sig_size); + +void __init fsverity_init_signature(void); +#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */ +static inline int +fsverity_verify_signature(const struct fsverity_info *vi, + const u8 *signature, size_t sig_size) +{ + return 0; +} + +static inline void fsverity_init_signature(void) +{ +} +#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */ + +/* verify.c */ + +void __init fsverity_init_workqueue(void); + +#endif /* _FSVERITY_PRIVATE_H */ diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c new file mode 100644 index 0000000000..6b08b1d9a7 --- /dev/null +++ b/fs/verity/hash_algs.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs-verity hash algorithms + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <crypto/hash.h> + +/* The hash algorithms supported by fs-verity */ +struct fsverity_hash_alg fsverity_hash_algs[] = { + [FS_VERITY_HASH_ALG_SHA256] = { + .name = "sha256", + .digest_size = SHA256_DIGEST_SIZE, + .block_size = SHA256_BLOCK_SIZE, + .algo_id = HASH_ALGO_SHA256, + }, + [FS_VERITY_HASH_ALG_SHA512] = { + .name = "sha512", + .digest_size = SHA512_DIGEST_SIZE, + .block_size = SHA512_BLOCK_SIZE, + .algo_id = HASH_ALGO_SHA512, + }, +}; + +static DEFINE_MUTEX(fsverity_hash_alg_init_mutex); + +/** + * fsverity_get_hash_alg() - validate and prepare a hash algorithm + * @inode: optional inode for logging purposes + * @num: the hash algorithm number + * + * Get the struct fsverity_hash_alg for the given hash algorithm number, and + * ensure it has a hash transform ready to go. The hash transforms are + * allocated on-demand so that we don't waste resources unnecessarily, and + * because the crypto modules may be initialized later than fs/verity/. + * + * Return: pointer to the hash alg on success, else an ERR_PTR() + */ +const struct fsverity_hash_alg *fsverity_get_hash_alg(const struct inode *inode, + unsigned int num) +{ + struct fsverity_hash_alg *alg; + struct crypto_shash *tfm; + int err; + + if (num >= ARRAY_SIZE(fsverity_hash_algs) || + !fsverity_hash_algs[num].name) { + fsverity_warn(inode, "Unknown hash algorithm number: %u", num); + return ERR_PTR(-EINVAL); + } + alg = &fsverity_hash_algs[num]; + + /* pairs with smp_store_release() below */ + if (likely(smp_load_acquire(&alg->tfm) != NULL)) + return alg; + + mutex_lock(&fsverity_hash_alg_init_mutex); + + if (alg->tfm != NULL) + goto out_unlock; + + tfm = crypto_alloc_shash(alg->name, 0, 0); + if (IS_ERR(tfm)) { + if (PTR_ERR(tfm) == -ENOENT) { + fsverity_warn(inode, + "Missing crypto API support for hash algorithm \"%s\"", + alg->name); + alg = ERR_PTR(-ENOPKG); + goto out_unlock; + } + fsverity_err(inode, + "Error allocating hash algorithm \"%s\": %ld", + alg->name, PTR_ERR(tfm)); + alg = ERR_CAST(tfm); + goto out_unlock; + } + + err = -EINVAL; + if (WARN_ON_ONCE(alg->digest_size != crypto_shash_digestsize(tfm))) + goto err_free_tfm; + if (WARN_ON_ONCE(alg->block_size != crypto_shash_blocksize(tfm))) + goto err_free_tfm; + + pr_info("%s using implementation \"%s\"\n", + alg->name, crypto_shash_driver_name(tfm)); + + /* pairs with smp_load_acquire() above */ + smp_store_release(&alg->tfm, tfm); + goto out_unlock; + +err_free_tfm: + crypto_free_shash(tfm); + alg = ERR_PTR(err); +out_unlock: + mutex_unlock(&fsverity_hash_alg_init_mutex); + return alg; +} + +/** + * fsverity_prepare_hash_state() - precompute the initial hash state + * @alg: hash algorithm + * @salt: a salt which is to be prepended to all data to be hashed + * @salt_size: salt size in bytes, possibly 0 + * + * Return: NULL if the salt is empty, otherwise the kmalloc()'ed precomputed + * initial hash state on success or an ERR_PTR() on failure. + */ +const u8 *fsverity_prepare_hash_state(const struct fsverity_hash_alg *alg, + const u8 *salt, size_t salt_size) +{ + u8 *hashstate = NULL; + SHASH_DESC_ON_STACK(desc, alg->tfm); + u8 *padded_salt = NULL; + size_t padded_salt_size; + int err; + + desc->tfm = alg->tfm; + + if (salt_size == 0) + return NULL; + + hashstate = kmalloc(crypto_shash_statesize(alg->tfm), GFP_KERNEL); + if (!hashstate) + return ERR_PTR(-ENOMEM); + + /* + * Zero-pad the salt to the next multiple of the input size of the hash + * algorithm's compression function, e.g. 64 bytes for SHA-256 or 128 + * bytes for SHA-512. This ensures that the hash algorithm won't have + * any bytes buffered internally after processing the salt, thus making + * salted hashing just as fast as unsalted hashing. + */ + padded_salt_size = round_up(salt_size, alg->block_size); + padded_salt = kzalloc(padded_salt_size, GFP_KERNEL); + if (!padded_salt) { + err = -ENOMEM; + goto err_free; + } + memcpy(padded_salt, salt, salt_size); + err = crypto_shash_init(desc); + if (err) + goto err_free; + + err = crypto_shash_update(desc, padded_salt, padded_salt_size); + if (err) + goto err_free; + + err = crypto_shash_export(desc, hashstate); + if (err) + goto err_free; +out: + kfree(padded_salt); + return hashstate; + +err_free: + kfree(hashstate); + hashstate = ERR_PTR(err); + goto out; +} + +/** + * fsverity_hash_block() - hash a single data or hash block + * @params: the Merkle tree's parameters + * @inode: inode for which the hashing is being done + * @data: virtual address of a buffer containing the block to hash + * @out: output digest, size 'params->digest_size' bytes + * + * Hash a single data or hash block. The hash is salted if a salt is specified + * in the Merkle tree parameters. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_hash_block(const struct merkle_tree_params *params, + const struct inode *inode, const void *data, u8 *out) +{ + SHASH_DESC_ON_STACK(desc, params->hash_alg->tfm); + int err; + + desc->tfm = params->hash_alg->tfm; + + if (params->hashstate) { + err = crypto_shash_import(desc, params->hashstate); + if (err) { + fsverity_err(inode, + "Error %d importing hash state", err); + return err; + } + err = crypto_shash_finup(desc, data, params->block_size, out); + } else { + err = crypto_shash_digest(desc, data, params->block_size, out); + } + if (err) + fsverity_err(inode, "Error %d computing block hash", err); + return err; +} + +/** + * fsverity_hash_buffer() - hash some data + * @alg: the hash algorithm to use + * @data: the data to hash + * @size: size of data to hash, in bytes + * @out: output digest, size 'alg->digest_size' bytes + * + * Return: 0 on success, -errno on failure + */ +int fsverity_hash_buffer(const struct fsverity_hash_alg *alg, + const void *data, size_t size, u8 *out) +{ + return crypto_shash_tfm_digest(alg->tfm, data, size, out); +} + +void __init fsverity_check_hash_algs(void) +{ + size_t i; + + /* + * Sanity check the hash algorithms (could be a build-time check, but + * they're in an array) + */ + for (i = 0; i < ARRAY_SIZE(fsverity_hash_algs); i++) { + const struct fsverity_hash_alg *alg = &fsverity_hash_algs[i]; + + if (!alg->name) + continue; + + /* + * 0 must never be allocated as an FS_VERITY_HASH_ALG_* value, + * as it is reserved for users that use 0 to mean unspecified or + * a default value. fs/verity/ itself doesn't care and doesn't + * have a default algorithm, but some users make use of this. + */ + BUG_ON(i == 0); + + BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE); + + /* + * For efficiency, the implementation currently assumes the + * digest and block sizes are powers of 2. This limitation can + * be lifted if the code is updated to handle other values. + */ + BUG_ON(!is_power_of_2(alg->digest_size)); + BUG_ON(!is_power_of_2(alg->block_size)); + + /* Verify that there is a valid mapping to HASH_ALGO_*. */ + BUG_ON(alg->algo_id == 0); + BUG_ON(alg->digest_size != hash_digest_size[alg->algo_id]); + } +} diff --git a/fs/verity/init.c b/fs/verity/init.c new file mode 100644 index 0000000000..a29f062f60 --- /dev/null +++ b/fs/verity/init.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * fs-verity module initialization and logging + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/ratelimit.h> + +#ifdef CONFIG_SYSCTL +static struct ctl_table_header *fsverity_sysctl_header; + +static struct ctl_table fsverity_sysctl_table[] = { +#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES + { + .procname = "require_signatures", + .data = &fsverity_require_signatures, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +#endif + { } +}; + +static void __init fsverity_init_sysctl(void) +{ + fsverity_sysctl_header = register_sysctl("fs/verity", + fsverity_sysctl_table); + if (!fsverity_sysctl_header) + panic("fsverity sysctl registration failed"); +} +#else /* CONFIG_SYSCTL */ +static inline void fsverity_init_sysctl(void) +{ +} +#endif /* !CONFIG_SYSCTL */ + +void fsverity_msg(const struct inode *inode, const char *level, + const char *fmt, ...) +{ + static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + struct va_format vaf; + va_list args; + + if (!__ratelimit(&rs)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + if (inode) + printk("%sfs-verity (%s, inode %lu): %pV\n", + level, inode->i_sb->s_id, inode->i_ino, &vaf); + else + printk("%sfs-verity: %pV\n", level, &vaf); + va_end(args); +} + +static int __init fsverity_init(void) +{ + fsverity_check_hash_algs(); + fsverity_init_info_cache(); + fsverity_init_workqueue(); + fsverity_init_sysctl(); + fsverity_init_signature(); + return 0; +} +late_initcall(fsverity_init) diff --git a/fs/verity/measure.c b/fs/verity/measure.c new file mode 100644 index 0000000000..eec5956141 --- /dev/null +++ b/fs/verity/measure.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Ioctl to get a verity file's digest + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/uaccess.h> + +/** + * fsverity_ioctl_measure() - get a verity file's digest + * @filp: file to get digest of + * @_uarg: user pointer to fsverity_digest + * + * Retrieve the file digest that the kernel is enforcing for reads from a verity + * file. See the "FS_IOC_MEASURE_VERITY" section of + * Documentation/filesystems/fsverity.rst for the documentation. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_ioctl_measure(struct file *filp, void __user *_uarg) +{ + const struct inode *inode = file_inode(filp); + struct fsverity_digest __user *uarg = _uarg; + const struct fsverity_info *vi; + const struct fsverity_hash_alg *hash_alg; + struct fsverity_digest arg; + + vi = fsverity_get_info(inode); + if (!vi) + return -ENODATA; /* not a verity file */ + hash_alg = vi->tree_params.hash_alg; + + /* + * The user specifies the digest_size their buffer has space for; we can + * return the digest if it fits in the available space. We write back + * the actual size, which may be shorter than the user-specified size. + */ + + if (get_user(arg.digest_size, &uarg->digest_size)) + return -EFAULT; + if (arg.digest_size < hash_alg->digest_size) + return -EOVERFLOW; + + memset(&arg, 0, sizeof(arg)); + arg.digest_algorithm = hash_alg - fsverity_hash_algs; + arg.digest_size = hash_alg->digest_size; + + if (copy_to_user(uarg, &arg, sizeof(arg))) + return -EFAULT; + + if (copy_to_user(uarg->digest, vi->file_digest, hash_alg->digest_size)) + return -EFAULT; + + return 0; +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_measure); + +/** + * fsverity_get_digest() - get a verity file's digest + * @inode: inode to get digest of + * @raw_digest: (out) the raw file digest + * @alg: (out) the digest's algorithm, as a FS_VERITY_HASH_ALG_* value + * @halg: (out) the digest's algorithm, as a HASH_ALGO_* value + * + * Retrieves the fsverity digest of the given file. The file must have been + * opened at least once since the inode was last loaded into the inode cache; + * otherwise this function will not recognize when fsverity is enabled. + * + * The file's fsverity digest consists of @raw_digest in combination with either + * @alg or @halg. (The caller can choose which one of @alg or @halg to use.) + * + * IMPORTANT: Callers *must* make use of one of the two algorithm IDs, since + * @raw_digest is meaningless without knowing which algorithm it uses! fsverity + * provides no security guarantee for users who ignore the algorithm ID, even if + * they use the digest size (since algorithms can share the same digest size). + * + * Return: The size of the raw digest in bytes, or 0 if the file doesn't have + * fsverity enabled. + */ +int fsverity_get_digest(struct inode *inode, + u8 raw_digest[FS_VERITY_MAX_DIGEST_SIZE], + u8 *alg, enum hash_algo *halg) +{ + const struct fsverity_info *vi; + const struct fsverity_hash_alg *hash_alg; + + vi = fsverity_get_info(inode); + if (!vi) + return 0; /* not a verity file */ + + hash_alg = vi->tree_params.hash_alg; + memcpy(raw_digest, vi->file_digest, hash_alg->digest_size); + if (alg) + *alg = hash_alg - fsverity_hash_algs; + if (halg) + *halg = hash_alg->algo_id; + return hash_alg->digest_size; +} +EXPORT_SYMBOL_GPL(fsverity_get_digest); diff --git a/fs/verity/open.c b/fs/verity/open.c new file mode 100644 index 0000000000..6c31a871b8 --- /dev/null +++ b/fs/verity/open.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Opening fs-verity files + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/mm.h> +#include <linux/slab.h> + +static struct kmem_cache *fsverity_info_cachep; + +/** + * fsverity_init_merkle_tree_params() - initialize Merkle tree parameters + * @params: the parameters struct to initialize + * @inode: the inode for which the Merkle tree is being built + * @hash_algorithm: number of hash algorithm to use + * @log_blocksize: log base 2 of block size to use + * @salt: pointer to salt (optional) + * @salt_size: size of salt, possibly 0 + * + * Validate the hash algorithm and block size, then compute the tree topology + * (num levels, num blocks in each level, etc.) and initialize @params. + * + * Return: 0 on success, -errno on failure + */ +int fsverity_init_merkle_tree_params(struct merkle_tree_params *params, + const struct inode *inode, + unsigned int hash_algorithm, + unsigned int log_blocksize, + const u8 *salt, size_t salt_size) +{ + const struct fsverity_hash_alg *hash_alg; + int err; + u64 blocks; + u64 blocks_in_level[FS_VERITY_MAX_LEVELS]; + u64 offset; + int level; + + memset(params, 0, sizeof(*params)); + + hash_alg = fsverity_get_hash_alg(inode, hash_algorithm); + if (IS_ERR(hash_alg)) + return PTR_ERR(hash_alg); + params->hash_alg = hash_alg; + params->digest_size = hash_alg->digest_size; + + params->hashstate = fsverity_prepare_hash_state(hash_alg, salt, + salt_size); + if (IS_ERR(params->hashstate)) { + err = PTR_ERR(params->hashstate); + params->hashstate = NULL; + fsverity_err(inode, "Error %d preparing hash state", err); + goto out_err; + } + + /* + * fs/verity/ directly assumes that the Merkle tree block size is a + * power of 2 less than or equal to PAGE_SIZE. Another restriction + * arises from the interaction between fs/verity/ and the filesystems + * themselves: filesystems expect to be able to verify a single + * filesystem block of data at a time. Therefore, the Merkle tree block + * size must also be less than or equal to the filesystem block size. + * + * The above are the only hard limitations, so in theory the Merkle tree + * block size could be as small as twice the digest size. However, + * that's not useful, and it would result in some unusually deep and + * large Merkle trees. So we currently require that the Merkle tree + * block size be at least 1024 bytes. That's small enough to test the + * sub-page block case on systems with 4K pages, but not too small. + */ + if (log_blocksize < 10 || log_blocksize > PAGE_SHIFT || + log_blocksize > inode->i_blkbits) { + fsverity_warn(inode, "Unsupported log_blocksize: %u", + log_blocksize); + err = -EINVAL; + goto out_err; + } + params->log_blocksize = log_blocksize; + params->block_size = 1 << log_blocksize; + params->log_blocks_per_page = PAGE_SHIFT - log_blocksize; + params->blocks_per_page = 1 << params->log_blocks_per_page; + + if (WARN_ON_ONCE(!is_power_of_2(params->digest_size))) { + err = -EINVAL; + goto out_err; + } + if (params->block_size < 2 * params->digest_size) { + fsverity_warn(inode, + "Merkle tree block size (%u) too small for hash algorithm \"%s\"", + params->block_size, hash_alg->name); + err = -EINVAL; + goto out_err; + } + params->log_digestsize = ilog2(params->digest_size); + params->log_arity = log_blocksize - params->log_digestsize; + params->hashes_per_block = 1 << params->log_arity; + + /* + * Compute the number of levels in the Merkle tree and create a map from + * level to the starting block of that level. Level 'num_levels - 1' is + * the root and is stored first. Level 0 is the level directly "above" + * the data blocks and is stored last. + */ + + /* Compute number of levels and the number of blocks in each level */ + blocks = ((u64)inode->i_size + params->block_size - 1) >> log_blocksize; + while (blocks > 1) { + if (params->num_levels >= FS_VERITY_MAX_LEVELS) { + fsverity_err(inode, "Too many levels in Merkle tree"); + err = -EFBIG; + goto out_err; + } + blocks = (blocks + params->hashes_per_block - 1) >> + params->log_arity; + blocks_in_level[params->num_levels++] = blocks; + } + + /* Compute the starting block of each level */ + offset = 0; + for (level = (int)params->num_levels - 1; level >= 0; level--) { + params->level_start[level] = offset; + offset += blocks_in_level[level]; + } + + /* + * With block_size != PAGE_SIZE, an in-memory bitmap will need to be + * allocated to track the "verified" status of hash blocks. Don't allow + * this bitmap to get too large. For now, limit it to 1 MiB, which + * limits the file size to about 4.4 TB with SHA-256 and 4K blocks. + * + * Together with the fact that the data, and thus also the Merkle tree, + * cannot have more than ULONG_MAX pages, this implies that hash block + * indices can always fit in an 'unsigned long'. But to be safe, we + * explicitly check for that too. Note, this is only for hash block + * indices; data block indices might not fit in an 'unsigned long'. + */ + if ((params->block_size != PAGE_SIZE && offset > 1 << 23) || + offset > ULONG_MAX) { + fsverity_err(inode, "Too many blocks in Merkle tree"); + err = -EFBIG; + goto out_err; + } + + params->tree_size = offset << log_blocksize; + params->tree_pages = PAGE_ALIGN(params->tree_size) >> PAGE_SHIFT; + return 0; + +out_err: + kfree(params->hashstate); + memset(params, 0, sizeof(*params)); + return err; +} + +/* + * Compute the file digest by hashing the fsverity_descriptor excluding the + * builtin signature and with the sig_size field set to 0. + */ +static int compute_file_digest(const struct fsverity_hash_alg *hash_alg, + struct fsverity_descriptor *desc, + u8 *file_digest) +{ + __le32 sig_size = desc->sig_size; + int err; + + desc->sig_size = 0; + err = fsverity_hash_buffer(hash_alg, desc, sizeof(*desc), file_digest); + desc->sig_size = sig_size; + + return err; +} + +/* + * Create a new fsverity_info from the given fsverity_descriptor (with optional + * appended builtin signature), and check the signature if present. The + * fsverity_descriptor must have already undergone basic validation. + */ +struct fsverity_info *fsverity_create_info(const struct inode *inode, + struct fsverity_descriptor *desc) +{ + struct fsverity_info *vi; + int err; + + vi = kmem_cache_zalloc(fsverity_info_cachep, GFP_KERNEL); + if (!vi) + return ERR_PTR(-ENOMEM); + vi->inode = inode; + + err = fsverity_init_merkle_tree_params(&vi->tree_params, inode, + desc->hash_algorithm, + desc->log_blocksize, + desc->salt, desc->salt_size); + if (err) { + fsverity_err(inode, + "Error %d initializing Merkle tree parameters", + err); + goto fail; + } + + memcpy(vi->root_hash, desc->root_hash, vi->tree_params.digest_size); + + err = compute_file_digest(vi->tree_params.hash_alg, desc, + vi->file_digest); + if (err) { + fsverity_err(inode, "Error %d computing file digest", err); + goto fail; + } + + err = fsverity_verify_signature(vi, desc->signature, + le32_to_cpu(desc->sig_size)); + if (err) + goto fail; + + if (vi->tree_params.block_size != PAGE_SIZE) { + /* + * When the Merkle tree block size and page size differ, we use + * a bitmap to keep track of which hash blocks have been + * verified. This bitmap must contain one bit per hash block, + * including alignment to a page boundary at the end. + * + * Eventually, to support extremely large files in an efficient + * way, it might be necessary to make pages of this bitmap + * reclaimable. But for now, simply allocating the whole bitmap + * is a simple solution that works well on the files on which + * fsverity is realistically used. E.g., with SHA-256 and 4K + * blocks, a 100MB file only needs a 24-byte bitmap, and the + * bitmap for any file under 17GB fits in a 4K page. + */ + unsigned long num_bits = + vi->tree_params.tree_pages << + vi->tree_params.log_blocks_per_page; + + vi->hash_block_verified = kvcalloc(BITS_TO_LONGS(num_bits), + sizeof(unsigned long), + GFP_KERNEL); + if (!vi->hash_block_verified) { + err = -ENOMEM; + goto fail; + } + spin_lock_init(&vi->hash_page_init_lock); + } + + return vi; + +fail: + fsverity_free_info(vi); + return ERR_PTR(err); +} + +void fsverity_set_info(struct inode *inode, struct fsverity_info *vi) +{ + /* + * Multiple tasks may race to set ->i_verity_info, so use + * cmpxchg_release(). This pairs with the smp_load_acquire() in + * fsverity_get_info(). I.e., here we publish ->i_verity_info with a + * RELEASE barrier so that other tasks can ACQUIRE it. + */ + if (cmpxchg_release(&inode->i_verity_info, NULL, vi) != NULL) { + /* Lost the race, so free the fsverity_info we allocated. */ + fsverity_free_info(vi); + /* + * Afterwards, the caller may access ->i_verity_info directly, + * so make sure to ACQUIRE the winning fsverity_info. + */ + (void)fsverity_get_info(inode); + } +} + +void fsverity_free_info(struct fsverity_info *vi) +{ + if (!vi) + return; + kfree(vi->tree_params.hashstate); + kvfree(vi->hash_block_verified); + kmem_cache_free(fsverity_info_cachep, vi); +} + +static bool validate_fsverity_descriptor(struct inode *inode, + const struct fsverity_descriptor *desc, + size_t desc_size) +{ + if (desc_size < sizeof(*desc)) { + fsverity_err(inode, "Unrecognized descriptor size: %zu bytes", + desc_size); + return false; + } + + if (desc->version != 1) { + fsverity_err(inode, "Unrecognized descriptor version: %u", + desc->version); + return false; + } + + if (memchr_inv(desc->__reserved, 0, sizeof(desc->__reserved))) { + fsverity_err(inode, "Reserved bits set in descriptor"); + return false; + } + + if (desc->salt_size > sizeof(desc->salt)) { + fsverity_err(inode, "Invalid salt_size: %u", desc->salt_size); + return false; + } + + if (le64_to_cpu(desc->data_size) != inode->i_size) { + fsverity_err(inode, + "Wrong data_size: %llu (desc) != %lld (inode)", + le64_to_cpu(desc->data_size), inode->i_size); + return false; + } + + if (le32_to_cpu(desc->sig_size) > desc_size - sizeof(*desc)) { + fsverity_err(inode, "Signature overflows verity descriptor"); + return false; + } + + return true; +} + +/* + * Read the inode's fsverity_descriptor (with optional appended builtin + * signature) from the filesystem, and do basic validation of it. + */ +int fsverity_get_descriptor(struct inode *inode, + struct fsverity_descriptor **desc_ret) +{ + int res; + struct fsverity_descriptor *desc; + + res = inode->i_sb->s_vop->get_verity_descriptor(inode, NULL, 0); + if (res < 0) { + fsverity_err(inode, + "Error %d getting verity descriptor size", res); + return res; + } + if (res > FS_VERITY_MAX_DESCRIPTOR_SIZE) { + fsverity_err(inode, "Verity descriptor is too large (%d bytes)", + res); + return -EMSGSIZE; + } + desc = kmalloc(res, GFP_KERNEL); + if (!desc) + return -ENOMEM; + res = inode->i_sb->s_vop->get_verity_descriptor(inode, desc, res); + if (res < 0) { + fsverity_err(inode, "Error %d reading verity descriptor", res); + kfree(desc); + return res; + } + + if (!validate_fsverity_descriptor(inode, desc, res)) { + kfree(desc); + return -EINVAL; + } + + *desc_ret = desc; + return 0; +} + +/* Ensure the inode has an ->i_verity_info */ +static int ensure_verity_info(struct inode *inode) +{ + struct fsverity_info *vi = fsverity_get_info(inode); + struct fsverity_descriptor *desc; + int err; + + if (vi) + return 0; + + err = fsverity_get_descriptor(inode, &desc); + if (err) + return err; + + vi = fsverity_create_info(inode, desc); + if (IS_ERR(vi)) { + err = PTR_ERR(vi); + goto out_free_desc; + } + + fsverity_set_info(inode, vi); + err = 0; +out_free_desc: + kfree(desc); + return err; +} + +int __fsverity_file_open(struct inode *inode, struct file *filp) +{ + if (filp->f_mode & FMODE_WRITE) + return -EPERM; + return ensure_verity_info(inode); +} +EXPORT_SYMBOL_GPL(__fsverity_file_open); + +int __fsverity_prepare_setattr(struct dentry *dentry, struct iattr *attr) +{ + if (attr->ia_valid & ATTR_SIZE) + return -EPERM; + return 0; +} +EXPORT_SYMBOL_GPL(__fsverity_prepare_setattr); + +void __fsverity_cleanup_inode(struct inode *inode) +{ + fsverity_free_info(inode->i_verity_info); + inode->i_verity_info = NULL; +} +EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode); + +void __init fsverity_init_info_cache(void) +{ + fsverity_info_cachep = KMEM_CACHE_USERCOPY( + fsverity_info, + SLAB_RECLAIM_ACCOUNT | SLAB_PANIC, + file_digest); +} diff --git a/fs/verity/read_metadata.c b/fs/verity/read_metadata.c new file mode 100644 index 0000000000..f58432772d --- /dev/null +++ b/fs/verity/read_metadata.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Ioctl to read verity metadata + * + * Copyright 2021 Google LLC + */ + +#include "fsverity_private.h" + +#include <linux/backing-dev.h> +#include <linux/highmem.h> +#include <linux/sched/signal.h> +#include <linux/uaccess.h> + +static int fsverity_read_merkle_tree(struct inode *inode, + const struct fsverity_info *vi, + void __user *buf, u64 offset, int length) +{ + const struct fsverity_operations *vops = inode->i_sb->s_vop; + u64 end_offset; + unsigned int offs_in_page; + pgoff_t index, last_index; + int retval = 0; + int err = 0; + + end_offset = min(offset + length, vi->tree_params.tree_size); + if (offset >= end_offset) + return 0; + offs_in_page = offset_in_page(offset); + last_index = (end_offset - 1) >> PAGE_SHIFT; + + /* + * Iterate through each Merkle tree page in the requested range and copy + * the requested portion to userspace. Note that the Merkle tree block + * size isn't important here, as we are returning a byte stream; i.e., + * we can just work with pages even if the tree block size != PAGE_SIZE. + */ + for (index = offset >> PAGE_SHIFT; index <= last_index; index++) { + unsigned long num_ra_pages = + min_t(unsigned long, last_index - index + 1, + inode->i_sb->s_bdi->io_pages); + unsigned int bytes_to_copy = min_t(u64, end_offset - offset, + PAGE_SIZE - offs_in_page); + struct page *page; + const void *virt; + + page = vops->read_merkle_tree_page(inode, index, num_ra_pages); + if (IS_ERR(page)) { + err = PTR_ERR(page); + fsverity_err(inode, + "Error %d reading Merkle tree page %lu", + err, index); + break; + } + + virt = kmap_local_page(page); + if (copy_to_user(buf, virt + offs_in_page, bytes_to_copy)) { + kunmap_local(virt); + put_page(page); + err = -EFAULT; + break; + } + kunmap_local(virt); + put_page(page); + + retval += bytes_to_copy; + buf += bytes_to_copy; + offset += bytes_to_copy; + + if (fatal_signal_pending(current)) { + err = -EINTR; + break; + } + cond_resched(); + offs_in_page = 0; + } + return retval ? retval : err; +} + +/* Copy the requested portion of the buffer to userspace. */ +static int fsverity_read_buffer(void __user *dst, u64 offset, int length, + const void *src, size_t src_length) +{ + if (offset >= src_length) + return 0; + src += offset; + src_length -= offset; + + length = min_t(size_t, length, src_length); + + if (copy_to_user(dst, src, length)) + return -EFAULT; + + return length; +} + +static int fsverity_read_descriptor(struct inode *inode, + void __user *buf, u64 offset, int length) +{ + struct fsverity_descriptor *desc; + size_t desc_size; + int res; + + res = fsverity_get_descriptor(inode, &desc); + if (res) + return res; + + /* don't include the builtin signature */ + desc_size = offsetof(struct fsverity_descriptor, signature); + desc->sig_size = 0; + + res = fsverity_read_buffer(buf, offset, length, desc, desc_size); + + kfree(desc); + return res; +} + +static int fsverity_read_signature(struct inode *inode, + void __user *buf, u64 offset, int length) +{ + struct fsverity_descriptor *desc; + int res; + + res = fsverity_get_descriptor(inode, &desc); + if (res) + return res; + + if (desc->sig_size == 0) { + res = -ENODATA; + goto out; + } + + /* + * Include only the builtin signature. fsverity_get_descriptor() + * already verified that sig_size is in-bounds. + */ + res = fsverity_read_buffer(buf, offset, length, desc->signature, + le32_to_cpu(desc->sig_size)); +out: + kfree(desc); + return res; +} + +/** + * fsverity_ioctl_read_metadata() - read verity metadata from a file + * @filp: file to read the metadata from + * @uarg: user pointer to fsverity_read_metadata_arg + * + * Return: length read on success, 0 on EOF, -errno on failure + */ +int fsverity_ioctl_read_metadata(struct file *filp, const void __user *uarg) +{ + struct inode *inode = file_inode(filp); + const struct fsverity_info *vi; + struct fsverity_read_metadata_arg arg; + int length; + void __user *buf; + + vi = fsverity_get_info(inode); + if (!vi) + return -ENODATA; /* not a verity file */ + /* + * Note that we don't have to explicitly check that the file is open for + * reading, since verity files can only be opened for reading. + */ + + if (copy_from_user(&arg, uarg, sizeof(arg))) + return -EFAULT; + + if (arg.__reserved) + return -EINVAL; + + /* offset + length must not overflow. */ + if (arg.offset + arg.length < arg.offset) + return -EINVAL; + + /* Ensure that the return value will fit in INT_MAX. */ + length = min_t(u64, arg.length, INT_MAX); + + buf = u64_to_user_ptr(arg.buf_ptr); + + switch (arg.metadata_type) { + case FS_VERITY_METADATA_TYPE_MERKLE_TREE: + return fsverity_read_merkle_tree(inode, vi, buf, arg.offset, + length); + case FS_VERITY_METADATA_TYPE_DESCRIPTOR: + return fsverity_read_descriptor(inode, buf, arg.offset, length); + case FS_VERITY_METADATA_TYPE_SIGNATURE: + return fsverity_read_signature(inode, buf, arg.offset, length); + default: + return -EINVAL; + } +} +EXPORT_SYMBOL_GPL(fsverity_ioctl_read_metadata); diff --git a/fs/verity/signature.c b/fs/verity/signature.c new file mode 100644 index 0000000000..90c07573dd --- /dev/null +++ b/fs/verity/signature.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Verification of builtin signatures + * + * Copyright 2019 Google LLC + */ + +/* + * This file implements verification of fs-verity builtin signatures. Please + * take great care before using this feature. It is not the only way to do + * signatures with fs-verity, and the alternatives (such as userspace signature + * verification, and IMA appraisal) can be much better. For details about the + * limitations of this feature, see Documentation/filesystems/fsverity.rst. + */ + +#include "fsverity_private.h" + +#include <linux/cred.h> +#include <linux/key.h> +#include <linux/slab.h> +#include <linux/verification.h> + +/* + * /proc/sys/fs/verity/require_signatures + * If 1, all verity files must have a valid builtin signature. + */ +int fsverity_require_signatures; + +/* + * Keyring that contains the trusted X.509 certificates. + * + * Only root (kuid=0) can modify this. Also, root may use + * keyctl_restrict_keyring() to prevent any more additions. + */ +static struct key *fsverity_keyring; + +/** + * fsverity_verify_signature() - check a verity file's signature + * @vi: the file's fsverity_info + * @signature: the file's built-in signature + * @sig_size: size of signature in bytes, or 0 if no signature + * + * If the file includes a signature of its fs-verity file digest, verify it + * against the certificates in the fs-verity keyring. + * + * Return: 0 on success (signature valid or not required); -errno on failure + */ +int fsverity_verify_signature(const struct fsverity_info *vi, + const u8 *signature, size_t sig_size) +{ + const struct inode *inode = vi->inode; + const struct fsverity_hash_alg *hash_alg = vi->tree_params.hash_alg; + struct fsverity_formatted_digest *d; + int err; + + if (sig_size == 0) { + if (fsverity_require_signatures) { + fsverity_err(inode, + "require_signatures=1, rejecting unsigned file!"); + return -EPERM; + } + return 0; + } + + if (fsverity_keyring->keys.nr_leaves_on_tree == 0) { + /* + * The ".fs-verity" keyring is empty, due to builtin signatures + * being supported by the kernel but not actually being used. + * In this case, verify_pkcs7_signature() would always return an + * error, usually ENOKEY. It could also be EBADMSG if the + * PKCS#7 is malformed, but that isn't very important to + * distinguish. So, just skip to ENOKEY to avoid the attack + * surface of the PKCS#7 parser, which would otherwise be + * reachable by any task able to execute FS_IOC_ENABLE_VERITY. + */ + fsverity_err(inode, + "fs-verity keyring is empty, rejecting signed file!"); + return -ENOKEY; + } + + d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL); + if (!d) + return -ENOMEM; + memcpy(d->magic, "FSVerity", 8); + d->digest_algorithm = cpu_to_le16(hash_alg - fsverity_hash_algs); + d->digest_size = cpu_to_le16(hash_alg->digest_size); + memcpy(d->digest, vi->file_digest, hash_alg->digest_size); + + err = verify_pkcs7_signature(d, sizeof(*d) + hash_alg->digest_size, + signature, sig_size, fsverity_keyring, + VERIFYING_UNSPECIFIED_SIGNATURE, + NULL, NULL); + kfree(d); + + if (err) { + if (err == -ENOKEY) + fsverity_err(inode, + "File's signing cert isn't in the fs-verity keyring"); + else if (err == -EKEYREJECTED) + fsverity_err(inode, "Incorrect file signature"); + else if (err == -EBADMSG) + fsverity_err(inode, "Malformed file signature"); + else + fsverity_err(inode, "Error %d verifying file signature", + err); + return err; + } + + return 0; +} + +void __init fsverity_init_signature(void) +{ + fsverity_keyring = + keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0), + current_cred(), KEY_POS_SEARCH | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | + KEY_USR_SEARCH | KEY_USR_SETATTR, + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(fsverity_keyring)) + panic("failed to allocate \".fs-verity\" keyring"); +} diff --git a/fs/verity/verify.c b/fs/verity/verify.c new file mode 100644 index 0000000000..904ccd7e8e --- /dev/null +++ b/fs/verity/verify.c @@ -0,0 +1,364 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Data verification functions, i.e. hooks for ->readahead() + * + * Copyright 2019 Google LLC + */ + +#include "fsverity_private.h" + +#include <crypto/hash.h> +#include <linux/bio.h> + +static struct workqueue_struct *fsverity_read_workqueue; + +/* + * Returns true if the hash block with index @hblock_idx in the tree, located in + * @hpage, has already been verified. + */ +static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, + unsigned long hblock_idx) +{ + bool verified; + unsigned int blocks_per_page; + unsigned int i; + + /* + * When the Merkle tree block size and page size are the same, then the + * ->hash_block_verified bitmap isn't allocated, and we use PG_checked + * to directly indicate whether the page's block has been verified. + * + * Using PG_checked also guarantees that we re-verify hash pages that + * get evicted and re-instantiated from the backing storage, as new + * pages always start out with PG_checked cleared. + */ + if (!vi->hash_block_verified) + return PageChecked(hpage); + + /* + * When the Merkle tree block size and page size differ, we use a bitmap + * to indicate whether each hash block has been verified. + * + * However, we still need to ensure that hash pages that get evicted and + * re-instantiated from the backing storage are re-verified. To do + * this, we use PG_checked again, but now it doesn't really mean + * "checked". Instead, now it just serves as an indicator for whether + * the hash page is newly instantiated or not. + * + * The first thread that sees PG_checked=0 must clear the corresponding + * bitmap bits, then set PG_checked=1. This requires a spinlock. To + * avoid having to take this spinlock in the common case of + * PG_checked=1, we start with an opportunistic lockless read. + */ + if (PageChecked(hpage)) { + /* + * A read memory barrier is needed here to give ACQUIRE + * semantics to the above PageChecked() test. + */ + smp_rmb(); + return test_bit(hblock_idx, vi->hash_block_verified); + } + spin_lock(&vi->hash_page_init_lock); + if (PageChecked(hpage)) { + verified = test_bit(hblock_idx, vi->hash_block_verified); + } else { + blocks_per_page = vi->tree_params.blocks_per_page; + hblock_idx = round_down(hblock_idx, blocks_per_page); + for (i = 0; i < blocks_per_page; i++) + clear_bit(hblock_idx + i, vi->hash_block_verified); + /* + * A write memory barrier is needed here to give RELEASE + * semantics to the below SetPageChecked() operation. + */ + smp_wmb(); + SetPageChecked(hpage); + verified = false; + } + spin_unlock(&vi->hash_page_init_lock); + return verified; +} + +/* + * Verify a single data block against the file's Merkle tree. + * + * In principle, we need to verify the entire path to the root node. However, + * for efficiency the filesystem may cache the hash blocks. Therefore we need + * only ascend the tree until an already-verified hash block is seen, and then + * verify the path to that block. + * + * Return: %true if the data block is valid, else %false. + */ +static bool +verify_data_block(struct inode *inode, struct fsverity_info *vi, + const void *data, u64 data_pos, unsigned long max_ra_pages) +{ + const struct merkle_tree_params *params = &vi->tree_params; + const unsigned int hsize = params->digest_size; + int level; + u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE]; + const u8 *want_hash; + u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; + /* The hash blocks that are traversed, indexed by level */ + struct { + /* Page containing the hash block */ + struct page *page; + /* Mapped address of the hash block (will be within @page) */ + const void *addr; + /* Index of the hash block in the tree overall */ + unsigned long index; + /* Byte offset of the wanted hash relative to @addr */ + unsigned int hoffset; + } hblocks[FS_VERITY_MAX_LEVELS]; + /* + * The index of the previous level's block within that level; also the + * index of that block's hash within the current level. + */ + u64 hidx = data_pos >> params->log_blocksize; + + /* Up to 1 + FS_VERITY_MAX_LEVELS pages may be mapped at once */ + BUILD_BUG_ON(1 + FS_VERITY_MAX_LEVELS > KM_MAX_IDX); + + if (unlikely(data_pos >= inode->i_size)) { + /* + * This can happen in the data page spanning EOF when the Merkle + * tree block size is less than the page size. The Merkle tree + * doesn't cover data blocks fully past EOF. But the entire + * page spanning EOF can be visible to userspace via a mmap, and + * any part past EOF should be all zeroes. Therefore, we need + * to verify that any data blocks fully past EOF are all zeroes. + */ + if (memchr_inv(data, 0, params->block_size)) { + fsverity_err(inode, + "FILE CORRUPTED! Data past EOF is not zeroed"); + return false; + } + return true; + } + + /* + * Starting at the leaf level, ascend the tree saving hash blocks along + * the way until we find a hash block that has already been verified, or + * until we reach the root. + */ + for (level = 0; level < params->num_levels; level++) { + unsigned long next_hidx; + unsigned long hblock_idx; + pgoff_t hpage_idx; + unsigned int hblock_offset_in_page; + unsigned int hoffset; + struct page *hpage; + const void *haddr; + + /* + * The index of the block in the current level; also the index + * of that block's hash within the next level. + */ + next_hidx = hidx >> params->log_arity; + + /* Index of the hash block in the tree overall */ + hblock_idx = params->level_start[level] + next_hidx; + + /* Index of the hash page in the tree overall */ + hpage_idx = hblock_idx >> params->log_blocks_per_page; + + /* Byte offset of the hash block within the page */ + hblock_offset_in_page = + (hblock_idx << params->log_blocksize) & ~PAGE_MASK; + + /* Byte offset of the hash within the block */ + hoffset = (hidx << params->log_digestsize) & + (params->block_size - 1); + + hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, + hpage_idx, level == 0 ? min(max_ra_pages, + params->tree_pages - hpage_idx) : 0); + if (IS_ERR(hpage)) { + fsverity_err(inode, + "Error %ld reading Merkle tree page %lu", + PTR_ERR(hpage), hpage_idx); + goto error; + } + haddr = kmap_local_page(hpage) + hblock_offset_in_page; + if (is_hash_block_verified(vi, hpage, hblock_idx)) { + memcpy(_want_hash, haddr + hoffset, hsize); + want_hash = _want_hash; + kunmap_local(haddr); + put_page(hpage); + goto descend; + } + hblocks[level].page = hpage; + hblocks[level].addr = haddr; + hblocks[level].index = hblock_idx; + hblocks[level].hoffset = hoffset; + hidx = next_hidx; + } + + want_hash = vi->root_hash; +descend: + /* Descend the tree verifying hash blocks. */ + for (; level > 0; level--) { + struct page *hpage = hblocks[level - 1].page; + const void *haddr = hblocks[level - 1].addr; + unsigned long hblock_idx = hblocks[level - 1].index; + unsigned int hoffset = hblocks[level - 1].hoffset; + + if (fsverity_hash_block(params, inode, haddr, real_hash) != 0) + goto error; + if (memcmp(want_hash, real_hash, hsize) != 0) + goto corrupted; + /* + * Mark the hash block as verified. This must be atomic and + * idempotent, as the same hash block might be verified by + * multiple threads concurrently. + */ + if (vi->hash_block_verified) + set_bit(hblock_idx, vi->hash_block_verified); + else + SetPageChecked(hpage); + memcpy(_want_hash, haddr + hoffset, hsize); + want_hash = _want_hash; + kunmap_local(haddr); + put_page(hpage); + } + + /* Finally, verify the data block. */ + if (fsverity_hash_block(params, inode, data, real_hash) != 0) + goto error; + if (memcmp(want_hash, real_hash, hsize) != 0) + goto corrupted; + return true; + +corrupted: + fsverity_err(inode, + "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN", + data_pos, level - 1, + params->hash_alg->name, hsize, want_hash, + params->hash_alg->name, hsize, real_hash); +error: + for (; level > 0; level--) { + kunmap_local(hblocks[level - 1].addr); + put_page(hblocks[level - 1].page); + } + return false; +} + +static bool +verify_data_blocks(struct folio *data_folio, size_t len, size_t offset, + unsigned long max_ra_pages) +{ + struct inode *inode = data_folio->mapping->host; + struct fsverity_info *vi = inode->i_verity_info; + const unsigned int block_size = vi->tree_params.block_size; + u64 pos = (u64)data_folio->index << PAGE_SHIFT; + + if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size))) + return false; + if (WARN_ON_ONCE(!folio_test_locked(data_folio) || + folio_test_uptodate(data_folio))) + return false; + do { + void *data; + bool valid; + + data = kmap_local_folio(data_folio, offset); + valid = verify_data_block(inode, vi, data, pos + offset, + max_ra_pages); + kunmap_local(data); + if (!valid) + return false; + offset += block_size; + len -= block_size; + } while (len); + return true; +} + +/** + * fsverity_verify_blocks() - verify data in a folio + * @folio: the folio containing the data to verify + * @len: the length of the data to verify in the folio + * @offset: the offset of the data to verify in the folio + * + * Verify data that has just been read from a verity file. The data must be + * located in a pagecache folio that is still locked and not yet uptodate. The + * length and offset of the data must be Merkle tree block size aligned. + * + * Return: %true if the data is valid, else %false. + */ +bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset) +{ + return verify_data_blocks(folio, len, offset, 0); +} +EXPORT_SYMBOL_GPL(fsverity_verify_blocks); + +#ifdef CONFIG_BLOCK +/** + * fsverity_verify_bio() - verify a 'read' bio that has just completed + * @bio: the bio to verify + * + * Verify the bio's data against the file's Merkle tree. All bio data segments + * must be aligned to the file's Merkle tree block size. If any data fails + * verification, then bio->bi_status is set to an error status. + * + * This is a helper function for use by the ->readahead() method of filesystems + * that issue bios to read data directly into the page cache. Filesystems that + * populate the page cache without issuing bios (e.g. non block-based + * filesystems) must instead call fsverity_verify_page() directly on each page. + * All filesystems must also call fsverity_verify_page() on holes. + */ +void fsverity_verify_bio(struct bio *bio) +{ + struct folio_iter fi; + unsigned long max_ra_pages = 0; + + if (bio->bi_opf & REQ_RAHEAD) { + /* + * If this bio is for data readahead, then we also do readahead + * of the first (largest) level of the Merkle tree. Namely, + * when a Merkle tree page is read, we also try to piggy-back on + * some additional pages -- up to 1/4 the number of data pages. + * + * This improves sequential read performance, as it greatly + * reduces the number of I/O requests made to the Merkle tree. + */ + max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2); + } + + bio_for_each_folio_all(fi, bio) { + if (!verify_data_blocks(fi.folio, fi.length, fi.offset, + max_ra_pages)) { + bio->bi_status = BLK_STS_IOERR; + break; + } + } +} +EXPORT_SYMBOL_GPL(fsverity_verify_bio); +#endif /* CONFIG_BLOCK */ + +/** + * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue + * @work: the work to enqueue + * + * Enqueue verification work for asynchronous processing. + */ +void fsverity_enqueue_verify_work(struct work_struct *work) +{ + queue_work(fsverity_read_workqueue, work); +} +EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); + +void __init fsverity_init_workqueue(void) +{ + /* + * Use a high-priority workqueue to prioritize verification work, which + * blocks reads from completing, over regular application tasks. + * + * For performance reasons, don't use an unbound workqueue. Using an + * unbound workqueue for crypto operations causes excessive scheduler + * latency on ARM64. + */ + fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", + WQ_HIGHPRI, + num_online_cpus()); + if (!fsverity_read_workqueue) + panic("failed to allocate fsverity_read_queue"); +} |