summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 17:45:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 17:45:29 +0000
commit83506c85f8d4332b3edfdc8f1fd07aa691415350 (patch)
tree316b9630e093bb3b80e5d6e1c304151b5597901e /fs
parentAdding upstream version 5.10.209. (diff)
downloadlinux-upstream/5.10.216.tar.xz
linux-upstream/5.10.216.zip
Adding upstream version 5.10.216.upstream/5.10.216
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/callback.c3
-rw-r--r--fs/afs/server.c7
-rw-r--r--fs/afs/volume.c4
-rw-r--r--fs/aio.c17
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/delayed-inode.c3
-rw-r--r--fs/btrfs/dev-replace.c24
-rw-r--r--fs/btrfs/dir-item.c122
-rw-r--r--fs/btrfs/disk-io.c13
-rw-r--r--fs/btrfs/export.c9
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/inode.c91
-rw-r--r--fs/btrfs/ioctl.c35
-rw-r--r--fs/btrfs/qgroup.c16
-rw-r--r--fs/btrfs/ref-verify.c6
-rw-r--r--fs/btrfs/send.c12
-rw-r--r--fs/btrfs/tree-checker.c27
-rw-r--r--fs/btrfs/tree-log.c14
-rw-r--r--fs/btrfs/volumes.c14
-rw-r--r--fs/cachefiles/bind.c3
-rw-r--r--fs/ceph/caps.c12
-rw-r--r--fs/cifs/smb2misc.c2
-rw-r--r--fs/cifs/smb2ops.c33
-rw-r--r--fs/cifs/smb2pdu.c106
-rw-r--r--fs/cifs/smb2pdu.h42
-rw-r--r--fs/cifs/smb2proto.h12
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/ecryptfs/inode.c8
-rw-r--r--fs/erofs/decompressor.c24
-rw-r--r--fs/exec.c4
-rw-r--r--fs/ext4/extents.c116
-rw-r--r--fs/ext4/extents_status.c14
-rw-r--r--fs/ext4/extents_status.h6
-rw-r--r--fs/ext4/inode.c65
-rw-r--r--fs/ext4/mballoc.c116
-rw-r--r--fs/ext4/move_extent.c6
-rw-r--r--fs/ext4/resize.c40
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/f2fs/file.c13
-rw-r--r--fs/f2fs/recovery.c25
-rw-r--r--fs/fat/nfs.c6
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/fuse/dir.c4
-rw-r--r--fs/fuse/fuse_i.h1
-rw-r--r--fs/fuse/inode.c7
-rw-r--r--fs/hugetlbfs/inode.c23
-rw-r--r--fs/ioctl.c3
-rw-r--r--fs/isofs/inode.c18
-rw-r--r--fs/jbd2/checkpoint.c137
-rw-r--r--fs/jfs/jfs_dmap.c57
-rw-r--r--fs/jfs/jfs_dtree.c7
-rw-r--r--fs/jfs/jfs_imap.c3
-rw-r--r--fs/jfs/jfs_mount.c6
-rw-r--r--fs/kernfs/dir.c12
-rw-r--r--fs/namei.c60
-rw-r--r--fs/nfs/direct.c11
-rw-r--r--fs/nfs/nfs42.h7
-rw-r--r--fs/nfs/nfs4proc.c16
-rw-r--r--fs/nfs/nfsroot.c4
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nilfs2/btree.c9
-rw-r--r--fs/nilfs2/dat.c27
-rw-r--r--fs/nilfs2/dir.c2
-rw-r--r--fs/nilfs2/direct.c9
-rw-r--r--fs/nilfs2/file.c8
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/nilfs2/recovery.c7
-rw-r--r--fs/nilfs2/segment.c8
-rw-r--r--fs/pipe.c19
-rw-r--r--fs/pstore/ram.c1
-rw-r--r--fs/pstore/zone.c2
-rw-r--r--fs/quota/dquot.c226
-rw-r--r--fs/select.c2
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/sysv/itree.c10
-rw-r--r--fs/ubifs/dir.c2
-rw-r--r--fs/ubifs/file.c13
-rw-r--r--fs/vboxsf/super.c3
-rw-r--r--fs/zonefs/super.c68
80 files changed, 1130 insertions, 776 deletions
diff --git a/fs/afs/callback.c b/fs/afs/callback.c
index 7d9b23d98..229308c7f 100644
--- a/fs/afs/callback.c
+++ b/fs/afs/callback.c
@@ -70,13 +70,14 @@ static struct afs_volume *afs_lookup_volume_rcu(struct afs_cell *cell,
{
struct afs_volume *volume = NULL;
struct rb_node *p;
- int seq = 0;
+ int seq = 1;
do {
/* Unfortunately, rbtree walking doesn't give reliable results
* under just the RCU read lock, so we have to check for
* changes.
*/
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&cell->volume_lock, &seq);
p = rcu_dereference_raw(cell->volumes.rb_node);
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 684a2b02b..733e3c470 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -27,7 +27,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
const struct afs_addr_list *alist;
struct afs_server *server = NULL;
unsigned int i;
- int seq = 0, diff;
+ int seq = 1, diff;
rcu_read_lock();
@@ -35,6 +35,7 @@ struct afs_server *afs_find_server(struct afs_net *net,
if (server)
afs_unuse_server_notime(net, server, afs_server_trace_put_find_rsq);
server = NULL;
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
if (srx->transport.family == AF_INET6) {
@@ -90,7 +91,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
{
struct afs_server *server = NULL;
struct rb_node *p;
- int diff, seq = 0;
+ int diff, seq = 1;
_enter("%pU", uuid);
@@ -102,7 +103,7 @@ struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uu
if (server)
afs_unuse_server(net, server, afs_server_trace_put_uuid_rsq);
server = NULL;
-
+ seq++; /* 2 on the 1st/lockless path, otherwise odd */
read_seqbegin_or_lock(&net->fs_lock, &seq);
p = net->fs_servers.rb_node;
diff --git a/fs/afs/volume.c b/fs/afs/volume.c
index f84194b79..fb19c6928 100644
--- a/fs/afs/volume.c
+++ b/fs/afs/volume.c
@@ -302,7 +302,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
{
struct afs_server_list *new, *old, *discard;
struct afs_vldb_entry *vldb;
- char idbuf[16];
+ char idbuf[24];
int ret, idsz;
_enter("");
@@ -310,7 +310,7 @@ static int afs_update_volume_status(struct afs_volume *volume, struct key *key)
/* We look up an ID by passing it as a decimal string in the
* operation's name parameter.
*/
- idsz = sprintf(idbuf, "%llu", volume->vid);
+ idsz = snprintf(idbuf, sizeof(idbuf), "%llu", volume->vid);
vldb = afs_vl_lookup_vldb(volume->cell, key, idbuf, idsz);
if (IS_ERR(vldb)) {
diff --git a/fs/aio.c b/fs/aio.c
index 5934ea84b..93b6bbf01 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -565,13 +565,24 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
- struct kioctx *ctx = req->ki_ctx;
+ struct aio_kiocb *req;
+ struct kioctx *ctx;
unsigned long flags;
+ /*
+ * kiocb didn't come from aio or is neither a read nor a write, hence
+ * ignore it.
+ */
+ if (!(iocb->ki_flags & IOCB_AIO_RW))
+ return;
+
+ req = container_of(iocb, struct aio_kiocb, rw);
+
if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
return;
+ ctx = req->ki_ctx;
+
spin_lock_irqsave(&ctx->ctx_lock, flags);
list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
@@ -1454,7 +1465,7 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb)
req->ki_complete = aio_complete_rw;
req->private = NULL;
req->ki_pos = iocb->aio_offset;
- req->ki_flags = iocb_flags(req->ki_filp);
+ req->ki_flags = iocb_flags(req->ki_filp) | IOCB_AIO_RW;
if (iocb->aio_flags & IOCB_FLAG_RESFD)
req->ki_flags |= IOCB_EVENTFD;
req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp));
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index d2f77b124..f1731eeb8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -2315,20 +2315,14 @@ struct btrfs_data_container *init_data_container(u32 total_bytes)
size_t alloc_bytes;
alloc_bytes = max_t(size_t, total_bytes, sizeof(*data));
- data = kvmalloc(alloc_bytes, GFP_KERNEL);
+ data = kvzalloc(alloc_bytes, GFP_KERNEL);
if (!data)
return ERR_PTR(-ENOMEM);
- if (total_bytes >= sizeof(*data)) {
+ if (total_bytes >= sizeof(*data))
data->bytes_left = total_bytes - sizeof(*data);
- data->bytes_missing = 0;
- } else {
+ else
data->bytes_missing = sizeof(*data) - total_bytes;
- data->bytes_left = 0;
- }
-
- data->elem_cnt = 0;
- data->elem_missed = 0;
return data;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 67831868e..3ddb09f2b 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2879,7 +2879,7 @@ struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
- u64 objectid, const char *name, int name_len,
+ u64 index, const char *name, int name_len,
int mod);
struct btrfs_dir_item *
btrfs_search_dir_index_item(struct btrfs_root *root,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index bcffe7886..cdfc791b3 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1135,6 +1135,9 @@ __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
if (ret)
return ret;
+ ret = btrfs_record_root_in_trans(trans, node->root);
+ if (ret)
+ return ret;
ret = btrfs_update_delayed_inode(trans, node->root, path, node);
return ret;
}
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index be6935d19..9a6055659 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -566,6 +566,23 @@ leave:
return ret;
}
+static int btrfs_check_replace_dev_names(struct btrfs_ioctl_dev_replace_args *args)
+{
+ if (args->start.srcdevid == 0) {
+ if (memchr(args->start.srcdev_name, 0,
+ sizeof(args->start.srcdev_name)) == NULL)
+ return -ENAMETOOLONG;
+ } else {
+ args->start.srcdev_name[0] = 0;
+ }
+
+ if (memchr(args->start.tgtdev_name, 0,
+ sizeof(args->start.tgtdev_name)) == NULL)
+ return -ENAMETOOLONG;
+
+ return 0;
+}
+
int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_dev_replace_args *args)
{
@@ -578,10 +595,9 @@ int btrfs_dev_replace_by_ioctl(struct btrfs_fs_info *fs_info,
default:
return -EINVAL;
}
-
- if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') ||
- args->start.tgtdev_name[0] == '\0')
- return -EINVAL;
+ ret = btrfs_check_replace_dev_names(args);
+ if (ret < 0)
+ return ret;
ret = btrfs_dev_replace_start(fs_info, args->start.tgtdev_name,
args->start.srcdevid,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 863367c2c..98c6faa8c 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -171,10 +171,40 @@ out_free:
return 0;
}
+static struct btrfs_dir_item *btrfs_lookup_match_dir(
+ struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *key, const char *name,
+ int name_len, int mod)
+{
+ const int ins_len = (mod < 0 ? -1 : 0);
+ const int cow = (mod != 0);
+ int ret;
+
+ ret = btrfs_search_slot(trans, root, key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0)
+ return ERR_PTR(-ENOENT);
+
+ return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+}
+
/*
- * lookup a directory item based on name. 'dir' is the objectid
- * we're searching in, and 'mod' tells us if you plan on deleting the
- * item (use mod < 0) or changing the options (use mod > 0)
+ * Lookup for a directory item by name.
+ *
+ * @trans: The transaction handle to use. Can be NULL if @mod is 0.
+ * @root: The root of the target tree.
+ * @path: Path to use for the search.
+ * @dir: The inode number (objectid) of the directory.
+ * @name: The name associated to the directory entry we are looking for.
+ * @name_len: The length of the name.
+ * @mod: Used to indicate if the tree search is meant for a read only
+ * lookup, for a modification lookup or for a deletion lookup, so
+ * its value should be 0, 1 or -1, respectively.
+ *
+ * Returns: NULL if the dir item does not exists, an error pointer if an error
+ * happened, or a pointer to a dir item if a dir item exists for the given name.
*/
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -182,23 +212,18 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
const char *name, int name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
+ struct btrfs_dir_item *di;
key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
-
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return di;
}
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
@@ -212,7 +237,6 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
int slot;
struct btrfs_path *path;
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -221,20 +245,20 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
key.type = BTRFS_DIR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-
- /* return back any errors */
- if (ret < 0)
- goto out;
+ di = btrfs_lookup_match_dir(NULL, root, path, &key, name, name_len, 0);
+ if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ /* Nothing found, we're safe */
+ if (ret == -ENOENT) {
+ ret = 0;
+ goto out;
+ }
- /* nothing found, we're safe */
- if (ret > 0) {
- ret = 0;
- goto out;
+ if (ret < 0)
+ goto out;
}
/* we found an item, look for our name in the item */
- di = btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
if (di) {
/* our exact name was found */
ret = -EEXIST;
@@ -261,35 +285,42 @@ out:
}
/*
- * lookup a directory item based on index. 'dir' is the objectid
- * we're searching in, and 'mod' tells us if you plan on deleting the
- * item (use mod < 0) or changing the options (use mod > 0)
+ * Lookup for a directory index item by name and index number.
+ *
+ * @trans: The transaction handle to use. Can be NULL if @mod is 0.
+ * @root: The root of the target tree.
+ * @path: Path to use for the search.
+ * @dir: The inode number (objectid) of the directory.
+ * @index: The index number.
+ * @name: The name associated to the directory entry we are looking for.
+ * @name_len: The length of the name.
+ * @mod: Used to indicate if the tree search is meant for a read only
+ * lookup, for a modification lookup or for a deletion lookup, so
+ * its value should be 0, 1 or -1, respectively.
*
- * The name is used to make sure the index really points to the name you were
- * looking for.
+ * Returns: NULL if the dir index item does not exists, an error pointer if an
+ * error happened, or a pointer to a dir item if the dir index item exists and
+ * matches the criteria (name and index number).
*/
struct btrfs_dir_item *
btrfs_lookup_dir_index_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
- u64 objectid, const char *name, int name_len,
+ u64 index, const char *name, int name_len,
int mod)
{
- int ret;
+ struct btrfs_dir_item *di;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
key.objectid = dir;
key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = objectid;
+ key.offset = index;
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
- return ERR_PTR(-ENOENT);
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (di == ERR_PTR(-ENOENT))
+ return NULL;
+
+ return di;
}
struct btrfs_dir_item *
@@ -346,21 +377,18 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
const char *name, u16 name_len,
int mod)
{
- int ret;
struct btrfs_key key;
- int ins_len = mod < 0 ? -1 : 0;
- int cow = mod != 0;
+ struct btrfs_dir_item *di;
key.objectid = dir;
key.type = BTRFS_XATTR_ITEM_KEY;
key.offset = btrfs_name_hash(name, name_len);
- ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
- if (ret < 0)
- return ERR_PTR(ret);
- if (ret > 0)
+
+ di = btrfs_lookup_match_dir(trans, root, path, &key, name, name_len, mod);
+ if (IS_ERR(di) && PTR_ERR(di) == -ENOENT)
return NULL;
- return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
+ return di;
}
/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0e25a3f64..019f0925f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1553,8 +1553,17 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
- /* Shouldn't get preallocated anon_dev for cached roots */
- ASSERT(!anon_dev);
+ /*
+ * Some other caller may have read out the newly inserted
+ * subvolume already (for things like backref walk etc). Not
+ * that common but still possible. In that case, we just need
+ * to free the anon_dev.
+ */
+ if (unlikely(anon_dev)) {
+ free_anon_bdev(anon_dev);
+ anon_dev = 0;
+ }
+
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
btrfs_put_root(root);
return ERR_PTR(-ENOENT);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index bfa2bf445..d908afa1f 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -161,8 +161,15 @@ struct dentry *btrfs_get_parent(struct dentry *child)
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto fail;
+ if (ret == 0) {
+ /*
+ * Key with offset of -1 found, there would have to exist an
+ * inode with such number or a root with such id.
+ */
+ ret = -EUCLEAN;
+ goto fail;
+ }
- BUG_ON(ret == 0); /* Key with offset of -1 found */
if (path->slots[0] == 0) {
ret = -ENOENT;
goto fail;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8f62e1710..3ba43a400 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1202,7 +1202,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << 9);
- if (WARN_ON(start != aligned_start)) {
+ /* Adjust the range to be aligned to 512B sectors if necessary. */
+ if (start != aligned_start) {
len -= aligned_start - start;
len = round_down(len, 1 << 9);
start = aligned_start;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c900a3966..591caac2b 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4007,7 +4007,8 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
struct btrfs_block_rsv block_rsv;
u64 root_flags;
int ret;
- int err;
+
+ down_write(&fs_info->subvol_sem);
/*
* Don't allow to delete a subvolume with send in progress. This is
@@ -4020,25 +4021,25 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
btrfs_warn(fs_info,
"attempt to delete subvolume %llu during send",
dest->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
if (atomic_read(&dest->nr_swapfiles)) {
spin_unlock(&dest->root_item_lock);
btrfs_warn(fs_info,
"attempt to delete subvolume %llu with active swapfile",
root->root_key.objectid);
- return -EPERM;
+ ret = -EPERM;
+ goto out_up_write;
}
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags | BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- down_write(&fs_info->subvol_sem);
-
- err = may_destroy_subvol(dest);
- if (err)
- goto out_up_write;
+ ret = may_destroy_subvol(dest);
+ if (ret)
+ goto out_undead;
btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP);
/*
@@ -4046,13 +4047,13 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
* two for dir entries,
* two for root ref/backref.
*/
- err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
- if (err)
- goto out_up_write;
+ ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 5, true);
+ if (ret)
+ goto out_undead;
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
+ ret = PTR_ERR(trans);
goto out_release;
}
trans->block_rsv = &block_rsv;
@@ -4062,7 +4063,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_subvol(trans, dir, dentry);
if (ret) {
- err = ret;
btrfs_abort_transaction(trans, ret);
goto out_end_trans;
}
@@ -4080,7 +4080,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
}
@@ -4090,7 +4089,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
@@ -4100,7 +4098,6 @@ int btrfs_delete_subvolume(struct inode *dir, struct dentry *dentry)
dest->root_key.objectid);
if (ret && ret != -ENOENT) {
btrfs_abort_transaction(trans, ret);
- err = ret;
goto out_end_trans;
}
}
@@ -4111,20 +4108,20 @@ out_end_trans:
trans->block_rsv = NULL;
trans->bytes_reserved = 0;
ret = btrfs_end_transaction(trans);
- if (ret && !err)
- err = ret;
inode->i_flags |= S_DEAD;
out_release:
btrfs_subvolume_release_metadata(root, &block_rsv);
-out_up_write:
- up_write(&fs_info->subvol_sem);
- if (err) {
+out_undead:
+ if (ret) {
spin_lock(&dest->root_item_lock);
root_flags = btrfs_root_flags(&dest->root_item);
btrfs_set_root_flags(&dest->root_item,
root_flags & ~BTRFS_ROOT_SUBVOL_DEAD);
spin_unlock(&dest->root_item_lock);
- } else {
+ }
+out_up_write:
+ up_write(&fs_info->subvol_sem);
+ if (!ret) {
d_invalidate(dentry);
btrfs_prune_dentries(dest);
ASSERT(dest->send_in_progress == 0);
@@ -4136,7 +4133,7 @@ out_up_write:
}
}
- return err;
+ return ret;
}
static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
@@ -8971,8 +8968,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(root);
- root_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -8989,8 +8984,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(dest);
- dest_log_pinned = true;
ret = btrfs_insert_inode_ref(trans, root,
old_dentry->d_name.name,
old_dentry->d_name.len,
@@ -9021,6 +9014,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
BTRFS_I(new_inode), 1);
}
+ /*
+ * Now pin the logs of the roots. We do it to ensure that no other task
+ * can sync the logs while we are in progress with the rename, because
+ * that could result in an inconsistency in case any of the inodes that
+ * are part of this rename operation were logged before.
+ *
+ * We pin the logs even if at this precise moment none of the inodes was
+ * logged before. This is because right after we checked for that, some
+ * other task fsyncing some other inode not involved with this rename
+ * operation could log that one of our inodes exists.
+ *
+ * We don't need to pin the logs before the above calls to
+ * btrfs_insert_inode_ref(), since those don't ever need to change a log.
+ */
+ if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_pin_log_trans(root);
+ root_log_pinned = true;
+ }
+ if (new_ino != BTRFS_FIRST_FREE_OBJECTID) {
+ btrfs_pin_log_trans(dest);
+ dest_log_pinned = true;
+ }
+
/* src is a subvolume */
if (old_ino == BTRFS_FIRST_FREE_OBJECTID) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
@@ -9270,8 +9286,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* force full log commit if subvolume involved. */
btrfs_set_log_full_commit(trans);
} else {
- btrfs_pin_log_trans(root);
- log_pinned = true;
ret = btrfs_insert_inode_ref(trans, dest,
new_dentry->d_name.name,
new_dentry->d_name.len,
@@ -9295,6 +9309,25 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, old_dir, old_dentry);
} else {
+ /*
+ * Now pin the log. We do it to ensure that no other task can
+ * sync the log while we are in progress with the rename, as
+ * that could result in an inconsistency in case any of the
+ * inodes that are part of this rename operation were logged
+ * before.
+ *
+ * We pin the log even if at this precise moment none of the
+ * inodes was logged before. This is because right after we
+ * checked for that, some other task fsyncing some other inode
+ * not involved with this rename operation could log that one of
+ * our inodes exists.
+ *
+ * We don't need to pin the logs before the above call to
+ * btrfs_insert_inode_ref(), since that does not need to change
+ * a log.
+ */
+ btrfs_pin_log_trans(root);
+ log_pinned = true;
ret = __btrfs_unlink_inode(trans, root, BTRFS_I(old_dir),
BTRFS_I(d_inode(old_dentry)),
old_dentry->d_name.name,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f06824bea..ab8ed1877 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -798,6 +798,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_trans_handle *trans;
int ret;
+ if (btrfs_root_refs(&root->root_item) == 0)
+ return -ENOENT;
+
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL;
@@ -3145,7 +3148,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
{
struct inode *inode = file_inode(file);
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_ioctl_defrag_range_args *range;
+ struct btrfs_ioctl_defrag_range_args range = {0};
int ret;
ret = mnt_want_write_file(file);
@@ -3177,33 +3180,28 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
goto out;
}
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (!range) {
- ret = -ENOMEM;
- goto out;
- }
-
if (argp) {
- if (copy_from_user(range, argp,
- sizeof(*range))) {
+ if (copy_from_user(&range, argp, sizeof(range))) {
ret = -EFAULT;
- kfree(range);
+ goto out;
+ }
+ if (range.flags & ~BTRFS_DEFRAG_RANGE_FLAGS_SUPP) {
+ ret = -EOPNOTSUPP;
goto out;
}
/* compression requires us to start the IO */
- if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
- range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
- range->extent_thresh = (u32)-1;
+ if ((range.flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
+ range.flags |= BTRFS_DEFRAG_RANGE_START_IO;
+ range.extent_thresh = (u32)-1;
}
} else {
/* the rest are all set to zero by kzalloc */
- range->len = (u64)-1;
+ range.len = (u64)-1;
}
ret = btrfs_defrag_file(file_inode(file), file,
- range, BTRFS_OLDEST_GENERATION, 0);
+ &range, BTRFS_OLDEST_GENERATION, 0);
if (ret > 0)
ret = 0;
- kfree(range);
break;
default:
ret = -EINVAL;
@@ -4318,6 +4316,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
goto out;
}
+ if (sa->create && is_fstree(sa->qgroupid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a67323c2d..50669ff93 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1608,6 +1608,15 @@ out:
return ret;
}
+static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
+{
+ return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
+ qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
+ qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
+}
+
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
@@ -1627,6 +1636,11 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
goto out;
}
+ if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
/* Check if there are no children of this qgroup */
if (!list_empty(&qgroup->members)) {
ret = -EBUSY;
@@ -4100,6 +4114,8 @@ void btrfs_qgroup_convert_reserved_meta(struct btrfs_root *root, int num_bytes)
BTRFS_QGROUP_RSV_META_PREALLOC);
trace_qgroup_meta_convert(root, num_bytes);
qgroup_convert_meta(fs_info, root->root_key.objectid, num_bytes);
+ if (!sb_rdonly(fs_info->sb))
+ add_root_meta_rsv(root, num_bytes, BTRFS_QGROUP_RSV_META_PERTRANS);
}
/*
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index bd3bb94cc..c3711598a 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -899,8 +899,10 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
out_unlock:
spin_unlock(&fs_info->ref_verify_lock);
out:
- if (ret)
+ if (ret) {
+ btrfs_free_ref_cache(fs_info);
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
+ }
return ret;
}
@@ -1029,8 +1031,8 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
}
}
if (ret) {
- btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
btrfs_free_ref_cache(fs_info);
+ btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
}
btrfs_free_path(path);
return ret;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index af9701afc..0519a3557 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -966,7 +966,15 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
ret = PTR_ERR(start);
goto out;
}
- BUG_ON(start < p->buf);
+ if (unlikely(start < p->buf)) {
+ btrfs_err(root->fs_info,
+ "send: path ref buffer underflow for key (%llu %u %llu)",
+ found_key->objectid,
+ found_key->type,
+ found_key->offset);
+ ret = -EINVAL;
+ goto out;
+ }
}
p->start = start;
} else {
@@ -7285,7 +7293,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
}
if (arg->flags & ~BTRFS_SEND_FLAG_MASK) {
- ret = -EINVAL;
+ ret = -EOPNOTSUPP;
goto out;
}
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 32f1b15b2..5b952f69b 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1189,7 +1189,8 @@ static void extent_err(const struct extent_buffer *eb, int slot,
}
static int check_extent_item(struct extent_buffer *leaf,
- struct btrfs_key *key, int slot)
+ struct btrfs_key *key, int slot,
+ struct btrfs_key *prev_key)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
@@ -1334,7 +1335,7 @@ static int check_extent_item(struct extent_buffer *leaf,
if (ptr + btrfs_extent_inline_ref_size(inline_type) > end) {
extent_err(leaf, slot,
"inline ref item overflows extent item, ptr %lu iref size %u end %lu",
- ptr, inline_type, end);
+ ptr, btrfs_extent_inline_ref_size(inline_type), end);
return -EUCLEAN;
}
@@ -1400,6 +1401,26 @@ static int check_extent_item(struct extent_buffer *leaf,
total_refs, inline_refs);
return -EUCLEAN;
}
+
+ if ((prev_key->type == BTRFS_EXTENT_ITEM_KEY) ||
+ (prev_key->type == BTRFS_METADATA_ITEM_KEY)) {
+ u64 prev_end = prev_key->objectid;
+
+ if (prev_key->type == BTRFS_METADATA_ITEM_KEY)
+ prev_end += fs_info->nodesize;
+ else
+ prev_end += prev_key->offset;
+
+ if (unlikely(prev_end > key->objectid)) {
+ extent_err(leaf, slot,
+ "previous extent [%llu %u %llu] overlaps current extent [%llu %u %llu]",
+ prev_key->objectid, prev_key->type,
+ prev_key->offset, key->objectid, key->type,
+ key->offset);
+ return -EUCLEAN;
+ }
+ }
+
return 0;
}
@@ -1568,7 +1589,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
break;
case BTRFS_EXTENT_ITEM_KEY:
case BTRFS_METADATA_ITEM_KEY:
- ret = check_extent_item(leaf, key, slot);
+ ret = check_extent_item(leaf, key, slot, prev_key);
break;
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_DATA_REF_KEY:
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 10a0913ff..34e9eb501 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -912,8 +912,7 @@ static noinline int inode_in_dir(struct btrfs_root *root,
di = btrfs_lookup_dir_index_item(NULL, root, path, dirid,
index, name, name_len, 0);
if (IS_ERR(di)) {
- if (PTR_ERR(di) != -ENOENT)
- ret = PTR_ERR(di);
+ ret = PTR_ERR(di);
goto out;
} else if (di) {
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
@@ -1149,8 +1148,7 @@ next:
di = btrfs_lookup_dir_index_item(trans, root, path, btrfs_ino(dir),
ref_index, name, namelen, 0);
if (IS_ERR(di)) {
- if (PTR_ERR(di) != -ENOENT)
- return PTR_ERR(di);
+ return PTR_ERR(di);
} else if (di) {
ret = drop_one_dir_item(trans, root, path, dir, di);
if (ret)
@@ -1976,9 +1974,6 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
goto out;
}
- if (dst_di == ERR_PTR(-ENOENT))
- dst_di = NULL;
-
if (IS_ERR(dst_di)) {
ret = PTR_ERR(dst_di);
goto out;
@@ -2286,7 +2281,7 @@ again:
dir_key->offset,
name, name_len, 0);
}
- if (!log_di || log_di == ERR_PTR(-ENOENT)) {
+ if (!log_di) {
btrfs_dir_item_key_to_cpu(eb, di, &location);
btrfs_release_path(path);
btrfs_release_path(log_path);
@@ -3495,8 +3490,7 @@ out_unlock:
if (err == -ENOSPC) {
btrfs_set_log_full_commit(trans);
err = 0;
- } else if (err < 0 && err != -ENOENT) {
- /* ENOENT can be returned if the entry hasn't been fsynced yet */
+ } else if (err < 0) {
btrfs_abort_transaction(trans, err);
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index eaf5cd043..09c23626f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1471,7 +1471,7 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
- physical_end - physical_start)) {
+ physical_end + 1 - physical_start)) {
*start = physical_end + 1;
return true;
}
@@ -3178,7 +3178,17 @@ again:
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
goto error;
}
- BUG_ON(ret == 0); /* Corruption */
+ if (ret == 0) {
+ /*
+ * On the first search we would find chunk tree with
+ * offset -1, which is not possible. On subsequent
+ * loops this would find an existing item on an invalid
+ * offset (one less than the previous one, wrong
+ * alignment and size).
+ */
+ ret = -EUCLEAN;
+ goto error;
+ }
ret = btrfs_previous_item(chunk_root, path, key.objectid,
key.type);
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index dfb14dbdd..3b39552c2 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -245,6 +245,8 @@ error_open_root:
kmem_cache_free(cachefiles_object_jar, fsdef);
error_root_object:
cachefiles_end_secure(cache, saved_cred);
+ put_cred(cache->cache_cred);
+ cache->cache_cred = NULL;
pr_err("Failed to register: %d\n", ret);
return ret;
}
@@ -265,6 +267,7 @@ void cachefiles_daemon_unbind(struct cachefiles_cache *cache)
dput(cache->graveyard);
mntput(cache->mnt);
+ put_cred(cache->cache_cred);
kfree(cache->rootdirname);
kfree(cache->secctx);
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 432dc2a16..8e43d07ff 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1402,7 +1402,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
if (flushing & CEPH_CAP_XATTR_EXCL) {
arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
arg->xattr_version = ci->i_xattrs.version;
- arg->xattr_buf = ci->i_xattrs.blob;
+ arg->xattr_buf = ceph_buffer_get(ci->i_xattrs.blob);
} else {
arg->xattr_buf = NULL;
arg->old_xattr_buf = NULL;
@@ -1468,6 +1468,7 @@ static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
encode_cap_msg(msg, arg);
ceph_con_send(&arg->session->s_con, msg);
ceph_buffer_put(arg->old_xattr_buf);
+ ceph_buffer_put(arg->xattr_buf);
if (arg->wake)
wake_up_all(&ci->i_cap_wq);
}
@@ -4598,12 +4599,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
struct inode *dir,
int mds, int drop, int unless)
{
- struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
int ret;
+ /* This shouldn't happen */
+ BUG_ON(!dir);
+
/*
* force an record for the directory caps if we have a dentry lease.
* this is racy (can't take i_ceph_lock and d_lock together), but it
@@ -4613,14 +4616,9 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
- if (!dir) {
- parent = dget(dentry->d_parent);
- dir = d_inode(parent);
- }
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
- dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index b98bba887..660e00eb4 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -117,7 +117,7 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
} else if (nc_offset + 1 == non_ctxlen) {
cifs_dbg(FYI, "no SPNEGO security blob in negprot rsp\n");
size_of_pad_before_neg_ctxts = 0;
- } else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE)
+ } else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE + 1)
/* has padding, but no SPNEGO blob */
size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen + 1;
else
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 26edaeb42..b2a7238a3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -82,6 +82,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
*val = 65000; /* Don't get near 64K credits, avoid srv bugs */
pr_warn_once("server overflowed SMB3 credits\n");
}
+ WARN_ON_ONCE(server->in_flight == 0);
server->in_flight--;
if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
rc = change_conf(server);
@@ -818,10 +819,12 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
kref_get(&tcon->crfid.refcount);
tcon->crfid.has_lease = true;
- smb2_parse_contexts(server, o_rsp,
+ rc = smb2_parse_contexts(server, rsp_iov,
&oparms.fid->epoch,
oparms.fid->lease_key, &oplock,
NULL, NULL);
+ if (rc)
+ goto oshr_exit;
} else
goto oshr_exit;
@@ -4892,6 +4895,7 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
struct smb2_sync_hdr *shdr;
unsigned int pdu_length = server->pdu_size;
unsigned int buf_size;
+ unsigned int next_cmd;
struct mid_q_entry *mid_entry;
int next_is_large;
char *next_buffer = NULL;
@@ -4920,14 +4924,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
next_is_large = server->large_buf;
one_more:
shdr = (struct smb2_sync_hdr *)buf;
- if (shdr->NextCommand) {
+ next_cmd = le32_to_cpu(shdr->NextCommand);
+ if (next_cmd) {
+ if (WARN_ON_ONCE(next_cmd > pdu_length))
+ return -1;
if (next_is_large)
next_buffer = (char *)cifs_buf_get();
else
next_buffer = (char *)cifs_small_buf_get();
- memcpy(next_buffer,
- buf + le32_to_cpu(shdr->NextCommand),
- pdu_length - le32_to_cpu(shdr->NextCommand));
+ memcpy(next_buffer, buf + next_cmd, pdu_length - next_cmd);
}
mid_entry = smb2_find_mid(server, buf);
@@ -4951,8 +4956,8 @@ one_more:
else
ret = cifs_handle_standard(server, mid_entry);
- if (ret == 0 && shdr->NextCommand) {
- pdu_length -= le32_to_cpu(shdr->NextCommand);
+ if (ret == 0 && next_cmd) {
+ pdu_length -= next_cmd;
server->large_buf = next_is_large;
if (next_is_large)
server->bigbuf = buf = next_buffer;
@@ -5561,7 +5566,7 @@ struct smb_version_values smb20_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
@@ -5583,7 +5588,7 @@ struct smb_version_values smb21_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
@@ -5604,7 +5609,7 @@ struct smb_version_values smb3any_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
@@ -5625,7 +5630,7 @@ struct smb_version_values smbdefault_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
@@ -5646,7 +5651,7 @@ struct smb_version_values smb30_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
@@ -5667,7 +5672,7 @@ struct smb_version_values smb302_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
@@ -5688,7 +5693,7 @@ struct smb_version_values smb311_values = {
.header_size = sizeof(struct smb2_sync_hdr),
.header_preamble_size = 0,
.max_header_size = MAX_SMB2_HDR_SIZE,
- .read_rsp_size = sizeof(struct smb2_read_rsp) - 1,
+ .read_rsp_size = sizeof(struct smb2_read_rsp),
.lock_cmd = SMB2_LOCK,
.cap_unix = 0,
.cap_nt_find = SMB2_NT_FIND,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 76679dc4e..aa3211d8c 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1261,7 +1261,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
/* Testing shows that buffer offset must be at location of Buffer[0] */
req->SecurityBufferOffset =
- cpu_to_le16(sizeof(struct smb2_sess_setup_req) - 1 /* pad */);
+ cpu_to_le16(sizeof(struct smb2_sess_setup_req));
req->SecurityBufferLength = cpu_to_le16(sess_data->iov[1].iov_len);
memset(&rqst, 0, sizeof(struct smb_rqst));
@@ -1760,8 +1760,7 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
iov[0].iov_len = total_len - 1;
/* Testing shows that buffer offset must be at location of Buffer[0] */
- req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req)
- - 1 /* pad */);
+ req->PathOffset = cpu_to_le16(sizeof(struct smb2_tree_connect_req));
req->PathLength = cpu_to_le16(unc_path_len - 2);
iov[1].iov_base = unc_path;
iov[1].iov_len = unc_path_len;
@@ -1992,17 +1991,18 @@ parse_posix_ctxt(struct create_context *cc, struct smb2_file_all_info *info,
posix->nlink, posix->mode, posix->reparse_tag);
}
-void
-smb2_parse_contexts(struct TCP_Server_Info *server,
- struct smb2_create_rsp *rsp,
- unsigned int *epoch, char *lease_key, __u8 *oplock,
- struct smb2_file_all_info *buf,
- struct create_posix_rsp *posix)
+int smb2_parse_contexts(struct TCP_Server_Info *server,
+ struct kvec *rsp_iov,
+ unsigned int *epoch,
+ char *lease_key, __u8 *oplock,
+ struct smb2_file_all_info *buf,
+ struct create_posix_rsp *posix)
{
- char *data_offset;
+ struct smb2_create_rsp *rsp = rsp_iov->iov_base;
struct create_context *cc;
- unsigned int next;
- unsigned int remaining;
+ size_t rem, off, len;
+ size_t doff, dlen;
+ size_t noff, nlen;
char *name;
static const char smb3_create_tag_posix[] = {
0x93, 0xAD, 0x25, 0x50, 0x9C,
@@ -2011,45 +2011,63 @@ smb2_parse_contexts(struct TCP_Server_Info *server,
};
*oplock = 0;
- data_offset = (char *)rsp + le32_to_cpu(rsp->CreateContextsOffset);
- remaining = le32_to_cpu(rsp->CreateContextsLength);
- cc = (struct create_context *)data_offset;
+
+ off = le32_to_cpu(rsp->CreateContextsOffset);
+ rem = le32_to_cpu(rsp->CreateContextsLength);
+ if (check_add_overflow(off, rem, &len) || len > rsp_iov->iov_len)
+ return -EINVAL;
+ cc = (struct create_context *)((u8 *)rsp + off);
/* Initialize inode number to 0 in case no valid data in qfid context */
if (buf)
buf->IndexNumber = 0;
- while (remaining >= sizeof(struct create_context)) {
- name = le16_to_cpu(cc->NameOffset) + (char *)cc;
- if (le16_to_cpu(cc->NameLength) == 4 &&
- strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4) == 0)
- *oplock = server->ops->parse_lease_buf(cc, epoch,
- lease_key);
- else if (buf && (le16_to_cpu(cc->NameLength) == 4) &&
- strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4) == 0)
- parse_query_id_ctxt(cc, buf);
- else if ((le16_to_cpu(cc->NameLength) == 16)) {
- if (posix &&
- memcmp(name, smb3_create_tag_posix, 16) == 0)
+ while (rem >= sizeof(*cc)) {
+ doff = le16_to_cpu(cc->DataOffset);
+ dlen = le32_to_cpu(cc->DataLength);
+ if (check_add_overflow(doff, dlen, &len) || len > rem)
+ return -EINVAL;
+
+ noff = le16_to_cpu(cc->NameOffset);
+ nlen = le16_to_cpu(cc->NameLength);
+ if (noff + nlen > doff)
+ return -EINVAL;
+
+ name = (char *)cc + noff;
+ switch (nlen) {
+ case 4:
+ if (!strncmp(name, SMB2_CREATE_REQUEST_LEASE, 4)) {
+ *oplock = server->ops->parse_lease_buf(cc, epoch,
+ lease_key);
+ } else if (buf &&
+ !strncmp(name, SMB2_CREATE_QUERY_ON_DISK_ID, 4)) {
+ parse_query_id_ctxt(cc, buf);
+ }
+ break;
+ case 16:
+ if (posix && !memcmp(name, smb3_create_tag_posix, 16))
parse_posix_ctxt(cc, buf, posix);
+ break;
+ default:
+ cifs_dbg(FYI, "%s: unhandled context (nlen=%zu dlen=%zu)\n",
+ __func__, nlen, dlen);
+ if (IS_ENABLED(CONFIG_CIFS_DEBUG2))
+ cifs_dump_mem("context data: ", cc, dlen);
+ break;
}
- /* else {
- cifs_dbg(FYI, "Context not matched with len %d\n",
- le16_to_cpu(cc->NameLength));
- cifs_dump_mem("Cctxt name: ", name, 4);
- } */
-
- next = le32_to_cpu(cc->Next);
- if (!next)
+
+ off = le32_to_cpu(cc->Next);
+ if (!off)
break;
- remaining -= next;
- cc = (struct create_context *)((char *)cc + next);
+ if (check_sub_overflow(rem, off, &rem))
+ return -EINVAL;
+ cc = (struct create_context *)((u8 *)cc + off);
}
if (rsp->OplockLevel != SMB2_OPLOCK_LEVEL_LEASE)
*oplock = rsp->OplockLevel;
- return;
+ return 0;
}
static int
@@ -2916,8 +2934,8 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
}
- smb2_parse_contexts(server, rsp, &oparms->fid->epoch,
- oparms->fid->lease_key, oplock, buf, posix);
+ rc = smb2_parse_contexts(server, &rsp_iov, &oparms->fid->epoch,
+ oparms->fid->lease_key, oplock, buf, posix);
creat_exit:
SMB2_open_free(&rqst);
free_rsp_buf(resp_buftype, rsp);
@@ -4676,7 +4694,7 @@ int SMB2_query_directory_init(const unsigned int xid,
memcpy(bufptr, &asteriks, len);
req->FileNameOffset =
- cpu_to_le16(sizeof(struct smb2_query_directory_req) - 1);
+ cpu_to_le16(sizeof(struct smb2_query_directory_req));
req->FileNameLength = cpu_to_le16(len);
/*
* BB could be 30 bytes or so longer if we used SMB2 specific
@@ -4873,7 +4891,7 @@ SMB2_set_info_init(struct cifs_tcon *tcon, struct TCP_Server_Info *server,
req->AdditionalInformation = cpu_to_le32(additional_info);
req->BufferOffset =
- cpu_to_le16(sizeof(struct smb2_set_info_req) - 1);
+ cpu_to_le16(sizeof(struct smb2_set_info_req));
req->BufferLength = cpu_to_le32(*size);
memcpy(req->Buffer, *data, *size);
@@ -5105,9 +5123,9 @@ build_qfs_info_req(struct kvec *iov, struct cifs_tcon *tcon,
req->VolatileFileId = volatile_fid;
/* 1 for pad */
req->InputBufferOffset =
- cpu_to_le16(sizeof(struct smb2_query_info_req) - 1);
+ cpu_to_le16(sizeof(struct smb2_query_info_req));
req->OutputBufferLength = cpu_to_le32(
- outbuf_len + sizeof(struct smb2_query_info_rsp) - 1);
+ outbuf_len + sizeof(struct smb2_query_info_rsp));
iov->iov_base = (char *)req;
iov->iov_len = total_len;
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 89a732b31..eaa873175 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -220,7 +220,7 @@ struct smb2_err_rsp {
__le16 StructureSize;
__le16 Reserved; /* MBZ */
__le32 ByteCount; /* even if zero, at least one byte follows */
- __u8 ErrorData[1]; /* variable length */
+ __u8 ErrorData[]; /* variable length */
} __packed;
#define SYMLINK_ERROR_TAG 0x4c4d5953
@@ -464,7 +464,7 @@ struct smb2_negotiate_rsp {
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
__le32 NegotiateContextOffset; /* Pre:SMB3.1.1 was reserved/ignored */
- __u8 Buffer[1]; /* variable length GSS security buffer */
+ __u8 Buffer[]; /* variable length GSS security buffer */
} __packed;
/* Flags */
@@ -481,7 +481,7 @@ struct smb2_sess_setup_req {
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
__u64 PreviousSessionId;
- __u8 Buffer[1]; /* variable length GSS security buffer */
+ __u8 Buffer[]; /* variable length GSS security buffer */
} __packed;
/* Currently defined SessionFlags */
@@ -494,7 +494,7 @@ struct smb2_sess_setup_rsp {
__le16 SessionFlags;
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
- __u8 Buffer[1]; /* variable length GSS security buffer */
+ __u8 Buffer[]; /* variable length GSS security buffer */
} __packed;
struct smb2_logoff_req {
@@ -520,7 +520,7 @@ struct smb2_tree_connect_req {
__le16 Flags; /* Reserved MBZ for dialects prior to SMB3.1.1 */
__le16 PathOffset;
__le16 PathLength;
- __u8 Buffer[1]; /* variable length */
+ __u8 Buffer[]; /* variable length */
} __packed;
/* See MS-SMB2 section 2.2.9.2 */
@@ -828,7 +828,7 @@ struct smb2_create_rsp {
__u64 VolatileFileId; /* opaque endianness */
__le32 CreateContextsOffset;
__le32 CreateContextsLength;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
struct create_context {
@@ -1289,7 +1289,7 @@ struct smb2_read_plain_req {
__le32 RemainingBytes;
__le16 ReadChannelInfoOffset;
__le16 ReadChannelInfoLength;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
/* Read flags */
@@ -1304,7 +1304,7 @@ struct smb2_read_rsp {
__le32 DataLength;
__le32 DataRemaining;
__u32 Flags;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
/* For write request Flags field below the following flags are defined: */
@@ -1324,7 +1324,7 @@ struct smb2_write_req {
__le16 WriteChannelInfoOffset;
__le16 WriteChannelInfoLength;
__le32 Flags;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
struct smb2_write_rsp {
@@ -1335,7 +1335,7 @@ struct smb2_write_rsp {
__le32 DataLength;
__le32 DataRemaining;
__u32 Reserved2;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
/* notify flags */
@@ -1371,7 +1371,7 @@ struct smb2_change_notify_rsp {
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
- __u8 Buffer[1]; /* array of file notify structs */
+ __u8 Buffer[]; /* array of file notify structs */
} __packed;
#define SMB2_LOCKFLAG_SHARED_LOCK 0x0001
@@ -1394,7 +1394,10 @@ struct smb2_lock_req {
__u64 PersistentFileId; /* opaque endianness */
__u64 VolatileFileId; /* opaque endianness */
/* Followed by at least one */
- struct smb2_lock_element locks[1];
+ union {
+ struct smb2_lock_element lock;
+ DECLARE_FLEX_ARRAY(struct smb2_lock_element, locks);
+ };
} __packed;
struct smb2_lock_rsp {
@@ -1434,7 +1437,7 @@ struct smb2_query_directory_req {
__le16 FileNameOffset;
__le16 FileNameLength;
__le32 OutputBufferLength;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
struct smb2_query_directory_rsp {
@@ -1442,7 +1445,7 @@ struct smb2_query_directory_rsp {
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
/* Possible InfoType values */
@@ -1483,7 +1486,7 @@ struct smb2_query_info_req {
__le32 Flags;
__u64 PersistentFileId; /* opaque endianness */
__u64 VolatileFileId; /* opaque endianness */
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
struct smb2_query_info_rsp {
@@ -1491,7 +1494,7 @@ struct smb2_query_info_rsp {
__le16 StructureSize; /* Must be 9 */
__le16 OutputBufferOffset;
__le32 OutputBufferLength;
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
/*
@@ -1514,7 +1517,7 @@ struct smb2_set_info_req {
__le32 AdditionalInformation;
__u64 PersistentFileId; /* opaque endianness */
__u64 VolatileFileId; /* opaque endianness */
- __u8 Buffer[1];
+ __u8 Buffer[];
} __packed;
struct smb2_set_info_rsp {
@@ -1716,7 +1719,10 @@ struct smb2_file_all_info { /* data block encoding of response to level 18 */
__le32 Mode;
__le32 AlignmentRequirement;
__le32 FileNameLength;
- char FileName[1];
+ union {
+ char __pad; /* Legacy structure padding */
+ DECLARE_FLEX_ARRAY(char, FileName);
+ };
} __packed; /* level 18 Query */
struct smb2_file_eof_info { /* encoding of request for level 10 */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index ed2b4fb01..3184a5efc 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -270,11 +270,13 @@ extern int smb3_validate_negotiate(const unsigned int, struct cifs_tcon *);
extern enum securityEnum smb2_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
-extern void smb2_parse_contexts(struct TCP_Server_Info *server,
- struct smb2_create_rsp *rsp,
- unsigned int *epoch, char *lease_key,
- __u8 *oplock, struct smb2_file_all_info *buf,
- struct create_posix_rsp *posix);
+int smb2_parse_contexts(struct TCP_Server_Info *server,
+ struct kvec *rsp_iov,
+ unsigned int *epoch,
+ char *lease_key, __u8 *oplock,
+ struct smb2_file_all_info *buf,
+ struct create_posix_rsp *posix);
+
extern int smb3_encryption_required(const struct cifs_tcon *tcon);
extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size);
diff --git a/fs/dcache.c b/fs/dcache.c
index ea0485861..976c7474d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -759,12 +759,12 @@ static inline bool fast_dput(struct dentry *dentry)
*/
if (unlikely(ret < 0)) {
spin_lock(&dentry->d_lock);
- if (dentry->d_lockref.count > 1) {
- dentry->d_lockref.count--;
+ if (WARN_ON_ONCE(dentry->d_lockref.count <= 0)) {
spin_unlock(&dentry->d_lock);
return true;
}
- return false;
+ dentry->d_lockref.count--;
+ goto locked;
}
/*
@@ -815,6 +815,7 @@ static inline bool fast_dput(struct dentry *dentry)
* else could have killed it and marked it dead. Either way, we
* don't need to do anything else.
*/
+locked:
if (dentry->d_lockref.count) {
spin_unlock(&dentry->d_lock);
return true;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index e23752d9a..c867a0d62 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -76,6 +76,14 @@ static struct inode *__ecryptfs_get_inode(struct inode *lower_inode,
if (lower_inode->i_sb != ecryptfs_superblock_to_lower(sb))
return ERR_PTR(-EXDEV);
+
+ /* Reject dealing with casefold directories. */
+ if (IS_CASEFOLDED(lower_inode)) {
+ pr_err_ratelimited("%s: Can't handle casefolded directory.\n",
+ __func__);
+ return ERR_PTR(-EREMOTE);
+ }
+
if (!igrab(lower_inode))
return ERR_PTR(-ESTALE);
inode = iget5_locked(sb, (unsigned long)lower_inode,
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index f921580b5..36693924d 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -24,7 +24,8 @@ struct z_erofs_decompressor {
*/
int (*prepare_destpages)(struct z_erofs_decompress_req *rq,
struct list_head *pagepool);
- int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out);
+ int (*decompress)(struct z_erofs_decompress_req *rq, u8 *out,
+ u8 *obase);
char *name;
};
@@ -114,10 +115,13 @@ static void *generic_copy_inplace_data(struct z_erofs_decompress_req *rq,
return tmp;
}
-static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
+static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out,
+ u8 *obase)
{
+ const uint nrpages_out = PAGE_ALIGN(rq->pageofs_out +
+ rq->outputsize) >> PAGE_SHIFT;
unsigned int inputmargin, inlen;
- u8 *src;
+ u8 *src, *src2;
bool copied, support_0padding;
int ret;
@@ -125,6 +129,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
return -EOPNOTSUPP;
src = kmap_atomic(*rq->in);
+ src2 = src;
inputmargin = 0;
support_0padding = false;
@@ -148,16 +153,15 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
if (rq->inplace_io) {
const uint oend = (rq->pageofs_out +
rq->outputsize) & ~PAGE_MASK;
- const uint nr = PAGE_ALIGN(rq->pageofs_out +
- rq->outputsize) >> PAGE_SHIFT;
-
if (rq->partial_decoding || !support_0padding ||
- rq->out[nr - 1] != rq->in[0] ||
+ rq->out[nrpages_out - 1] != rq->in[0] ||
rq->inputsize - oend <
LZ4_DECOMPRESS_INPLACE_MARGIN(inlen)) {
src = generic_copy_inplace_data(rq, src, inputmargin);
inputmargin = 0;
copied = true;
+ } else {
+ src = obase + ((nrpages_out - 1) << PAGE_SHIFT);
}
}
@@ -187,7 +191,7 @@ static int z_erofs_lz4_decompress(struct z_erofs_decompress_req *rq, u8 *out)
if (copied)
erofs_put_pcpubuf(src);
else
- kunmap_atomic(src);
+ kunmap_atomic(src2);
return ret;
}
@@ -257,7 +261,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
return PTR_ERR(dst);
rq->inplace_io = false;
- ret = alg->decompress(rq, dst);
+ ret = alg->decompress(rq, dst, NULL);
if (!ret)
copy_from_pcpubuf(rq->out, dst, rq->pageofs_out,
rq->outputsize);
@@ -291,7 +295,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq,
dst_maptype = 2;
dstmap_out:
- ret = alg->decompress(rq, dst + rq->pageofs_out);
+ ret = alg->decompress(rq, dst + rq->pageofs_out, dst);
if (!dst_maptype)
kunmap_atomic(dst);
diff --git a/fs/exec.c b/fs/exec.c
index 983295c0b..ebe901195 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -888,6 +888,7 @@ int transfer_args_to_stack(struct linux_binprm *bprm,
goto out;
}
+ bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE;
*sp_location = sp;
out:
@@ -1392,6 +1393,9 @@ int begin_new_exec(struct linux_binprm * bprm)
out_unlock:
up_write(&me->signal->exec_update_lock);
+ if (!bprm->cred)
+ mutex_unlock(&me->signal->cred_guard_mutex);
+
out:
return retval;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 193b13630..9e1259272 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2222,7 +2222,7 @@ static int ext4_fill_es_cache_info(struct inode *inode,
/*
- * ext4_ext_determine_hole - determine hole around given block
+ * ext4_ext_find_hole - find hole around given block according to the given path
* @inode: inode we lookup in
* @path: path in extent tree to @lblk
* @lblk: pointer to logical block around which we want to determine hole
@@ -2234,9 +2234,9 @@ static int ext4_fill_es_cache_info(struct inode *inode,
* The function returns the length of a hole starting at @lblk. We update @lblk
* to the beginning of the hole if we managed to find it.
*/
-static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
- struct ext4_ext_path *path,
- ext4_lblk_t *lblk)
+static ext4_lblk_t ext4_ext_find_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *lblk)
{
int depth = ext_depth(inode);
struct ext4_extent *ex;
@@ -2264,30 +2264,6 @@ static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
}
/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
- */
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
- ext4_lblk_t hole_len)
-{
- struct extent_status es;
-
- ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
- hole_start + hole_len - 1, &es);
- if (es.es_len) {
- /* There's delayed extent containing lblock? */
- if (es.es_lblk <= hole_start)
- return;
- hole_len = min(es.es_lblk - hole_start, hole_len);
- }
- ext_debug(inode, " -> %u:%u\n", hole_start, hole_len);
- ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
- EXTENT_STATUS_HOLE);
-}
-
-/*
* ext4_ext_rm_idx:
* removes index from the index block.
*/
@@ -3131,8 +3107,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
if (ee_len == 0)
return 0;
- return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
- EXTENT_STATUS_WRITTEN);
+ ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
+ EXTENT_STATUS_WRITTEN);
+ return 0;
}
/* FIXME!! we need to try to merge to left or right after zero-out */
@@ -4058,6 +4035,69 @@ static int get_implied_cluster_alloc(struct super_block *sb,
return 0;
}
+/*
+ * Determine hole length around the given logical block, first try to
+ * locate and expand the hole from the given @path, and then adjust it
+ * if it's partially or completely converted to delayed extents, insert
+ * it into the extent cache tree if it's indeed a hole, finally return
+ * the length of the determined extent.
+ */
+static ext4_lblk_t ext4_ext_determine_insert_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t lblk)
+{
+ ext4_lblk_t hole_start, len;
+ struct extent_status es;
+
+ hole_start = lblk;
+ len = ext4_ext_find_hole(inode, path, &hole_start);
+again:
+ ext4_es_find_extent_range(inode, &ext4_es_is_delayed, hole_start,
+ hole_start + len - 1, &es);
+ if (!es.es_len)
+ goto insert_hole;
+
+ /*
+ * There's a delalloc extent in the hole, handle it if the delalloc
+ * extent is in front of, behind and straddle the queried range.
+ */
+ if (lblk >= es.es_lblk + es.es_len) {
+ /*
+ * The delalloc extent is in front of the queried range,
+ * find again from the queried start block.
+ */
+ len -= lblk - hole_start;
+ hole_start = lblk;
+ goto again;
+ } else if (in_range(lblk, es.es_lblk, es.es_len)) {
+ /*
+ * The delalloc extent containing lblk, it must have been
+ * added after ext4_map_blocks() checked the extent status
+ * tree, adjust the length to the delalloc extent's after
+ * lblk.
+ */
+ len = es.es_lblk + es.es_len - lblk;
+ return len;
+ } else {
+ /*
+ * The delalloc extent is partially or completely behind
+ * the queried range, update hole length until the
+ * beginning of the delalloc extent.
+ */
+ len = min(es.es_lblk - hole_start, len);
+ }
+
+insert_hole:
+ /* Put just found gap into cache to speed up subsequent requests */
+ ext_debug(inode, " -> %u:%u\n", hole_start, len);
+ ext4_es_insert_extent(inode, hole_start, len, ~0, EXTENT_STATUS_HOLE);
+
+ /* Update hole_len to reflect hole size after lblk */
+ if (hole_start != lblk)
+ len -= lblk - hole_start;
+
+ return len;
+}
/*
* Block allocation/map/preallocation routine for extents based files
@@ -4175,22 +4215,12 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
- ext4_lblk_t hole_start, hole_len;
+ ext4_lblk_t len;
- hole_start = map->m_lblk;
- hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
- /*
- * put just found gap into cache to speed up
- * subsequent requests
- */
- ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
+ len = ext4_ext_determine_insert_hole(inode, path, map->m_lblk);
- /* Update hole_len to reflect hole size after map->m_lblk */
- if (hole_start != map->m_lblk)
- hole_len -= map->m_lblk - hole_start;
map->m_pblk = 0;
- map->m_len = min_t(unsigned int, map->m_len, hole_len);
-
+ map->m_len = min_t(unsigned int, map->m_len, len);
goto out;
}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index cccbdfd49..f37e62546 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -846,12 +846,10 @@ out:
/*
* ext4_es_insert_extent() adds information to an inode's extent
* status tree.
- *
- * Return 0 on success, error code on failure.
*/
-int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len, ext4_fsblk_t pblk,
- unsigned int status)
+void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, ext4_fsblk_t pblk,
+ unsigned int status)
{
struct extent_status newes;
ext4_lblk_t end = lblk + len - 1;
@@ -863,13 +861,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
bool revise_pending = false;
if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
- return 0;
+ return;
es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",
lblk, len, pblk, status, inode->i_ino);
if (!len)
- return 0;
+ return;
BUG_ON(end < lblk);
@@ -938,7 +936,7 @@ error:
goto retry;
ext4_es_print_tree(inode);
- return 0;
+ return;
}
/*
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index 4ec30a798..481ec4381 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -127,9 +127,9 @@ extern int __init ext4_init_es(void);
extern void ext4_exit_es(void);
extern void ext4_es_init_tree(struct ext4_es_tree *tree);
-extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
- ext4_lblk_t len, ext4_fsblk_t pblk,
- unsigned int status);
+extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
+ ext4_lblk_t len, ext4_fsblk_t pblk,
+ unsigned int status);
extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t len, ext4_fsblk_t pblk,
unsigned int status);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 365c4d3a4..8b48ed351 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -589,10 +589,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
- ret = ext4_es_insert_extent(inode, map->m_lblk,
- map->m_len, map->m_pblk, status);
- if (ret < 0)
- retval = ret;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
}
up_read((&EXT4_I(inode)->i_data_sem));
@@ -701,12 +699,8 @@ found:
ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk,
map->m_lblk + map->m_len - 1))
status |= EXTENT_STATUS_DELAYED;
- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- if (ret < 0) {
- retval = ret;
- goto out_sem;
- }
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
}
out_sem:
@@ -1734,11 +1728,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
/* Lookup extent status tree firstly */
if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
- if (ext4_es_is_hole(&es)) {
- retval = 0;
- down_read(&EXT4_I(inode)->i_data_sem);
+ if (ext4_es_is_hole(&es))
goto add_delayed;
- }
/*
* Delayed extent could be allocated by fallocate.
@@ -1780,27 +1771,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
retval = ext4_ext_map_blocks(NULL, inode, map, 0);
else
retval = ext4_ind_map_blocks(NULL, inode, map, 0);
-
-add_delayed:
- if (retval == 0) {
- int ret;
-
- /*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
- */
-
- ret = ext4_insert_delayed_block(inode, map->m_lblk);
- if (ret != 0) {
- retval = ret;
- goto out_unlock;
- }
-
- map_bh(bh, inode->i_sb, invalid_block);
- set_buffer_new(bh);
- set_buffer_delay(bh);
- } else if (retval > 0) {
- int ret;
+ if (retval < 0) {
+ up_read(&EXT4_I(inode)->i_data_sem);
+ return retval;
+ }
+ if (retval > 0) {
unsigned int status;
if (unlikely(retval != map->m_len)) {
@@ -1813,15 +1788,23 @@ add_delayed:
status = map->m_flags & EXT4_MAP_UNWRITTEN ?
EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- if (ret != 0)
- retval = ret;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
+ up_read(&EXT4_I(inode)->i_data_sem);
+ return retval;
}
+ up_read(&EXT4_I(inode)->i_data_sem);
-out_unlock:
- up_read((&EXT4_I(inode)->i_data_sem));
+add_delayed:
+ down_write(&EXT4_I(inode)->i_data_sem);
+ retval = ext4_insert_delayed_block(inode, map->m_lblk);
+ up_write(&EXT4_I(inode)->i_data_sem);
+ if (retval)
+ return retval;
+ map_bh(bh, inode->i_sb, invalid_block);
+ set_buffer_new(bh);
+ set_buffer_delay(bh);
return retval;
}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3babc07ae..c1d632725 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -823,6 +823,24 @@ void ext4_mb_generate_buddy(struct super_block *sb,
atomic64_add(period, &sbi->s_mb_generation_time);
}
+static void mb_regenerate_buddy(struct ext4_buddy *e4b)
+{
+ int count;
+ int order = 1;
+ void *buddy;
+
+ while ((buddy = mb_find_buddy(e4b, order++, &count)))
+ ext4_set_bits(buddy, 0, count);
+
+ e4b->bd_info->bb_fragments = 0;
+ memset(e4b->bd_info->bb_counters, 0,
+ sizeof(*e4b->bd_info->bb_counters) *
+ (e4b->bd_sb->s_blocksize_bits + 2));
+
+ ext4_mb_generate_buddy(e4b->bd_sb, e4b->bd_buddy,
+ e4b->bd_bitmap, e4b->bd_group, e4b->bd_info);
+}
+
/* The buddy information is attached the buddy cache inode
* for convenience. The information regarding each group
* is loaded via ext4_mb_load_buddy. The information involve
@@ -1476,11 +1494,6 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
mb_check_buddy(e4b);
mb_free_blocks_double(inode, e4b, first, count);
- this_cpu_inc(discard_pa_seq);
- e4b->bd_info->bb_free += count;
- if (first < e4b->bd_info->bb_first_free)
- e4b->bd_info->bb_first_free = first;
-
/* access memory sequentially: check left neighbour,
* clear range and then check right neighbour
*/
@@ -1494,21 +1507,31 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t blocknr;
+ /*
+ * Fastcommit replay can free already freed blocks which
+ * corrupts allocation info. Regenerate it.
+ */
+ if (sbi->s_mount_state & EXT4_FC_REPLAY) {
+ mb_regenerate_buddy(e4b);
+ goto check;
+ }
+
blocknr = ext4_group_first_block_no(sb, e4b->bd_group);
blocknr += EXT4_C2B(sbi, block);
- if (!(sbi->s_mount_state & EXT4_FC_REPLAY)) {
- ext4_grp_locked_error(sb, e4b->bd_group,
- inode ? inode->i_ino : 0,
- blocknr,
- "freeing already freed block (bit %u); block bitmap corrupt.",
- block);
- ext4_mark_group_bitmap_corrupted(
- sb, e4b->bd_group,
+ ext4_grp_locked_error(sb, e4b->bd_group,
+ inode ? inode->i_ino : 0, blocknr,
+ "freeing already freed block (bit %u); block bitmap corrupt.",
+ block);
+ ext4_mark_group_bitmap_corrupted(sb, e4b->bd_group,
EXT4_GROUP_INFO_BBITMAP_CORRUPT);
- }
- goto done;
+ return;
}
+ this_cpu_inc(discard_pa_seq);
+ e4b->bd_info->bb_free += count;
+ if (first < e4b->bd_info->bb_first_free)
+ e4b->bd_info->bb_first_free = first;
+
/* let's maintain fragments counter */
if (left_is_free && right_is_free)
e4b->bd_info->bb_fragments--;
@@ -1533,8 +1556,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
if (first <= last)
mb_buddy_mark_free(e4b, first >> 1, last >> 1);
-done:
mb_set_largest_free_order(sb, e4b->bd_info);
+check:
mb_check_buddy(e4b);
}
@@ -1854,6 +1877,9 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
return err;
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
if (max > 0) {
@@ -1861,6 +1887,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -1889,12 +1916,10 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
if (err)
return err;
- if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
- ext4_mb_unload_buddy(e4b);
- return 0;
- }
-
ext4_lock_group(ac->ac_sb, group);
+ if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
+ goto out;
+
max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
ac->ac_g_ex.fe_len, &ex);
ex.fe_logical = 0xDEADFA11; /* debug value */
@@ -1927,6 +1952,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
ac->ac_b_ex = ex;
ext4_mb_use_best_found(ac, e4b);
}
+out:
ext4_unlock_group(ac->ac_sb, group);
ext4_mb_unload_buddy(e4b);
@@ -2565,7 +2591,10 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
for (i = 0; i <= 13; i++)
seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
sg.info.bb_counters[i] : 0);
- seq_puts(seq, " ]\n");
+ seq_puts(seq, " ]");
+ if (EXT4_MB_GRP_BBITMAP_CORRUPT(&sg.info))
+ seq_puts(seq, " Block bitmap corrupted!");
+ seq_puts(seq, "\n");
return 0;
}
@@ -4136,10 +4165,16 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
.fe_len = ac->ac_g_ex.fe_len,
};
loff_t orig_goal_end = extent_logical_end(sbi, &ex);
+ loff_t o_ex_end = extent_logical_end(sbi, &ac->ac_o_ex);
- /* we can't allocate as much as normalizer wants.
- * so, found space must get proper lstart
- * to cover original request */
+ /*
+ * We can't allocate as much as normalizer wants, so we try
+ * to get proper lstart to cover the original request, except
+ * when the goal doesn't cover the original request as below:
+ *
+ * orig_ex:2045/2055(10), isize:8417280 -> normalized:0/2048
+ * best_ex:0/200(200) -> adjusted: 1848/2048(200)
+ */
BUG_ON(ac->ac_g_ex.fe_logical > ac->ac_o_ex.fe_logical);
BUG_ON(ac->ac_g_ex.fe_len < ac->ac_o_ex.fe_len);
@@ -4151,7 +4186,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
* 1. Check if best ex can be kept at end of goal and still
* cover original start
* 2. Else, check if best ex can be kept at start of goal and
- * still cover original start
+ * still cover original end
* 3. Else, keep the best ex at start of original request.
*/
ex.fe_len = ac->ac_b_ex.fe_len;
@@ -4161,7 +4196,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
goto adjust_bex;
ex.fe_logical = ac->ac_g_ex.fe_logical;
- if (ac->ac_o_ex.fe_logical < extent_logical_end(sbi, &ex))
+ if (o_ex_end <= extent_logical_end(sbi, &ex))
goto adjust_bex;
ex.fe_logical = ac->ac_o_ex.fe_logical;
@@ -4169,7 +4204,6 @@ adjust_bex:
ac->ac_b_ex.fe_logical = ex.fe_logical;
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
- BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
BUG_ON(extent_logical_end(sbi, &ex) > orig_goal_end);
}
@@ -5895,11 +5929,16 @@ __acquires(bitlock)
static ext4_grpblk_t ext4_last_grp_cluster(struct super_block *sb,
ext4_group_t grp)
{
- if (grp < ext4_get_groups_count(sb))
- return EXT4_CLUSTERS_PER_GROUP(sb) - 1;
- return (ext4_blocks_count(EXT4_SB(sb)->s_es) -
- ext4_group_first_block_no(sb, grp) - 1) >>
- EXT4_CLUSTER_BITS(sb);
+ unsigned long nr_clusters_in_group;
+
+ if (grp < (ext4_get_groups_count(sb) - 1))
+ nr_clusters_in_group = EXT4_CLUSTERS_PER_GROUP(sb);
+ else
+ nr_clusters_in_group = (ext4_blocks_count(EXT4_SB(sb)->s_es) -
+ ext4_group_first_block_no(sb, grp))
+ >> EXT4_CLUSTER_BITS(sb);
+
+ return nr_clusters_in_group - 1;
}
static bool ext4_trim_interrupted(void)
@@ -5911,13 +5950,15 @@ static int ext4_try_to_trim_range(struct super_block *sb,
struct ext4_buddy *e4b, ext4_grpblk_t start,
ext4_grpblk_t max, ext4_grpblk_t minblocks)
{
- ext4_grpblk_t next, count, free_count;
+ ext4_grpblk_t next, count, free_count, last, origin_start;
bool set_trimmed = false;
void *bitmap;
+ last = ext4_last_grp_cluster(sb, e4b->bd_group);
bitmap = e4b->bd_bitmap;
- if (start == 0 && max >= ext4_last_grp_cluster(sb, e4b->bd_group))
+ if (start == 0 && max >= last)
set_trimmed = true;
+ origin_start = start;
start = max(e4b->bd_info->bb_first_free, start);
count = 0;
free_count = 0;
@@ -5926,7 +5967,10 @@ static int ext4_try_to_trim_range(struct super_block *sb,
start = mb_find_next_zero_bit(bitmap, max + 1, start);
if (start > max)
break;
- next = mb_find_next_bit(bitmap, max + 1, start);
+
+ next = mb_find_next_bit(bitmap, last + 1, start);
+ if (origin_start == 0 && next >= last)
+ set_trimmed = true;
if ((next - start) >= minblocks) {
int ret = ext4_trim_extent(sb, start, next - start, e4b);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 64a579734..f8dd5d972 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -615,6 +615,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
goto out;
o_end = o_start + len;
+ *moved_len = 0;
while (o_start < o_end) {
struct ext4_extent *ex;
ext4_lblk_t cur_blk, next_blk;
@@ -670,7 +671,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
*/
ext4_double_up_write_data_sem(orig_inode, donor_inode);
/* Swap original branches with new branches */
- move_extent_per_page(o_filp, donor_inode,
+ *moved_len += move_extent_per_page(o_filp, donor_inode,
orig_page_index, donor_page_index,
offset_in_page, cur_len,
unwritten, &ret);
@@ -680,9 +681,6 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
o_start += cur_len;
d_start += cur_len;
}
- *moved_len = o_start - orig_blk;
- if (*moved_len > len)
- *moved_len = len;
out:
if (*moved_len) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9b4199a1e..5d45ec29e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -227,17 +227,24 @@ struct ext4_new_flex_group_data {
in the flex group */
__u16 *bg_flags; /* block group flags of groups
in @groups */
+ ext4_group_t resize_bg; /* number of allocated
+ new_group_data */
ext4_group_t count; /* number of groups in @groups
*/
};
/*
+ * Avoiding memory allocation failures due to too many groups added each time.
+ */
+#define MAX_RESIZE_BG 16384
+
+/*
* alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
* @flexbg_size.
*
* Returns NULL on failure otherwise address of the allocated structure.
*/
-static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
+static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned int flexbg_size)
{
struct ext4_new_flex_group_data *flex_gd;
@@ -245,17 +252,18 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
if (flex_gd == NULL)
goto out3;
- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
- goto out2;
- flex_gd->count = flexbg_size;
+ if (unlikely(flexbg_size > MAX_RESIZE_BG))
+ flex_gd->resize_bg = MAX_RESIZE_BG;
+ else
+ flex_gd->resize_bg = flexbg_size;
- flex_gd->groups = kmalloc_array(flexbg_size,
+ flex_gd->groups = kmalloc_array(flex_gd->resize_bg,
sizeof(struct ext4_new_group_data),
GFP_NOFS);
if (flex_gd->groups == NULL)
goto out2;
- flex_gd->bg_flags = kmalloc_array(flexbg_size, sizeof(__u16),
+ flex_gd->bg_flags = kmalloc_array(flex_gd->resize_bg, sizeof(__u16),
GFP_NOFS);
if (flex_gd->bg_flags == NULL)
goto out1;
@@ -292,7 +300,7 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
*/
static int ext4_alloc_group_tables(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
- int flexbg_size)
+ unsigned int flexbg_size)
{
struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t start_blk;
@@ -393,12 +401,12 @@ next_group:
group = group_data[0].group;
printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
- "%d groups, flexbg size is %d:\n", flex_gd->count,
+ "%u groups, flexbg size is %u:\n", flex_gd->count,
flexbg_size);
for (i = 0; i < flex_gd->count; i++) {
ext4_debug(
- "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
+ "adding %s group %u: %u blocks (%u free, %u mdata blocks)\n",
ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i,
group_data[i].blocks_count,
@@ -1537,7 +1545,8 @@ exit_journal:
int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
int gdb_num_end = ((group + flex_gd->count - 1) /
EXT4_DESC_PER_BLOCK(sb));
- int meta_bg = ext4_has_feature_meta_bg(sb);
+ int meta_bg = ext4_has_feature_meta_bg(sb) &&
+ gdb_num >= le32_to_cpu(es->s_first_meta_bg);
sector_t padding_blocks = meta_bg ? 0 : sbi->s_sbh->b_blocknr -
ext4_group_first_block_no(sb, 0);
sector_t old_gdb = 0;
@@ -1562,8 +1571,7 @@ exit:
static int ext4_setup_next_flex_gd(struct super_block *sb,
struct ext4_new_flex_group_data *flex_gd,
- ext4_fsblk_t n_blocks_count,
- unsigned long flexbg_size)
+ ext4_fsblk_t n_blocks_count)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
@@ -1587,7 +1595,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
BUG_ON(last);
ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
- last_group = group | (flexbg_size - 1);
+ last_group = group | (flex_gd->resize_bg - 1);
if (last_group > n_group)
last_group = n_group;
@@ -1941,8 +1949,9 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
ext4_fsblk_t o_blocks_count;
ext4_fsblk_t n_blocks_count_retry = 0;
unsigned long last_update_time = 0;
- int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
+ int err = 0;
int meta_bg;
+ unsigned int flexbg_size = ext4_flex_bg_size(sbi);
/* See if the device is actually as big as what was requested */
bh = ext4_sb_bread(sb, n_blocks_count - 1, 0);
@@ -2083,8 +2092,7 @@ retry:
/* Add flex groups. Note that a regular group is a
* flex group with 1 group.
*/
- while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
- flexbg_size)) {
+ while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count)) {
if (jiffies - last_update_time > HZ * 10) {
if (last_update_time)
ext4_msg(sb, KERN_INFO,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e386d67cf..0149d3c2c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6205,6 +6205,10 @@ static int ext4_write_dquot(struct dquot *dquot)
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_commit(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to commit dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
@@ -6221,6 +6225,10 @@ static int ext4_acquire_dquot(struct dquot *dquot)
if (IS_ERR(handle))
return PTR_ERR(handle);
ret = dquot_acquire(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to acquire dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
@@ -6240,6 +6248,10 @@ static int ext4_release_dquot(struct dquot *dquot)
return PTR_ERR(handle);
}
ret = dquot_release(dquot);
+ if (ret < 0)
+ ext4_error_err(dquot->dq_sb, -ret,
+ "Failed to release dquot type %d",
+ dquot->dq_id.type);
err = ext4_journal_stop(handle);
if (!ret)
ret = err;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 55818bd51..40e805014 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -3663,7 +3663,13 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
goto next;
}
- if (__is_valid_data_blkaddr(blkaddr)) {
+ /*
+ * compressed cluster was not released due to it
+ * fails in release_compress_blocks(), so NEW_ADDR
+ * is a possible case.
+ */
+ if (blkaddr == NEW_ADDR ||
+ __is_valid_data_blkaddr(blkaddr)) {
compr_blocks++;
continue;
}
@@ -3673,6 +3679,11 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count)
}
reserved = cluster_size - compr_blocks;
+
+ /* for the case all blocks in cluster were reserved */
+ if (reserved == 1)
+ goto next;
+
ret = inc_valid_block_count(sbi, dn->inode, &reserved);
if (ret)
return ret;
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index c3c527afd..cd56af93d 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -641,7 +641,16 @@ retry_dn:
*/
if (dest == NEW_ADDR) {
f2fs_truncate_data_blocks_range(&dn, 1);
- f2fs_reserve_new_block(&dn);
+ do {
+ err = f2fs_reserve_new_block(&dn);
+ if (err == -ENOSPC) {
+ f2fs_bug_on(sbi, 1);
+ break;
+ }
+ } while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
+ if (err)
+ goto err;
continue;
}
@@ -649,12 +658,14 @@ retry_dn:
if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
if (src == NULL_ADDR) {
- err = f2fs_reserve_new_block(&dn);
- while (err &&
- IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
+ do {
err = f2fs_reserve_new_block(&dn);
- /* We should not get -ENOSPC */
- f2fs_bug_on(sbi, err);
+ if (err == -ENOSPC) {
+ f2fs_bug_on(sbi, 1);
+ break;
+ }
+ } while (err &&
+ IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION));
if (err)
goto err;
}
@@ -844,6 +855,8 @@ skip:
if (!err && fix_curseg_write_pointer && !f2fs_readonly(sbi->sb) &&
f2fs_sb_has_blkzoned(sbi)) {
err = f2fs_fix_curseg_write_pointer(sbi);
+ if (!err)
+ err = f2fs_check_write_pointer(sbi);
ret = err;
}
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index af191371c..bab63eeaf 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -130,6 +130,12 @@ fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp,
fid->parent_i_gen = parent->i_generation;
type = FILEID_FAT_WITH_PARENT;
*lenp = FAT_FID_SIZE_WITH_PARENT;
+ } else {
+ /*
+ * We need to initialize this field because the fh is actually
+ * 12 bytes long
+ */
+ fid->parent_i_pos_hi = 0;
}
return type;
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 01263ffbc..9a5f153c8 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -37,7 +37,7 @@ static long do_sys_name_to_handle(struct path *path,
if (f_handle.handle_bytes > MAX_HANDLE_SZ)
return -EINVAL;
- handle = kmalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
+ handle = kzalloc(sizeof(struct file_handle) + f_handle.handle_bytes,
GFP_KERNEL);
if (!handle)
return -ENOMEM;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index b0c701c00..d131f34cd 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -451,6 +451,10 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
goto out_put_forget;
if (fuse_invalid_attr(&outarg->attr))
goto out_put_forget;
+ if (outarg->nodeid == FUSE_ROOT_ID && outarg->generation != 0) {
+ pr_warn_once("root generation should be zero\n");
+ outarg->generation = 0;
+ }
*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
&outarg->attr, entry_attr_timeout(outarg),
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index ceaa68683..33eb5fefc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -873,7 +873,6 @@ static inline bool fuse_stale_inode(const struct inode *inode, int generation,
static inline void fuse_make_bad(struct inode *inode)
{
- remove_inode_hash(inode);
set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9ea175ff9..4a7ebccd3 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -352,8 +352,11 @@ retry:
} else if (fuse_stale_inode(inode, generation, attr)) {
/* nodeid was reused, any I/O on the old inode should fail */
fuse_make_bad(inode);
- iput(inode);
- goto retry;
+ if (inode != d_inode(sb->s_root)) {
+ remove_inode_hash(inode);
+ iput(inode);
+ goto retry;
+ }
}
done:
fi = get_fuse_inode(inode);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 5181e6d4e..bf3cda498 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -135,6 +135,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
+ vm_flags_t vm_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@@ -176,10 +177,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
ret = -ENOMEM;
- if (hugetlb_reserve_pages(inode,
+
+ vm_flags = vma->vm_flags;
+ /*
+ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+ * reserving here. Note: only for SHM hugetlbfs file, the inode
+ * flag S_PRIVATE is set.
+ */
+ if (inode->i_flags & S_PRIVATE)
+ vm_flags |= VM_NORESERVE;
+
+ if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
- vma->vm_flags))
+ vm_flags))
goto out;
ret = 0;
@@ -1234,6 +1245,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
+ struct hstate *h;
char *rest;
unsigned long ps;
int opt;
@@ -1278,11 +1290,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_pagesize:
ps = memparse(param->string, &rest);
- ctx->hstate = size_to_hstate(ps);
- if (!ctx->hstate) {
+ h = size_to_hstate(ps);
+ if (!h) {
pr_err("Unsupported page size %lu MB\n", ps >> 20);
return -EINVAL;
}
+ ctx->hstate = h;
return 0;
case Opt_min_size:
@@ -1498,7 +1511,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
inode->i_size = size;
clear_nlink(inode);
- if (hugetlb_reserve_pages(inode, 0,
+ if (!hugetlb_reserve_pages(inode, 0,
size >> huge_page_shift(hstate_inode(inode)), NULL,
acctflag))
file = ERR_PTR(-ENOMEM);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 7bcc60091..b2d06f016 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -799,8 +799,7 @@ COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
if (!f.file)
return -EBADF;
- /* RED-PEN how should LSM module know it's handling 32bit? */
- error = security_file_ioctl(f.file, cmd, arg);
+ error = security_file_ioctl_compat(f.file, cmd, arg);
if (error)
goto out;
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index f62b5a501..4c763f573 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -907,8 +907,22 @@ root_found:
* we then decide whether to use the Joliet descriptor.
*/
inode = isofs_iget(s, sbi->s_firstdatazone, 0);
- if (IS_ERR(inode))
- goto out_no_root;
+
+ /*
+ * Fix for broken CDs with a corrupt root inode but a correct Joliet
+ * root directory.
+ */
+ if (IS_ERR(inode)) {
+ if (joliet_level && sbi->s_firstdatazone != first_data_zone) {
+ printk(KERN_NOTICE
+ "ISOFS: root inode is unusable. "
+ "Disabling Rock Ridge and switching to Joliet.");
+ sbi->s_rock = 0;
+ inode = NULL;
+ } else {
+ goto out_no_root;
+ }
+ }
/*
* Fix for broken CDs with Rock Ridge and empty ISO root directory but
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 472932b9e..7898983c9 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -58,28 +58,6 @@ static inline void __buffer_unlink(struct journal_head *jh)
}
/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
- transaction_t *transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
-
- if (!transaction->t_checkpoint_io_list) {
- jh->b_cpnext = jh->b_cpprev = jh;
- } else {
- jh->b_cpnext = transaction->t_checkpoint_io_list;
- jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
- jh->b_cpprev->b_cpnext = jh;
- jh->b_cpnext->b_cpprev = jh;
- }
- transaction->t_checkpoint_io_list = jh;
-}
-
-/*
* Try to release a checkpointed buffer from its transaction.
* Returns 1 if we released it and 2 if we also released the
* whole transaction.
@@ -91,8 +69,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
int ret = 0;
struct buffer_head *bh = jh2bh(jh);
- if (jh->b_transaction == NULL && !buffer_locked(bh) &&
- !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+ if (!jh->b_transaction && !buffer_locked(bh) && !buffer_dirty(bh)) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
}
@@ -191,6 +168,7 @@ __flush_batch(journal_t *journal, int *batch_count)
struct buffer_head *bh = journal->j_chkpt_bhs[i];
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
+ journal->j_chkpt_bhs[i] = NULL;
}
*batch_count = 0;
}
@@ -228,7 +206,6 @@ int jbd2_log_do_checkpoint(journal_t *journal)
* OK, we need to start writing disk blocks. Take one transaction
* and write it.
*/
- result = 0;
spin_lock(&journal->j_list_lock);
if (!journal->j_checkpoint_transactions)
goto out;
@@ -251,15 +228,6 @@ restart:
jh = transaction->t_checkpoint_list;
bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- goto retry;
- }
if (jh->b_transaction != NULL) {
transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid;
@@ -294,32 +262,50 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
- if (!buffer_dirty(bh)) {
- if (unlikely(buffer_write_io_error(bh)) && !result)
- result = -EIO;
+ if (!trylock_buffer(bh)) {
+ /*
+ * The buffer is locked, it may be writing back, or
+ * flushing out in the last couple of cycles, or
+ * re-adding into a new transaction, need to check
+ * it again until it's unlocked.
+ */
+ get_bh(bh);
+ spin_unlock(&journal->j_list_lock);
+ wait_on_buffer(bh);
+ /* the journal_head may have gone by now */
+ BUFFER_TRACE(bh, "brelse");
+ __brelse(bh);
+ goto retry;
+ } else if (!buffer_dirty(bh)) {
+ unlock_buffer(bh);
BUFFER_TRACE(bh, "remove from checkpoint");
- if (__jbd2_journal_remove_checkpoint(jh))
- /* The transaction was released; we're done */
+ /*
+ * If the transaction was released or the checkpoint
+ * list was empty, we're done.
+ */
+ if (__jbd2_journal_remove_checkpoint(jh) ||
+ !transaction->t_checkpoint_list)
goto out;
- continue;
+ } else {
+ unlock_buffer(bh);
+ /*
+ * We are about to write the buffer, it could be
+ * raced by some other transaction shrink or buffer
+ * re-log logic once we release the j_list_lock,
+ * leave it on the checkpoint list and check status
+ * again to make sure it's clean.
+ */
+ BUFFER_TRACE(bh, "queue");
+ get_bh(bh);
+ J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ journal->j_chkpt_bhs[batch_count++] = bh;
+ transaction->t_chp_stats.cs_written++;
+ transaction->t_checkpoint_list = jh->b_cpnext;
}
- /*
- * Important: we are about to write the buffer, and
- * possibly block, while still holding the journal
- * lock. We cannot afford to let the transaction
- * logic start messing around with this buffer before
- * we write it to disk, as that would break
- * recoverability.
- */
- BUFFER_TRACE(bh, "queue");
- get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
- journal->j_chkpt_bhs[batch_count++] = bh;
- __buffer_relink_io(jh);
- transaction->t_chp_stats.cs_written++;
+
if ((batch_count == JBD2_NR_BATCH) ||
- need_resched() ||
- spin_needbreak(&journal->j_list_lock))
+ need_resched() || spin_needbreak(&journal->j_list_lock) ||
+ jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
goto unlock_and_flush;
}
@@ -333,46 +319,9 @@ restart:
goto restart;
}
- /*
- * Now we issued all of the transaction's buffers, let's deal
- * with the buffers that are out for I/O.
- */
-restart2:
- /* Did somebody clean up the transaction in the meanwhile? */
- if (journal->j_checkpoint_transactions != transaction ||
- transaction->t_tid != this_tid)
- goto out;
-
- while (transaction->t_checkpoint_io_list) {
- jh = transaction->t_checkpoint_io_list;
- bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- spin_lock(&journal->j_list_lock);
- goto restart2;
- }
- if (unlikely(buffer_write_io_error(bh)) && !result)
- result = -EIO;
-
- /*
- * Now in whatever state the buffer currently is, we
- * know that it has been written out and so we can
- * drop it from the list
- */
- if (__jbd2_journal_remove_checkpoint(jh))
- break;
- }
out:
spin_unlock(&journal->j_list_lock);
- if (result < 0)
- jbd2_journal_abort(journal, result);
- else
- result = jbd2_cleanup_journal_tail(journal);
+ result = jbd2_cleanup_journal_tail(journal);
return (result < 0) ? result : 0;
}
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index 72eb5ed54..9b6849b9b 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -63,10 +63,10 @@
*/
static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
int nblocks);
-static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval);
-static int dbBackSplit(dmtree_t * tp, int leafno);
-static int dbJoin(dmtree_t * tp, int leafno, int newval);
-static void dbAdjTree(dmtree_t * tp, int leafno, int newval);
+static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl);
+static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl);
+static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl);
+static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl);
static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
int level);
static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
@@ -2171,7 +2171,7 @@ static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
* system.
*/
if (dp->tree.stree[word] == NOFREE)
- dbBackSplit((dmtree_t *) & dp->tree, word);
+ dbBackSplit((dmtree_t *)&dp->tree, word, false);
dbAllocBits(bmp, dp, blkno, nblocks);
}
@@ -2257,7 +2257,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* the binary system of the leaves if need be.
*/
dbSplit(tp, word, BUDMIN,
- dbMaxBud((u8 *) & dp->wmap[word]));
+ dbMaxBud((u8 *)&dp->wmap[word]), false);
word += 1;
} else {
@@ -2297,7 +2297,7 @@ static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
* system of the leaves to reflect the current
* allocation (size).
*/
- dbSplit(tp, word, size, NOFREE);
+ dbSplit(tp, word, size, NOFREE, false);
/* get the number of dmap words handled */
nw = BUDSIZE(size, BUDMIN);
@@ -2404,7 +2404,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
/* update the leaf for this dmap word.
*/
rc = dbJoin(tp, word,
- dbMaxBud((u8 *) & dp->wmap[word]));
+ dbMaxBud((u8 *)&dp->wmap[word]), false);
if (rc)
return rc;
@@ -2437,7 +2437,7 @@ static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
/* update the leaf.
*/
- rc = dbJoin(tp, word, size);
+ rc = dbJoin(tp, word, size, false);
if (rc)
return rc;
@@ -2589,14 +2589,14 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
* that it is at the front of a binary buddy system.
*/
if (oldval == NOFREE) {
- rc = dbBackSplit((dmtree_t *) dcp, leafno);
+ rc = dbBackSplit((dmtree_t *)dcp, leafno, true);
if (rc)
return rc;
oldval = dcp->stree[ti];
}
- dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval);
+ dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true);
} else {
- rc = dbJoin((dmtree_t *) dcp, leafno, newval);
+ rc = dbJoin((dmtree_t *) dcp, leafno, newval, true);
if (rc)
return rc;
}
@@ -2625,7 +2625,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
*/
if (alloc) {
dbJoin((dmtree_t *) dcp, leafno,
- oldval);
+ oldval, true);
} else {
/* the dbJoin() above might have
* caused a larger binary buddy system
@@ -2635,9 +2635,9 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
*/
if (dcp->stree[ti] == NOFREE)
dbBackSplit((dmtree_t *)
- dcp, leafno);
+ dcp, leafno, true);
dbSplit((dmtree_t *) dcp, leafno,
- dcp->budmin, oldval);
+ dcp->budmin, oldval, true);
}
/* release the buffer and return the error.
@@ -2685,7 +2685,7 @@ dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
-static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
+static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl)
{
int budsz;
int cursz;
@@ -2707,7 +2707,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
while (cursz >= splitsz) {
/* update the buddy's leaf with its new value.
*/
- dbAdjTree(tp, leafno ^ budsz, cursz);
+ dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl);
/* on to the next size and buddy.
*/
@@ -2719,7 +2719,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
/* adjust the dmap tree to reflect the specified leaf's new
* value.
*/
- dbAdjTree(tp, leafno, newval);
+ dbAdjTree(tp, leafno, newval, is_ctl);
}
@@ -2750,7 +2750,7 @@ static void dbSplit(dmtree_t * tp, int leafno, int splitsz, int newval)
*
* serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
*/
-static int dbBackSplit(dmtree_t * tp, int leafno)
+static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
{
int budsz, bud, w, bsz, size;
int cursz;
@@ -2801,7 +2801,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
* system in two.
*/
cursz = leaf[bud] - 1;
- dbSplit(tp, bud, cursz, cursz);
+ dbSplit(tp, bud, cursz, cursz, is_ctl);
break;
}
}
@@ -2829,7 +2829,7 @@ static int dbBackSplit(dmtree_t * tp, int leafno)
*
* RETURN VALUES: none
*/
-static int dbJoin(dmtree_t * tp, int leafno, int newval)
+static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
{
int budsz, buddy;
s8 *leaf;
@@ -2884,12 +2884,12 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
if (leafno < buddy) {
/* leafno is the left buddy.
*/
- dbAdjTree(tp, buddy, NOFREE);
+ dbAdjTree(tp, buddy, NOFREE, is_ctl);
} else {
/* buddy is the left buddy and becomes
* leafno.
*/
- dbAdjTree(tp, leafno, NOFREE);
+ dbAdjTree(tp, leafno, NOFREE, is_ctl);
leafno = buddy;
}
@@ -2902,7 +2902,7 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
/* update the leaf value.
*/
- dbAdjTree(tp, leafno, newval);
+ dbAdjTree(tp, leafno, newval, is_ctl);
return 0;
}
@@ -2923,15 +2923,20 @@ static int dbJoin(dmtree_t * tp, int leafno, int newval)
*
* RETURN VALUES: none
*/
-static void dbAdjTree(dmtree_t * tp, int leafno, int newval)
+static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl)
{
int lp, pp, k;
- int max;
+ int max, size;
+
+ size = is_ctl ? CTLTREESIZE : TREESIZE;
/* pick up the index of the leaf for this leafno.
*/
lp = leafno + le32_to_cpu(tp->dmt_leafidx);
+ if (WARN_ON_ONCE(lp >= size || lp < 0))
+ return;
+
/* is the current value the same as the old value ? if so,
* there is nothing to do.
*/
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 837d42f61..a222a9d71 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -633,6 +633,11 @@ int dtSearch(struct inode *ip, struct component_name * key, ino_t * data,
for (base = 0, lim = p->header.nextindex; lim; lim >>= 1) {
index = base + (lim >> 1);
+ if (stbl[index] < 0) {
+ rc = -EIO;
+ goto out;
+ }
+
if (p->header.flag & BT_LEAF) {
/* uppercase leaf name to compare */
cmp =
@@ -1970,7 +1975,7 @@ static int dtSplitRoot(tid_t tid,
do {
f = &rp->slot[fsi];
fsi = f->next;
- } while (fsi != -1);
+ } while (fsi >= 0);
f->next = n;
}
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 14f918a48..b0965f3ef 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2181,6 +2181,9 @@ static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
/* get the ag and iag numbers for this iag.
*/
agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+ if (agno >= MAXAG || agno < 0)
+ return -EIO;
+
iagno = le32_to_cpu(iagp->iagnum);
/* check if this is the last free extent within the
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index aa4ff7bca..55702b31a 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -172,15 +172,15 @@ int jfs_mount(struct super_block *sb)
}
jfs_info("jfs_mount: ipimap:0x%p", ipimap);
- /* map further access of per fileset inodes by the fileset inode */
- sbi->ipimap = ipimap;
-
/* initialize fileset inode allocation map */
if ((rc = diMount(ipimap))) {
jfs_err("jfs_mount: diMount failed w/rc = %d", rc);
goto err_ipimap;
}
+ /* map further access of per fileset inodes by the fileset inode */
+ sbi->ipimap = ipimap;
+
return rc;
/*
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index c91ee05cc..0ba056e06 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -696,6 +696,18 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
{
struct kernfs_node *kn;
+ if (parent->mode & S_ISGID) {
+ /* this code block imitates inode_init_owner() for
+ * kernfs
+ */
+
+ if (parent->iattr)
+ gid = parent->iattr->ia_gid;
+
+ if (flags & KERNFS_DIR)
+ mode |= S_ISGID;
+ }
+
kn = __kernfs_new_node(kernfs_root(parent), parent,
name, mode, uid, gid, flags);
if (kn) {
diff --git a/fs/namei.c b/fs/namei.c
index 3ff954a2b..cb37d7c47 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2771,20 +2771,14 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
p = d_ancestor(p2, p1);
if (p) {
inode_lock_nested(p2->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p1->d_inode, I_MUTEX_CHILD);
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT2);
return p;
}
p = d_ancestor(p1, p2);
- if (p) {
- inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
- inode_lock_nested(p2->d_inode, I_MUTEX_CHILD);
- return p;
- }
-
- lock_two_inodes(p1->d_inode, p2->d_inode,
- I_MUTEX_PARENT, I_MUTEX_PARENT2);
- return NULL;
+ inode_lock_nested(p1->d_inode, I_MUTEX_PARENT);
+ inode_lock_nested(p2->d_inode, I_MUTEX_PARENT2);
+ return p;
}
EXPORT_SYMBOL(lock_rename);
@@ -4260,11 +4254,12 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
*
* a) we can get into loop creation.
* b) race potential - two innocent renames can create a loop together.
- * That's where 4.4 screws up. Current fix: serialization on
+ * That's where 4.4BSD screws up. Current fix: serialization on
* sb->s_vfs_rename_mutex. We might be more accurate, but that's another
* story.
- * c) we have to lock _four_ objects - parents and victim (if it exists),
- * and source.
+ * c) we may have to lock up to _four_ objects - parents and victim (if it exists),
+ * and source (if it's a non-directory or a subdirectory that moves to
+ * different parent).
* And that - after we got ->i_mutex on parents (until then we don't know
* whether the target exists). Solution: try to be smart with locking
* order for inodes. We rely on the fact that tree topology may change
@@ -4293,6 +4288,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
struct name_snapshot old_name;
+ bool lock_old_subdir, lock_new_subdir;
if (source == target)
return 0;
@@ -4342,15 +4338,32 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
/*
- * Lock all moved children. Moved directories may need to change parent
- * pointer so they need the lock to prevent against concurrent
- * directory changes moving parent pointer. For regular files we've
- * historically always done this. The lockdep locking subclasses are
- * somewhat arbitrary but RENAME_EXCHANGE in particular can swap
- * regular files and directories so it's difficult to tell which
- * subclasses to use.
+ * Lock children.
+ * The source subdirectory needs to be locked on cross-directory
+ * rename or cross-directory exchange since its parent changes.
+ * The target subdirectory needs to be locked on cross-directory
+ * exchange due to parent change and on any rename due to becoming
+ * a victim.
+ * Non-directories need locking in all cases (for NFS reasons);
+ * they get locked after any subdirectories (in inode address order).
+ *
+ * NOTE: WE ONLY LOCK UNRELATED DIRECTORIES IN CROSS-DIRECTORY CASE.
+ * NEVER, EVER DO THAT WITHOUT ->s_vfs_rename_mutex.
*/
- lock_two_inodes(source, target, I_MUTEX_NORMAL, I_MUTEX_NONDIR2);
+ lock_old_subdir = new_dir != old_dir;
+ lock_new_subdir = new_dir != old_dir || !(flags & RENAME_EXCHANGE);
+ if (is_dir) {
+ if (lock_old_subdir)
+ inode_lock_nested(source, I_MUTEX_CHILD);
+ if (target && (!new_is_dir || lock_new_subdir))
+ inode_lock(target);
+ } else if (new_is_dir) {
+ if (lock_new_subdir)
+ inode_lock_nested(target, I_MUTEX_CHILD);
+ inode_lock(source);
+ } else {
+ lock_two_nondirectories(source, target);
+ }
error = -EBUSY;
if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
@@ -4394,8 +4407,9 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
d_exchange(old_dentry, new_dentry);
}
out:
- inode_unlock(source);
- if (target)
+ if (!is_dir || lock_old_subdir)
+ inode_unlock(source);
+ if (target && (!new_is_dir || lock_new_subdir))
inode_unlock(target);
dput(new_dentry);
if (!error) {
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 5d86ffa72..499519f0f 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -678,10 +678,17 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
LIST_HEAD(mds_list);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
+ nfs_commit_begin(cinfo.mds);
nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
- if (res < 0) /* res == -ENOMEM */
- nfs_direct_write_reschedule(dreq);
+ if (res < 0) { /* res == -ENOMEM */
+ spin_lock(&dreq->lock);
+ if (dreq->flags == 0)
+ dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ spin_unlock(&dreq->lock);
+ }
+ if (nfs_commit_end(cinfo.mds))
+ nfs_direct_write_complete(dreq);
}
static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 0fe5aacbc..e7192d0ee 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -54,11 +54,14 @@ int nfs42_proc_removexattr(struct inode *inode, const char *name);
* They would be 7 bytes long in the eventual buffer ("user.x\0"), and
* 8 bytes long XDR-encoded.
*
- * Include the trailing eof word as well.
+ * Include the trailing eof word as well and make the result a multiple
+ * of 4 bytes.
*/
static inline u32 nfs42_listxattr_xdrsize(u32 buflen)
{
- return ((buflen / (XATTR_USER_PREFIX_LEN + 2)) * 8) + 4;
+ u32 size = 8 * buflen / (XATTR_USER_PREFIX_LEN + 2) + 4;
+
+ return (size + 3) & ~3;
}
#endif /* CONFIG_NFS_V4_2 */
#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7c3c96ed6..8e546e6a5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -10370,29 +10370,33 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
static ssize_t nfs4_listxattr(struct dentry *dentry, char *list, size_t size)
{
ssize_t error, error2, error3;
+ size_t left = size;
- error = generic_listxattr(dentry, list, size);
+ error = generic_listxattr(dentry, list, left);
if (error < 0)
return error;
if (list) {
list += error;
- size -= error;
+ left -= error;
}
- error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, size);
+ error2 = nfs4_listxattr_nfs4_label(d_inode(dentry), list, left);
if (error2 < 0)
return error2;
if (list) {
list += error2;
- size -= error2;
+ left -= error2;
}
- error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, size);
+ error3 = nfs4_listxattr_nfs4_user(d_inode(dentry), list, left);
if (error3 < 0)
return error3;
- return error + error2 + error3;
+ error += error2 + error3;
+ if (size && error > size)
+ return -ERANGE;
+ return error;
}
static void nfs4_enable_swap(struct inode *inode)
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index fa1483088..c2cf4ff62 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -175,10 +175,10 @@ static int __init root_nfs_cat(char *dest, const char *src,
size_t len = strlen(dest);
if (len && dest[len - 1] != ',')
- if (strlcat(dest, ",", destlen) > destlen)
+ if (strlcat(dest, ",", destlen) >= destlen)
return -1;
- if (strlcat(dest, src, destlen) > destlen)
+ if (strlcat(dest, src, destlen) >= destlen)
return -1;
return 0;
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d3cd099ff..4cf060691 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1637,7 +1637,7 @@ static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
!atomic_read(&cinfo->rpcs_out));
}
-static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
+void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
{
atomic_inc(&cinfo->rpcs_out);
}
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 65cd599cb..4905b7cd7 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -724,7 +724,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
dat = nilfs_bmap_get_dat(btree);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
- goto out;
+ goto dat_error;
ptr = blocknr;
}
cnt = 1;
@@ -743,7 +743,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
- goto out;
+ goto dat_error;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt || ++cnt == maxblocks)
@@ -782,6 +782,11 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree,
out:
nilfs_btree_free_path(path);
return ret;
+
+ dat_error:
+ if (ret == -ENOENT)
+ ret = -EINVAL; /* Notify bmap layer of metadata corruption */
+ goto out;
}
static void nilfs_btree_promote_key(struct nilfs_bmap *btree,
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 8fedc7104..22b1ca5c3 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -40,8 +40,21 @@ static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat)
static int nilfs_dat_prepare_entry(struct inode *dat,
struct nilfs_palloc_req *req, int create)
{
- return nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
- create, &req->pr_entry_bh);
+ int ret;
+
+ ret = nilfs_palloc_get_entry_block(dat, req->pr_entry_nr,
+ create, &req->pr_entry_bh);
+ if (unlikely(ret == -ENOENT)) {
+ nilfs_err(dat->i_sb,
+ "DAT doesn't have a block to manage vblocknr = %llu",
+ (unsigned long long)req->pr_entry_nr);
+ /*
+ * Return internal code -EINVAL to notify bmap layer of
+ * metadata corruption.
+ */
+ ret = -EINVAL;
+ }
+ return ret;
}
static void nilfs_dat_commit_entry(struct inode *dat,
@@ -123,11 +136,7 @@ static void nilfs_dat_commit_free(struct inode *dat,
int nilfs_dat_prepare_start(struct inode *dat, struct nilfs_palloc_req *req)
{
- int ret;
-
- ret = nilfs_dat_prepare_entry(dat, req, 0);
- WARN_ON(ret == -ENOENT);
- return ret;
+ return nilfs_dat_prepare_entry(dat, req, 0);
}
void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req,
@@ -154,10 +163,8 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req)
int ret;
ret = nilfs_dat_prepare_entry(dat, req, 0);
- if (ret < 0) {
- WARN_ON(ret == -ENOENT);
+ if (ret < 0)
return ret;
- }
kaddr = kmap_atomic(req->pr_entry_bh->b_page);
entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr,
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 81394e22d..eb7de9e2a 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -243,7 +243,7 @@ nilfs_filetype_table[NILFS_FT_MAX] = {
#define S_SHIFT 12
static unsigned char
-nilfs_type_by_mode[S_IFMT >> S_SHIFT] = {
+nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = {
[S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = NILFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV,
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index f35310195..7faf8c285 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -66,7 +66,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
dat = nilfs_bmap_get_dat(direct);
ret = nilfs_dat_translate(dat, ptr, &blocknr);
if (ret < 0)
- return ret;
+ goto dat_error;
ptr = blocknr;
}
@@ -79,7 +79,7 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
if (dat) {
ret = nilfs_dat_translate(dat, ptr2, &blocknr);
if (ret < 0)
- return ret;
+ goto dat_error;
ptr2 = blocknr;
}
if (ptr2 != ptr + cnt)
@@ -87,6 +87,11 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct,
}
*ptrp = ptr;
return cnt;
+
+ dat_error:
+ if (ret == -ENOENT)
+ ret = -EINVAL; /* Notify bmap layer of metadata corruption */
+ return ret;
}
static __u64
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index e1bd592ce..5611a3534 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -105,7 +105,13 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf)
nilfs_transaction_commit(inode->i_sb);
mapped:
- wait_for_stable_page(page);
+ /*
+ * Since checksumming including data blocks is performed to determine
+ * the validity of the log to be written and used for recovery, it is
+ * necessary to wait for writeback to finish here, regardless of the
+ * stable write requirement of the backing device.
+ */
+ wait_on_page_writeback(page);
out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index d144e08a9..06f4deb55 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -112,7 +112,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
"%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
__func__, inode->i_ino,
(unsigned long long)blkoff);
- err = 0;
+ err = -EAGAIN;
}
nilfs_transaction_abort(inode->i_sb);
goto out;
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 2217f904a..188b8cc52 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -472,9 +472,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
struct nilfs_recovery_block *rb,
- struct page *page)
+ loff_t pos, struct page *page)
{
struct buffer_head *bh_org;
+ size_t from = pos & ~PAGE_MASK;
void *kaddr;
bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize);
@@ -482,7 +483,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs,
return -EIO;
kaddr = kmap_atomic(page);
- memcpy(kaddr + bh_offset(bh_org), bh_org->b_data, bh_org->b_size);
+ memcpy(kaddr + from, bh_org->b_data, bh_org->b_size);
kunmap_atomic(kaddr);
brelse(bh_org);
return 0;
@@ -521,7 +522,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
goto failed_inode;
}
- err = nilfs_recovery_copy_block(nilfs, rb, page);
+ err = nilfs_recovery_copy_block(nilfs, rb, pos, page);
if (unlikely(err))
goto failed_page;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 418055ac9..be0ca35b8 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1707,7 +1707,6 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
lock_page(bd_page);
@@ -1718,6 +1717,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
}
break;
}
+ set_buffer_async_write(bh);
if (bh->b_page != fs_page) {
nilfs_begin_page_io(fs_page);
fs_page = bh->b_page;
@@ -1803,7 +1803,6 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
- clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
clear_buffer_uptodate(bh);
if (bh->b_page != bd_page) {
@@ -1812,6 +1811,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
}
break;
}
+ clear_buffer_async_write(bh);
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, err);
fs_page = bh->b_page;
@@ -1899,8 +1899,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
BIT(BH_Delay) | BIT(BH_NILFS_Volatile) |
BIT(BH_NILFS_Redirected));
- set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh == segbuf->sb_super_root) {
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
bd_page = bh->b_page;
@@ -1908,6 +1909,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
update_sr = true;
break;
}
+ set_mask_bits(&bh->b_state, clear_bits, set_bits);
if (bh->b_page != fs_page) {
nilfs_end_page_io(fs_page, 0);
fs_page = bh->b_page;
diff --git a/fs/pipe.c b/fs/pipe.c
index dbb090e1b..588fe37d8 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -435,12 +435,10 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
-#ifdef CONFIG_WATCH_QUEUE
- if (pipe->watch_queue) {
+ if (pipe_has_watch_queue(pipe)) {
ret = -EXDEV;
goto out;
}
-#endif
/*
* If it wasn't empty we try to merge new data into
@@ -1302,6 +1300,11 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
pipe->tail = tail;
pipe->head = head;
+ if (!pipe_has_watch_queue(pipe)) {
+ pipe->max_usage = nr_slots;
+ pipe->nr_accounted = nr_slots;
+ }
+
spin_unlock_irq(&pipe->rd_wait.lock);
/* This might have made more room for writers */
@@ -1319,10 +1322,8 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
unsigned int nr_slots, size;
long ret = 0;
-#ifdef CONFIG_WATCH_QUEUE
- if (pipe->watch_queue)
+ if (pipe_has_watch_queue(pipe))
return -EBUSY;
-#endif
size = round_pipe_size(arg);
nr_slots = size >> PAGE_SHIFT;
@@ -1355,8 +1356,6 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
if (ret < 0)
goto out_revert_acct;
- pipe->max_usage = nr_slots;
- pipe->nr_accounted = nr_slots;
return pipe->max_usage * PAGE_SIZE;
out_revert_acct:
@@ -1375,10 +1374,8 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
if (file->f_op != &pipefifo_fops || !pipe)
return NULL;
-#ifdef CONFIG_WATCH_QUEUE
- if (for_splice && pipe->watch_queue)
+ if (for_splice && pipe_has_watch_queue(pipe))
return NULL;
-#endif
return pipe;
}
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 98e579ce0..44fc3b396 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -519,6 +519,7 @@ static int ramoops_init_przs(const char *name,
}
zone_sz = mem_sz / *cnt;
+ zone_sz = ALIGN_DOWN(zone_sz, 2);
if (!zone_sz) {
dev_err(dev, "%s zone size == 0\n", name);
goto fail;
diff --git a/fs/pstore/zone.c b/fs/pstore/zone.c
index b50fc33f2..2426fb679 100644
--- a/fs/pstore/zone.c
+++ b/fs/pstore/zone.c
@@ -973,6 +973,8 @@ static ssize_t psz_kmsg_read(struct pstore_zone *zone,
char *buf = kasprintf(GFP_KERNEL, "%s: Total %d times\n",
kmsg_dump_reason_str(record->reason),
record->count);
+ if (!buf)
+ return -ENOMEM;
hlen = strlen(buf);
record->buf = krealloc(buf, hlen + size, GFP_KERNEL);
if (!record->buf) {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 4bb4b4b79..6a7b7d447 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -401,15 +401,17 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
/* Dirtify all the dquots - this can block when journalling */
-static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
+static inline int mark_all_dquot_dirty(struct dquot __rcu * const *dquots)
{
int ret, err, cnt;
+ struct dquot *dquot;
ret = err = 0;
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquot[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (dquot)
/* Even in case of error we have to continue */
- ret = mark_dquot_dirty(dquot[cnt]);
+ ret = mark_dquot_dirty(dquot);
if (!err)
err = ret;
}
@@ -1006,14 +1008,15 @@ out:
}
EXPORT_SYMBOL(dqget);
-static inline struct dquot **i_dquot(struct inode *inode)
+static inline struct dquot __rcu **i_dquot(struct inode *inode)
{
- return inode->i_sb->s_op->get_dquots(inode);
+ /* Force __rcu for now until filesystems are fixed */
+ return (struct dquot __rcu **)inode->i_sb->s_op->get_dquots(inode);
}
static int dqinit_needed(struct inode *inode, int type)
{
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
int cnt;
if (IS_NOQUOTA(inode))
@@ -1086,59 +1089,7 @@ out:
return err;
}
-/*
- * Remove references to dquots from inode and add dquot to list for freeing
- * if we have the last reference to dquot
- */
-static void remove_inode_dquot_ref(struct inode *inode, int type,
- struct list_head *tofree_head)
-{
- struct dquot **dquots = i_dquot(inode);
- struct dquot *dquot = dquots[type];
-
- if (!dquot)
- return;
-
- dquots[type] = NULL;
- if (list_empty(&dquot->dq_free)) {
- /*
- * The inode still has reference to dquot so it can't be in the
- * free list
- */
- spin_lock(&dq_list_lock);
- list_add(&dquot->dq_free, tofree_head);
- spin_unlock(&dq_list_lock);
- } else {
- /*
- * Dquot is already in a list to put so we won't drop the last
- * reference here.
- */
- dqput(dquot);
- }
-}
-
-/*
- * Free list of dquots
- * Dquots are removed from inodes and no new references can be got so we are
- * the only ones holding reference
- */
-static void put_dquot_list(struct list_head *tofree_head)
-{
- struct list_head *act_head;
- struct dquot *dquot;
-
- act_head = tofree_head->next;
- while (act_head != tofree_head) {
- dquot = list_entry(act_head, struct dquot, dq_free);
- act_head = act_head->next;
- /* Remove dquot from the list so we won't have problems... */
- list_del_init(&dquot->dq_free);
- dqput(dquot);
- }
-}
-
-static void remove_dquot_ref(struct super_block *sb, int type,
- struct list_head *tofree_head)
+static void remove_dquot_ref(struct super_block *sb, int type)
{
struct inode *inode;
#ifdef CONFIG_QUOTA_DEBUG
@@ -1155,11 +1106,18 @@ static void remove_dquot_ref(struct super_block *sb, int type,
*/
spin_lock(&dq_data_lock);
if (!IS_NOQUOTA(inode)) {
+ struct dquot __rcu **dquots = i_dquot(inode);
+ struct dquot *dquot = srcu_dereference_check(
+ dquots[type], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+
#ifdef CONFIG_QUOTA_DEBUG
if (unlikely(inode_get_rsv_space(inode) > 0))
reserved = 1;
#endif
- remove_inode_dquot_ref(inode, type, tofree_head);
+ rcu_assign_pointer(dquots[type], NULL);
+ if (dquot)
+ dqput(dquot);
}
spin_unlock(&dq_data_lock);
}
@@ -1176,13 +1134,8 @@ static void remove_dquot_ref(struct super_block *sb, int type,
/* Gather all references from inodes and drop them */
static void drop_dquot_ref(struct super_block *sb, int type)
{
- LIST_HEAD(tofree_head);
-
- if (sb->dq_op) {
- remove_dquot_ref(sb, type, &tofree_head);
- synchronize_srcu(&dquot_srcu);
- put_dquot_list(&tofree_head);
- }
+ if (sb->dq_op)
+ remove_dquot_ref(sb, type);
}
static inline
@@ -1515,7 +1468,8 @@ static int inode_quota_active(const struct inode *inode)
static int __dquot_initialize(struct inode *inode, int type)
{
int cnt, init_needed = 0;
- struct dquot **dquots, *got[MAXQUOTAS] = {};
+ struct dquot __rcu **dquots;
+ struct dquot *got[MAXQUOTAS] = {};
struct super_block *sb = inode->i_sb;
qsize_t rsv;
int ret = 0;
@@ -1590,7 +1544,7 @@ static int __dquot_initialize(struct inode *inode, int type)
if (!got[cnt])
continue;
if (!dquots[cnt]) {
- dquots[cnt] = got[cnt];
+ rcu_assign_pointer(dquots[cnt], got[cnt]);
got[cnt] = NULL;
/*
* Make quota reservation system happy if someone
@@ -1598,12 +1552,16 @@ static int __dquot_initialize(struct inode *inode, int type)
*/
rsv = inode_get_rsv_space(inode);
if (unlikely(rsv)) {
+ struct dquot *dquot = srcu_dereference_check(
+ dquots[cnt], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+
spin_lock(&inode->i_lock);
/* Get reservation again under proper lock */
rsv = __inode_get_rsv_space(inode);
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- dquots[cnt]->dq_dqb.dqb_rsvspace += rsv;
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ spin_lock(&dquot->dq_dqb_lock);
+ dquot->dq_dqb.dqb_rsvspace += rsv;
+ spin_unlock(&dquot->dq_dqb_lock);
spin_unlock(&inode->i_lock);
}
}
@@ -1625,7 +1583,7 @@ EXPORT_SYMBOL(dquot_initialize);
bool dquot_initialize_needed(struct inode *inode)
{
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
int i;
if (!inode_quota_active(inode))
@@ -1650,13 +1608,14 @@ EXPORT_SYMBOL(dquot_initialize_needed);
static void __dquot_drop(struct inode *inode)
{
int cnt;
- struct dquot **dquots = i_dquot(inode);
+ struct dquot __rcu **dquots = i_dquot(inode);
struct dquot *put[MAXQUOTAS];
spin_lock(&dq_data_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- put[cnt] = dquots[cnt];
- dquots[cnt] = NULL;
+ put[cnt] = srcu_dereference_check(dquots[cnt], &dquot_srcu,
+ lockdep_is_held(&dq_data_lock));
+ rcu_assign_pointer(dquots[cnt], NULL);
}
spin_unlock(&dq_data_lock);
dqput_all(put);
@@ -1664,7 +1623,7 @@ static void __dquot_drop(struct inode *inode)
void dquot_drop(struct inode *inode)
{
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
int cnt;
if (IS_NOQUOTA(inode))
@@ -1737,7 +1696,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
int reserve = flags & DQUOT_SPACE_RESERVE;
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
if (!inode_quota_active(inode)) {
if (reserve) {
@@ -1757,27 +1717,26 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
index = srcu_read_lock(&dquot_srcu);
spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
if (reserve) {
- ret = dquot_add_space(dquots[cnt], 0, number, flags,
- &warn[cnt]);
+ ret = dquot_add_space(dquot, 0, number, flags, &warn[cnt]);
} else {
- ret = dquot_add_space(dquots[cnt], number, 0, flags,
- &warn[cnt]);
+ ret = dquot_add_space(dquot, number, 0, flags, &warn[cnt]);
}
if (ret) {
/* Back out changes we already did */
for (cnt--; cnt >= 0; cnt--) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- spin_lock(&dquots[cnt]->dq_dqb_lock);
+ spin_lock(&dquot->dq_dqb_lock);
if (reserve)
- dquot_free_reserved_space(dquots[cnt],
- number);
+ dquot_free_reserved_space(dquot, number);
else
- dquot_decr_space(dquots[cnt], number);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ dquot_decr_space(dquot, number);
+ spin_unlock(&dquot->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
goto out_flush_warn;
@@ -1807,7 +1766,8 @@ int dquot_alloc_inode(struct inode *inode)
{
int cnt, ret = 0, index;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
+ struct dquot *dquot;
if (!inode_quota_active(inode))
return 0;
@@ -1818,17 +1778,19 @@ int dquot_alloc_inode(struct inode *inode)
index = srcu_read_lock(&dquot_srcu);
spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- ret = dquot_add_inodes(dquots[cnt], 1, &warn[cnt]);
+ ret = dquot_add_inodes(dquot, 1, &warn[cnt]);
if (ret) {
for (cnt--; cnt >= 0; cnt--) {
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
/* Back out changes we already did */
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- dquot_decr_inodes(dquots[cnt], 1);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ spin_lock(&dquot->dq_dqb_lock);
+ dquot_decr_inodes(dquot, 1);
+ spin_unlock(&dquot->dq_dqb_lock);
}
goto warn_put_all;
}
@@ -1849,7 +1811,8 @@ EXPORT_SYMBOL(dquot_alloc_inode);
*/
int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
{
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
int cnt, index;
if (!inode_quota_active(inode)) {
@@ -1865,9 +1828,8 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquots[cnt]) {
- struct dquot *dquot = dquots[cnt];
-
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (dquot) {
spin_lock(&dquot->dq_dqb_lock);
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_rsvspace < number))
number = dquot->dq_dqb.dqb_rsvspace;
@@ -1891,7 +1853,8 @@ EXPORT_SYMBOL(dquot_claim_space_nodirty);
*/
void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
{
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
int cnt, index;
if (!inode_quota_active(inode)) {
@@ -1907,9 +1870,8 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
spin_lock(&inode->i_lock);
/* Claim reserved quotas to allocated quotas */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (dquots[cnt]) {
- struct dquot *dquot = dquots[cnt];
-
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (dquot) {
spin_lock(&dquot->dq_dqb_lock);
if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
number = dquot->dq_dqb.dqb_curspace;
@@ -1935,7 +1897,8 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
{
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot **dquots;
+ struct dquot __rcu **dquots;
+ struct dquot *dquot;
int reserve = flags & DQUOT_SPACE_RESERVE, index;
if (!inode_quota_active(inode)) {
@@ -1956,17 +1919,18 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
int wtype;
warn[cnt].w_type = QUOTA_NL_NOWARN;
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- wtype = info_bdq_free(dquots[cnt], number);
+ spin_lock(&dquot->dq_dqb_lock);
+ wtype = info_bdq_free(dquot, number);
if (wtype != QUOTA_NL_NOWARN)
- prepare_warning(&warn[cnt], dquots[cnt], wtype);
+ prepare_warning(&warn[cnt], dquot, wtype);
if (reserve)
- dquot_free_reserved_space(dquots[cnt], number);
+ dquot_free_reserved_space(dquot, number);
else
- dquot_decr_space(dquots[cnt], number);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ dquot_decr_space(dquot, number);
+ spin_unlock(&dquot->dq_dqb_lock);
}
if (reserve)
*inode_reserved_space(inode) -= number;
@@ -1990,7 +1954,8 @@ void dquot_free_inode(struct inode *inode)
{
unsigned int cnt;
struct dquot_warn warn[MAXQUOTAS];
- struct dquot * const *dquots;
+ struct dquot __rcu * const *dquots;
+ struct dquot *dquot;
int index;
if (!inode_quota_active(inode))
@@ -2001,16 +1966,16 @@ void dquot_free_inode(struct inode *inode)
spin_lock(&inode->i_lock);
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
int wtype;
-
warn[cnt].w_type = QUOTA_NL_NOWARN;
- if (!dquots[cnt])
+ dquot = srcu_dereference(dquots[cnt], &dquot_srcu);
+ if (!dquot)
continue;
- spin_lock(&dquots[cnt]->dq_dqb_lock);
- wtype = info_idq_free(dquots[cnt], 1);
+ spin_lock(&dquot->dq_dqb_lock);
+ wtype = info_idq_free(dquot, 1);
if (wtype != QUOTA_NL_NOWARN)
- prepare_warning(&warn[cnt], dquots[cnt], wtype);
- dquot_decr_inodes(dquots[cnt], 1);
- spin_unlock(&dquots[cnt]->dq_dqb_lock);
+ prepare_warning(&warn[cnt], dquot, wtype);
+ dquot_decr_inodes(dquot, 1);
+ spin_unlock(&dquot->dq_dqb_lock);
}
spin_unlock(&inode->i_lock);
mark_all_dquot_dirty(dquots);
@@ -2036,8 +2001,9 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
qsize_t cur_space;
qsize_t rsv_space = 0;
qsize_t inode_usage = 1;
+ struct dquot __rcu **dquots;
struct dquot *transfer_from[MAXQUOTAS] = {};
- int cnt, ret = 0;
+ int cnt, index, ret = 0;
char is_valid[MAXQUOTAS] = {};
struct dquot_warn warn_to[MAXQUOTAS];
struct dquot_warn warn_from_inodes[MAXQUOTAS];
@@ -2068,6 +2034,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
}
cur_space = __inode_get_bytes(inode);
rsv_space = __inode_get_rsv_space(inode);
+ dquots = i_dquot(inode);
/*
* Build the transfer_from list, check limits, and update usage in
* the target structures.
@@ -2082,7 +2049,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
if (!sb_has_quota_active(inode->i_sb, cnt))
continue;
is_valid[cnt] = 1;
- transfer_from[cnt] = i_dquot(inode)[cnt];
+ transfer_from[cnt] = srcu_dereference_check(dquots[cnt],
+ &dquot_srcu, lockdep_is_held(&dq_data_lock));
ret = dquot_add_inodes(transfer_to[cnt], inode_usage,
&warn_to[cnt]);
if (ret)
@@ -2121,13 +2089,21 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
rsv_space);
spin_unlock(&transfer_from[cnt]->dq_dqb_lock);
}
- i_dquot(inode)[cnt] = transfer_to[cnt];
+ rcu_assign_pointer(dquots[cnt], transfer_to[cnt]);
}
spin_unlock(&inode->i_lock);
spin_unlock(&dq_data_lock);
- mark_all_dquot_dirty(transfer_from);
- mark_all_dquot_dirty(transfer_to);
+ /*
+ * These arrays are local and we hold dquot references so we don't need
+ * the srcu protection but still take dquot_srcu to avoid warning in
+ * mark_all_dquot_dirty().
+ */
+ index = srcu_read_lock(&dquot_srcu);
+ mark_all_dquot_dirty((struct dquot __rcu **)transfer_from);
+ mark_all_dquot_dirty((struct dquot __rcu **)transfer_to);
+ srcu_read_unlock(&dquot_srcu, index);
+
flush_warnings(warn_to);
flush_warnings(warn_from_inodes);
flush_warnings(warn_from_space);
diff --git a/fs/select.c b/fs/select.c
index 5edffee11..668a52005 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -475,7 +475,7 @@ static inline void wait_key_set(poll_table *wait, unsigned long in,
wait->_key |= POLLOUT_SET;
}
-static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
+static noinline_for_stack int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
{
ktime_t expire, *to = NULL;
struct poll_wqueues table;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 96d0da65e..f51b23968 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -429,6 +429,8 @@ struct kernfs_node *sysfs_break_active_protection(struct kobject *kobj,
kn = kernfs_find_and_get(kobj->sd, attr->name);
if (kn)
kernfs_break_active_protection(kn);
+ else
+ kobject_put(kobj);
return kn;
}
EXPORT_SYMBOL_GPL(sysfs_break_active_protection);
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index e3d1673b8..ef9bcfeec 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -82,9 +82,6 @@ static inline sysv_zone_t *block_end(struct buffer_head *bh)
return (sysv_zone_t*)((char*)bh->b_data + bh->b_size);
}
-/*
- * Requires read_lock(&pointers_lock) or write_lock(&pointers_lock)
- */
static Indirect *get_branch(struct inode *inode,
int depth,
int offsets[],
@@ -104,15 +101,18 @@ static Indirect *get_branch(struct inode *inode,
bh = sb_bread(sb, block);
if (!bh)
goto failure;
+ read_lock(&pointers_lock);
if (!verify_chain(chain, p))
goto changed;
add_chain(++p, bh, (sysv_zone_t*)bh->b_data + *++offsets);
+ read_unlock(&pointers_lock);
if (!p->key)
goto no_block;
}
return NULL;
changed:
+ read_unlock(&pointers_lock);
brelse(bh);
*err = -EAGAIN;
goto no_block;
@@ -218,9 +218,7 @@ static int get_block(struct inode *inode, sector_t iblock, struct buffer_head *b
goto out;
reread:
- read_lock(&pointers_lock);
partial = get_branch(inode, depth, offsets, chain, &err);
- read_unlock(&pointers_lock);
/* Simplest case - block found, no allocation needed */
if (!partial) {
@@ -290,9 +288,9 @@ static Indirect *find_shared(struct inode *inode,
*top = 0;
for (k = depth; k > 1 && !offsets[k-1]; k--)
;
+ partial = get_branch(inode, k, offsets, chain, &err);
write_lock(&pointers_lock);
- partial = get_branch(inode, k, offsets, chain, &err);
if (!partial)
partial = chain + k-1;
/*
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index bc562b107..11cd921df 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -1198,6 +1198,8 @@ out_cancel:
dir_ui->ui_size = dir->i_size;
mutex_unlock(&dir_ui->ui_mutex);
out_inode:
+ /* Free inode->i_link before inode is marked as bad. */
+ fscrypt_free_inode(inode);
make_bad_inode(inode);
iput(inode);
out_fname:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 19fdcda04..18df7a825 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -262,9 +262,6 @@ static int write_begin_slow(struct address_space *mapping,
return err;
}
}
-
- SetPageUptodate(page);
- ClearPageError(page);
}
if (PagePrivate(page))
@@ -463,9 +460,6 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
return err;
}
}
-
- SetPageUptodate(page);
- ClearPageError(page);
}
err = allocate_budget(c, page, ui, appending);
@@ -475,10 +469,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
* If we skipped reading the page because we were going to
* write all of it, then it is not up to date.
*/
- if (skipped_read) {
+ if (skipped_read)
ClearPageChecked(page);
- ClearPageUptodate(page);
- }
/*
* Budgeting failed which means it would have to force
* write-back but didn't, because we set the @fast flag in the
@@ -569,6 +561,9 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
goto out;
}
+ if (len == PAGE_SIZE)
+ SetPageUptodate(page);
+
if (!PagePrivate(page)) {
attach_page_private(page, (void *)1);
atomic_long_inc(&c->dirty_pg_cnt);
diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c
index c578e772c..f11bcbac7 100644
--- a/fs/vboxsf/super.c
+++ b/fs/vboxsf/super.c
@@ -151,7 +151,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc)
if (!sbi->nls) {
vbg_err("vboxsf: Count not load '%s' nls\n", nls_name);
err = -EINVAL;
- goto fail_free;
+ goto fail_destroy_idr;
}
}
@@ -224,6 +224,7 @@ fail_free:
ida_simple_remove(&vboxsf_bdi_ida, sbi->bdi_id);
if (sbi->nls)
unload_nls(sbi->nls);
+fail_destroy_idr:
idr_destroy(&sbi->ino_idr);
kfree(sbi);
return err;
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index b9522eee1..f9ecade9e 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -319,16 +319,18 @@ static loff_t zonefs_check_zone_condition(struct inode *inode,
}
}
-struct zonefs_ioerr_data {
- struct inode *inode;
- bool write;
-};
-
static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
void *data)
{
- struct zonefs_ioerr_data *err = data;
- struct inode *inode = err->inode;
+ struct blk_zone *z = data;
+
+ *z = *zone;
+ return 0;
+}
+
+static void zonefs_handle_io_error(struct inode *inode, struct blk_zone *zone,
+ bool write)
+{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct super_block *sb = inode->i_sb;
struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
@@ -344,8 +346,8 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
isize = i_size_read(inode);
if (zone->cond != BLK_ZONE_COND_OFFLINE &&
zone->cond != BLK_ZONE_COND_READONLY &&
- !err->write && isize == data_size)
- return 0;
+ !write && isize == data_size)
+ return;
/*
* At this point, we detected either a bad zone or an inconsistency
@@ -366,8 +368,9 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
* In all cases, warn about inode size inconsistency and handle the
* IO error according to the zone condition and to the mount options.
*/
- if (zi->i_ztype == ZONEFS_ZTYPE_SEQ && isize != data_size)
- zonefs_warn(sb, "inode %lu: invalid size %lld (should be %lld)\n",
+ if (isize != data_size)
+ zonefs_warn(sb,
+ "inode %lu: invalid size %lld (should be %lld)\n",
inode->i_ino, isize, data_size);
/*
@@ -427,8 +430,6 @@ static int zonefs_io_error_cb(struct blk_zone *zone, unsigned int idx,
zonefs_update_stats(inode, data_size);
zonefs_i_size_write(inode, data_size);
zi->i_wpoffset = data_size;
-
- return 0;
}
/*
@@ -442,23 +443,25 @@ static void __zonefs_io_error(struct inode *inode, bool write)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct super_block *sb = inode->i_sb;
- struct zonefs_sb_info *sbi = ZONEFS_SB(sb);
unsigned int noio_flag;
- unsigned int nr_zones = 1;
- struct zonefs_ioerr_data err = {
- .inode = inode,
- .write = write,
- };
+ struct blk_zone zone;
int ret;
/*
- * The only files that have more than one zone are conventional zone
- * files with aggregated conventional zones, for which the inode zone
- * size is always larger than the device zone size.
+ * Conventional zone have no write pointer and cannot become read-only
+ * or offline. So simply fake a report for a single or aggregated zone
+ * and let zonefs_handle_io_error() correct the zone inode information
+ * according to the mount options.
*/
- if (zi->i_zone_size > bdev_zone_sectors(sb->s_bdev))
- nr_zones = zi->i_zone_size >>
- (sbi->s_zone_sectors_shift + SECTOR_SHIFT);
+ if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) {
+ zone.start = zi->i_zsector;
+ zone.len = zi->i_max_size >> SECTOR_SHIFT;
+ zone.wp = zone.start + zone.len;
+ zone.type = BLK_ZONE_TYPE_CONVENTIONAL;
+ zone.cond = BLK_ZONE_COND_NOT_WP;
+ zone.capacity = zone.len;
+ goto handle_io_error;
+ }
/*
* Memory allocations in blkdev_report_zones() can trigger a memory
@@ -469,12 +472,19 @@ static void __zonefs_io_error(struct inode *inode, bool write)
* the GFP_NOIO context avoids both problems.
*/
noio_flag = memalloc_noio_save();
- ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, nr_zones,
- zonefs_io_error_cb, &err);
- if (ret != nr_zones)
+ ret = blkdev_report_zones(sb->s_bdev, zi->i_zsector, 1,
+ zonefs_io_error_cb, &zone);
+ memalloc_noio_restore(noio_flag);
+ if (ret != 1) {
zonefs_err(sb, "Get inode %lu zone information failed %d\n",
inode->i_ino, ret);
- memalloc_noio_restore(noio_flag);
+ zonefs_warn(sb, "remounting filesystem read-only\n");
+ sb->s_flags |= SB_RDONLY;
+ return;
+ }
+
+handle_io_error:
+ zonefs_handle_io_error(inode, &zone, write);
}
static void zonefs_io_error(struct inode *inode, bool write)