summaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 04:15:07 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-08 04:15:07 +0000
commit7fd92ba31ac1c688b59aa93cc03b748f920df8fe (patch)
treebaa580b97c260c790730e2525483eb1953f3c39d /fs/ext4
parentAdding upstream version 4.19.269. (diff)
downloadlinux-7fd92ba31ac1c688b59aa93cc03b748f920df8fe.tar.xz
linux-7fd92ba31ac1c688b59aa93cc03b748f920df8fe.zip
Adding upstream version 4.19.282.upstream/4.19.282
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/fsmap.c2
-rw-r--r--fs/ext4/indirect.c9
-rw-r--r--fs/ext4/inline.c12
-rw-r--r--fs/ext4/inode.c68
-rw-r--r--fs/ext4/ioctl.c11
-rw-r--r--fs/ext4/namei.c16
-rw-r--r--fs/ext4/page-io.c11
-rw-r--r--fs/ext4/resize.c6
-rw-r--r--fs/ext4/super.c50
-rw-r--r--fs/ext4/sysfs.c7
-rw-r--r--fs/ext4/xattr.c253
-rw-r--r--fs/ext4/xattr.h7
13 files changed, 266 insertions, 188 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6938dff9f..e58b162ad 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -503,7 +503,7 @@ enum {
*
* It's not paranoia if the Murphy's Law really *is* out to get you. :-)
*/
-#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG))
+#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG))
#define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG))
static inline void ext4_check_flag_values(void)
diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c
index 6f3f245f3..6b52ace14 100644
--- a/fs/ext4/fsmap.c
+++ b/fs/ext4/fsmap.c
@@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
+ if (keys[1].fmr_physical < keys[0].fmr_physical)
+ return 0;
start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index a5442528a..0cc0d22c0 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
struct super_block *sb = inode->i_sb;
Indirect *p = chain;
struct buffer_head *bh;
+ unsigned int key;
int ret = -EIO;
*err = 0;
@@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth,
if (!p->key)
goto no_block;
while (--depth) {
- bh = sb_getblk(sb, le32_to_cpu(p->key));
+ key = le32_to_cpu(p->key);
+ if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) {
+ /* the block was out of range */
+ ret = -EFSCORRUPTED;
+ goto failure;
+ }
+ bh = sb_getblk(sb, key);
if (unlikely(!bh)) {
ret = -ENOMEM;
goto failure;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index b1c6b9398..72387e142 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -157,7 +157,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
- ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
out:
brelse(is.iloc.bh);
@@ -207,7 +206,7 @@ out:
/*
* write the buffer to the inline inode.
* If 'create' is set, we don't need to do the extra copy in the xattr
- * value since it is already handled by ext4_xattr_ibody_inline_set.
+ * value since it is already handled by ext4_xattr_ibody_set.
* That saves us one memcpy.
*/
static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
@@ -289,7 +288,7 @@ static int ext4_create_inline_data(handle_t *handle,
BUG_ON(!is.s.not_found);
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error) {
if (error == -ENOSPC)
ext4_clear_inode_state(inode,
@@ -361,7 +360,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
i.value = value;
i.value_len = len;
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
@@ -434,7 +433,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
if (error)
goto out;
- error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is);
+ error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (error)
goto out;
@@ -1978,8 +1977,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline)
i.value = value;
i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ?
i_size - EXT4_MIN_INLINE_DATA_SIZE : 0;
- err = ext4_xattr_ibody_inline_set(handle, inode,
- &i, &is);
+ err = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (err)
goto out_error;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 95139c992..7aaf4dafd 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -207,6 +207,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
+ ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
/*
* When journalling data dirty buffers are tracked only in the
@@ -1426,7 +1428,8 @@ static int ext4_write_end(struct file *file,
int inline_data = ext4_has_inline_data(inode);
trace_ext4_write_end(inode, pos, len, copied);
- if (inline_data) {
+ if (inline_data &&
+ ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
if (ret < 0) {
@@ -4549,7 +4552,7 @@ int ext4_truncate(struct inode *inode)
trace_ext4_truncate_enter(inode);
if (!ext4_can_truncate(inode))
- return 0;
+ goto out_trace;
ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
@@ -4560,16 +4563,15 @@ int ext4_truncate(struct inode *inode)
int has_inline = 1;
err = ext4_inline_data_truncate(inode, &has_inline);
- if (err)
- return err;
- if (has_inline)
- return 0;
+ if (err || has_inline)
+ goto out_trace;
}
/* If we zero-out tail of the page, we have to create jinode for jbd2 */
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
- if (ext4_inode_attach_jinode(inode) < 0)
- return 0;
+ err = ext4_inode_attach_jinode(inode);
+ if (err)
+ goto out_trace;
}
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4578,8 +4580,10 @@ int ext4_truncate(struct inode *inode)
credits = ext4_blocks_for_truncate(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
- if (IS_ERR(handle))
- return PTR_ERR(handle);
+ if (IS_ERR(handle)) {
+ err = PTR_ERR(handle);
+ goto out_trace;
+ }
if (inode->i_size & (inode->i_sb->s_blocksize - 1))
ext4_block_truncate_page(handle, mapping, inode->i_size);
@@ -4628,6 +4632,7 @@ out_stop:
ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
+out_trace:
trace_ext4_truncate_exit(inode);
return err;
}
@@ -4663,9 +4668,17 @@ static int __ext4_get_inode_loc(struct inode *inode,
inodes_per_block = EXT4_SB(sb)->s_inodes_per_block;
inode_offset = ((inode->i_ino - 1) %
EXT4_INODES_PER_GROUP(sb));
- block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block);
iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb);
+ block = ext4_inode_table(sb, gdp);
+ if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) ||
+ (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) {
+ ext4_error(sb, "Invalid inode table block %llu in "
+ "block_group %u", block, iloc->block_group);
+ return -EFSCORRUPTED;
+ }
+ block += (inode_offset / inodes_per_block);
+
bh = sb_getblk(sb, block);
if (unlikely(!bh))
return -ENOMEM;
@@ -4853,8 +4866,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+ int err;
+
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- return ext4_find_inline_data_nolock(inode);
+ err = ext4_find_inline_data_nolock(inode);
+ if (!err && ext4_has_inline_data(inode))
+ ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
+ return err;
} else
EXT4_I(inode)->i_inline_off = 0;
return 0;
@@ -4930,13 +4948,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
goto bad_inode;
raw_inode = ext4_raw_inode(&iloc);
- if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) {
- ext4_error_inode(inode, function, line, 0,
- "iget: root inode unallocated");
- ret = -EFSCORRUPTED;
- goto bad_inode;
- }
-
if ((flags & EXT4_IGET_HANDLE) &&
(raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) {
ret = -ESTALE;
@@ -5007,11 +5018,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
* NeilBrown 1999oct15
*/
if (inode->i_nlink == 0) {
- if ((inode->i_mode == 0 ||
+ if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL ||
!(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) &&
ino != EXT4_BOOT_LOADER_INO) {
- /* this inode is deleted */
- ret = -ESTALE;
+ /* this inode is deleted or unallocated */
+ if (flags & EXT4_IGET_SPECIAL) {
+ ext4_error_inode(inode, function, line, 0,
+ "iget: special inode unallocated");
+ ret = -EFSCORRUPTED;
+ } else
+ ret = -ESTALE;
goto bad_inode;
}
/* The only unlinked inodes we let through here have
@@ -6033,6 +6049,14 @@ static int __ext4_expand_extra_isize(struct inode *inode,
return 0;
}
+ /*
+ * We may need to allocate external xattr block so we need quotas
+ * initialized. Here we can be called with various locks held so we
+ * cannot affort to initialize quotas ourselves. So just bail.
+ */
+ if (dquot_initialize_needed(inode))
+ return -EAGAIN;
+
/* try to expand with EAs present */
error = ext4_expand_extra_isize_ea(inode, new_extra_isize,
raw_inode, handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 484cb68c3..b930e8d55 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
/* Protect extent tree against block allocations via delalloc */
ext4_double_down_write_data_sem(inode, inode_bl);
- if (inode_bl->i_nlink == 0) {
+ if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) {
/* this inode has never been used as a BOOT_LOADER */
set_nlink(inode_bl, 1);
i_uid_write(inode_bl, 0);
@@ -178,6 +178,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ei_bl->i_flags = 0;
inode_set_iversion(inode_bl, 1);
i_size_write(inode_bl, 0);
+ EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
inode_bl->i_mode = S_IFREG;
if (ext4_has_feature_extents(sb)) {
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);
@@ -449,6 +450,10 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
if (ext4_is_quota_file(inode))
return err;
+ err = dquot_initialize(inode);
+ if (err)
+ return err;
+
err = ext4_get_inode_loc(inode, &iloc);
if (err)
return err;
@@ -464,10 +469,6 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
brelse(iloc.bh);
}
- err = dquot_initialize(inode);
- if (err)
- return err;
-
handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
EXT4_QUOTA_INIT_BLOCKS(sb) +
EXT4_QUOTA_DEL_BLOCKS(sb) + 3);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a878b9a8d..db9bba347 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1419,11 +1419,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
int has_inline_data = 1;
ret = ext4_find_inline_entry(dir, fname, res_dir,
&has_inline_data);
- if (has_inline_data) {
- if (inlined)
- *inlined = 1;
+ if (inlined)
+ *inlined = has_inline_data;
+ if (has_inline_data)
goto cleanup_and_exit;
- }
}
if ((namelen <= 2) && (name[0] == '.') &&
@@ -3515,7 +3514,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
* so the old->de may no longer valid and need to find it again
* before reset old inode info.
*/
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
retval = PTR_ERR(old.bh);
if (!old.bh)
@@ -3677,7 +3677,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
return retval;
}
- old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+ old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
+ &old.inlined);
if (IS_ERR(old.bh))
return PTR_ERR(old.bh);
/*
@@ -3870,6 +3871,9 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
retval = dquot_initialize(old.dir);
if (retval)
return retval;
+ retval = dquot_initialize(old.inode);
+ if (retval)
+ return retval;
retval = dquot_initialize(new.dir);
if (retval)
return retval;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 3de933354..bf910f266 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -388,7 +388,8 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
static int io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
- struct page *page,
+ struct page *pagecache_page,
+ struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
@@ -403,10 +404,11 @@ submit_and_retry:
return ret;
io->io_bio->bi_write_hint = inode->i_write_hint;
}
- ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
+ ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
+ bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- wbc_account_io(io->io_wbc, page, bh->b_size);
+ wbc_account_io(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
return 0;
}
@@ -514,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
- ret = io_submit_add_bh(io, inode,
- data_page ? data_page : page, bh);
+ ret = io_submit_add_bh(io, inode, page, data_page, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 8737d1bcd..288213cad 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1567,8 +1567,8 @@ exit_journal:
int meta_bg = ext4_has_feature_meta_bg(sb);
sector_t old_gdb = 0;
- update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
- sizeof(struct ext4_super_block), 0);
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
+ (char *)es, sizeof(struct ext4_super_block), 0);
for (; gdb_num <= gdb_num_end; gdb_num++) {
struct buffer_head *gdb_bh;
@@ -1775,7 +1775,7 @@ errout:
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
+ update_backups(sb, ext4_group_first_block_no(sb, 0),
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f00cc301d..e54a5be15 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1081,6 +1081,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
return NULL;
inode_set_iversion(&ei->vfs_inode, 1);
+ ei->i_flags = 0;
spin_lock_init(&ei->i_raw_lock);
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
@@ -4302,30 +4303,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
ext4_has_feature_journal_needs_recovery(sb)) {
ext4_msg(sb, KERN_ERR, "required journal recovery "
"suppressed and not mounted read-only");
- goto failed_mount_wq;
+ goto failed_mount3a;
} else {
/* Nojournal mode, all journal mount options are illegal */
- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
- ext4_msg(sb, KERN_ERR, "can't mount with "
- "journal_checksum, fs mounted w/o journal");
- goto failed_mount_wq;
- }
if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"journal_async_commit, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
+ }
+
+ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "journal_checksum, fs mounted w/o journal");
+ goto failed_mount3a;
}
if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"commit=%lu, fs mounted w/o journal",
sbi->s_commit_interval / HZ);
- goto failed_mount_wq;
+ goto failed_mount3a;
}
if (EXT4_MOUNT_DATA_FLAGS &
(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"data=, fs mounted w/o journal");
- goto failed_mount_wq;
+ goto failed_mount3a;
}
sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
clear_opt(sb, JOURNAL_CHECKSUM);
@@ -4731,7 +4733,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
journal_inode, journal_inode->i_size);
- if (!S_ISREG(journal_inode->i_mode)) {
+ if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
ext4_msg(sb, KERN_ERR, "invalid journal inode");
iput(journal_inode);
return NULL;
@@ -5839,6 +5841,20 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
return err;
}
+static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
+{
+ switch (type) {
+ case USRQUOTA:
+ return qf_inum == EXT4_USR_QUOTA_INO;
+ case GRPQUOTA:
+ return qf_inum == EXT4_GRP_QUOTA_INO;
+ case PRJQUOTA:
+ return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
+ default:
+ BUG();
+ }
+}
+
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags)
{
@@ -5855,9 +5871,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
if (!qf_inums[type])
return -EPERM;
+ if (!ext4_check_quota_inum(type, qf_inums[type])) {
+ ext4_error(sb, "Bad quota inum: %lu, type: %d",
+ qf_inums[type], type);
+ return -EUCLEAN;
+ }
+
qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
if (IS_ERR(qf_inode)) {
- ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]);
+ ext4_error(sb, "Bad quota inode: %lu, type: %d",
+ qf_inums[type], type);
return PTR_ERR(qf_inode);
}
@@ -5896,8 +5919,9 @@ static int ext4_enable_quotas(struct super_block *sb)
if (err) {
ext4_warning(sb,
"Failed to enable quota tracking "
- "(type=%d, err=%d). Please run "
- "e2fsck to fix.", type, err);
+ "(type=%d, err=%d, ino=%lu). "
+ "Please run e2fsck to fix.", type,
+ err, qf_inums[type]);
for (type--; type >= 0; type--) {
struct inode *inode;
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 9212a026a..74722ce72 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -349,6 +349,11 @@ static void ext4_sb_release(struct kobject *kobj)
complete(&sbi->s_kobj_unregister);
}
+static void ext4_feat_release(struct kobject *kobj)
+{
+ kfree(kobj);
+}
+
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
@@ -363,7 +368,7 @@ static struct kobj_type ext4_sb_ktype = {
static struct kobj_type ext4_feat_ktype = {
.default_attrs = ext4_feat_attrs,
.sysfs_ops = &ext4_attr_ops,
- .release = (void (*)(struct kobject *))kfree,
+ .release = ext4_feat_release,
};
static struct kobject *ext4_root;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 497649c69..1b73a7f81 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -384,6 +384,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
struct inode *inode;
int err;
+ /*
+ * We have to check for this corruption early as otherwise
+ * iget_locked() could wait indefinitely for the state of our
+ * parent inode.
+ */
+ if (parent->i_ino == ea_ino) {
+ ext4_error(parent->i_sb,
+ "Parent and EA inode have the same ino %lu", ea_ino);
+ return -EFSCORRUPTED;
+ }
+
inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
@@ -434,6 +445,21 @@ error:
return err;
}
+/* Remove entry from mbcache when EA inode is getting evicted */
+void ext4_evict_ea_inode(struct inode *inode)
+{
+ struct mb_cache_entry *oe;
+
+ if (!EA_INODE_CACHE(inode))
+ return;
+ /* Wait for entry to get unused so that we can remove it */
+ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
+ ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
+ }
+}
+
static int
ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
struct ext4_xattr_entry *entry, void *buffer,
@@ -1019,10 +1045,8 @@ static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode,
static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
int ref_change)
{
- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
struct ext4_iloc iloc;
s64 ref_count;
- u32 hash;
int ret;
inode_lock(ea_inode);
@@ -1047,14 +1071,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
set_nlink(ea_inode, 1);
ext4_orphan_del(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_create(ea_inode_cache,
- GFP_NOFS, hash,
- ea_inode->i_ino,
- true /* reusable */);
- }
}
} else {
WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
@@ -1067,12 +1083,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
clear_nlink(ea_inode);
ext4_orphan_add(handle, ea_inode);
-
- if (ea_inode_cache) {
- hash = ext4_xattr_inode_get_hash(ea_inode);
- mb_cache_entry_delete(ea_inode_cache, hash,
- ea_inode->i_ino);
- }
}
}
@@ -1253,6 +1263,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
+retry_ref:
lock_buffer(bh);
hash = le32_to_cpu(BHDR(bh)->h_hash);
ref = le32_to_cpu(BHDR(bh)->h_refcount);
@@ -1262,9 +1273,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
* This must happen under buffer lock for
* ext4_xattr_block_set() to reliably detect freed block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
+ bh->b_blocknr);
+ if (oe) {
+ unlock_buffer(bh);
+ mb_cache_entry_wait_unused(oe);
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto retry_ref;
+ }
+ }
get_bh(bh);
unlock_buffer(bh);
@@ -1288,7 +1308,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
ce = mb_cache_entry_get(ea_block_cache, hash,
bh->b_blocknr);
if (ce) {
- ce->e_reusable = 1;
+ set_bit(MBE_REUSABLE_B, &ce->e_flags);
mb_cache_entry_put(ea_block_cache, ce);
}
}
@@ -1427,6 +1447,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) };
int err;
+ if (inode->i_sb->s_root == NULL) {
+ ext4_warning(inode->i_sb,
+ "refuse to create EA inode when umounting");
+ WARN_ON(1);
+ return ERR_PTR(-EINVAL);
+ }
+
/*
* Let the next inode be the goal, so we try and allocate the EA inode
* in the same group, or nearby one.
@@ -1446,6 +1473,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle,
if (!err)
err = ext4_inode_attach_jinode(ea_inode);
if (err) {
+ if (ext4_xattr_inode_dec_ref(handle, ea_inode))
+ ext4_warning_inode(ea_inode,
+ "cleanup dec ref error %d", err);
iput(ea_inode);
return ERR_PTR(err);
}
@@ -1872,6 +1902,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
#define header(x) ((struct ext4_xattr_header *)(x))
if (s->base) {
+ int offset = (char *)s->here - bs->bh->b_data;
+
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -1886,9 +1918,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
* ext4_xattr_block_set() to reliably detect modified
* block
*/
- if (ea_block_cache)
- mb_cache_entry_delete(ea_block_cache, hash,
- bs->bh->b_blocknr);
+ if (ea_block_cache) {
+ struct mb_cache_entry *oe;
+
+ oe = mb_cache_entry_delete_or_get(ea_block_cache,
+ hash, bs->bh->b_blocknr);
+ if (oe) {
+ /*
+ * Xattr block is getting reused. Leave
+ * it alone.
+ */
+ mb_cache_entry_put(ea_block_cache, oe);
+ goto clone_block;
+ }
+ }
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s, handle, inode,
true /* is_block */);
@@ -1903,50 +1946,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (error)
goto cleanup;
goto inserted;
- } else {
- int offset = (char *)s->here - bs->bh->b_data;
+ }
+clone_block:
+ unlock_buffer(bs->bh);
+ ea_bdebug(bs->bh, "cloning");
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+ error = -ENOMEM;
+ if (s->base == NULL)
+ goto cleanup;
+ s->first = ENTRY(header(s->base)+1);
+ header(s->base)->h_refcount = cpu_to_le32(1);
+ s->here = ENTRY(s->base + offset);
+ s->end = s->base + bs->bh->b_size;
- unlock_buffer(bs->bh);
- ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
- error = -ENOMEM;
- if (s->base == NULL)
+ /*
+ * If existing entry points to an xattr inode, we need
+ * to prevent ext4_xattr_set_entry() from decrementing
+ * ref count on it because the reference belongs to the
+ * original block. In this case, make the entry look
+ * like it has an empty value.
+ */
+ if (!s->not_found && s->here->e_value_inum) {
+ ea_ino = le32_to_cpu(s->here->e_value_inum);
+ error = ext4_xattr_inode_iget(inode, ea_ino,
+ le32_to_cpu(s->here->e_hash),
+ &tmp_inode);
+ if (error)
goto cleanup;
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
- s->first = ENTRY(header(s->base)+1);
- header(s->base)->h_refcount = cpu_to_le32(1);
- s->here = ENTRY(s->base + offset);
- s->end = s->base + bs->bh->b_size;
-
- /*
- * If existing entry points to an xattr inode, we need
- * to prevent ext4_xattr_set_entry() from decrementing
- * ref count on it because the reference belongs to the
- * original block. In this case, make the entry look
- * like it has an empty value.
- */
- if (!s->not_found && s->here->e_value_inum) {
- ea_ino = le32_to_cpu(s->here->e_value_inum);
- error = ext4_xattr_inode_iget(inode, ea_ino,
- le32_to_cpu(s->here->e_hash),
- &tmp_inode);
- if (error)
- goto cleanup;
- if (!ext4_test_inode_state(tmp_inode,
- EXT4_STATE_LUSTRE_EA_INODE)) {
- /*
- * Defer quota free call for previous
- * inode until success is guaranteed.
- */
- old_ea_inode_quota = le32_to_cpu(
- s->here->e_value_size);
- }
- iput(tmp_inode);
-
- s->here->e_value_inum = 0;
- s->here->e_value_size = 0;
+ if (!ext4_test_inode_state(tmp_inode,
+ EXT4_STATE_LUSTRE_EA_INODE)) {
+ /*
+ * Defer quota free call for previous
+ * inode until success is guaranteed.
+ */
+ old_ea_inode_quota = le32_to_cpu(
+ s->here->e_value_size);
}
+ iput(tmp_inode);
+
+ s->here->e_value_inum = 0;
+ s->here->e_value_size = 0;
}
} else {
/* Allocate a buffer where we construct the new block. */
@@ -2013,18 +2053,13 @@ inserted:
lock_buffer(new_bh);
/*
* We have to be careful about races with
- * freeing, rehashing or adding references to
- * xattr block. Once we hold buffer lock xattr
- * block's state is stable so we can check
- * whether the block got freed / rehashed or
- * not. Since we unhash mbcache entry under
- * buffer lock when freeing / rehashing xattr
- * block, checking whether entry is still
- * hashed is reliable. Same rules hold for
- * e_reusable handling.
+ * adding references to xattr block. Once we
+ * hold buffer lock xattr block's state is
+ * stable so we can check the additional
+ * reference fits.
*/
- if (hlist_bl_unhashed(&ce->e_hash_list) ||
- !ce->e_reusable) {
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ if (ref > EXT4_XATTR_REFCOUNT_MAX) {
/*
* Undo everything and check mbcache
* again.
@@ -2039,10 +2074,9 @@ inserted:
new_bh = NULL;
goto inserted;
}
- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
- if (ref >= EXT4_XATTR_REFCOUNT_MAX)
- ce->e_reusable = 0;
+ if (ref == EXT4_XATTR_REFCOUNT_MAX)
+ clear_bit(MBE_REUSABLE_B, &ce->e_flags);
ea_bdebug(new_bh, "reusing; refcount now=%d",
ref);
ext4_xattr_block_csum_set(inode, new_bh);
@@ -2070,19 +2104,11 @@ inserted:
goal = ext4_group_first_block_no(sb,
EXT4_I(inode)->i_block_group);
-
- /* non-extent files can't have physical blocks past 2^32 */
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
-
block = ext4_new_meta_blocks(handle, inode, goal, 0,
NULL, &error);
if (error)
goto cleanup;
- if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
- BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
-
ea_idebug(inode, "creating block %llu",
(unsigned long long)block);
@@ -2210,7 +2236,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
return 0;
}
-int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
+int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is)
{
@@ -2235,30 +2261,6 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
return 0;
}
-static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is)
-{
- struct ext4_xattr_ibody_header *header;
- struct ext4_xattr_search *s = &is->s;
- int error;
-
- if (EXT4_I(inode)->i_extra_isize == 0)
- return -ENOSPC;
- error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
- if (error)
- return error;
- header = IHDR(inode, ext4_raw_inode(&is->iloc));
- if (!IS_LAST_ENTRY(s->first)) {
- header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
- ext4_set_inode_state(inode, EXT4_STATE_XATTR);
- } else {
- header->h_magic = cpu_to_le32(0);
- ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
- }
- return 0;
-}
-
static int ext4_xattr_value_same(struct ext4_xattr_search *s,
struct ext4_xattr_info *i)
{
@@ -2575,9 +2577,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
- buffer = kmalloc(value_size, GFP_NOFS);
b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
- if (!is || !bs || !buffer || !b_entry_name) {
+ if (!is || !bs || !b_entry_name) {
error = -ENOMEM;
goto out;
}
@@ -2589,12 +2590,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
/* Save the entry name and the entry value */
if (entry->e_value_inum) {
+ buffer = kvmalloc(value_size, GFP_NOFS);
+ if (!buffer) {
+ error = -ENOMEM;
+ goto out;
+ }
+
error = ext4_xattr_inode_get(inode, entry, buffer, value_size);
if (error)
goto out;
} else {
size_t value_offs = le16_to_cpu(entry->e_value_offs);
- memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
+ buffer = (void *)IFIRST(header) + value_offs;
}
memcpy(b_entry_name, entry->e_name, entry->e_name_len);
@@ -2609,25 +2616,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
if (error)
goto out;
- /* Remove the chosen entry from the inode */
- error = ext4_xattr_ibody_set(handle, inode, &i, is);
- if (error)
- goto out;
-
i.value = buffer;
i.value_len = value_size;
error = ext4_xattr_block_find(inode, &i, bs);
if (error)
goto out;
- /* Add entry which was removed from the inode into the block */
+ /* Move ea entry from the inode into the block */
error = ext4_xattr_block_set(handle, inode, &i, bs);
if (error)
goto out;
- error = 0;
+
+ /* Remove the chosen entry from the inode */
+ i.value = NULL;
+ i.value_len = 0;
+ error = ext4_xattr_ibody_set(handle, inode, &i, is);
+
out:
kfree(b_entry_name);
- kfree(buffer);
+ if (entry->e_value_inum && buffer)
+ kvfree(buffer);
if (is)
brelse(is->iloc.bh);
if (bs)
@@ -2802,6 +2810,9 @@ shift:
(void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
+ if (ext4_has_inline_data(inode))
+ error = ext4_find_inline_data_nolock(inode);
+
cleanup:
if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 990084e00..66911f8a1 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -190,6 +190,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
+extern void ext4_evict_ea_inode(struct inode *inode);
extern const struct xattr_handler *ext4_xattr_handlers[];
@@ -198,9 +199,9 @@ extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
extern int ext4_xattr_ibody_get(struct inode *inode, int name_index,
const char *name,
void *buffer, size_t buffer_size);
-extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
- struct ext4_xattr_info *i,
- struct ext4_xattr_ibody_find *is);
+extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+ struct ext4_xattr_info *i,
+ struct ext4_xattr_ibody_find *is);
extern struct mb_cache *ext4_xattr_create_cache(void);
extern void ext4_xattr_destroy_cache(struct mb_cache *);