summaryrefslogtreecommitdiffstats
path: root/fs/hugetlbfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/hugetlbfs')
-rw-r--r--fs/hugetlbfs/inode.c111
1 files changed, 44 insertions, 67 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 60fce26ff9..6f57fa75a6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -83,29 +83,6 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
{}
};
-#ifdef CONFIG_NUMA
-static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
-{
- vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy,
- index);
-}
-
-static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
-{
- mpol_cond_put(vma->vm_policy);
-}
-#else
-static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma,
- struct inode *inode, pgoff_t index)
-{
-}
-
-static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma)
-{
-}
-#endif
-
/*
* Mask used when checking the page offset value passed in via system
* calls. This value will be converted to a loff_t which is signed.
@@ -123,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
loff_t len, vma_len;
int ret;
struct hstate *h = hstate_file(file);
+ vm_flags_t vm_flags;
/*
* vma address alignment (but not the pgoff alignment) has
@@ -135,7 +113,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND);
vma->vm_ops = &hugetlb_vm_ops;
- ret = seal_check_future_write(info->seals, vma);
+ ret = seal_check_write(info->seals, vma);
if (ret)
return ret;
@@ -164,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
file_accessed(file);
ret = -ENOMEM;
+
+ vm_flags = vma->vm_flags;
+ /*
+ * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip
+ * reserving here. Note: only for SHM hugetlbfs file, the inode
+ * flag S_PRIVATE is set.
+ */
+ if (inode->i_flags & S_PRIVATE)
+ vm_flags |= VM_NORESERVE;
+
if (!hugetlb_reserve_pages(inode,
vma->vm_pgoff >> huge_page_order(h),
len >> huge_page_shift(h), vma,
- vma->vm_flags))
+ vm_flags))
goto out;
ret = 0;
@@ -334,7 +322,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
ssize_t retval = 0;
while (iov_iter_count(to)) {
- struct page *page;
+ struct folio *folio;
size_t nr, copied, want;
/* nr is the maximum number of bytes to copy from this page */
@@ -352,18 +340,18 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
nr = nr - offset;
- /* Find the page */
- page = find_lock_page(mapping, index);
- if (unlikely(page == NULL)) {
+ /* Find the folio */
+ folio = filemap_lock_hugetlb_folio(h, mapping, index);
+ if (IS_ERR(folio)) {
/*
* We have a HOLE, zero out the user-buffer for the
* length of the hole or request.
*/
copied = iov_iter_zero(nr, to);
} else {
- unlock_page(page);
+ folio_unlock(folio);
- if (!PageHWPoison(page))
+ if (!folio_test_hwpoison(folio))
want = nr;
else {
/*
@@ -371,19 +359,19 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
* touching the 1st raw HWPOISON subpage after
* offset.
*/
- want = adjust_range_hwpoison(page, offset, nr);
+ want = adjust_range_hwpoison(&folio->page, offset, nr);
if (want == 0) {
- put_page(page);
+ folio_put(folio);
retval = -EIO;
break;
}
}
/*
- * We have the page, copy it to user space buffer.
+ * We have the folio, copy it to user space buffer.
*/
- copied = copy_page_to_iter(page, offset, want, to);
- put_page(page);
+ copied = copy_folio_to_iter(folio, offset, want, to);
+ folio_put(folio);
}
offset += copied;
retval += copied;
@@ -661,21 +649,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
{
struct hstate *h = hstate_inode(inode);
struct address_space *mapping = &inode->i_data;
- const pgoff_t start = lstart >> huge_page_shift(h);
- const pgoff_t end = lend >> huge_page_shift(h);
+ const pgoff_t end = lend >> PAGE_SHIFT;
struct folio_batch fbatch;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
folio_batch_init(&fbatch);
- next = start;
+ next = lstart >> PAGE_SHIFT;
while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) {
for (i = 0; i < folio_batch_count(&fbatch); ++i) {
struct folio *folio = fbatch.folios[i];
u32 hash = 0;
- index = folio->index;
+ index = folio->index >> huge_page_order(h);
hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
@@ -693,7 +680,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart,
}
if (truncate_op)
- (void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed);
+ (void)hugetlb_unreserve_pages(inode,
+ lstart >> huge_page_shift(h),
+ LONG_MAX, freed);
}
static void hugetlbfs_evict_inode(struct inode *inode)
@@ -741,7 +730,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h,
pgoff_t idx = start >> huge_page_shift(h);
struct folio *folio;
- folio = filemap_lock_folio(mapping, idx);
+ folio = filemap_lock_hugetlb_folio(h, mapping, idx);
if (IS_ERR(folio))
return;
@@ -852,8 +841,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
/*
* Initialize a pseudo vma as this is required by the huge page
- * allocation routines. If NUMA is configured, use page index
- * as input to create an allocation policy.
+ * allocation routines.
*/
vma_init(&pseudo_vma, mm);
vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
@@ -886,7 +874,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
mutex_lock(&hugetlb_fault_mutex_table[hash]);
/* See if already present in mapping to avoid alloc/free */
- folio = filemap_get_folio(mapping, index);
+ folio = filemap_get_folio(mapping, index << huge_page_order(h));
if (!IS_ERR(folio)) {
folio_put(folio);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -901,9 +889,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
* folios in these areas, we need to consume the reserves
* to keep reservation accounting consistent.
*/
- hugetlb_set_vma_policy(&pseudo_vma, inode, index);
folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0);
- hugetlb_drop_vma_policy(&pseudo_vma);
if (IS_ERR(folio)) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
error = PTR_ERR(folio);
@@ -980,7 +966,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
inode->i_mode = S_IFDIR | ctx->mode;
inode->i_uid = ctx->uid;
inode->i_gid = ctx->gid;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
inode->i_op = &hugetlbfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -1024,7 +1010,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
- inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ simple_inode_init_ts(inode);
inode->i_mapping->private_data = resv_map;
info->seals = F_SEAL_SEAL;
switch (mode & S_IFMT) {
@@ -1067,7 +1053,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
if (!inode)
return -ENOSPC;
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
d_instantiate(dentry, inode);
dget(dentry);/* Extra count - pin the dentry in core */
return 0;
@@ -1099,7 +1085,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
if (!inode)
return -ENOSPC;
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
d_tmpfile(file, inode);
return finish_open_simple(file, 0);
}
@@ -1121,7 +1107,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
} else
iput(inode);
}
- dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
return error;
}
@@ -1204,7 +1190,9 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb);
struct hstate *h = hstate_inode(d_inode(dentry));
+ u64 id = huge_encode_dev(dentry->d_sb->s_dev);
+ buf->f_fsid = u64_to_fsid(id);
buf->f_type = HUGETLBFS_MAGIC;
buf->f_bsize = huge_page_size(h);
if (sbinfo) {
@@ -1282,18 +1270,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
hugetlbfs_inc_free_inodes(sbinfo);
return NULL;
}
-
- /*
- * Any time after allocation, hugetlbfs_destroy_inode can be called
- * for the inode. mpol_free_shared_policy is unconditionally called
- * as part of hugetlbfs_destroy_inode. So, initialize policy here
- * in case of a quick call to destroy.
- *
- * Note that the policy is initialized even if we are creating a
- * private inode. This simplifies hugetlbfs_destroy_inode.
- */
- mpol_shared_policy_init(&p->policy, NULL);
-
return &p->vfs_inode;
}
@@ -1305,7 +1281,6 @@ static void hugetlbfs_free_inode(struct inode *inode)
static void hugetlbfs_destroy_inode(struct inode *inode)
{
hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
- mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
}
static const struct address_space_operations hugetlbfs_aops = {
@@ -1390,6 +1365,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
{
struct hugetlbfs_fs_context *ctx = fc->fs_private;
struct fs_parse_result result;
+ struct hstate *h;
char *rest;
unsigned long ps;
int opt;
@@ -1434,11 +1410,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
case Opt_pagesize:
ps = memparse(param->string, &rest);
- ctx->hstate = size_to_hstate(ps);
- if (!ctx->hstate) {
+ h = size_to_hstate(ps);
+ if (!h) {
pr_err("Unsupported page size %lu MB\n", ps / SZ_1M);
return -EINVAL;
}
+ ctx->hstate = h;
return 0;
case Opt_min_size: