diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 111 |
1 files changed, 44 insertions, 67 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 60fce26ff9..6f57fa75a6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -83,29 +83,6 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = { {} }; -#ifdef CONFIG_NUMA -static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index) -{ - vma->vm_policy = mpol_shared_policy_lookup(&HUGETLBFS_I(inode)->policy, - index); -} - -static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma) -{ - mpol_cond_put(vma->vm_policy); -} -#else -static inline void hugetlb_set_vma_policy(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index) -{ -} - -static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma) -{ -} -#endif - /* * Mask used when checking the page offset value passed in via system * calls. This value will be converted to a loff_t which is signed. @@ -123,6 +100,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) loff_t len, vma_len; int ret; struct hstate *h = hstate_file(file); + vm_flags_t vm_flags; /* * vma address alignment (but not the pgoff alignment) has @@ -135,7 +113,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND); vma->vm_ops = &hugetlb_vm_ops; - ret = seal_check_future_write(info->seals, vma); + ret = seal_check_write(info->seals, vma); if (ret) return ret; @@ -164,10 +142,20 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) file_accessed(file); ret = -ENOMEM; + + vm_flags = vma->vm_flags; + /* + * for SHM_HUGETLB, the pages are reserved in the shmget() call so skip + * reserving here. Note: only for SHM hugetlbfs file, the inode + * flag S_PRIVATE is set. + */ + if (inode->i_flags & S_PRIVATE) + vm_flags |= VM_NORESERVE; + if (!hugetlb_reserve_pages(inode, vma->vm_pgoff >> huge_page_order(h), len >> huge_page_shift(h), vma, - vma->vm_flags)) + vm_flags)) goto out; ret = 0; @@ -334,7 +322,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) ssize_t retval = 0; while (iov_iter_count(to)) { - struct page *page; + struct folio *folio; size_t nr, copied, want; /* nr is the maximum number of bytes to copy from this page */ @@ -352,18 +340,18 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) } nr = nr - offset; - /* Find the page */ - page = find_lock_page(mapping, index); - if (unlikely(page == NULL)) { + /* Find the folio */ + folio = filemap_lock_hugetlb_folio(h, mapping, index); + if (IS_ERR(folio)) { /* * We have a HOLE, zero out the user-buffer for the * length of the hole or request. */ copied = iov_iter_zero(nr, to); } else { - unlock_page(page); + folio_unlock(folio); - if (!PageHWPoison(page)) + if (!folio_test_hwpoison(folio)) want = nr; else { /* @@ -371,19 +359,19 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) * touching the 1st raw HWPOISON subpage after * offset. */ - want = adjust_range_hwpoison(page, offset, nr); + want = adjust_range_hwpoison(&folio->page, offset, nr); if (want == 0) { - put_page(page); + folio_put(folio); retval = -EIO; break; } } /* - * We have the page, copy it to user space buffer. + * We have the folio, copy it to user space buffer. */ - copied = copy_page_to_iter(page, offset, want, to); - put_page(page); + copied = copy_folio_to_iter(folio, offset, want, to); + folio_put(folio); } offset += copied; retval += copied; @@ -661,21 +649,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, { struct hstate *h = hstate_inode(inode); struct address_space *mapping = &inode->i_data; - const pgoff_t start = lstart >> huge_page_shift(h); - const pgoff_t end = lend >> huge_page_shift(h); + const pgoff_t end = lend >> PAGE_SHIFT; struct folio_batch fbatch; pgoff_t next, index; int i, freed = 0; bool truncate_op = (lend == LLONG_MAX); folio_batch_init(&fbatch); - next = start; + next = lstart >> PAGE_SHIFT; while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) { for (i = 0; i < folio_batch_count(&fbatch); ++i) { struct folio *folio = fbatch.folios[i]; u32 hash = 0; - index = folio->index; + index = folio->index >> huge_page_order(h); hash = hugetlb_fault_mutex_hash(mapping, index); mutex_lock(&hugetlb_fault_mutex_table[hash]); @@ -693,7 +680,9 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, } if (truncate_op) - (void)hugetlb_unreserve_pages(inode, start, LONG_MAX, freed); + (void)hugetlb_unreserve_pages(inode, + lstart >> huge_page_shift(h), + LONG_MAX, freed); } static void hugetlbfs_evict_inode(struct inode *inode) @@ -741,7 +730,7 @@ static void hugetlbfs_zero_partial_page(struct hstate *h, pgoff_t idx = start >> huge_page_shift(h); struct folio *folio; - folio = filemap_lock_folio(mapping, idx); + folio = filemap_lock_hugetlb_folio(h, mapping, idx); if (IS_ERR(folio)) return; @@ -852,8 +841,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, /* * Initialize a pseudo vma as this is required by the huge page - * allocation routines. If NUMA is configured, use page index - * as input to create an allocation policy. + * allocation routines. */ vma_init(&pseudo_vma, mm); vm_flags_init(&pseudo_vma, VM_HUGETLB | VM_MAYSHARE | VM_SHARED); @@ -886,7 +874,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, mutex_lock(&hugetlb_fault_mutex_table[hash]); /* See if already present in mapping to avoid alloc/free */ - folio = filemap_get_folio(mapping, index); + folio = filemap_get_folio(mapping, index << huge_page_order(h)); if (!IS_ERR(folio)) { folio_put(folio); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -901,9 +889,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, * folios in these areas, we need to consume the reserves * to keep reservation accounting consistent. */ - hugetlb_set_vma_policy(&pseudo_vma, inode, index); folio = alloc_hugetlb_folio(&pseudo_vma, addr, 0); - hugetlb_drop_vma_policy(&pseudo_vma); if (IS_ERR(folio)) { mutex_unlock(&hugetlb_fault_mutex_table[hash]); error = PTR_ERR(folio); @@ -980,7 +966,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, inode->i_mode = S_IFDIR | ctx->mode; inode->i_uid = ctx->uid; inode->i_gid = ctx->gid; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_op = &hugetlbfs_dir_inode_operations; inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ @@ -1024,7 +1010,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, lockdep_set_class(&inode->i_mapping->i_mmap_rwsem, &hugetlbfs_i_mmap_rwsem_key); inode->i_mapping->a_ops = &hugetlbfs_aops; - inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); inode->i_mapping->private_data = resv_map; info->seals = F_SEAL_SEAL; switch (mode & S_IFMT) { @@ -1067,7 +1053,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev); if (!inode) return -ENOSPC; - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); d_instantiate(dentry, inode); dget(dentry);/* Extra count - pin the dentry in core */ return 0; @@ -1099,7 +1085,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap, inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0); if (!inode) return -ENOSPC; - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); d_tmpfile(file, inode); return finish_open_simple(file, 0); } @@ -1121,7 +1107,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap, } else iput(inode); } - dir->i_mtime = inode_set_ctime_current(dir); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); return error; } @@ -1204,7 +1190,9 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); struct hstate *h = hstate_inode(d_inode(dentry)); + u64 id = huge_encode_dev(dentry->d_sb->s_dev); + buf->f_fsid = u64_to_fsid(id); buf->f_type = HUGETLBFS_MAGIC; buf->f_bsize = huge_page_size(h); if (sbinfo) { @@ -1282,18 +1270,6 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) hugetlbfs_inc_free_inodes(sbinfo); return NULL; } - - /* - * Any time after allocation, hugetlbfs_destroy_inode can be called - * for the inode. mpol_free_shared_policy is unconditionally called - * as part of hugetlbfs_destroy_inode. So, initialize policy here - * in case of a quick call to destroy. - * - * Note that the policy is initialized even if we are creating a - * private inode. This simplifies hugetlbfs_destroy_inode. - */ - mpol_shared_policy_init(&p->policy, NULL); - return &p->vfs_inode; } @@ -1305,7 +1281,6 @@ static void hugetlbfs_free_inode(struct inode *inode) static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); - mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); } static const struct address_space_operations hugetlbfs_aops = { @@ -1390,6 +1365,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par { struct hugetlbfs_fs_context *ctx = fc->fs_private; struct fs_parse_result result; + struct hstate *h; char *rest; unsigned long ps; int opt; @@ -1434,11 +1410,12 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par case Opt_pagesize: ps = memparse(param->string, &rest); - ctx->hstate = size_to_hstate(ps); - if (!ctx->hstate) { + h = size_to_hstate(ps); + if (!h) { pr_err("Unsupported page size %lu MB\n", ps / SZ_1M); return -EINVAL; } + ctx->hstate = h; return 0; case Opt_min_size: |