diff options
Diffstat (limited to 'fs')
105 files changed, 1214 insertions, 693 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c index ba084b0b2..82bb38370 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -878,7 +878,7 @@ affs_truncate(struct inode *inode) if (inode->i_size > AFFS_I(inode)->mmu_private) { struct address_space *mapping = inode->i_mapping; struct page *page; - void *fsdata; + void *fsdata = NULL; loff_t isize = inode->i_size; int res; @@ -332,6 +332,9 @@ static int aio_ring_mremap(struct vm_area_struct *vma) spin_lock(&mm->ioctx_lock); rcu_read_lock(); table = rcu_dereference(mm->ioctx_table); + if (!table) + goto out_unlock; + for (i = 0; i < table->nr; i++) { struct kioctx *ctx; @@ -345,6 +348,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) } } +out_unlock: rcu_read_unlock(); spin_unlock(&mm->ioctx_lock); return res; diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index c3deb2e35..e7a9e8b56 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -244,6 +244,7 @@ static int load_aout_binary(struct linux_binprm * bprm) set_personality(PER_LINUX); #endif setup_new_exec(bprm); + install_exec_creds(bprm); current->mm->end_code = ex.a_text + (current->mm->start_code = N_TXTADDR(ex)); @@ -256,7 +257,6 @@ static int load_aout_binary(struct linux_binprm * bprm) if (retval < 0) return retval; - install_exec_creds(bprm); if (N_MAGIC(ex) == OMAGIC) { unsigned long text_addr, map_size; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index b53bb3729..64d0b8380 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -357,6 +357,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->personality |= READ_IMPLIES_EXEC; setup_new_exec(bprm); + install_exec_creds(bprm); set_binfmt(&elf_fdpic_format); @@ -438,9 +439,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm) current->mm->start_stack = current->mm->start_brk + stack_size; #endif - install_exec_creds(bprm); - if (create_elf_fdpic_tables(bprm, current->mm, - &exec_params, &interp_params) < 0) + retval = create_elf_fdpic_tables(bprm, current->mm, &exec_params, + &interp_params); + if (retval < 0) goto error; kdebug("- start_code %lx", current->mm->start_code); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index a6f97d86f..a909743b1 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -541,6 +541,7 @@ static int load_flat_file(struct linux_binprm *bprm, /* OK, This is the point of no return */ set_personality(PER_LINUX_32BIT); setup_new_exec(bprm); + install_exec_creds(bprm); } /* @@ -965,8 +966,6 @@ static int load_flat_binary(struct linux_binprm *bprm) } } - install_exec_creds(bprm); - set_binfmt(&flat_format); #ifdef CONFIG_MMU diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 27a04f492..8fe7edd2b 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -42,10 +42,10 @@ static LIST_HEAD(entries); static int enabled = 1; enum {Enabled, Magic}; -#define MISC_FMT_PRESERVE_ARGV0 (1 << 31) -#define MISC_FMT_OPEN_BINARY (1 << 30) -#define MISC_FMT_CREDENTIALS (1 << 29) -#define MISC_FMT_OPEN_FILE (1 << 28) +#define MISC_FMT_PRESERVE_ARGV0 (1UL << 31) +#define MISC_FMT_OPEN_BINARY (1UL << 30) +#define MISC_FMT_CREDENTIALS (1UL << 29) +#define MISC_FMT_OPEN_FILE (1UL << 28) typedef struct { struct list_head list; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 752b5d265..4f2513388 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3234,13 +3234,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, di_args->bytes_used = btrfs_device_get_bytes_used(dev); di_args->total_bytes = btrfs_device_get_total_bytes(dev); memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); - if (dev->name) { - strncpy(di_args->path, rcu_str_deref(dev->name), - sizeof(di_args->path) - 1); - di_args->path[sizeof(di_args->path) - 1] = 0; - } else { + if (dev->name) + strscpy(di_args->path, rcu_str_deref(dev->name), sizeof(di_args->path)); + else di_args->path[0] = '\0'; - } out: rcu_read_unlock(); diff --git a/fs/btrfs/rcu-string.h b/fs/btrfs/rcu-string.h index a97dc74a4..02f15321c 100644 --- a/fs/btrfs/rcu-string.h +++ b/fs/btrfs/rcu-string.h @@ -18,7 +18,11 @@ static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask) (len * sizeof(char)), mask); if (!ret) return ret; - strncpy(ret->str, src, len); + /* Warn if the source got unexpectedly truncated. */ + if (WARN_ON(strscpy(ret->str, src, len) < 0)) { + kfree(ret); + return NULL; + } return ret; } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index eb2f8e84f..1f535cd99 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1306,6 +1306,7 @@ static int find_extent_clone(struct send_ctx *sctx, u64 disk_byte; u64 num_bytes; u64 extent_item_pos; + u64 extent_refs; u64 flags = 0; struct btrfs_file_extent_item *fi; struct extent_buffer *eb = path->nodes[0]; @@ -1373,14 +1374,22 @@ static int find_extent_clone(struct send_ctx *sctx, ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], struct btrfs_extent_item); + extent_refs = btrfs_extent_refs(tmp_path->nodes[0], ei); /* * Backreference walking (iterate_extent_inodes() below) is currently * too expensive when an extent has a large number of references, both * in time spent and used memory. So for now just fallback to write * operations instead of clone operations when an extent has more than * a certain amount of references. + * + * Also, if we have only one reference and only the send root as a clone + * source - meaning no clone roots were given in the struct + * btrfs_ioctl_send_args passed to the send ioctl - then it's our + * reference and there's no point in doing backref walking which is + * expensive, so exit early. */ - if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) { + if ((extent_refs == 1 && sctx->clone_roots_cnt == 1) || + extent_refs > SEND_MAX_EXTENT_REFS) { ret = -ENOENT; goto out; } @@ -6817,10 +6826,10 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) /* * Check that we don't overflow at later allocations, we request * clone_sources_count + 1 items, and compare to unsigned long inside - * access_ok. + * access_ok. Also set an upper limit for allocation size so this can't + * easily exhaust memory. Max number of clone sources is about 200K. */ - if (arg->clone_sources_count > - ULONG_MAX / sizeof(struct clone_root) - 1) { + if (arg->clone_sources_count > SZ_8M / sizeof(struct clone_root)) { ret = -EINVAL; goto out; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2b4d33b58..0294f519c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1418,7 +1418,7 @@ again: goto out; } - while (1) { + while (search_start < search_end) { l = path->nodes[0]; slot = path->slots[0]; if (slot >= btrfs_header_nritems(l)) { @@ -1441,6 +1441,9 @@ again: if (key.type != BTRFS_DEV_EXTENT_KEY) goto next; + if (key.offset > search_end) + break; + if (key.offset > search_start) { hole_size = key.offset - search_start; @@ -1515,6 +1518,7 @@ next: else ret = 0; + ASSERT(max_hole_start + max_hole_size <= search_end); out: btrfs_free_path(path); *start = max_hole_start; diff --git a/fs/char_dev.c b/fs/char_dev.c index 5fffd5050..2c3d519b2 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -553,7 +553,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev) } rc = device_add(dev); - if (rc) + if (rc && dev->devt) cdev_del(cdev); return rc; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index f047e8787..c1d5daa4b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -121,7 +121,10 @@ extern const struct dentry_operations cifs_ci_dentry_ops; #ifdef CONFIG_CIFS_DFS_UPCALL extern struct vfsmount *cifs_dfs_d_automount(struct path *path); #else -#define cifs_dfs_d_automount NULL +static inline struct vfsmount *cifs_dfs_d_automount(struct path *path) +{ + return ERR_PTR(-EREMOTE); +} #endif /* Functions related to symlinks */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index cb70f0c6a..d16fd8d1f 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4895,8 +4895,13 @@ CIFSGetDFSRefer(const unsigned int xid, struct cifs_ses *ses, return -ENODEV; getDFSRetry: - rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, (void **) &pSMB, - (void **) &pSMBr); + /* + * Use smb_init_no_reconnect() instead of smb_init() as + * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus + * causing an infinite recursion. + */ + rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, + (void **)&pSMB, (void **)&pSMBr); if (rc) return rc; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index b5aba1c89..37e91f27f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -429,7 +429,8 @@ cifs_reconnect(struct TCP_Server_Info *server) server->ssocket->state, server->ssocket->flags); sock_release(server->ssocket); server->ssocket = NULL; - } + } else if (cifs_rdma_enabled(server)) + smbd_destroy(server); server->sequence_number = 0; server->session_estab = false; kfree(server->session_key.response); @@ -799,10 +800,8 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) wake_up_all(&server->request_q); /* give those requests time to exit */ msleep(125); - if (cifs_rdma_enabled(server) && server->smbd_conn) { - smbd_destroy(server->smbd_conn); - server->smbd_conn = NULL; - } + if (cifs_rdma_enabled(server)) + smbd_destroy(server); if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; @@ -2940,7 +2939,7 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)), static struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) { - int rc = -ENOMEM; + int rc = 0; unsigned int xid; struct cifs_ses *ses; struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; @@ -2982,6 +2981,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) return ses; } + rc = -ENOMEM; + cifs_dbg(FYI, "Existing smb sess not found\n"); ses = sesInfoAlloc(); if (ses == NULL) diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 5b1c33d92..f590149e2 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -481,6 +481,7 @@ smb3_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon, oparms.disposition = FILE_CREATE; oparms.fid = &fid; oparms.reconnect = false; + oparms.mode = 0644; rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL); diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f906984eb..118bcb351 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -475,7 +475,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon) if (rc == -EOPNOTSUPP) { cifs_dbg(FYI, "server does not support query network interfaces\n"); - goto out; + ret_data_len = 0; } else if (rc != 0) { cifs_dbg(VFS, "error %d on ioctl to get interface list\n", rc); goto out; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 3485b9bf9..50c6405be 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -3152,12 +3152,15 @@ smb2_readv_callback(struct mid_q_entry *mid) (struct smb2_sync_hdr *)rdata->iov[0].iov_base; unsigned int credits_received = 0; struct smb_rqst rqst = { .rq_iov = &rdata->iov[1], - .rq_nvec = 1, - .rq_pages = rdata->pages, - .rq_offset = rdata->page_offset, - .rq_npages = rdata->nr_pages, - .rq_pagesz = rdata->pagesz, - .rq_tailsz = rdata->tailsz }; + .rq_nvec = 1, }; + + if (rdata->got_bytes) { + rqst.rq_pages = rdata->pages; + rqst.rq_offset = rdata->page_offset; + rqst.rq_npages = rdata->nr_pages; + rqst.rq_pagesz = rdata->pagesz; + rqst.rq_tailsz = rdata->tailsz; + } cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%u\n", __func__, mid->mid, mid->mid_state, rdata->result, diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c index 784628ec4..ea1d8cfab 100644 --- a/fs/cifs/smbdirect.c +++ b/fs/cifs/smbdirect.c @@ -320,6 +320,9 @@ static int smbd_conn_upcall( info->transport_status = SMBD_DISCONNECTED; smbd_process_disconnected(info); + wake_up(&info->disconn_wait); + wake_up_interruptible(&info->wait_reassembly_queue); + wake_up_interruptible_all(&info->wait_send_queue); break; default: @@ -1478,21 +1481,102 @@ static void idle_connection_timer(struct work_struct *work) info->keep_alive_interval*HZ); } -/* Destroy this SMBD connection, called from upper layer */ -void smbd_destroy(struct smbd_connection *info) +/* + * Destroy the transport and related RDMA and memory resources + * Need to go through all the pending counters and make sure on one is using + * the transport while it is destroyed + */ +void smbd_destroy(struct TCP_Server_Info *server) { + struct smbd_connection *info = server->smbd_conn; + struct smbd_response *response; + unsigned long flags; + + if (!info) { + log_rdma_event(INFO, "rdma session already destroyed\n"); + return; + } + log_rdma_event(INFO, "destroying rdma session\n"); + if (info->transport_status != SMBD_DISCONNECTED) { + rdma_disconnect(server->smbd_conn->id); + log_rdma_event(INFO, "wait for transport being disconnected\n"); + wait_event( + info->disconn_wait, + info->transport_status == SMBD_DISCONNECTED); + } - /* Kick off the disconnection process */ - smbd_disconnect_rdma_connection(info); + log_rdma_event(INFO, "destroying qp\n"); + ib_drain_qp(info->id->qp); + rdma_destroy_qp(info->id); + + log_rdma_event(INFO, "cancelling idle timer\n"); + cancel_delayed_work_sync(&info->idle_timer_work); + log_rdma_event(INFO, "cancelling send immediate work\n"); + cancel_delayed_work_sync(&info->send_immediate_work); + + log_rdma_event(INFO, "wait for all send posted to IB to finish\n"); + wait_event(info->wait_send_pending, + atomic_read(&info->send_pending) == 0); + wait_event(info->wait_send_payload_pending, + atomic_read(&info->send_payload_pending) == 0); + + /* It's not posssible for upper layer to get to reassembly */ + log_rdma_event(INFO, "drain the reassembly queue\n"); + do { + spin_lock_irqsave(&info->reassembly_queue_lock, flags); + response = _get_first_reassembly(info); + if (response) { + list_del(&response->list); + spin_unlock_irqrestore( + &info->reassembly_queue_lock, flags); + put_receive_buffer(info, response); + } else + spin_unlock_irqrestore( + &info->reassembly_queue_lock, flags); + } while (response); + info->reassembly_data_length = 0; + + log_rdma_event(INFO, "free receive buffers\n"); + wait_event(info->wait_receive_queues, + info->count_receive_queue + info->count_empty_packet_queue + == info->receive_credit_max); + destroy_receive_buffers(info); - log_rdma_event(INFO, "wait for transport being destroyed\n"); - wait_event(info->wait_destroy, - info->transport_status == SMBD_DESTROYED); + /* + * For performance reasons, memory registration and deregistration + * are not locked by srv_mutex. It is possible some processes are + * blocked on transport srv_mutex while holding memory registration. + * Release the transport srv_mutex to allow them to hit the failure + * path when sending data, and then release memory registartions. + */ + log_rdma_event(INFO, "freeing mr list\n"); + wake_up_interruptible_all(&info->wait_mr); + while (atomic_read(&info->mr_used_count)) { + mutex_unlock(&server->srv_mutex); + msleep(1000); + mutex_lock(&server->srv_mutex); + } + destroy_mr_list(info); + + ib_free_cq(info->send_cq); + ib_free_cq(info->recv_cq); + ib_dealloc_pd(info->pd); + rdma_destroy_id(info->id); + + /* free mempools */ + mempool_destroy(info->request_mempool); + kmem_cache_destroy(info->request_cache); + + mempool_destroy(info->response_mempool); + kmem_cache_destroy(info->response_cache); + + info->transport_status = SMBD_DESTROYED; destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); + server->smbd_conn = NULL; } /* @@ -1514,17 +1598,9 @@ int smbd_reconnect(struct TCP_Server_Info *server) */ if (server->smbd_conn->transport_status == SMBD_CONNECTED) { log_rdma_event(INFO, "disconnecting transport\n"); - smbd_disconnect_rdma_connection(server->smbd_conn); + smbd_destroy(server); } - /* wait until the transport is destroyed */ - if (!wait_event_timeout(server->smbd_conn->wait_destroy, - server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ)) - return -EAGAIN; - - destroy_workqueue(server->smbd_conn->workqueue); - kfree(server->smbd_conn); - create_conn: log_rdma_event(INFO, "creating rdma session\n"); server->smbd_conn = smbd_get_connection( @@ -1741,12 +1817,13 @@ static struct smbd_connection *_smbd_get_connection( conn_param.retry_count = SMBD_CM_RETRY; conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY; conn_param.flow_control = 0; - init_waitqueue_head(&info->wait_destroy); log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", &addr_in->sin_addr, port); init_waitqueue_head(&info->conn_wait); + init_waitqueue_head(&info->disconn_wait); + init_waitqueue_head(&info->wait_reassembly_queue); rc = rdma_connect(info->id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); @@ -1770,8 +1847,6 @@ static struct smbd_connection *_smbd_get_connection( } init_waitqueue_head(&info->wait_send_queue); - init_waitqueue_head(&info->wait_reassembly_queue); - INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work); queue_delayed_work(info->workqueue, &info->idle_timer_work, @@ -1812,7 +1887,8 @@ static struct smbd_connection *_smbd_get_connection( allocate_mr_failed: /* At this point, need to a full transport shutdown */ - smbd_destroy(info); + server->smbd_conn = info; + smbd_destroy(server); return NULL; negotiation_failed: @@ -2378,6 +2454,7 @@ static int allocate_mr_list(struct smbd_connection *info) atomic_set(&info->mr_ready_count, 0); atomic_set(&info->mr_used_count, 0); init_waitqueue_head(&info->wait_for_mr_cleanup); + INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); /* Allocate more MRs (2x) than hardware responder_resources */ for (i = 0; i < info->responder_resources * 2; i++) { smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); @@ -2406,13 +2483,13 @@ static int allocate_mr_list(struct smbd_connection *info) list_add_tail(&smbdirect_mr->list, &info->mr_list); atomic_inc(&info->mr_ready_count); } - INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); return 0; out: kfree(smbdirect_mr); list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { + list_del(&smbdirect_mr->list); ib_dereg_mr(smbdirect_mr->mr); kfree(smbdirect_mr->sgl); kfree(smbdirect_mr); diff --git a/fs/cifs/smbdirect.h b/fs/cifs/smbdirect.h index b5c240ff2..b0ca7df41 100644 --- a/fs/cifs/smbdirect.h +++ b/fs/cifs/smbdirect.h @@ -71,6 +71,7 @@ struct smbd_connection { struct completion ri_done; wait_queue_head_t conn_wait; wait_queue_head_t wait_destroy; + wait_queue_head_t disconn_wait; struct completion negotiate_completion; bool negotiate_done; @@ -288,7 +289,7 @@ struct smbd_connection *smbd_get_connection( /* Reconnect SMBDirect session */ int smbd_reconnect(struct TCP_Server_Info *server); /* Destroy SMBDirect session */ -void smbd_destroy(struct smbd_connection *info); +void smbd_destroy(struct TCP_Server_Info *server); /* Interface for carrying upper layer I/O through send/recv */ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); @@ -331,7 +332,7 @@ struct smbd_connection {}; static inline void *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) {return NULL;} static inline int smbd_reconnect(struct TCP_Server_Info *server) {return -1; } -static inline void smbd_destroy(struct smbd_connection *info) {} +static inline void smbd_destroy(struct TCP_Server_Info *server) {} static inline int smbd_recv(struct smbd_connection *info, struct msghdr *msg) {return -1; } static inline int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst) {return -1; } #endif diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 4fce1da7d..a57d080d2 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -330,8 +330,8 @@ ssize_t debugfs_attr_read(struct file *file, char __user *buf, } EXPORT_SYMBOL_GPL(debugfs_attr_read); -ssize_t debugfs_attr_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) +static ssize_t debugfs_attr_write_xsigned(struct file *file, const char __user *buf, + size_t len, loff_t *ppos, bool is_signed) { struct dentry *dentry = F_DENTRY(file); ssize_t ret; @@ -339,12 +339,28 @@ ssize_t debugfs_attr_write(struct file *file, const char __user *buf, ret = debugfs_file_get(dentry); if (unlikely(ret)) return ret; - ret = simple_attr_write(file, buf, len, ppos); + if (is_signed) + ret = simple_attr_write_signed(file, buf, len, ppos); + else + ret = simple_attr_write(file, buf, len, ppos); debugfs_file_put(dentry); return ret; } + +ssize_t debugfs_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return debugfs_attr_write_xsigned(file, buf, len, ppos, false); +} EXPORT_SYMBOL_GPL(debugfs_attr_write); +ssize_t debugfs_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return debugfs_attr_write_xsigned(file, buf, len, ppos, true); +} +EXPORT_SYMBOL_GPL(debugfs_attr_write_signed); + static struct dentry *debugfs_create_mode_unsafe(const char *name, umode_t mode, struct dentry *parent, void *value, const struct file_operations *fops, @@ -742,11 +758,11 @@ static int debugfs_atomic_t_get(void *data, u64 *val) *val = atomic_read((atomic_t *)data); return 0; } -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t, debugfs_atomic_t_get, debugfs_atomic_t_set, "%lld\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); -DEFINE_DEBUGFS_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, +DEFINE_DEBUGFS_ATTRIBUTE_SIGNED(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); /** diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6938dff9f..e58b162ad 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -503,7 +503,7 @@ enum { * * It's not paranoia if the Murphy's Law really *is* out to get you. :-) */ -#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1 << EXT4_INODE_##FLAG)) +#define TEST_FLAG_VALUE(FLAG) (EXT4_##FLAG##_FL == (1U << EXT4_INODE_##FLAG)) #define CHECK_FLAG_VALUE(FLAG) BUILD_BUG_ON(!TEST_FLAG_VALUE(FLAG)) static inline void ext4_check_flag_values(void) diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 6f3f245f3..6b52ace14 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb, keys[0].fmr_physical = bofs; if (keys[1].fmr_physical >= eofs) keys[1].fmr_physical = eofs - 1; + if (keys[1].fmr_physical < keys[0].fmr_physical) + return 0; start_fsb = keys[0].fmr_physical; end_fsb = keys[1].fmr_physical; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index a5442528a..0cc0d22c0 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -148,6 +148,7 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, struct super_block *sb = inode->i_sb; Indirect *p = chain; struct buffer_head *bh; + unsigned int key; int ret = -EIO; *err = 0; @@ -156,7 +157,13 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, if (!p->key) goto no_block; while (--depth) { - bh = sb_getblk(sb, le32_to_cpu(p->key)); + key = le32_to_cpu(p->key); + if (key > ext4_blocks_count(EXT4_SB(sb)->s_es)) { + /* the block was out of range */ + ret = -EFSCORRUPTED; + goto failure; + } + bh = sb_getblk(sb, key); if (unlikely(!bh)) { ret = -ENOMEM; goto failure; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index b1c6b9398..72387e142 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -157,7 +157,6 @@ int ext4_find_inline_data_nolock(struct inode *inode) (void *)ext4_raw_inode(&is.iloc)); EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + le32_to_cpu(is.s.here->e_value_size); - ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); } out: brelse(is.iloc.bh); @@ -207,7 +206,7 @@ out: /* * write the buffer to the inline inode. * If 'create' is set, we don't need to do the extra copy in the xattr - * value since it is already handled by ext4_xattr_ibody_inline_set. + * value since it is already handled by ext4_xattr_ibody_set. * That saves us one memcpy. */ static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, @@ -289,7 +288,7 @@ static int ext4_create_inline_data(handle_t *handle, BUG_ON(!is.s.not_found); - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) { if (error == -ENOSPC) ext4_clear_inode_state(inode, @@ -361,7 +360,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode, i.value = value; i.value_len = len; - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; @@ -434,7 +433,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle, if (error) goto out; - error = ext4_xattr_ibody_inline_set(handle, inode, &i, &is); + error = ext4_xattr_ibody_set(handle, inode, &i, &is); if (error) goto out; @@ -1978,8 +1977,7 @@ int ext4_inline_data_truncate(struct inode *inode, int *has_inline) i.value = value; i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; - err = ext4_xattr_ibody_inline_set(handle, inode, - &i, &is); + err = ext4_xattr_ibody_set(handle, inode, &i, &is); if (err) goto out_error; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 95139c992..7aaf4dafd 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -207,6 +207,8 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); + if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) + ext4_evict_ea_inode(inode); if (inode->i_nlink) { /* * When journalling data dirty buffers are tracked only in the @@ -1426,7 +1428,8 @@ static int ext4_write_end(struct file *file, int inline_data = ext4_has_inline_data(inode); trace_ext4_write_end(inode, pos, len, copied); - if (inline_data) { + if (inline_data && + ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { ret = ext4_write_inline_data_end(inode, pos, len, copied, page); if (ret < 0) { @@ -4549,7 +4552,7 @@ int ext4_truncate(struct inode *inode) trace_ext4_truncate_enter(inode); if (!ext4_can_truncate(inode)) - return 0; + goto out_trace; ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); @@ -4560,16 +4563,15 @@ int ext4_truncate(struct inode *inode) int has_inline = 1; err = ext4_inline_data_truncate(inode, &has_inline); - if (err) - return err; - if (has_inline) - return 0; + if (err || has_inline) + goto out_trace; } /* If we zero-out tail of the page, we have to create jinode for jbd2 */ if (inode->i_size & (inode->i_sb->s_blocksize - 1)) { - if (ext4_inode_attach_jinode(inode) < 0) - return 0; + err = ext4_inode_attach_jinode(inode); + if (err) + goto out_trace; } if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4578,8 +4580,10 @@ int ext4_truncate(struct inode *inode) credits = ext4_blocks_for_truncate(inode); handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + goto out_trace; + } if (inode->i_size & (inode->i_sb->s_blocksize - 1)) ext4_block_truncate_page(handle, mapping, inode->i_size); @@ -4628,6 +4632,7 @@ out_stop: ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); +out_trace: trace_ext4_truncate_exit(inode); return err; } @@ -4663,9 +4668,17 @@ static int __ext4_get_inode_loc(struct inode *inode, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; inode_offset = ((inode->i_ino - 1) % EXT4_INODES_PER_GROUP(sb)); - block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); + block = ext4_inode_table(sb, gdp); + if ((block <= le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) || + (block >= ext4_blocks_count(EXT4_SB(sb)->s_es))) { + ext4_error(sb, "Invalid inode table block %llu in " + "block_group %u", block, iloc->block_group); + return -EFSCORRUPTED; + } + block += (inode_offset / inodes_per_block); + bh = sb_getblk(sb, block); if (unlikely(!bh)) return -ENOMEM; @@ -4853,8 +4866,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode, if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { + int err; + ext4_set_inode_state(inode, EXT4_STATE_XATTR); - return ext4_find_inline_data_nolock(inode); + err = ext4_find_inline_data_nolock(inode); + if (!err && ext4_has_inline_data(inode)) + ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); + return err; } else EXT4_I(inode)->i_inline_off = 0; return 0; @@ -4930,13 +4948,6 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, goto bad_inode; raw_inode = ext4_raw_inode(&iloc); - if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { - ext4_error_inode(inode, function, line, 0, - "iget: root inode unallocated"); - ret = -EFSCORRUPTED; - goto bad_inode; - } - if ((flags & EXT4_IGET_HANDLE) && (raw_inode->i_links_count == 0) && (raw_inode->i_mode == 0)) { ret = -ESTALE; @@ -5007,11 +5018,16 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, * NeilBrown 1999oct15 */ if (inode->i_nlink == 0) { - if ((inode->i_mode == 0 || + if ((inode->i_mode == 0 || flags & EXT4_IGET_SPECIAL || !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) && ino != EXT4_BOOT_LOADER_INO) { - /* this inode is deleted */ - ret = -ESTALE; + /* this inode is deleted or unallocated */ + if (flags & EXT4_IGET_SPECIAL) { + ext4_error_inode(inode, function, line, 0, + "iget: special inode unallocated"); + ret = -EFSCORRUPTED; + } else + ret = -ESTALE; goto bad_inode; } /* The only unlinked inodes we let through here have @@ -6033,6 +6049,14 @@ static int __ext4_expand_extra_isize(struct inode *inode, return 0; } + /* + * We may need to allocate external xattr block so we need quotas + * initialized. Here we can be called with various locks held so we + * cannot affort to initialize quotas ourselves. So just bail. + */ + if (dquot_initialize_needed(inode)) + return -EAGAIN; + /* try to expand with EAs present */ error = ext4_expand_extra_isize_ea(inode, new_extra_isize, raw_inode, handle); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 484cb68c3..b930e8d55 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb, /* Protect extent tree against block allocations via delalloc */ ext4_double_down_write_data_sem(inode, inode_bl); - if (inode_bl->i_nlink == 0) { + if (is_bad_inode(inode_bl) || !S_ISREG(inode_bl->i_mode)) { /* this inode has never been used as a BOOT_LOADER */ set_nlink(inode_bl, 1); i_uid_write(inode_bl, 0); @@ -178,6 +178,7 @@ static long swap_inode_boot_loader(struct super_block *sb, ei_bl->i_flags = 0; inode_set_iversion(inode_bl, 1); i_size_write(inode_bl, 0); + EXT4_I(inode_bl)->i_disksize = inode_bl->i_size; inode_bl->i_mode = S_IFREG; if (ext4_has_feature_extents(sb)) { ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS); @@ -449,6 +450,10 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) if (ext4_is_quota_file(inode)) return err; + err = dquot_initialize(inode); + if (err) + return err; + err = ext4_get_inode_loc(inode, &iloc); if (err) return err; @@ -464,10 +469,6 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid) brelse(iloc.bh); } - err = dquot_initialize(inode); - if (err) - return err; - handle = ext4_journal_start(inode, EXT4_HT_QUOTA, EXT4_QUOTA_INIT_BLOCKS(sb) + EXT4_QUOTA_DEL_BLOCKS(sb) + 3); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index a878b9a8d..db9bba347 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1419,11 +1419,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir, int has_inline_data = 1; ret = ext4_find_inline_entry(dir, fname, res_dir, &has_inline_data); - if (has_inline_data) { - if (inlined) - *inlined = 1; + if (inlined) + *inlined = has_inline_data; + if (has_inline_data) goto cleanup_and_exit; - } } if ((namelen <= 2) && (name[0] == '.') && @@ -3515,7 +3514,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, * so the old->de may no longer valid and need to find it again * before reset old inode info. */ - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) retval = PTR_ERR(old.bh); if (!old.bh) @@ -3677,7 +3677,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, return retval; } - old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, + &old.inlined); if (IS_ERR(old.bh)) return PTR_ERR(old.bh); /* @@ -3870,6 +3871,9 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, retval = dquot_initialize(old.dir); if (retval) return retval; + retval = dquot_initialize(old.inode); + if (retval) + return retval; retval = dquot_initialize(new.dir); if (retval) return retval; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 3de933354..bf910f266 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -388,7 +388,8 @@ static int io_submit_init_bio(struct ext4_io_submit *io, static int io_submit_add_bh(struct ext4_io_submit *io, struct inode *inode, - struct page *page, + struct page *pagecache_page, + struct page *bounce_page, struct buffer_head *bh) { int ret; @@ -403,10 +404,11 @@ submit_and_retry: return ret; io->io_bio->bi_write_hint = inode->i_write_hint; } - ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh)); + ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page, + bh->b_size, bh_offset(bh)); if (ret != bh->b_size) goto submit_and_retry; - wbc_account_io(io->io_wbc, page, bh->b_size); + wbc_account_io(io->io_wbc, pagecache_page, bh->b_size); io->io_next_block++; return 0; } @@ -514,8 +516,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, do { if (!buffer_async_write(bh)) continue; - ret = io_submit_add_bh(io, inode, - data_page ? data_page : page, bh); + ret = io_submit_add_bh(io, inode, page, data_page, bh); if (ret) { /* * We only get here on ENOMEM. Not much else diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 8737d1bcd..288213cad 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1567,8 +1567,8 @@ exit_journal: int meta_bg = ext4_has_feature_meta_bg(sb); sector_t old_gdb = 0; - update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, - sizeof(struct ext4_super_block), 0); + update_backups(sb, ext4_group_first_block_no(sb, 0), + (char *)es, sizeof(struct ext4_super_block), 0); for (; gdb_num <= gdb_num_end; gdb_num++) { struct buffer_head *gdb_bh; @@ -1775,7 +1775,7 @@ errout: if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, + update_backups(sb, ext4_group_first_block_no(sb, 0), (char *)es, sizeof(struct ext4_super_block), 0); } return err; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f00cc301d..e54a5be15 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1081,6 +1081,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) return NULL; inode_set_iversion(&ei->vfs_inode, 1); + ei->i_flags = 0; spin_lock_init(&ei->i_raw_lock); INIT_LIST_HEAD(&ei->i_prealloc_list); spin_lock_init(&ei->i_prealloc_lock); @@ -4302,30 +4303,31 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); - goto failed_mount_wq; + goto failed_mount3a; } else { /* Nojournal mode, all journal mount options are illegal */ - if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "journal_checksum, fs mounted w/o journal"); - goto failed_mount_wq; - } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ext4_msg(sb, KERN_ERR, "can't mount with " "journal_async_commit, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; + } + + if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_checksum, fs mounted w/o journal"); + goto failed_mount3a; } if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { ext4_msg(sb, KERN_ERR, "can't mount with " "commit=%lu, fs mounted w/o journal", sbi->s_commit_interval / HZ); - goto failed_mount_wq; + goto failed_mount3a; } if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { ext4_msg(sb, KERN_ERR, "can't mount with " "data=, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); @@ -4731,7 +4733,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, jbd_debug(2, "Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); - if (!S_ISREG(journal_inode->i_mode)) { + if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) { ext4_msg(sb, KERN_ERR, "invalid journal inode"); iput(journal_inode); return NULL; @@ -5839,6 +5841,20 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, return err; } +static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum) +{ + switch (type) { + case USRQUOTA: + return qf_inum == EXT4_USR_QUOTA_INO; + case GRPQUOTA: + return qf_inum == EXT4_GRP_QUOTA_INO; + case PRJQUOTA: + return qf_inum >= EXT4_GOOD_OLD_FIRST_INO; + default: + BUG(); + } +} + static int ext4_quota_enable(struct super_block *sb, int type, int format_id, unsigned int flags) { @@ -5855,9 +5871,16 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id, if (!qf_inums[type]) return -EPERM; + if (!ext4_check_quota_inum(type, qf_inums[type])) { + ext4_error(sb, "Bad quota inum: %lu, type: %d", + qf_inums[type], type); + return -EUCLEAN; + } + qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL); if (IS_ERR(qf_inode)) { - ext4_error(sb, "Bad quota inode # %lu", qf_inums[type]); + ext4_error(sb, "Bad quota inode: %lu, type: %d", + qf_inums[type], type); return PTR_ERR(qf_inode); } @@ -5896,8 +5919,9 @@ static int ext4_enable_quotas(struct super_block *sb) if (err) { ext4_warning(sb, "Failed to enable quota tracking " - "(type=%d, err=%d). Please run " - "e2fsck to fix.", type, err); + "(type=%d, err=%d, ino=%lu). " + "Please run e2fsck to fix.", type, + err, qf_inums[type]); for (type--; type >= 0; type--) { struct inode *inode; diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 9212a026a..74722ce72 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -349,6 +349,11 @@ static void ext4_sb_release(struct kobject *kobj) complete(&sbi->s_kobj_unregister); } +static void ext4_feat_release(struct kobject *kobj) +{ + kfree(kobj); +} + static const struct sysfs_ops ext4_attr_ops = { .show = ext4_attr_show, .store = ext4_attr_store, @@ -363,7 +368,7 @@ static struct kobj_type ext4_sb_ktype = { static struct kobj_type ext4_feat_ktype = { .default_attrs = ext4_feat_attrs, .sysfs_ops = &ext4_attr_ops, - .release = (void (*)(struct kobject *))kfree, + .release = ext4_feat_release, }; static struct kobject *ext4_root; diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 497649c69..1b73a7f81 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -384,6 +384,17 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, struct inode *inode; int err; + /* + * We have to check for this corruption early as otherwise + * iget_locked() could wait indefinitely for the state of our + * parent inode. + */ + if (parent->i_ino == ea_ino) { + ext4_error(parent->i_sb, + "Parent and EA inode have the same ino %lu", ea_ino); + return -EFSCORRUPTED; + } + inode = ext4_iget(parent->i_sb, ea_ino, EXT4_IGET_NORMAL); if (IS_ERR(inode)) { err = PTR_ERR(inode); @@ -434,6 +445,21 @@ error: return err; } +/* Remove entry from mbcache when EA inode is getting evicted */ +void ext4_evict_ea_inode(struct inode *inode) +{ + struct mb_cache_entry *oe; + + if (!EA_INODE_CACHE(inode)) + return; + /* Wait for entry to get unused so that we can remove it */ + while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode), + ext4_xattr_inode_get_hash(inode), inode->i_ino))) { + mb_cache_entry_wait_unused(oe); + mb_cache_entry_put(EA_INODE_CACHE(inode), oe); + } +} + static int ext4_xattr_inode_verify_hashes(struct inode *ea_inode, struct ext4_xattr_entry *entry, void *buffer, @@ -1019,10 +1045,8 @@ static int ext4_xattr_ensure_credits(handle_t *handle, struct inode *inode, static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, int ref_change) { - struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode); struct ext4_iloc iloc; s64 ref_count; - u32 hash; int ret; inode_lock(ea_inode); @@ -1047,14 +1071,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, set_nlink(ea_inode, 1); ext4_orphan_del(handle, ea_inode); - - if (ea_inode_cache) { - hash = ext4_xattr_inode_get_hash(ea_inode); - mb_cache_entry_create(ea_inode_cache, - GFP_NOFS, hash, - ea_inode->i_ino, - true /* reusable */); - } } } else { WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld", @@ -1067,12 +1083,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, clear_nlink(ea_inode); ext4_orphan_add(handle, ea_inode); - - if (ea_inode_cache) { - hash = ext4_xattr_inode_get_hash(ea_inode); - mb_cache_entry_delete(ea_inode_cache, hash, - ea_inode->i_ino); - } } } @@ -1253,6 +1263,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, if (error) goto out; +retry_ref: lock_buffer(bh); hash = le32_to_cpu(BHDR(bh)->h_hash); ref = le32_to_cpu(BHDR(bh)->h_refcount); @@ -1262,9 +1273,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, * This must happen under buffer lock for * ext4_xattr_block_set() to reliably detect freed block */ - if (ea_block_cache) - mb_cache_entry_delete(ea_block_cache, hash, - bh->b_blocknr); + if (ea_block_cache) { + struct mb_cache_entry *oe; + + oe = mb_cache_entry_delete_or_get(ea_block_cache, hash, + bh->b_blocknr); + if (oe) { + unlock_buffer(bh); + mb_cache_entry_wait_unused(oe); + mb_cache_entry_put(ea_block_cache, oe); + goto retry_ref; + } + } get_bh(bh); unlock_buffer(bh); @@ -1288,7 +1308,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, ce = mb_cache_entry_get(ea_block_cache, hash, bh->b_blocknr); if (ce) { - ce->e_reusable = 1; + set_bit(MBE_REUSABLE_B, &ce->e_flags); mb_cache_entry_put(ea_block_cache, ce); } } @@ -1427,6 +1447,13 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle, uid_t owner[2] = { i_uid_read(inode), i_gid_read(inode) }; int err; + if (inode->i_sb->s_root == NULL) { + ext4_warning(inode->i_sb, + "refuse to create EA inode when umounting"); + WARN_ON(1); + return ERR_PTR(-EINVAL); + } + /* * Let the next inode be the goal, so we try and allocate the EA inode * in the same group, or nearby one. @@ -1446,6 +1473,9 @@ static struct inode *ext4_xattr_inode_create(handle_t *handle, if (!err) err = ext4_inode_attach_jinode(ea_inode); if (err) { + if (ext4_xattr_inode_dec_ref(handle, ea_inode)) + ext4_warning_inode(ea_inode, + "cleanup dec ref error %d", err); iput(ea_inode); return ERR_PTR(err); } @@ -1872,6 +1902,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, #define header(x) ((struct ext4_xattr_header *)(x)) if (s->base) { + int offset = (char *)s->here - bs->bh->b_data; + BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, bs->bh); if (error) @@ -1886,9 +1918,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, * ext4_xattr_block_set() to reliably detect modified * block */ - if (ea_block_cache) - mb_cache_entry_delete(ea_block_cache, hash, - bs->bh->b_blocknr); + if (ea_block_cache) { + struct mb_cache_entry *oe; + + oe = mb_cache_entry_delete_or_get(ea_block_cache, + hash, bs->bh->b_blocknr); + if (oe) { + /* + * Xattr block is getting reused. Leave + * it alone. + */ + mb_cache_entry_put(ea_block_cache, oe); + goto clone_block; + } + } ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); @@ -1903,50 +1946,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (error) goto cleanup; goto inserted; - } else { - int offset = (char *)s->here - bs->bh->b_data; + } +clone_block: + unlock_buffer(bs->bh); + ea_bdebug(bs->bh, "cloning"); + s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); + error = -ENOMEM; + if (s->base == NULL) + goto cleanup; + s->first = ENTRY(header(s->base)+1); + header(s->base)->h_refcount = cpu_to_le32(1); + s->here = ENTRY(s->base + offset); + s->end = s->base + bs->bh->b_size; - unlock_buffer(bs->bh); - ea_bdebug(bs->bh, "cloning"); - s->base = kmalloc(bs->bh->b_size, GFP_NOFS); - error = -ENOMEM; - if (s->base == NULL) + /* + * If existing entry points to an xattr inode, we need + * to prevent ext4_xattr_set_entry() from decrementing + * ref count on it because the reference belongs to the + * original block. In this case, make the entry look + * like it has an empty value. + */ + if (!s->not_found && s->here->e_value_inum) { + ea_ino = le32_to_cpu(s->here->e_value_inum); + error = ext4_xattr_inode_iget(inode, ea_ino, + le32_to_cpu(s->here->e_hash), + &tmp_inode); + if (error) goto cleanup; - memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); - s->first = ENTRY(header(s->base)+1); - header(s->base)->h_refcount = cpu_to_le32(1); - s->here = ENTRY(s->base + offset); - s->end = s->base + bs->bh->b_size; - - /* - * If existing entry points to an xattr inode, we need - * to prevent ext4_xattr_set_entry() from decrementing - * ref count on it because the reference belongs to the - * original block. In this case, make the entry look - * like it has an empty value. - */ - if (!s->not_found && s->here->e_value_inum) { - ea_ino = le32_to_cpu(s->here->e_value_inum); - error = ext4_xattr_inode_iget(inode, ea_ino, - le32_to_cpu(s->here->e_hash), - &tmp_inode); - if (error) - goto cleanup; - if (!ext4_test_inode_state(tmp_inode, - EXT4_STATE_LUSTRE_EA_INODE)) { - /* - * Defer quota free call for previous - * inode until success is guaranteed. - */ - old_ea_inode_quota = le32_to_cpu( - s->here->e_value_size); - } - iput(tmp_inode); - - s->here->e_value_inum = 0; - s->here->e_value_size = 0; + if (!ext4_test_inode_state(tmp_inode, + EXT4_STATE_LUSTRE_EA_INODE)) { + /* + * Defer quota free call for previous + * inode until success is guaranteed. + */ + old_ea_inode_quota = le32_to_cpu( + s->here->e_value_size); } + iput(tmp_inode); + + s->here->e_value_inum = 0; + s->here->e_value_size = 0; } } else { /* Allocate a buffer where we construct the new block. */ @@ -2013,18 +2053,13 @@ inserted: lock_buffer(new_bh); /* * We have to be careful about races with - * freeing, rehashing or adding references to - * xattr block. Once we hold buffer lock xattr - * block's state is stable so we can check - * whether the block got freed / rehashed or - * not. Since we unhash mbcache entry under - * buffer lock when freeing / rehashing xattr - * block, checking whether entry is still - * hashed is reliable. Same rules hold for - * e_reusable handling. + * adding references to xattr block. Once we + * hold buffer lock xattr block's state is + * stable so we can check the additional + * reference fits. */ - if (hlist_bl_unhashed(&ce->e_hash_list) || - !ce->e_reusable) { + ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; + if (ref > EXT4_XATTR_REFCOUNT_MAX) { /* * Undo everything and check mbcache * again. @@ -2039,10 +2074,9 @@ inserted: new_bh = NULL; goto inserted; } - ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; BHDR(new_bh)->h_refcount = cpu_to_le32(ref); - if (ref >= EXT4_XATTR_REFCOUNT_MAX) - ce->e_reusable = 0; + if (ref == EXT4_XATTR_REFCOUNT_MAX) + clear_bit(MBE_REUSABLE_B, &ce->e_flags); ea_bdebug(new_bh, "reusing; refcount now=%d", ref); ext4_xattr_block_csum_set(inode, new_bh); @@ -2070,19 +2104,11 @@ inserted: goal = ext4_group_first_block_no(sb, EXT4_I(inode)->i_block_group); - - /* non-extent files can't have physical blocks past 2^32 */ - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; - block = ext4_new_meta_blocks(handle, inode, goal, 0, NULL, &error); if (error) goto cleanup; - if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) - BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS); - ea_idebug(inode, "creating block %llu", (unsigned long long)block); @@ -2210,7 +2236,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, return 0; } -int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, +int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_info *i, struct ext4_xattr_ibody_find *is) { @@ -2235,30 +2261,6 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, return 0; } -static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is) -{ - struct ext4_xattr_ibody_header *header; - struct ext4_xattr_search *s = &is->s; - int error; - - if (EXT4_I(inode)->i_extra_isize == 0) - return -ENOSPC; - error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) - return error; - header = IHDR(inode, ext4_raw_inode(&is->iloc)); - if (!IS_LAST_ENTRY(s->first)) { - header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); - ext4_set_inode_state(inode, EXT4_STATE_XATTR); - } else { - header->h_magic = cpu_to_le32(0); - ext4_clear_inode_state(inode, EXT4_STATE_XATTR); - } - return 0; -} - static int ext4_xattr_value_same(struct ext4_xattr_search *s, struct ext4_xattr_info *i) { @@ -2575,9 +2577,8 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS); bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS); - buffer = kmalloc(value_size, GFP_NOFS); b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS); - if (!is || !bs || !buffer || !b_entry_name) { + if (!is || !bs || !b_entry_name) { error = -ENOMEM; goto out; } @@ -2589,12 +2590,18 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, /* Save the entry name and the entry value */ if (entry->e_value_inum) { + buffer = kvmalloc(value_size, GFP_NOFS); + if (!buffer) { + error = -ENOMEM; + goto out; + } + error = ext4_xattr_inode_get(inode, entry, buffer, value_size); if (error) goto out; } else { size_t value_offs = le16_to_cpu(entry->e_value_offs); - memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size); + buffer = (void *)IFIRST(header) + value_offs; } memcpy(b_entry_name, entry->e_name, entry->e_name_len); @@ -2609,25 +2616,26 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode, if (error) goto out; - /* Remove the chosen entry from the inode */ - error = ext4_xattr_ibody_set(handle, inode, &i, is); - if (error) - goto out; - i.value = buffer; i.value_len = value_size; error = ext4_xattr_block_find(inode, &i, bs); if (error) goto out; - /* Add entry which was removed from the inode into the block */ + /* Move ea entry from the inode into the block */ error = ext4_xattr_block_set(handle, inode, &i, bs); if (error) goto out; - error = 0; + + /* Remove the chosen entry from the inode */ + i.value = NULL; + i.value_len = 0; + error = ext4_xattr_ibody_set(handle, inode, &i, is); + out: kfree(b_entry_name); - kfree(buffer); + if (entry->e_value_inum && buffer) + kvfree(buffer); if (is) brelse(is->iloc.bh); if (bs) @@ -2802,6 +2810,9 @@ shift: (void *)header, total_ino); EXT4_I(inode)->i_extra_isize = new_extra_isize; + if (ext4_has_inline_data(inode)) + error = ext4_find_inline_data_nolock(inode); + cleanup: if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) { ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.", diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 990084e00..66911f8a1 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -190,6 +190,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle); +extern void ext4_evict_ea_inode(struct inode *inode); extern const struct xattr_handler *ext4_xattr_handlers[]; @@ -198,9 +199,9 @@ extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, extern int ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, void *buffer, size_t buffer_size); -extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, - struct ext4_xattr_info *i, - struct ext4_xattr_ibody_find *is); +extern int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, + struct ext4_xattr_info *i, + struct ext4_xattr_ibody_find *is); extern struct mb_cache *ext4_xattr_create_cache(void); extern void ext4_xattr_destroy_cache(struct mb_cache *); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c63f5e326..56b2dadd6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -464,7 +464,7 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) } if (fio->io_wbc && !is_read_io(fio->op)) - wbc_account_io(fio->io_wbc, page, PAGE_SIZE); + wbc_account_io(fio->io_wbc, fio->page, PAGE_SIZE); bio_set_op_attrs(bio, fio->op, fio->op_flags); @@ -533,7 +533,7 @@ alloc_new: } if (fio->io_wbc) - wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE); + wbc_account_io(fio->io_wbc, fio->page, PAGE_SIZE); io->last_block_in_bio = fio->new_blkaddr; f2fs_trace_ios(fio, 0); diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 36d6b561b..e85ed4aa9 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -375,7 +375,8 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_node *en; bool ret = false; - f2fs_bug_on(sbi, !et); + if (!et) + return false; trace_f2fs_lookup_extent_tree_start(inode, pgofs); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 6bf78cf63..7ad78aa9c 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -408,18 +408,17 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, dentry_blk = page_address(page); + /* + * Start by zeroing the full block, to ensure that all unused space is + * zeroed and no uninitialized memory is leaked to disk. + */ + memset(dentry_blk, 0, F2FS_BLKSIZE); + make_dentry_ptr_inline(dir, &src, inline_dentry); make_dentry_ptr_block(dir, &dst, dentry_blk); /* copy data from inline dentry block to new dentry block */ memcpy(dst.bitmap, src.bitmap, src.nr_bitmap); - memset(dst.bitmap + src.nr_bitmap, 0, dst.nr_bitmap - src.nr_bitmap); - /* - * we do not need to zero out remainder part of dentry and filename - * field, since we have used bitmap for marking the usage status of - * them, besides, we can also ignore copying/zeroing reserved space - * of dentry block, because them haven't been used so far. - */ memcpy(dst.dentry, src.dentry, SIZE_OF_DIR_ENTRY * src.max); memcpy(dst.filename, src.filename, src.max * F2FS_SLOT_LEN); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6fbf04713..7596fce92 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1382,7 +1382,7 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) + if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) return __issue_discard_cmd_orderly(sbi, dpolicy); pend_list = &dcc->pend_list[i]; @@ -627,6 +627,7 @@ int __close_fd(struct files_struct *files, unsigned fd) fdt = files_fdtable(files); if (fd >= fdt->max_fds) goto out_unlock; + fd = array_index_nospec(fd, fdt->max_fds); file = fdt->fd[fd]; if (!file) goto out_unlock; diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 31e8270d0..d9866d89f 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -179,7 +179,6 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w { struct inode *inode = page->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); if (PageChecked(page)) { ClearPageChecked(page); @@ -187,7 +186,7 @@ static int __gfs2_jdata_writepage(struct page *page, struct writeback_control *w create_empty_buffers(page, inode->i_sb->s_blocksize, BIT(BH_Dirty)|BIT(BH_Uptodate)); } - gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize); + gfs2_page_add_databufs(ip, page, 0, PAGE_SIZE); } return gfs2_write_full_page(page, gfs2_get_block_noalloc, wbc); } @@ -481,8 +480,6 @@ int stuffed_readpage(struct gfs2_inode *ip, struct page *page) return error; kaddr = kmap_atomic(page); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap_atomic(kaddr); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 150cec85c..ccafd45b6 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -72,9 +72,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, void *kaddr = kmap(page); u64 dsize = i_size_read(inode); - if (dsize > gfs2_max_stuffed_size(ip)) - dsize = gfs2_max_stuffed_size(ip); - memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize); memset(kaddr + dsize, 0, PAGE_SIZE - dsize); kunmap(page); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 20f08f439..a7a423adf 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -388,6 +388,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) ip->i_depth = (u8)depth; ip->i_entries = be32_to_cpu(str->di_entries); + if (gfs2_is_stuffed(ip) && ip->i_inode.i_size > gfs2_max_stuffed_size(ip)) + goto corrupt; + if (S_ISREG(ip->i_inode.i_mode)) gfs2_set_aops(&ip->i_inode); diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index c0a73a6ff..397e02a56 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -281,6 +281,7 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid) tree->node_hash[hash] = node; tree->node_hash_cnt++; } else { + hfs_bnode_get(node2); spin_unlock(&tree->hash_lock); kfree(node); wait_event(node2->lock_wq, !test_bit(HFS_BNODE_NEW, &node2->flags)); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index da243c84e..ee2ea5532 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -453,14 +453,16 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) /* panic? */ return -EIO; + res = -EIO; + if (HFS_I(main_inode)->cat_key.CName.len > HFS_NAMELEN) + goto out; fd.search_key->cat = HFS_I(main_inode)->cat_key; if (hfs_brec_find(&fd)) - /* panic? */ goto out; if (S_ISDIR(main_inode->i_mode)) { if (fd.entrylength < sizeof(struct hfs_cat_dir)) - /* panic? */; + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); if (rec.type != HFS_CDR_DIR || @@ -473,6 +475,8 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_dir)); } else if (HFS_IS_RSRC(inode)) { + if (fd.entrylength < sizeof(struct hfs_cat_file)) + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); hfs_inode_write_fork(inode, rec.file.RExtRec, @@ -481,7 +485,7 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) sizeof(struct hfs_cat_file)); } else { if (fd.entrylength < sizeof(struct hfs_cat_file)) - /* panic? */; + goto out; hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); if (rec.type != HFS_CDR_FIL || @@ -498,9 +502,10 @@ int hfs_write_inode(struct inode *inode, struct writeback_control *wbc) hfs_bnode_write(fd.bnode, &rec, fd.entryoffset, sizeof(struct hfs_cat_file)); } + res = 0; out: hfs_find_exit(&fd); - return 0; + return res; } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, diff --git a/fs/hfs/trans.c b/fs/hfs/trans.c index 39f5e343b..fdb0edb8a 100644 --- a/fs/hfs/trans.c +++ b/fs/hfs/trans.c @@ -109,7 +109,7 @@ void hfs_asc2mac(struct super_block *sb, struct hfs_name *out, const struct qstr if (nls_io) { wchar_t ch; - while (srclen > 0) { + while (srclen > 0 && dstlen > 0) { size = nls_io->char2uni(src, srclen, &ch); if (size < 0) { ch = '?'; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dd7ad9f13..db2e1c750 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -198,6 +198,8 @@ struct hfsplus_sb_info { #define HFSPLUS_SB_HFSX 3 #define HFSPLUS_SB_CASEFOLD 4 #define HFSPLUS_SB_NOBARRIER 5 +#define HFSPLUS_SB_UID 6 +#define HFSPLUS_SB_GID 7 static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) { diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d7ab9d8c4..c7073a151 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -187,11 +187,11 @@ static void hfsplus_get_perms(struct inode *inode, mode = be16_to_cpu(perms->mode); i_uid_write(inode, be32_to_cpu(perms->owner)); - if (!i_uid_read(inode) && !mode) + if ((test_bit(HFSPLUS_SB_UID, &sbi->flags)) || (!i_uid_read(inode) && !mode)) inode->i_uid = sbi->uid; i_gid_write(inode, be32_to_cpu(perms->group)); - if (!i_gid_read(inode) && !mode) + if ((test_bit(HFSPLUS_SB_GID, &sbi->flags)) || (!i_gid_read(inode) && !mode)) inode->i_gid = sbi->gid; if (dir) { @@ -476,8 +476,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) if (type == HFSPLUS_FOLDER) { struct hfsplus_cat_folder *folder = &entry.folder; - if (fd->entrylength < sizeof(struct hfsplus_cat_folder)) - /* panic? */; + WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_folder)); hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_folder)); hfsplus_get_perms(inode, &folder->permissions, 1); @@ -497,8 +496,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) } else if (type == HFSPLUS_FILE) { struct hfsplus_cat_file *file = &entry.file; - if (fd->entrylength < sizeof(struct hfsplus_cat_file)) - /* panic? */; + WARN_ON(fd->entrylength < sizeof(struct hfsplus_cat_file)); hfs_bnode_read(fd->bnode, &entry, fd->entryoffset, sizeof(struct hfsplus_cat_file)); @@ -555,8 +553,7 @@ int hfsplus_cat_write_inode(struct inode *inode) if (S_ISDIR(main_inode->i_mode)) { struct hfsplus_cat_folder *folder = &entry.folder; - if (fd.entrylength < sizeof(struct hfsplus_cat_folder)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_folder)); hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); /* simple node checks? */ @@ -581,8 +578,7 @@ int hfsplus_cat_write_inode(struct inode *inode) } else { struct hfsplus_cat_file *file = &entry.file; - if (fd.entrylength < sizeof(struct hfsplus_cat_file)) - /* panic? */; + WARN_ON(fd.entrylength < sizeof(struct hfsplus_cat_file)); hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_file)); hfsplus_inode_write_fork(inode, &file->data_fork); diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 047e05c57..c94a58762 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c @@ -140,6 +140,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) if (!uid_valid(sbi->uid)) { pr_err("invalid uid specified\n"); return 0; + } else { + set_bit(HFSPLUS_SB_UID, &sbi->flags); } break; case opt_gid: @@ -151,6 +153,8 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) if (!gid_valid(sbi->gid)) { pr_err("invalid gid specified\n"); return 0; + } else { + set_bit(HFSPLUS_SB_GID, &sbi->flags); } break; case opt_part: diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index eb4535eba..3b1356b10 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -294,11 +294,11 @@ static void hfsplus_put_super(struct super_block *sb) hfsplus_sync_fs(sb, 1); } + iput(sbi->alloc_file); + iput(sbi->hidden_dir); hfs_btree_close(sbi->attr_tree); hfs_btree_close(sbi->cat_tree); hfs_btree_close(sbi->ext_tree); - iput(sbi->alloc_file); - iput(sbi->hidden_dir); kfree(sbi->s_vhdr_buf); kfree(sbi->s_backup_vhdr_buf); unload_nls(sbi->nls); diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 3047872fd..bf3d8a451 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -137,19 +137,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); pgoff_t index = pos >> PAGE_SHIFT; - uint32_t pageofs = index << PAGE_SHIFT; int ret = 0; jffs2_dbg(1, "%s()\n", __func__); - if (pageofs > inode->i_size) { - /* Make new hole frag from old EOF to new page */ + if (pos > inode->i_size) { + /* Make new hole frag from old EOF to new position */ struct jffs2_raw_inode ri; struct jffs2_full_dnode *fn; uint32_t alloc_len; - jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n", - (unsigned int)inode->i_size, pageofs); + jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new position\n", + (unsigned int)inode->i_size, (uint32_t)pos); ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len, ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE); @@ -169,10 +168,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, ri.mode = cpu_to_jemode(inode->i_mode); ri.uid = cpu_to_je16(i_uid_read(inode)); ri.gid = cpu_to_je16(i_gid_read(inode)); - ri.isize = cpu_to_je32(max((uint32_t)inode->i_size, pageofs)); + ri.isize = cpu_to_je32((uint32_t)pos); ri.atime = ri.ctime = ri.mtime = cpu_to_je32(JFFS2_NOW()); ri.offset = cpu_to_je32(inode->i_size); - ri.dsize = cpu_to_je32(pageofs - inode->i_size); + ri.dsize = cpu_to_je32((uint32_t)pos - inode->i_size); ri.csize = cpu_to_je32(0); ri.compr = JFFS2_COMPR_ZERO; ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8)); @@ -202,7 +201,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping, goto out_err; } jffs2_complete_reservation(c); - inode->i_size = pageofs; + inode->i_size = pos; mutex_unlock(&f->sem); } diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 1014f2a24..3ad0a33e0 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -168,7 +168,7 @@ int dbMount(struct inode *ipbmap) struct bmap *bmp; struct dbmap_disk *dbmp_le; struct metapage *mp; - int i; + int i, err; /* * allocate/initialize the in-memory bmap descriptor @@ -183,8 +183,8 @@ int dbMount(struct inode *ipbmap) BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage, PSIZE, 0); if (mp == NULL) { - kfree(bmp); - return -EIO; + err = -EIO; + goto err_kfree_bmp; } /* copy the on-disk bmap descriptor to its in-memory version. */ @@ -194,9 +194,8 @@ int dbMount(struct inode *ipbmap) bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage); bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag); if (!bmp->db_numag) { - release_metapage(mp); - kfree(bmp); - return -EINVAL; + err = -EINVAL; + goto err_release_metapage; } bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel); @@ -207,6 +206,17 @@ int dbMount(struct inode *ipbmap) bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth); bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart); bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size); + if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG || + bmp->db_agl2size < 0) { + err = -EINVAL; + goto err_release_metapage; + } + + if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) { + err = -EINVAL; + goto err_release_metapage; + } + for (i = 0; i < MAXAG; i++) bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]); bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize); @@ -227,6 +237,12 @@ int dbMount(struct inode *ipbmap) BMAP_LOCK_INIT(bmp); return (0); + +err_release_metapage: + release_metapage(mp); +err_kfree_bmp: + kfree(bmp); + return err; } diff --git a/fs/libfs.c b/fs/libfs.c index be57e6483..fd5f6c106 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -864,8 +864,8 @@ out: EXPORT_SYMBOL_GPL(simple_attr_read); /* interpret the buffer as a number to call the set function with */ -ssize_t simple_attr_write(struct file *file, const char __user *buf, - size_t len, loff_t *ppos) +static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf, + size_t len, loff_t *ppos, bool is_signed) { struct simple_attr *attr; unsigned long long val; @@ -886,7 +886,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, goto out; attr->set_buf[size] = '\0'; - ret = kstrtoull(attr->set_buf, 0, &val); + if (is_signed) + ret = kstrtoll(attr->set_buf, 0, &val); + else + ret = kstrtoull(attr->set_buf, 0, &val); if (ret) goto out; ret = attr->set(attr->data, val); @@ -896,8 +899,21 @@ out: mutex_unlock(&attr->mutex); return ret; } + +ssize_t simple_attr_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return simple_attr_write_xsigned(file, buf, len, ppos, false); +} EXPORT_SYMBOL_GPL(simple_attr_write); +ssize_t simple_attr_write_signed(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + return simple_attr_write_xsigned(file, buf, len, ppos, true); +} +EXPORT_SYMBOL_GPL(simple_attr_write_signed); + /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation * @sb: filesystem to do the file handle conversion on diff --git a/fs/mbcache.c b/fs/mbcache.c index 081ccf0ca..2e2d4de4c 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -10,7 +10,7 @@ /* * Mbcache is a simple key-value store. Keys need not be unique, however * key-value pairs are expected to be unique (we use this fact in - * mb_cache_entry_delete()). + * mb_cache_entry_delete_or_get()). * * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. * Ext4 also uses it for deduplication of xattr values stored in inodes. @@ -89,12 +89,19 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, return -ENOMEM; INIT_LIST_HEAD(&entry->e_list); - /* One ref for hash, one ref returned */ - atomic_set(&entry->e_refcnt, 1); + /* + * We create entry with two references. One reference is kept by the + * hash table, the other reference is used to protect us from + * mb_cache_entry_delete_or_get() until the entry is fully setup. This + * avoids nesting of cache->c_list_lock into hash table bit locks which + * is problematic for RT. + */ + atomic_set(&entry->e_refcnt, 2); entry->e_key = key; entry->e_value = value; - entry->e_reusable = reusable; - entry->e_referenced = 0; + entry->e_flags = 0; + if (reusable) + set_bit(MBE_REUSABLE_B, &entry->e_flags); head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { @@ -106,24 +113,41 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, } hlist_bl_add_head(&entry->e_hash_list, head); hlist_bl_unlock(head); - spin_lock(&cache->c_list_lock); list_add_tail(&entry->e_list, &cache->c_list); - /* Grab ref for LRU list */ - atomic_inc(&entry->e_refcnt); cache->c_entry_count++; spin_unlock(&cache->c_list_lock); + mb_cache_entry_put(cache, entry); return 0; } EXPORT_SYMBOL(mb_cache_entry_create); -void __mb_cache_entry_free(struct mb_cache_entry *entry) +void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry) { + struct hlist_bl_head *head; + + head = mb_cache_entry_head(cache, entry->e_key); + hlist_bl_lock(head); + hlist_bl_del(&entry->e_hash_list); + hlist_bl_unlock(head); kmem_cache_free(mb_entry_cache, entry); } EXPORT_SYMBOL(__mb_cache_entry_free); +/* + * mb_cache_entry_wait_unused - wait to be the last user of the entry + * + * @entry - entry to work on + * + * Wait to be the last user of the entry. + */ +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) +{ + wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2); +} +EXPORT_SYMBOL(mb_cache_entry_wait_unused); + static struct mb_cache_entry *__entry_find(struct mb_cache *cache, struct mb_cache_entry *entry, u32 key) @@ -141,10 +165,10 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, while (node) { entry = hlist_bl_entry(node, struct mb_cache_entry, e_hash_list); - if (entry->e_key == key && entry->e_reusable) { - atomic_inc(&entry->e_refcnt); + if (entry->e_key == key && + test_bit(MBE_REUSABLE_B, &entry->e_flags) && + atomic_inc_not_zero(&entry->e_refcnt)) goto out; - } node = node->next; } entry = NULL; @@ -204,10 +228,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, head = mb_cache_entry_head(cache, key); hlist_bl_lock(head); hlist_bl_for_each_entry(entry, node, head, e_hash_list) { - if (entry->e_key == key && entry->e_value == value) { - atomic_inc(&entry->e_refcnt); + if (entry->e_key == key && entry->e_value == value && + atomic_inc_not_zero(&entry->e_refcnt)) goto out; - } } entry = NULL; out: @@ -216,7 +239,7 @@ out: } EXPORT_SYMBOL(mb_cache_entry_get); -/* mb_cache_entry_delete - remove a cache entry +/* mb_cache_entry_delete - try to remove a cache entry * @cache - cache we work with * @key - key * @value - value @@ -253,6 +276,43 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) } EXPORT_SYMBOL(mb_cache_entry_delete); +/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users + * @cache - cache we work with + * @key - key + * @value - value + * + * Remove entry from cache @cache with key @key and value @value. The removal + * happens only if the entry is unused. The function returns NULL in case the + * entry was successfully removed or there's no entry in cache. Otherwise the + * function grabs reference of the entry that we failed to delete because it + * still has users and return it. + */ +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, + u32 key, u64 value) +{ + struct mb_cache_entry *entry; + + entry = mb_cache_entry_get(cache, key, value); + if (!entry) + return NULL; + + /* + * Drop the ref we got from mb_cache_entry_get() and the initial hash + * ref if we are the last user + */ + if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2) + return entry; + + spin_lock(&cache->c_list_lock); + if (!list_empty(&entry->e_list)) + list_del_init(&entry->e_list); + cache->c_entry_count--; + spin_unlock(&cache->c_list_lock); + __mb_cache_entry_free(cache, entry); + return NULL; +} +EXPORT_SYMBOL(mb_cache_entry_delete_or_get); + /* mb_cache_entry_touch - cache entry got used * @cache - cache the entry belongs to * @entry - entry that got used @@ -262,7 +322,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete); void mb_cache_entry_touch(struct mb_cache *cache, struct mb_cache_entry *entry) { - entry->e_referenced = 1; + set_bit(MBE_REFERENCED_B, &entry->e_flags); } EXPORT_SYMBOL(mb_cache_entry_touch); @@ -280,34 +340,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, unsigned long nr_to_scan) { struct mb_cache_entry *entry; - struct hlist_bl_head *head; unsigned long shrunk = 0; spin_lock(&cache->c_list_lock); while (nr_to_scan-- && !list_empty(&cache->c_list)) { entry = list_first_entry(&cache->c_list, struct mb_cache_entry, e_list); - if (entry->e_referenced) { - entry->e_referenced = 0; + /* Drop initial hash reference if there is no user */ + if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || + atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { + clear_bit(MBE_REFERENCED_B, &entry->e_flags); list_move_tail(&entry->e_list, &cache->c_list); continue; } list_del_init(&entry->e_list); cache->c_entry_count--; - /* - * We keep LRU list reference so that entry doesn't go away - * from under us. - */ spin_unlock(&cache->c_list_lock); - head = mb_cache_entry_head(cache, entry->e_key); - hlist_bl_lock(head); - if (!hlist_bl_unhashed(&entry->e_hash_list)) { - hlist_bl_del_init(&entry->e_hash_list); - atomic_dec(&entry->e_refcnt); - } - hlist_bl_unlock(head); - if (mb_cache_entry_put(cache, entry)) - shrunk++; + __mb_cache_entry_free(cache, entry); + shrunk++; cond_resched(); spin_lock(&cache->c_list_lock); } @@ -399,11 +449,6 @@ void mb_cache_destroy(struct mb_cache *cache) * point. */ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { - if (!hlist_bl_unhashed(&entry->e_hash_list)) { - hlist_bl_del_init(&entry->e_hash_list); - atomic_dec(&entry->e_refcnt); - } else - WARN_ON(1); list_del(&entry->e_list); WARN_ON(atomic_read(&entry->e_refcnt) != 1); mb_cache_entry_put(cache, entry); diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index e8e825497..015d39ac2 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -837,6 +837,12 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, return &fl->generic_hdr; } +static bool +filelayout_lseg_is_striped(const struct nfs4_filelayout_segment *flseg) +{ + return flseg->num_fh > 1; +} + /* * filelayout_pg_test(). Called by nfs_can_coalesce_requests() * @@ -857,6 +863,8 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, size = pnfs_generic_pg_test(pgio, prev, req); if (!size) return 0; + else if (!filelayout_lseg_is_striped(FILELAYOUT_LSEG(pgio->pg_lseg))) + return size; /* see if req and prev are in the same stripe */ if (prev) { diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 5ac7bf24c..2d4383186 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -190,7 +190,7 @@ struct nfs4_state { unsigned int n_wronly; /* Number of write-only references */ unsigned int n_rdwr; /* Number of read/write references */ fmode_t state; /* State on the server (R,W, or RW) */ - atomic_t count; + refcount_t count; wait_queue_head_t waitq; }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9a0f48f7f..365161946 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1792,7 +1792,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) out: return ERR_PTR(ret); out_return_state: - atomic_inc(&state->count); + refcount_inc(&state->count); return state; } @@ -1851,8 +1851,7 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (!data->rpc_done) { if (data->rpc_status) return ERR_PTR(data->rpc_status); - /* cached opens have already been processed */ - goto update; + return nfs4_try_open_cached(data); } ret = nfs_refresh_inode(inode, &data->f_attr); @@ -1861,10 +1860,11 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); -update: - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); - atomic_inc(&state->count); + + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) + return ERR_PTR(-EAGAIN); + refcount_inc(&state->count); return state; } @@ -1902,7 +1902,7 @@ nfs4_opendata_find_nfs4_state(struct nfs4_opendata *data) return ERR_CAST(inode); if (data->state != NULL && data->state->inode == inode) { state = data->state; - atomic_inc(&state->count); + refcount_inc(&state->count); } else state = nfs4_get_open_state(inode, data->owner); iput(inode); @@ -1928,8 +1928,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); - update_open_stateid(state, &data->o_res.stateid, NULL, - data->o_arg.fmode); + if (!update_open_stateid(state, &data->o_res.stateid, + NULL, data->o_arg.fmode)) { + nfs4_put_open_state(state); + state = ERR_PTR(-EAGAIN); + } out: nfs_release_seqid(data->o_arg.seqid); return state; @@ -1975,23 +1978,23 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context if (opendata == NULL) return ERR_PTR(-ENOMEM); opendata->state = state; - atomic_inc(&state->count); + refcount_inc(&state->count); return opendata; } static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, - fmode_t fmode) + fmode_t fmode) { struct nfs4_state *newstate; + struct nfs_server *server = NFS_SB(opendata->dentry->d_sb); + int openflags = opendata->o_arg.open_flags; int ret; if (!nfs4_mode_match_open_stateid(opendata->state, fmode)) return 0; - opendata->o_arg.open_flags = 0; opendata->o_arg.fmode = fmode; - opendata->o_arg.share_access = nfs4_map_atomic_open_share( - NFS_SB(opendata->dentry->d_sb), - fmode, 0); + opendata->o_arg.share_access = + nfs4_map_atomic_open_share(server, fmode, openflags); memset(&opendata->o_res, 0, sizeof(opendata->o_res)); memset(&opendata->c_res, 0, sizeof(opendata->c_res)); nfs4_init_opendata_res(opendata); @@ -2569,10 +2572,15 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s struct nfs4_opendata *opendata; int ret; - opendata = nfs4_open_recoverdata_alloc(ctx, state, - NFS4_OPEN_CLAIM_FH); + opendata = nfs4_open_recoverdata_alloc(ctx, state, NFS4_OPEN_CLAIM_FH); if (IS_ERR(opendata)) return PTR_ERR(opendata); + /* + * We're not recovering a delegation, so ask for no delegation. + * Otherwise the recovery thread could deadlock with an outstanding + * delegation return. + */ + opendata->o_arg.open_flags = O_DIRECT; ret = nfs4_open_recover(opendata, state); if (ret == -ESTALE) d_drop(ctx->dentry); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 5ab021f87..e5b4c6987 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -675,7 +675,7 @@ nfs4_alloc_open_state(void) state = kzalloc(sizeof(*state), GFP_NOFS); if (!state) return NULL; - atomic_set(&state->count, 1); + refcount_set(&state->count, 1); INIT_LIST_HEAD(&state->lock_states); spin_lock_init(&state->state_lock); seqlock_init(&state->seqlock); @@ -709,7 +709,7 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner) continue; if (!nfs4_valid_open_stateid(state)) continue; - if (atomic_inc_not_zero(&state->count)) + if (refcount_inc_not_zero(&state->count)) return state; } return NULL; @@ -763,7 +763,7 @@ void nfs4_put_open_state(struct nfs4_state *state) struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; - if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) + if (!refcount_dec_and_lock(&state->count, &owner->so_lock)) return; spin_lock(&inode->i_lock); list_del(&state->inode_states); @@ -1247,6 +1247,8 @@ void nfs4_schedule_state_manager(struct nfs_client *clp) if (IS_ERR(task)) { printk(KERN_ERR "%s: kthread_run: %ld\n", __func__, PTR_ERR(task)); + if (!nfs_client_init_is_complete(clp)) + nfs_mark_client_ready(clp, PTR_ERR(task)); nfs4_clear_state_manager_bit(clp); nfs_put_client(clp); module_put(THIS_MODULE); @@ -1594,7 +1596,7 @@ restart: continue; if (state->state == 0) continue; - atomic_inc(&state->count); + refcount_inc(&state->count); spin_unlock(&sp->so_lock); status = ops->recover_open(sp, state); if (status >= 0) { diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 56e48642c..f0021e3b8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4277,12 +4277,10 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap, if (unlikely(!p)) goto out_overflow; if (len < NFS4_MAXLABELLEN) { - if (label) { - if (label->len) { - if (label->len < len) - return -ERANGE; - memcpy(label->label, p, len); - } + if (label && label->len) { + if (label->len < len) + return -ERANGE; + memcpy(label->label, p, len); label->len = len; label->pi = pi; label->lfs = lfs; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 7ee417b68..519d994c0 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -800,7 +800,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c } else { if (!conn->cb_xprt) return -EINVAL; - clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_session = ses; args.bc_xprt = conn->cb_xprt; args.prognumber = clp->cl_cb_session->se_cb_prog; @@ -820,6 +819,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c rpc_shutdown_client(client); return PTR_ERR(cred); } + + if (clp->cl_minorversion != 0) + clp->cl_cb_conn.cb_xprt = conn->cb_xprt; clp->cl_cb_client = client; clp->cl_cb_cred = cred; return 0; diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index f4cf1c079..cf81b5bc3 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -322,11 +322,11 @@ nfsd4_recall_file_layout(struct nfs4_layout_stateid *ls) if (ls->ls_recalled) goto out_unlock; - ls->ls_recalled = true; - atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); if (list_empty(&ls->ls_layouts)) goto out_unlock; + ls->ls_recalled = true; + atomic_inc(&ls->ls_stid.sc_file->fi_lo_recalls); trace_nfsd_layout_recall(&ls->ls_stid.sc_stateid); refcount_inc(&ls->ls_stid.sc_count); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 96b79bd90..c82f89832 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3102,6 +3102,17 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen, case nfserr_noent: xdr_truncate_encode(xdr, start_offset); goto skip_entry; + case nfserr_jukebox: + /* + * The pseudoroot should only display dentries that lead to + * exports. If we get EJUKEBOX here, then we can't tell whether + * this entry should be included. Just fail the whole READDIR + * with NFS4ERR_DELAY in that case, and hope that the situation + * will resolve itself by the client's next attempt. + */ + if (cd->rd_fhp->fh_export->ex_flags & NFSEXP_V4ROOT) + goto fail; + /* fallthrough */ default: /* * If the client requested the RDATTR_ERROR attribute, diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 919d1238c..a0e37530d 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -480,9 +480,18 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, 0, &bh, &submit_ptr); if (ret) { - if (ret != -EEXIST) - return ret; - goto out_check; + if (likely(ret == -EEXIST)) + goto out_check; + if (ret == -ENOENT) { + /* + * Block address translation failed due to invalid + * value of 'ptr'. In this case, return internal code + * -EINVAL (broken bmap) to notify bmap layer of fatal + * metadata corruption. + */ + ret = -EINVAL; + } + return ret; } if (ra) { diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9b96d79ee..dfb2083b8 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -70,7 +70,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_index > ~(__u64)0 - argv->v_nmembs) return -EINVAL; - buf = (void *)__get_free_pages(GFP_NOFS, 0); + buf = (void *)get_zeroed_page(GFP_NOFS); if (unlikely(!buf)) return -ENOMEM; maxmembs = PAGE_SIZE / argv->v_size; @@ -1135,7 +1135,14 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) minseg = range[0] + segbytes - 1; do_div(minseg, segbytes); + + if (range[1] < 4096) + goto out; + maxseg = NILFS_SB2_OFFSET_BYTES(range[1]); + if (maxseg < segbytes) + goto out; + do_div(maxseg, segbytes); maxseg--; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 11914b358..b23ed9a35 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -435,6 +435,23 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) return 0; } +/** + * nilfs_segctor_zeropad_segsum - zero pad the rest of the segment summary area + * @sci: segment constructor object + * + * nilfs_segctor_zeropad_segsum() zero-fills unallocated space at the end of + * the current segment summary block. + */ +static void nilfs_segctor_zeropad_segsum(struct nilfs_sc_info *sci) +{ + struct nilfs_segsum_pointer *ssp; + + ssp = sci->sc_blk_cnt > 0 ? &sci->sc_binfo_ptr : &sci->sc_finfo_ptr; + if (ssp->offset < ssp->bh->b_size) + memset(ssp->bh->b_data + ssp->offset, 0, + ssp->bh->b_size - ssp->offset); +} + static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) { sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; @@ -443,6 +460,7 @@ static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) * The current segment is filled up * (internal code) */ + nilfs_segctor_zeropad_segsum(sci); sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); return nilfs_segctor_reset_segment_buffer(sci); } @@ -547,6 +565,7 @@ static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, goto retry; } if (unlikely(required)) { + nilfs_segctor_zeropad_segsum(sci); err = nilfs_segbuf_extend_segsum(segbuf); if (unlikely(err)) goto failed; @@ -1531,6 +1550,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } + nilfs_segctor_zeropad_segsum(sci); nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); return 0; @@ -2609,11 +2629,10 @@ static int nilfs_segctor_thread(void *arg) goto loop; end_thread: - spin_unlock(&sci->sc_state_lock); - /* end sync. */ sci->sc_task = NULL; wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ + spin_unlock(&sci->sc_state_lock); return 0; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 5e4d7d191..221a54faa 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -411,6 +411,15 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) goto out; /* + * Prevent underflow in second superblock position calculation. + * The exact minimum size check is done in nilfs_sufile_resize(). + */ + if (newsize < 4096) { + ret = -ENOSPC; + goto out; + } + + /* * Write lock is required to protect some functions depending * on the number of segments, the number of reserved segments, * and so forth. @@ -475,6 +484,7 @@ static void nilfs_put_super(struct super_block *sb) up_write(&nilfs->ns_sem); } + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); @@ -1101,6 +1111,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) nilfs_put_root(fsroot); failed_unload: + nilfs_sysfs_delete_device_group(nilfs); iput(nilfs->ns_sufile); iput(nilfs->ns_cpfile); iput(nilfs->ns_dat); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index fb61c33c6..24f626e7d 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -13,6 +13,7 @@ #include <linux/blkdev.h> #include <linux/backing-dev.h> #include <linux/random.h> +#include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" #include "segment.h" @@ -86,7 +87,6 @@ void destroy_nilfs(struct the_nilfs *nilfs) { might_sleep(); if (nilfs_init(nilfs)) { - nilfs_sysfs_delete_device_group(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } @@ -274,6 +274,10 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; } + err = nilfs_sysfs_create_device_group(sb); + if (unlikely(err)) + goto sysfs_error; + if (valid_fs) goto skip_recovery; @@ -335,6 +339,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb) goto failed; failed_unload: + nilfs_sysfs_delete_device_group(nilfs); + + sysfs_error: iput(nilfs->ns_cpfile); iput(nilfs->ns_sufile); iput(nilfs->ns_dat); @@ -448,11 +455,33 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp) return crc == le32_to_cpu(sbp->s_sum); } -static int nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) +/** + * nilfs_sb2_bad_offset - check the location of the second superblock + * @sbp: superblock raw data buffer + * @offset: byte offset of second superblock calculated from device size + * + * nilfs_sb2_bad_offset() checks if the position on the second + * superblock is valid or not based on the filesystem parameters + * stored in @sbp. If @offset points to a location within the segment + * area, or if the parameters themselves are not normal, it is + * determined to be invalid. + * + * Return Value: true if invalid, false if valid. + */ +static bool nilfs_sb2_bad_offset(struct nilfs_super_block *sbp, u64 offset) { - return offset < ((le64_to_cpu(sbp->s_nsegments) * - le32_to_cpu(sbp->s_blocks_per_segment)) << - (le32_to_cpu(sbp->s_log_block_size) + 10)); + unsigned int shift_bits = le32_to_cpu(sbp->s_log_block_size); + u32 blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); + u64 nsegments = le64_to_cpu(sbp->s_nsegments); + u64 index; + + if (blocks_per_segment < NILFS_SEG_MIN_BLOCKS || + shift_bits > ilog2(NILFS_MAX_BLOCK_SIZE) - BLOCK_SIZE_BITS) + return true; + + index = offset >> (shift_bits + BLOCK_SIZE_BITS); + do_div(index, blocks_per_segment); + return index < nsegments; } static void nilfs_release_super_block(struct the_nilfs *nilfs) @@ -494,9 +523,15 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, { struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; - u64 sb2off = NILFS_SB2_OFFSET_BYTES(nilfs->ns_bdev->bd_inode->i_size); + u64 sb2off, devsize = nilfs->ns_bdev->bd_inode->i_size; int valid[2], swp = 0; + if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { + nilfs_msg(sb, KERN_ERR, "device size too small"); + return -EINVAL; + } + sb2off = NILFS_SB2_OFFSET_BYTES(devsize); + sbp[0] = nilfs_read_super_block(sb, NILFS_SB_OFFSET_BYTES, blocksize, &sbh[0]); sbp[1] = nilfs_read_super_block(sb, sb2off, blocksize, &sbh[1]); @@ -639,10 +674,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto failed_sbh; - err = nilfs_sysfs_create_device_group(sb); - if (err) - goto failed_sbh; - set_nilfs_init(nilfs); err = 0; out: diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index b6948813e..1353db3f7 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2003,11 +2003,25 @@ int ocfs2_write_end_nolock(struct address_space *mapping, } if (unlikely(copied < len) && wc->w_target_page) { + loff_t new_isize; + if (!PageUptodate(wc->w_target_page)) copied = 0; - ocfs2_zero_new_buffers(wc->w_target_page, start+copied, - start+len); + new_isize = max_t(loff_t, i_size_read(inode), pos + copied); + if (new_isize > page_offset(wc->w_target_page)) + ocfs2_zero_new_buffers(wc->w_target_page, start+copied, + start+len); + else { + /* + * When page is fully beyond new isize (data copy + * failed), do not bother zeroing the page. Invalidate + * it instead so that writeback does not get confused + * put page & buffer dirty bits into inconsistent + * state. + */ + block_invalidatepage(wc->w_target_page, 0, PAGE_SIZE); + } } if (wc->w_target_page) flush_dcache_page(wc->w_target_page); diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 1565dd8e8..fbbc30f20 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -115,14 +115,6 @@ static int __ocfs2_move_extent(handle_t *handle, */ replace_rec.e_flags = ext_flags & ~OCFS2_EXT_REFCOUNTED; - ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), - context->et.et_root_bh, - OCFS2_JOURNAL_ACCESS_WRITE); - if (ret) { - mlog_errno(ret); - goto out; - } - ret = ocfs2_split_extent(handle, &context->et, path, index, &replace_rec, context->meta_ac, &context->dealloc); @@ -131,8 +123,6 @@ static int __ocfs2_move_extent(handle_t *handle, goto out; } - ocfs2_journal_dirty(handle, context->et.et_root_bh); - context->new_phys_cpos = new_p_cpos; /* @@ -454,7 +444,7 @@ static int ocfs2_find_victim_alloc_group(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; if (vict_blkno < (le64_to_cpu(bg->bg_blkno) + - le16_to_cpu(bg->bg_bits))) { + (le16_to_cpu(bg->bg_bits) << bits_per_unit))) { *ret_bh = gd_bh; *vict_bit = (vict_blkno - blkno) >> @@ -569,6 +559,7 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh, last_free_bits++; if (last_free_bits == move_len) { + i -= move_len; *goal_bit = i; *phys_cpos = base_cpos + i; break; @@ -1040,18 +1031,19 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) context->range = ⦥ + /* + * ok, the default theshold for the defragmentation + * is 1M, since our maximum clustersize was 1M also. + * any thought? + */ + if (!range.me_threshold) + range.me_threshold = 1024 * 1024; + + if (range.me_threshold > i_size_read(inode)) + range.me_threshold = i_size_read(inode); + if (range.me_flags & OCFS2_MOVE_EXT_FL_AUTO_DEFRAG) { context->auto_defrag = 1; - /* - * ok, the default theshold for the defragmentation - * is 1M, since our maximum clustersize was 1M also. - * any thought? - */ - if (!range.me_threshold) - range.me_threshold = 1024 * 1024; - - if (range.me_threshold > i_size_read(inode)) - range.me_threshold = i_size_read(inode); if (range.me_flags & OCFS2_MOVE_EXT_FL_PART_DEFRAG) context->partial = 1; diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index e7eb08ac4..10d691530 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -715,6 +715,8 @@ static struct ctl_table_header *ocfs2_table_header; static int __init ocfs2_stack_glue_init(void) { + int ret; + strcpy(cluster_stack_name, OCFS2_STACK_PLUGIN_O2CB); ocfs2_table_header = register_sysctl_table(ocfs2_root_table); @@ -724,7 +726,11 @@ static int __init ocfs2_stack_glue_init(void) return -ENOMEM; /* or something. */ } - return ocfs2_sysfs_init(); + ret = ocfs2_sysfs_init(); + if (ret) + unregister_sysctl_table(ocfs2_table_header); + + return ret; } static void __exit ocfs2_stack_glue_exit(void) diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c index e24738c69..f79c015fa 100644 --- a/fs/orangefs/orangefs-debugfs.c +++ b/fs/orangefs/orangefs-debugfs.c @@ -254,6 +254,8 @@ out: void orangefs_debugfs_cleanup(void) { debugfs_remove_recursive(debug_dir); + kfree(debug_help_string); + debug_help_string = NULL; } /* open ORANGEFS_KMOD_DEBUG_HELP_FILE */ @@ -709,6 +711,7 @@ int orangefs_prepare_debugfs_help_string(int at_boot) memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE); strlcat(debug_help_string, new, string_size); mutex_unlock(&orangefs_help_file_lock); + kfree(new); } rc = 0; diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c index 85ef87245..c8818163e 100644 --- a/fs/orangefs/orangefs-mod.c +++ b/fs/orangefs/orangefs-mod.c @@ -141,7 +141,7 @@ static int __init orangefs_init(void) gossip_err("%s: could not initialize device subsystem %d!\n", __func__, ret); - goto cleanup_device; + goto cleanup_sysfs; } ret = register_filesystem(&orangefs_fs_type); @@ -153,11 +153,11 @@ static int __init orangefs_init(void) goto out; } - orangefs_sysfs_exit(); - -cleanup_device: orangefs_dev_cleanup(); +cleanup_sysfs: + orangefs_sysfs_exit(); + sysfs_init_failed: debugfs_init_failed: diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 06dc64962..9fa64dbde 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -561,28 +561,42 @@ static int ovl_create_or_link(struct dentry *dentry, struct inode *inode, goto out_revert_creds; } - err = -ENOMEM; - override_cred = prepare_creds(); - if (override_cred) { + if (!attr->hardlink) { + err = -ENOMEM; + override_cred = prepare_creds(); + if (!override_cred) + goto out_revert_creds; + /* + * In the creation cases(create, mkdir, mknod, symlink), + * ovl should transfer current's fs{u,g}id to underlying + * fs. Because underlying fs want to initialize its new + * inode owner using current's fs{u,g}id. And in this + * case, the @inode is a new inode that is initialized + * in inode_init_owner() to current's fs{u,g}id. So use + * the inode's i_{u,g}id to override the cred's fs{u,g}id. + * + * But in the other hardlink case, ovl_link() does not + * create a new inode, so just use the ovl mounter's + * fs{u,g}id. + */ override_cred->fsuid = inode->i_uid; override_cred->fsgid = inode->i_gid; - if (!attr->hardlink) { - err = security_dentry_create_files_as(dentry, - attr->mode, &dentry->d_name, old_cred, - override_cred); - if (err) { - put_cred(override_cred); - goto out_revert_creds; - } + err = security_dentry_create_files_as(dentry, + attr->mode, &dentry->d_name, old_cred, + override_cred); + if (err) { + put_cred(override_cred); + goto out_revert_creds; } put_cred(override_creds(override_cred)); put_cred(override_cred); - - if (!ovl_dentry_is_whiteout(dentry)) - err = ovl_create_upper(dentry, inode, attr); - else - err = ovl_create_over_whiteout(dentry, inode, attr); } + + if (!ovl_dentry_is_whiteout(dentry)) + err = ovl_create_upper(dentry, inode, attr); + else + err = ovl_create_over_whiteout(dentry, inode, attr); + out_revert_creds: revert_creds(old_cred); return err; diff --git a/fs/pnode.c b/fs/pnode.c index 7910ae91f..d27b7b97c 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -245,7 +245,7 @@ static int propagate_one(struct mount *m) } do { struct mount *parent = last_source->mnt_parent; - if (last_source == first_source) + if (peers(last_source, first_source)) break; done = parent->mnt_master == p; if (done && peers(n, parent)) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index c95f32b83..7c62a5265 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -13,6 +13,7 @@ #include <linux/namei.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/kmemleak.h> #include "internal.h" static const struct dentry_operations proc_sys_dentry_operations; @@ -1376,6 +1377,38 @@ struct ctl_table_header *register_sysctl(const char *path, struct ctl_table *tab } EXPORT_SYMBOL(register_sysctl); +/** + * __register_sysctl_init() - register sysctl table to path + * @path: path name for sysctl base + * @table: This is the sysctl table that needs to be registered to the path + * @table_name: The name of sysctl table, only used for log printing when + * registration fails + * + * The sysctl interface is used by userspace to query or modify at runtime + * a predefined value set on a variable. These variables however have default + * values pre-set. Code which depends on these variables will always work even + * if register_sysctl() fails. If register_sysctl() fails you'd just loose the + * ability to query or modify the sysctls dynamically at run time. Chances of + * register_sysctl() failing on init are extremely low, and so for both reasons + * this function does not return any error as it is used by initialization code. + * + * Context: Can only be called after your respective sysctl base path has been + * registered. So for instance, most base directories are registered early on + * init before init levels are processed through proc_sys_init() and + * sysctl_init(). + */ +void __init __register_sysctl_init(const char *path, struct ctl_table *table, + const char *table_name) +{ + struct ctl_table_header *hdr = register_sysctl(path, table); + + if (unlikely(!hdr)) { + pr_err("failed when register_sysctl %s to %s\n", table_name, path); + return; + } + kmemleak_not_leak(hdr); +} + static char *append_path(const char *path, char *pos, const char *name) { int namelen; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index dc63d4c60..2f2afc3c6 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -701,9 +701,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, page = device_private_entry_to_page(swpent); } if (page) { - int mapcount = page_mapcount(page); - - if (mapcount >= 2) + if (page_mapcount(page) >= 2 || hugetlb_pmd_shared(pte)) mss->shared_hugetlb += huge_page_size(hstate_vma(vma)); else mss->private_hugetlb += huge_page_size(hstate_vma(vma)); diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig index 503086f7f..d5fb6d95d 100644 --- a/fs/pstore/Kconfig +++ b/fs/pstore/Kconfig @@ -117,6 +117,7 @@ config PSTORE_CONSOLE config PSTORE_PMSG bool "Log user space messages" depends on PSTORE + select RT_MUTEXES help When the option is enabled, pstore will export a character interface /dev/pmsg0 to log user space messages. On reboot diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c index 24db02de1..ffc13ea19 100644 --- a/fs/pstore/pmsg.c +++ b/fs/pstore/pmsg.c @@ -15,9 +15,10 @@ #include <linux/device.h> #include <linux/fs.h> #include <linux/uaccess.h> +#include <linux/rtmutex.h> #include "internal.h" -static DEFINE_MUTEX(pmsg_lock); +static DEFINE_RT_MUTEX(pmsg_lock); static ssize_t write_pmsg(struct file *file, const char __user *buf, size_t count, loff_t *ppos) @@ -36,9 +37,9 @@ static ssize_t write_pmsg(struct file *file, const char __user *buf, if (!access_ok(VERIFY_READ, buf, count)) return -EFAULT; - mutex_lock(&pmsg_lock); + rt_mutex_lock(&pmsg_lock); ret = psinfo->write_user(&record, buf); - mutex_unlock(&pmsg_lock); + rt_mutex_unlock(&pmsg_lock); return ret ? ret : count; } diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index bafbab2dd..33294dee7 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -753,6 +753,7 @@ static int ramoops_probe(struct platform_device *pdev) /* Make sure we didn't get bogus platform data pointer. */ if (!pdata) { pr_err("NULL platform data\n"); + err = -EINVAL; goto fail_out; } @@ -760,6 +761,7 @@ static int ramoops_probe(struct platform_device *pdev) !pdata->ftrace_size && !pdata->pmsg_size)) { pr_err("The memory size and the record/console size must be " "non-zero\n"); + err = -EINVAL; goto fail_out; } diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 3c777ec80..60dff7180 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -426,7 +426,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size, phys_addr_t addr = page_start + i * PAGE_SIZE; pages[i] = pfn_to_page(addr >> PAGE_SHIFT); } - vaddr = vmap(pages, page_count, VM_MAP, prot); + /* + * VM_IOREMAP used here to bypass this region during vread() + * and kmap_atomic() (i.e. kcore) to avoid __va() failures. + */ + vaddr = vmap(pages, page_count, VM_MAP | VM_IOREMAP, prot); kfree(pages); /* diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index ddb379abd..770a2b143 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2298,28 +2298,62 @@ EXPORT_SYMBOL(dquot_quota_off); * Turn quotas on on a device */ -/* - * Helper function to turn quotas on when we already have the inode of - * quota file and no quota information is loaded. - */ -static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, +static int vfs_setup_quota_inode(struct inode *inode, int type) +{ + struct super_block *sb = inode->i_sb; + struct quota_info *dqopt = sb_dqopt(sb); + + if (is_bad_inode(inode)) + return -EUCLEAN; + if (!S_ISREG(inode->i_mode)) + return -EACCES; + if (IS_RDONLY(inode)) + return -EROFS; + if (sb_has_quota_loaded(sb, type)) + return -EBUSY; + + dqopt->files[type] = igrab(inode); + if (!dqopt->files[type]) + return -EIO; + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { + /* We don't want quota and atime on quota files (deadlocks + * possible) Also nobody should write to the file - we use + * special IO operations which ignore the immutable bit. */ + inode_lock(inode); + inode->i_flags |= S_NOQUOTA; + inode_unlock(inode); + /* + * When S_NOQUOTA is set, remove dquot references as no more + * references can be added + */ + __dquot_drop(inode); + } + return 0; +} + +static void vfs_cleanup_quota_inode(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + struct inode *inode = dqopt->files[type]; + + if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { + inode_lock(inode); + inode->i_flags &= ~S_NOQUOTA; + inode_unlock(inode); + } + dqopt->files[type] = NULL; + iput(inode); +} + +int dquot_load_quota_sb(struct super_block *sb, int type, int format_id, unsigned int flags) { struct quota_format_type *fmt = find_quota_format(format_id); - struct super_block *sb = inode->i_sb; struct quota_info *dqopt = sb_dqopt(sb); int error; if (!fmt) return -ESRCH; - if (!S_ISREG(inode->i_mode)) { - error = -EACCES; - goto out_fmt; - } - if (IS_RDONLY(inode)) { - error = -EROFS; - goto out_fmt; - } if (!sb->s_op->quota_write || !sb->s_op->quota_read || (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) { error = -EINVAL; @@ -2351,27 +2385,9 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, invalidate_bdev(sb->s_bdev); } - if (!(dqopt->flags & DQUOT_QUOTA_SYS_FILE)) { - /* We don't want quota and atime on quota files (deadlocks - * possible) Also nobody should write to the file - we use - * special IO operations which ignore the immutable bit. */ - inode_lock(inode); - inode->i_flags |= S_NOQUOTA; - inode_unlock(inode); - /* - * When S_NOQUOTA is set, remove dquot references as no more - * references can be added - */ - __dquot_drop(inode); - } - - error = -EIO; - dqopt->files[type] = igrab(inode); - if (!dqopt->files[type]) - goto out_file_flags; error = -EINVAL; if (!fmt->qf_ops->check_quota_file(sb, type)) - goto out_file_init; + goto out_fmt; dqopt->ops[type] = fmt->qf_ops; dqopt->info[type].dqi_format = fmt; @@ -2379,7 +2395,7 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, INIT_LIST_HEAD(&dqopt->info[type].dqi_dirty_list); error = dqopt->ops[type]->read_file_info(sb, type); if (error < 0) - goto out_file_init; + goto out_fmt; if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) { spin_lock(&dq_data_lock); dqopt->info[type].dqi_flags |= DQF_SYS_FILE; @@ -2394,18 +2410,30 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, dquot_disable(sb, type, flags); return error; -out_file_init: - dqopt->files[type] = NULL; - iput(inode); -out_file_flags: - inode_lock(inode); - inode->i_flags &= ~S_NOQUOTA; - inode_unlock(inode); out_fmt: put_quota_format(fmt); return error; } +EXPORT_SYMBOL(dquot_load_quota_sb); + +/* + * Helper function to turn quotas on when we already have the inode of + * quota file and no quota information is loaded. + */ +static int vfs_load_quota_inode(struct inode *inode, int type, int format_id, + unsigned int flags) +{ + int err; + + err = vfs_setup_quota_inode(inode, type); + if (err < 0) + return err; + err = dquot_load_quota_sb(inode->i_sb, type, format_id, flags); + if (err < 0) + vfs_cleanup_quota_inode(inode->i_sb, type); + return err; +} /* Reenable quotas on remount RW */ int dquot_resume(struct super_block *sb, int type) diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 959a066b7..2843b7cf4 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -695,6 +695,7 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod out_failed: reiserfs_write_unlock(dir->i_sb); + reiserfs_security_free(&security); return retval; } @@ -778,6 +779,7 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode out_failed: reiserfs_write_unlock(dir->i_sb); + reiserfs_security_free(&security); return retval; } @@ -876,6 +878,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode retval = journal_end(&th); out_failed: reiserfs_write_unlock(dir->i_sb); + reiserfs_security_free(&security); return retval; } @@ -1191,6 +1194,7 @@ static int reiserfs_symlink(struct inode *parent_dir, retval = journal_end(&th); out_failed: reiserfs_write_unlock(parent_dir->i_sb); + reiserfs_security_free(&security); return retval; } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 831a542c2..e5be1d747 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1443,7 +1443,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) unsigned long safe_mask = 0; unsigned int commit_max_age = (unsigned int)-1; struct reiserfs_journal *journal = SB_JOURNAL(s); - char *new_opts; int err; char *qf_names[REISERFS_MAXQUOTAS]; unsigned int qfmt = 0; @@ -1451,10 +1450,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) int i; #endif - new_opts = kstrdup(arg, GFP_KERNEL); - if (arg && !new_opts) - return -ENOMEM; - sync_filesystem(s); reiserfs_write_lock(s); @@ -1605,7 +1600,6 @@ out_ok_unlocked: out_err_unlock: reiserfs_write_unlock(s); out_err: - kfree(new_opts); return err; } diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index 20be9a0e5..59d87f9f7 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -49,6 +49,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode, int error; sec->name = NULL; + sec->value = NULL; /* Don't add selinux attributes on xattrs - they'll never get used */ if (IS_PRIVATE(dir)) @@ -94,7 +95,6 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th, void reiserfs_security_free(struct reiserfs_security_handle *sec) { - kfree(sec->name); kfree(sec->value); sec->name = NULL; sec->value = NULL; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 10e93345b..75886e6d9 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -196,7 +196,7 @@ static inline int squashfs_block_size(__le32 raw) #define SQUASHFS_ID_BLOCK_BYTES(A) (SQUASHFS_ID_BLOCKS(A) *\ sizeof(u64)) /* xattr id lookup table defines */ -#define SQUASHFS_XATTR_BYTES(A) ((A) * sizeof(struct squashfs_xattr_id)) +#define SQUASHFS_XATTR_BYTES(A) (((u64) (A)) * sizeof(struct squashfs_xattr_id)) #define SQUASHFS_XATTR_BLOCK(A) (SQUASHFS_XATTR_BYTES(A) / \ SQUASHFS_METADATA_SIZE) diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h index 5234c19a0..7ec30a112 100644 --- a/fs/squashfs/squashfs_fs_sb.h +++ b/fs/squashfs/squashfs_fs_sb.h @@ -76,7 +76,7 @@ struct squashfs_sb_info { long long bytes_used; unsigned int inodes; unsigned int fragments; - int xattr_ids; + unsigned int xattr_ids; unsigned int ids; }; #endif diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h index 86b0a0073..f360f27e3 100644 --- a/fs/squashfs/xattr.h +++ b/fs/squashfs/xattr.h @@ -23,12 +23,12 @@ #ifdef CONFIG_SQUASHFS_XATTR extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64, - u64 *, int *); + u64 *, unsigned int *); extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *, unsigned int *, unsigned long long *); #else static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb, - u64 start, u64 *xattr_table_start, int *xattr_ids) + u64 start, u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_xattr_id_table *id_table; diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index 7f718d2bf..fe266b3e9 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -69,7 +69,7 @@ int squashfs_xattr_lookup(struct super_block *sb, unsigned int index, * Read uncompressed xattr id lookup table indexes from disk into memory */ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, - u64 *xattr_table_start, int *xattr_ids) + u64 *xattr_table_start, unsigned int *xattr_ids) { struct squashfs_sb_info *msblk = sb->s_fs_info; unsigned int len, indexes; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 011e39149..cd70dbeea 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -599,7 +599,7 @@ int sysfs_emit_at(char *buf, int at, const char *fmt, ...) va_list args; int len; - if (WARN(!buf || offset_in_page(buf) || at < 0 || at >= PAGE_SIZE, + if (WARN(!buf || at < 0 || at >= PAGE_SIZE, "invalid sysfs_emit_at: buf:%p at:%d\n", buf, at)) return 0; diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index bcb67b0ca..31f66053e 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -438,7 +438,7 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size) res += blocks; direct = 1; } - return blocks; + return res; } int sysv_getattr(const struct path *path, struct kstat *stat, diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 7ef22baf9..30c7bd63c 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -224,11 +224,10 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) subtract_lebs += 1; /* - * The GC journal head LEB is not really accessible. And since - * different write types go to different heads, we may count only on - * one head's space. + * Since different write types go to different heads, we should + * reserve one leb for each head. */ - subtract_lebs += c->jhead_cnt - 1; + subtract_lebs += c->jhead_cnt; /* We also reserve one LEB for deletions, which bypass budgeting */ subtract_lebs += 1; @@ -415,7 +414,7 @@ static int calc_dd_growth(const struct ubifs_info *c, dd_growth = req->dirtied_page ? c->bi.page_budget : 0; if (req->dirtied_ino) - dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); + dd_growth += c->bi.inode_budget * req->dirtied_ino; if (req->mod_dent) dd_growth += c->bi.dent_budget; dd_growth += req->dirtied_ino_d; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 111905ddb..3b93b14e0 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1141,7 +1141,6 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, int err, sz_change, len = strlen(symname); struct fscrypt_str disk_link; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = ALIGN(len, 8), .dirtied_ino = 1 }; struct fscrypt_name nm; @@ -1157,6 +1156,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, * Budget request settings: new inode, new direntry and changing parent * directory inode. */ + req.new_ino_d = ALIGN(disk_link.len - 1, 8); err = ubifs_budget_space(c, &req); if (err) return err; @@ -1309,9 +1309,13 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry, old_dentry, old_inode->i_ino, old_dir->i_ino, new_dentry, new_dir->i_ino, flags); - if (unlink) + if (unlink) { ubifs_assert(c, inode_is_locked(new_inode)); + /* Budget for old inode's data when its nlink > 1. */ + req.dirtied_ino_d = ALIGN(ubifs_inode(new_inode)->data_len, 8); + } + if (unlink && is_dir) { err = ubifs_check_dir_empty(new_inode); if (err) @@ -1549,6 +1553,10 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, return err; } + err = ubifs_budget_space(c, &req); + if (err) + goto out; + lock_4_inodes(old_dir, new_dir, NULL, NULL); time = current_time(old_dir); @@ -1574,6 +1582,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry, unlock_4_inodes(old_dir, new_dir, NULL, NULL); ubifs_release_budget(c, &req); +out: fscrypt_free_filename(&fst_nm); fscrypt_free_filename(&snd_nm); return err; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3dbb5ac63..ae836e8bb 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1043,7 +1043,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) if (page->index >= synced_i_size >> PAGE_SHIFT) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - goto out_unlock; + goto out_redirty; /* * The inode has been written, but the write-buffer has * not been synchronized, so in case of an unclean @@ -1071,11 +1071,17 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) if (i_size > synced_i_size) { err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) - goto out_unlock; + goto out_redirty; } return do_writepage(page, len); - +out_redirty: + /* + * redirty_page_for_writepage() won't call ubifs_dirty_inode() because + * it passes I_DIRTY_PAGES flag while calling __mark_inode_dirty(), so + * there is no need to do space budget for dirty inode. + */ + redirty_page_for_writepage(wbc, page); out_unlock: unlock_page(page); return err; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f15ac3795..4665c4d7d 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -279,11 +279,18 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, if (zbr->len) { err = insert_old_idx(c, zbr->lnum, zbr->offs); if (unlikely(err)) - return ERR_PTR(err); + /* + * Obsolete znodes will be freed by tnc_destroy_cnext() + * or free_obsolete_znodes(), copied up znodes should + * be added back to tnc and freed by + * ubifs_destroy_tnc_subtree(). + */ + goto out; err = add_idx_dirt(c, zbr->lnum, zbr->len); } else err = 0; +out: zbr->znode = zn; zbr->lnum = 0; zbr->offs = 0; @@ -3046,6 +3053,21 @@ static void tnc_destroy_cnext(struct ubifs_info *c) cnext = cnext->cnext; if (ubifs_zn_obsolete(znode)) kfree(znode); + else if (!ubifs_zn_cow(znode)) { + /* + * Don't forget to update clean znode count after + * committing failed, because ubifs will check this + * count while closing tnc. Non-obsolete znode could + * be re-dirtied during committing process, so dirty + * flag is untrustable. The flag 'COW_ZNODE' is set + * for each dirty znode before committing, and it is + * cleared as long as the znode become clean, so we + * can statistic clean znode count according to this + * flag. + */ + atomic_long_inc(&c->clean_zn_cnt); + atomic_long_inc(&ubifs_clean_zn_cnt); + } } while (cnext && cnext != c->cnext); } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index d95230130..73720320f 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -34,7 +34,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, fibh->soffset = fibh->eoffset; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - fi = udf_get_fileident(iinfo->i_ext.i_data - + fi = udf_get_fileident(iinfo->i_data - (iinfo->i_efe ? sizeof(struct extendedFileEntry) : sizeof(struct fileEntry)), diff --git a/fs/udf/file.c b/fs/udf/file.c index cd31e4f6d..8fff7ffc3 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -50,7 +50,7 @@ static void __udf_adinicb_readpage(struct page *page) * So just sample it once and use the same value everywhere. */ kaddr = kmap_atomic(page); - memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, isize); + memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr, isize); memset(kaddr + isize, 0, PAGE_SIZE - isize); flush_dcache_page(page); SetPageUptodate(page); @@ -76,8 +76,7 @@ static int udf_adinicb_writepage(struct page *page, BUG_ON(!PageLocked(page)); kaddr = kmap_atomic(page); - memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, - i_size_read(inode)); + memcpy(iinfo->i_data + iinfo->i_lenEAttr, kaddr, i_size_read(inode)); SetPageUptodate(page); kunmap_atomic(kaddr); mark_inode_dirty(inode); @@ -148,26 +147,24 @@ static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) goto out; down_write(&iinfo->i_data_sem); - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - loff_t end = iocb->ki_pos + iov_iter_count(from); - - if (inode->i_sb->s_blocksize < - (udf_file_entry_alloc_offset(inode) + end)) { - err = udf_expand_file_adinicb(inode); - if (err) { - inode_unlock(inode); - udf_debug("udf_expand_adinicb: err=%d\n", err); - return err; - } - } else { - iinfo->i_lenAlloc = max(end, inode->i_size); - up_write(&iinfo->i_data_sem); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && + inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + + iocb->ki_pos + iov_iter_count(from))) { + err = udf_expand_file_adinicb(inode); + if (err) { + inode_unlock(inode); + udf_debug("udf_expand_adinicb: err=%d\n", err); + return err; } } else up_write(&iinfo->i_data_sem); retval = __generic_file_write_iter(iocb, from); out: + down_write(&iinfo->i_data_sem); + if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB && retval > 0) + iinfo->i_lenAlloc = inode->i_size; + up_write(&iinfo->i_data_sem); inode_unlock(inode); if (retval > 0) { @@ -215,7 +212,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return put_user(UDF_I(inode)->i_lenEAttr, (int __user *)arg); case UDF_GETEABLOCK: return copy_to_user((char __user *)arg, - UDF_I(inode)->i_ext.i_data, + UDF_I(inode)->i_data, UDF_I(inode)->i_lenEAttr) ? -EFAULT : 0; default: return -ENOIOCTLCMD; diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index f8e5872f7..cdaa86e07 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -67,16 +67,16 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode) iinfo->i_efe = 1; if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; - iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - - sizeof(struct extendedFileEntry), - GFP_KERNEL); + iinfo->i_data = kzalloc(inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry), + GFP_KERNEL); } else { iinfo->i_efe = 0; - iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - - sizeof(struct fileEntry), - GFP_KERNEL); + iinfo->i_data = kzalloc(inode->i_sb->s_blocksize - + sizeof(struct fileEntry), + GFP_KERNEL); } - if (!iinfo->i_ext.i_data) { + if (!iinfo->i_data) { iput(inode); return ERR_PTR(-ENOMEM); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ec8089a31..77421e656 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -150,8 +150,8 @@ void udf_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); invalidate_inode_buffers(inode); clear_inode(inode); - kfree(iinfo->i_ext.i_data); - iinfo->i_ext.i_data = NULL; + kfree(iinfo->i_data); + iinfo->i_data = NULL; udf_clear_extent_cache(inode); if (want_delete) { udf_free_inode(inode); @@ -278,14 +278,14 @@ int udf_expand_file_adinicb(struct inode *inode) kaddr = kmap_atomic(page); memset(kaddr + iinfo->i_lenAlloc, 0x00, PAGE_SIZE - iinfo->i_lenAlloc); - memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, + memcpy(kaddr, iinfo->i_data + iinfo->i_lenEAttr, iinfo->i_lenAlloc); flush_dcache_page(page); SetPageUptodate(page); kunmap_atomic(kaddr); } down_write(&iinfo->i_data_sem); - memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, + memset(iinfo->i_data + iinfo->i_lenEAttr, 0x00, iinfo->i_lenAlloc); iinfo->i_lenAlloc = 0; if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) @@ -303,8 +303,7 @@ int udf_expand_file_adinicb(struct inode *inode) lock_page(page); down_write(&iinfo->i_data_sem); kaddr = kmap_atomic(page); - memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, - inode->i_size); + memcpy(iinfo->i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); kunmap_atomic(kaddr); unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; @@ -392,8 +391,7 @@ struct buffer_head *udf_expand_dir_adinicb(struct inode *inode, } mark_buffer_dirty_inode(dbh, inode); - memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0, - iinfo->i_lenAlloc); + memset(iinfo->i_data + iinfo->i_lenEAttr, 0, iinfo->i_lenAlloc); iinfo->i_lenAlloc = 0; eloc.logicalBlockNum = *block; eloc.partitionReferenceNum = @@ -434,6 +432,12 @@ static int udf_get_block(struct inode *inode, sector_t block, iinfo->i_next_alloc_goal++; } + /* + * Block beyond EOF and prealloc extents? Just discard preallocation + * as it is not useful and complicates things. + */ + if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents) + udf_discard_prealloc(inode); udf_clear_extent_cache(inode); phys = inode_getblk(inode, block, &err, &new); if (!phys) @@ -483,8 +487,6 @@ static int udf_do_extend_file(struct inode *inode, uint32_t add; int count = 0, fake = !(last_ext->extLength & UDF_EXTENT_LENGTH_MASK); struct super_block *sb = inode->i_sb; - struct kernel_lb_addr prealloc_loc = {}; - uint32_t prealloc_len = 0; struct udf_inode_info *iinfo; int err; @@ -505,19 +507,6 @@ static int udf_do_extend_file(struct inode *inode, ~(sb->s_blocksize - 1); } - /* Last extent are just preallocated blocks? */ - if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == - EXT_NOT_RECORDED_ALLOCATED) { - /* Save the extent so that we can reattach it to the end */ - prealloc_loc = last_ext->extLocation; - prealloc_len = last_ext->extLength; - /* Mark the extent as a hole */ - last_ext->extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); - last_ext->extLocation.logicalBlockNum = 0; - last_ext->extLocation.partitionReferenceNum = 0; - } - /* Can we merge with the previous extent? */ if ((last_ext->extLength & UDF_EXTENT_FLAG_MASK) == EXT_NOT_RECORDED_NOT_ALLOCATED) { @@ -530,8 +519,10 @@ static int udf_do_extend_file(struct inode *inode, } if (fake) { - udf_add_aext(inode, last_pos, &last_ext->extLocation, - last_ext->extLength, 1); + err = udf_add_aext(inode, last_pos, &last_ext->extLocation, + last_ext->extLength, 1); + if (err < 0) + goto out_err; count++; } else { struct kernel_lb_addr tmploc; @@ -545,7 +536,7 @@ static int udf_do_extend_file(struct inode *inode, * more extents, we may need to enter possible following * empty indirect extent. */ - if (new_block_bytes || prealloc_len) + if (new_block_bytes) udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0); } @@ -565,7 +556,7 @@ static int udf_do_extend_file(struct inode *inode, err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) - return err; + goto out_err; count++; } if (new_block_bytes) { @@ -574,22 +565,11 @@ static int udf_do_extend_file(struct inode *inode, err = udf_add_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); if (err) - return err; + goto out_err; count++; } out: - /* Do we have some preallocated blocks saved? */ - if (prealloc_len) { - err = udf_add_aext(inode, last_pos, &prealloc_loc, - prealloc_len, 1); - if (err) - return err; - last_ext->extLocation = prealloc_loc; - last_ext->extLength = prealloc_len; - count++; - } - /* last_pos should point to the last written extent... */ if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) last_pos->offset -= sizeof(struct short_ad); @@ -599,19 +579,28 @@ out: return -EIO; return count; +out_err: + /* Remove extents we've created so far */ + udf_clear_extent_cache(inode); + udf_truncate_extents(inode); + return err; } /* Extend the final block of the file to final_block_len bytes */ static void udf_do_extend_final_block(struct inode *inode, struct extent_position *last_pos, struct kernel_long_ad *last_ext, - uint32_t final_block_len) + uint32_t new_elen) { - struct super_block *sb = inode->i_sb; uint32_t added_bytes; - added_bytes = final_block_len - - (last_ext->extLength & (sb->s_blocksize - 1)); + /* + * Extent already large enough? It may be already rounded up to block + * size... + */ + if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) + return; + added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); last_ext->extLength += added_bytes; UDF_I(inode)->i_lenExtents += added_bytes; @@ -628,12 +617,12 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) int8_t etype; struct super_block *sb = inode->i_sb; sector_t first_block = newsize >> sb->s_blocksize_bits, offset; - unsigned long partial_final_block; + loff_t new_elen; int adsize; struct udf_inode_info *iinfo = UDF_I(inode); struct kernel_long_ad extent; int err = 0; - int within_final_block; + bool within_last_ext; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) adsize = sizeof(struct short_ad); @@ -642,8 +631,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) else BUG(); + /* + * When creating hole in file, just don't bother with preserving + * preallocation. It likely won't be very useful anyway. + */ + udf_discard_prealloc(inode); + etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset); - within_final_block = (etype != -1); + within_last_ext = (etype != -1); + /* We don't expect extents past EOF... */ + WARN_ON_ONCE(within_last_ext && + elen > ((loff_t)offset + 1) << inode->i_blkbits); if ((!epos.bh && epos.offset == udf_file_entry_alloc_offset(inode)) || (epos.bh && epos.offset == sizeof(struct allocExtDesc))) { @@ -659,19 +657,17 @@ static int udf_extend_file(struct inode *inode, loff_t newsize) extent.extLength |= etype << 30; } - partial_final_block = newsize & (sb->s_blocksize - 1); + new_elen = ((loff_t)offset << inode->i_blkbits) | + (newsize & (sb->s_blocksize - 1)); /* File has extent covering the new size (could happen when extending * inside a block)? */ - if (within_final_block) { + if (within_last_ext) { /* Extending file within the last file block */ - udf_do_extend_final_block(inode, &epos, &extent, - partial_final_block); + udf_do_extend_final_block(inode, &epos, &extent, new_elen); } else { - loff_t add = ((loff_t)offset << sb->s_blocksize_bits) | - partial_final_block; - err = udf_do_extend_file(inode, &epos, &extent, add); + err = udf_do_extend_file(inode, &epos, &extent, new_elen); } if (err < 0) @@ -772,10 +768,11 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, goto out_free; } - /* Are we beyond EOF? */ + /* Are we beyond EOF and preallocated extent? */ if (etype == -1) { int ret; loff_t hole_len; + isBeyondEOF = true; if (count) { if (c) @@ -801,19 +798,17 @@ static sector_t inode_getblk(struct inode *inode, sector_t block, c = 0; offset = 0; count += ret; - /* We are not covered by a preallocated extent? */ - if ((laarr[0].extLength & UDF_EXTENT_FLAG_MASK) != - EXT_NOT_RECORDED_ALLOCATED) { - /* Is there any real extent? - otherwise we overwrite - * the fake one... */ - if (count) - c = !c; - laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | - inode->i_sb->s_blocksize; - memset(&laarr[c].extLocation, 0x00, - sizeof(struct kernel_lb_addr)); - count++; - } + /* + * Is there any real extent? - otherwise we overwrite the fake + * one... + */ + if (count) + c = !c; + laarr[c].extLength = EXT_NOT_RECORDED_NOT_ALLOCATED | + inode->i_sb->s_blocksize; + memset(&laarr[c].extLocation, 0x00, + sizeof(struct kernel_lb_addr)); + count++; endnum = c + 1; lastblock = 1; } else { @@ -1090,23 +1085,8 @@ static void udf_merge_extents(struct inode *inode, struct kernel_long_ad *laarr, blocksize - 1) >> blocksize_bits)))) { if (((li->extLength & UDF_EXTENT_LENGTH_MASK) + - (lip1->extLength & UDF_EXTENT_LENGTH_MASK) + - blocksize - 1) & ~UDF_EXTENT_LENGTH_MASK) { - lip1->extLength = (lip1->extLength - - (li->extLength & - UDF_EXTENT_LENGTH_MASK) + - UDF_EXTENT_LENGTH_MASK) & - ~(blocksize - 1); - li->extLength = (li->extLength & - UDF_EXTENT_FLAG_MASK) + - (UDF_EXTENT_LENGTH_MASK + 1) - - blocksize; - lip1->extLocation.logicalBlockNum = - li->extLocation.logicalBlockNum + - ((li->extLength & - UDF_EXTENT_LENGTH_MASK) >> - blocksize_bits); - } else { + (lip1->extLength & UDF_EXTENT_LENGTH_MASK) + + blocksize - 1) <= UDF_EXTENT_LENGTH_MASK) { li->extLength = lip1->extLength + (((li->extLength & UDF_EXTENT_LENGTH_MASK) + @@ -1259,7 +1239,7 @@ set_size: if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); - memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr + newsize, + memset(iinfo->i_data + iinfo->i_lenEAttr + newsize, 0x00, bsize - newsize - udf_file_entry_alloc_offset(inode)); iinfo->i_lenAlloc = newsize; @@ -1395,6 +1375,7 @@ reread: ret = -EIO; goto out; } + iinfo->i_hidden = hidden_inode; iinfo->i_unique = 0; iinfo->i_lenEAttr = 0; iinfo->i_lenExtents = 0; @@ -1408,7 +1389,7 @@ reread: sizeof(struct extendedFileEntry)); if (ret) goto out; - memcpy(iinfo->i_ext.i_data, + memcpy(iinfo->i_data, bh->b_data + sizeof(struct extendedFileEntry), bs - sizeof(struct extendedFileEntry)); } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_FE)) { @@ -1417,7 +1398,7 @@ reread: ret = udf_alloc_i_data(inode, bs - sizeof(struct fileEntry)); if (ret) goto out; - memcpy(iinfo->i_ext.i_data, + memcpy(iinfo->i_data, bh->b_data + sizeof(struct fileEntry), bs - sizeof(struct fileEntry)); } else if (fe->descTag.tagIdent == cpu_to_le16(TAG_IDENT_USE)) { @@ -1430,7 +1411,7 @@ reread: sizeof(struct unallocSpaceEntry)); if (ret) goto out; - memcpy(iinfo->i_ext.i_data, + memcpy(iinfo->i_data, bh->b_data + sizeof(struct unallocSpaceEntry), bs - sizeof(struct unallocSpaceEntry)); return 0; @@ -1488,6 +1469,8 @@ reread: iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr); iinfo->i_lenAlloc = le32_to_cpu(fe->lengthAllocDescs); iinfo->i_checkpoint = le32_to_cpu(fe->checkpoint); + iinfo->i_streamdir = 0; + iinfo->i_lenStreams = 0; } else { inode->i_blocks = le64_to_cpu(efe->logicalBlocksRecorded) << (inode->i_sb->s_blocksize_bits - 9); @@ -1501,6 +1484,16 @@ reread: iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr); iinfo->i_lenAlloc = le32_to_cpu(efe->lengthAllocDescs); iinfo->i_checkpoint = le32_to_cpu(efe->checkpoint); + + /* Named streams */ + iinfo->i_streamdir = (efe->streamDirectoryICB.extLength != 0); + iinfo->i_locStreamdir = + lelb_to_cpu(efe->streamDirectoryICB.extLocation); + iinfo->i_lenStreams = le64_to_cpu(efe->objectSize); + if (iinfo->i_lenStreams >= inode->i_size) + iinfo->i_lenStreams -= inode->i_size; + else + iinfo->i_lenStreams = 0; } inode->i_generation = iinfo->i_unique; @@ -1597,8 +1590,8 @@ out: static int udf_alloc_i_data(struct inode *inode, size_t size) { struct udf_inode_info *iinfo = UDF_I(inode); - iinfo->i_ext.i_data = kmalloc(size, GFP_KERNEL); - if (!iinfo->i_ext.i_data) + iinfo->i_data = kmalloc(size, GFP_KERNEL); + if (!iinfo->i_data) return -ENOMEM; return 0; } @@ -1672,7 +1665,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) use->lengthAllocDescs = cpu_to_le32(iinfo->i_lenAlloc); memcpy(bh->b_data + sizeof(struct unallocSpaceEntry), - iinfo->i_ext.i_data, inode->i_sb->s_blocksize - + iinfo->i_data, inode->i_sb->s_blocksize - sizeof(struct unallocSpaceEntry)); use->descTag.tagIdent = cpu_to_le16(TAG_IDENT_USE); crclen = sizeof(struct unallocSpaceEntry); @@ -1702,8 +1695,12 @@ static int udf_update_inode(struct inode *inode, int do_sync) if (S_ISDIR(inode->i_mode) && inode->i_nlink > 0) fe->fileLinkCount = cpu_to_le16(inode->i_nlink - 1); - else - fe->fileLinkCount = cpu_to_le16(inode->i_nlink); + else { + if (iinfo->i_hidden) + fe->fileLinkCount = cpu_to_le16(0); + else + fe->fileLinkCount = cpu_to_le16(inode->i_nlink); + } fe->informationLength = cpu_to_le64(inode->i_size); @@ -1741,7 +1738,7 @@ static int udf_update_inode(struct inode *inode, int do_sync) if (iinfo->i_efe == 0) { memcpy(bh->b_data + sizeof(struct fileEntry), - iinfo->i_ext.i_data, + iinfo->i_data, inode->i_sb->s_blocksize - sizeof(struct fileEntry)); fe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); @@ -1760,12 +1757,22 @@ static int udf_update_inode(struct inode *inode, int do_sync) crclen = sizeof(struct fileEntry); } else { memcpy(bh->b_data + sizeof(struct extendedFileEntry), - iinfo->i_ext.i_data, + iinfo->i_data, inode->i_sb->s_blocksize - sizeof(struct extendedFileEntry)); - efe->objectSize = cpu_to_le64(inode->i_size); + efe->objectSize = + cpu_to_le64(inode->i_size + iinfo->i_lenStreams); efe->logicalBlocksRecorded = cpu_to_le64(lb_recorded); + if (iinfo->i_streamdir) { + struct long_ad *icb_lad = &efe->streamDirectoryICB; + + icb_lad->extLocation = + cpu_to_lelb(iinfo->i_locStreamdir); + icb_lad->extLength = + cpu_to_le32(inode->i_sb->s_blocksize); + } + udf_adjust_time(iinfo, inode->i_atime); udf_adjust_time(iinfo, inode->i_mtime); udf_adjust_time(iinfo, inode->i_ctime); @@ -1864,8 +1871,13 @@ struct inode *__udf_iget(struct super_block *sb, struct kernel_lb_addr *ino, if (!inode) return ERR_PTR(-ENOMEM); - if (!(inode->i_state & I_NEW)) + if (!(inode->i_state & I_NEW)) { + if (UDF_I(inode)->i_hidden != hidden_inode) { + iput(inode); + return ERR_PTR(-EFSCORRUPTED); + } return inode; + } memcpy(&UDF_I(inode)->i_location, ino, sizeof(struct kernel_lb_addr)); err = udf_read_inode(inode, hidden_inode); @@ -2046,7 +2058,7 @@ void udf_write_aext(struct inode *inode, struct extent_position *epos, struct udf_inode_info *iinfo = UDF_I(inode); if (!epos->bh) - ptr = iinfo->i_ext.i_data + epos->offset - + ptr = iinfo->i_data + epos->offset - udf_file_entry_alloc_offset(inode) + iinfo->i_lenEAttr; else @@ -2138,7 +2150,7 @@ int8_t udf_current_aext(struct inode *inode, struct extent_position *epos, if (!epos->bh) { if (!epos->offset) epos->offset = udf_file_entry_alloc_offset(inode); - ptr = iinfo->i_ext.i_data + epos->offset - + ptr = iinfo->i_data + epos->offset - udf_file_entry_alloc_offset(inode) + iinfo->i_lenEAttr; alen = udf_file_entry_alloc_offset(inode) + diff --git a/fs/udf/misc.c b/fs/udf/misc.c index 853bcff51..1614d308d 100644 --- a/fs/udf/misc.c +++ b/fs/udf/misc.c @@ -52,9 +52,9 @@ struct genericFormat *udf_add_extendedattr(struct inode *inode, uint32_t size, uint16_t crclen; struct udf_inode_info *iinfo = UDF_I(inode); - ea = iinfo->i_ext.i_data; + ea = iinfo->i_data; if (iinfo->i_lenEAttr) { - ad = iinfo->i_ext.i_data + iinfo->i_lenEAttr; + ad = iinfo->i_data + iinfo->i_lenEAttr; } else { ad = ea; size += sizeof(struct extendedAttrHeaderDesc); @@ -153,7 +153,7 @@ struct genericFormat *udf_get_extendedattr(struct inode *inode, uint32_t type, uint32_t offset; struct udf_inode_info *iinfo = UDF_I(inode); - ea = iinfo->i_ext.i_data; + ea = iinfo->i_data; if (iinfo->i_lenEAttr) { struct extendedAttrHeaderDesc *eahd; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index d13ded8e2..05dd1f45b 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -478,8 +478,7 @@ add: if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { block = dinfo->i_location.logicalBlockNum; fi = (struct fileIdentDesc *) - (dinfo->i_ext.i_data + - fibh->soffset - + (dinfo->i_data + fibh->soffset - udf_ext0_offset(dir) + dinfo->i_lenEAttr); } else { @@ -962,7 +961,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry, mark_buffer_dirty_inode(epos.bh, inode); ea = epos.bh->b_data + udf_ext0_offset(inode); } else - ea = iinfo->i_ext.i_data + iinfo->i_lenEAttr; + ea = iinfo->i_data + iinfo->i_lenEAttr; eoffset = sb->s_blocksize - udf_ext0_offset(inode); pc = (struct pathComponent *)ea; @@ -1106,8 +1105,9 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, return -EINVAL; ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); - if (IS_ERR(ofi)) { - retval = PTR_ERR(ofi); + if (!ofi || IS_ERR(ofi)) { + if (IS_ERR(ofi)) + retval = PTR_ERR(ofi); goto end_rename; } @@ -1116,8 +1116,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, brelse(ofibh.sbh); tloc = lelb_to_cpu(ocfi.icb.extLocation); - if (!ofi || udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) - != old_inode->i_ino) + if (udf_get_lb_pblock(old_dir->i_sb, &tloc, 0) != old_inode->i_ino) goto end_rename; nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi); @@ -1142,7 +1141,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, retval = -EIO; if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { dir_fi = udf_get_fileident( - old_iinfo->i_ext.i_data - + old_iinfo->i_data - (old_iinfo->i_efe ? sizeof(struct extendedFileEntry) : sizeof(struct fileEntry)), diff --git a/fs/udf/partition.c b/fs/udf/partition.c index 090baff83..4cbf40575 100644 --- a/fs/udf/partition.c +++ b/fs/udf/partition.c @@ -65,7 +65,7 @@ uint32_t udf_get_pblock_virt15(struct super_block *sb, uint32_t block, } if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - loc = le32_to_cpu(((__le32 *)(iinfo->i_ext.i_data + + loc = le32_to_cpu(((__le32 *)(iinfo->i_data + vdata->s_start_offset))[block]); goto translate; } diff --git a/fs/udf/super.c b/fs/udf/super.c index b7fb7cd35..bce48a077 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -146,9 +146,12 @@ static struct inode *udf_alloc_inode(struct super_block *sb) ei->i_unique = 0; ei->i_lenExtents = 0; + ei->i_lenStreams = 0; ei->i_next_alloc_block = 0; ei->i_next_alloc_goal = 0; ei->i_strat4096 = 0; + ei->i_streamdir = 0; + ei->i_hidden = 0; init_rwsem(&ei->i_data_sem); ei->cached_extent.lstart = -1; spin_lock_init(&ei->i_extent_cache_lock); @@ -172,7 +175,7 @@ static void init_once(void *foo) { struct udf_inode_info *ei = (struct udf_inode_info *)foo; - ei->i_ext.i_data = NULL; + ei->i_data = NULL; inode_init_once(&ei->vfs_inode); } @@ -572,6 +575,11 @@ static int udf_parse_options(char *options, struct udf_options *uopt, if (!remount) { if (uopt->nls_map) unload_nls(uopt->nls_map); + /* + * load_nls() failure is handled later in + * udf_fill_super() after all options are + * parsed. + */ uopt->nls_map = load_nls(args[0].from); uopt->flags |= (1 << UDF_FLAG_NLS_MAP); } @@ -1200,7 +1208,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) vat20 = (struct virtualAllocationTable20 *)bh->b_data; } else { vat20 = (struct virtualAllocationTable20 *) - vati->i_ext.i_data; + vati->i_data; } map->s_type_specific.s_virtual.s_start_offset = diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 6023c97c6..aef3e4d90 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -122,7 +122,7 @@ static int udf_symlink_filler(struct file *file, struct page *page) down_read(&iinfo->i_data_sem); if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) { - symlink = iinfo->i_ext.i_data + iinfo->i_lenEAttr; + symlink = iinfo->i_data + iinfo->i_lenEAttr; } else { bh = sb_bread(inode->i_sb, pos); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 94220ba85..b0c71edc8 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -120,60 +120,42 @@ void udf_truncate_tail_extent(struct inode *inode) void udf_discard_prealloc(struct inode *inode) { - struct extent_position epos = { NULL, 0, {0, 0} }; + struct extent_position epos = {}; + struct extent_position prev_epos = {}; struct kernel_lb_addr eloc; uint32_t elen; uint64_t lbcount = 0; int8_t etype = -1, netype; - int adsize; struct udf_inode_info *iinfo = UDF_I(inode); + int bsize = 1 << inode->i_blkbits; if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB || - inode->i_size == iinfo->i_lenExtents) + ALIGN(inode->i_size, bsize) == ALIGN(iinfo->i_lenExtents, bsize)) return; - if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT) - adsize = sizeof(struct short_ad); - else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG) - adsize = sizeof(struct long_ad); - else - adsize = 0; - epos.block = iinfo->i_location; /* Find the last extent in the file */ - while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) { - etype = netype; + while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 0)) != -1) { + brelse(prev_epos.bh); + prev_epos = epos; + if (prev_epos.bh) + get_bh(prev_epos.bh); + + etype = udf_next_aext(inode, &epos, &eloc, &elen, 1); lbcount += elen; } if (etype == (EXT_NOT_RECORDED_ALLOCATED >> 30)) { - epos.offset -= adsize; lbcount -= elen; - extent_trunc(inode, &epos, &eloc, etype, elen, 0); - if (!epos.bh) { - iinfo->i_lenAlloc = - epos.offset - - udf_file_entry_alloc_offset(inode); - mark_inode_dirty(inode); - } else { - struct allocExtDesc *aed = - (struct allocExtDesc *)(epos.bh->b_data); - aed->lengthAllocDescs = - cpu_to_le32(epos.offset - - sizeof(struct allocExtDesc)); - if (!UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_STRICT) || - UDF_SB(inode->i_sb)->s_udfrev >= 0x0201) - udf_update_tag(epos.bh->b_data, epos.offset); - else - udf_update_tag(epos.bh->b_data, - sizeof(struct allocExtDesc)); - mark_buffer_dirty_inode(epos.bh, inode); - } + udf_delete_aext(inode, prev_epos); + udf_free_blocks(inode->i_sb, inode, &eloc, 0, + DIV_ROUND_UP(elen, 1 << inode->i_blkbits)); } /* This inode entry is in-memory only and thus we don't have to mark * the inode dirty */ iinfo->i_lenExtents = lbcount; brelse(epos.bh); + brelse(prev_epos.bh); } static void udf_update_alloc_ext_desc(struct inode *inode, diff --git a/fs/udf/udf_i.h b/fs/udf/udf_i.h index 2ef0e212f..b77bf713a 100644 --- a/fs/udf/udf_i.h +++ b/fs/udf/udf_i.h @@ -42,12 +42,12 @@ struct udf_inode_info { unsigned i_efe : 1; /* extendedFileEntry */ unsigned i_use : 1; /* unallocSpaceEntry */ unsigned i_strat4096 : 1; - unsigned reserved : 26; - union { - struct short_ad *i_sad; - struct long_ad *i_lad; - __u8 *i_data; - } i_ext; + unsigned i_streamdir : 1; + unsigned i_hidden : 1; /* hidden system inode */ + unsigned reserved : 24; + __u8 *i_data; + struct kernel_lb_addr i_locStreamdir; + __u64 i_lenStreams; struct rw_semaphore i_data_sem; struct udf_ext_cache cached_extent; /* Spinlock for protecting extent cache */ diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index d12e507e9..aa58173b4 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -57,6 +57,8 @@ #define MF_DUPLICATE_MD 0x01 #define MF_MIRROR_FE_LOADED 0x02 +#define EFSCORRUPTED EUCLEAN + struct udf_meta_data { __u32 s_meta_file_loc; __u32 s_mirror_file_loc; diff --git a/fs/xattr.c b/fs/xattr.c index 470ee0af3..5c3407e18 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -1012,7 +1012,7 @@ static int xattr_list_one(char **buffer, ssize_t *remaining_size, ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs, char *buffer, size_t size) { - bool trusted = capable(CAP_SYS_ADMIN); + bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN); struct simple_xattr *xattr; ssize_t remaining_size = size; int err = 0; |