diff options
Diffstat (limited to 'source3/modules/vfs_aio_pthread.c')
-rw-r--r-- | source3/modules/vfs_aio_pthread.c | 538 |
1 files changed, 538 insertions, 0 deletions
diff --git a/source3/modules/vfs_aio_pthread.c b/source3/modules/vfs_aio_pthread.c new file mode 100644 index 0000000..b099a6b --- /dev/null +++ b/source3/modules/vfs_aio_pthread.c @@ -0,0 +1,538 @@ +/* + * Simulate Posix AIO using pthreads. + * + * Based on the aio_fork work from Volker and Volker's pthreadpool library. + * + * Copyright (C) Volker Lendecke 2008 + * Copyright (C) Jeremy Allison 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "includes.h" +#include "system/filesys.h" +#include "system/shmem.h" +#include "smbd/smbd.h" +#include "smbd/globals.h" +#include "../lib/pthreadpool/pthreadpool_tevent.h" +#ifdef HAVE_LINUX_FALLOC_H +#include <linux/falloc.h> +#endif + +#if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS) + +/* + * We must have openat() to do any thread-based + * asynchronous opens. We also must be using + * thread-specific credentials (Linux-only + * for now). + */ + +struct aio_open_private_data { + struct aio_open_private_data *prev, *next; + /* Inputs. */ + int dir_fd; + bool opened_dir_fd; + int flags; + mode_t mode; + uint64_t mid; + bool in_progress; + struct smb_filename *fsp_name; + struct smb_filename *smb_fname; + connection_struct *conn; + struct smbXsrv_connection *xconn; + const struct security_unix_token *ux_tok; + uint64_t initial_allocation_size; + /* Returns. */ + int ret_fd; + int ret_errno; +}; + +/* List of outstanding requests we have. */ +static struct aio_open_private_data *open_pd_list; + +static void aio_open_do(struct aio_open_private_data *opd); +static void opd_free(struct aio_open_private_data *opd); + +/************************************************************************ + Find the open private data by mid. +***********************************************************************/ + +static struct aio_open_private_data *find_open_private_data_by_mid(uint64_t mid) +{ + struct aio_open_private_data *opd; + + for (opd = open_pd_list; opd != NULL; opd = opd->next) { + if (opd->mid == mid) { + return opd; + } + } + + return NULL; +} + +/************************************************************************ + Callback when an open completes. +***********************************************************************/ + +static void aio_open_handle_completion(struct tevent_req *subreq) +{ + struct aio_open_private_data *opd = + tevent_req_callback_data(subreq, + struct aio_open_private_data); + int ret; + + ret = pthreadpool_tevent_job_recv(subreq); + TALLOC_FREE(subreq); + + /* + * We're no longer in flight. Remove the + * destructor used to preserve opd so + * a talloc_free actually removes it. + */ + talloc_set_destructor(opd, NULL); + + if (opd->conn == NULL) { + /* + * We were shutdown closed in flight. No one + * wants the result, and state has been reparented + * to the NULL context, so just free it so we + * don't leak memory. + */ + DBG_NOTICE("aio open request for %s abandoned in flight\n", + opd->fsp_name->base_name); + if (opd->ret_fd != -1) { + close(opd->ret_fd); + opd->ret_fd = -1; + } + /* + * Find outstanding event and reschedule so the client + * gets an error message return from the open. + */ + schedule_deferred_open_message_smb(opd->xconn, opd->mid); + opd_free(opd); + return; + } + + if (ret != 0) { + bool ok; + + if (ret != EAGAIN) { + smb_panic("aio_open_handle_completion"); + /* notreached. */ + return; + } + /* + * Make sure we run as the user again + */ + ok = change_to_user_and_service(opd->conn, opd->conn->vuid); + if (!ok) { + smb_panic("Can't change to user"); + return; + } + /* + * If we get EAGAIN from pthreadpool_tevent_job_recv() this + * means the lower level pthreadpool failed to create a new + * thread. Fallback to sync processing in that case to allow + * some progress for the client. + */ + aio_open_do(opd); + } + + DEBUG(10,("aio_open_handle_completion: mid %llu " + "for file %s completed\n", + (unsigned long long)opd->mid, + opd->fsp_name->base_name)); + + opd->in_progress = false; + + /* Find outstanding event and reschedule. */ + if (!schedule_deferred_open_message_smb(opd->xconn, opd->mid)) { + /* + * Outstanding event didn't exist or was + * cancelled. Free up the fd and throw + * away the result. + */ + if (opd->ret_fd != -1) { + close(opd->ret_fd); + opd->ret_fd = -1; + } + opd_free(opd); + } +} + +/***************************************************************** + The core of the async open code - the worker function. Note we + use the new openat() system call to avoid any problems with + current working directory changes plus we change credentials + on the thread to prevent any security race conditions. +*****************************************************************/ + +static void aio_open_worker(void *private_data) +{ + struct aio_open_private_data *opd = + (struct aio_open_private_data *)private_data; + + /* Become the correct credential on this thread. */ + if (set_thread_credentials(opd->ux_tok->uid, + opd->ux_tok->gid, + (size_t)opd->ux_tok->ngroups, + opd->ux_tok->groups) != 0) { + opd->ret_fd = -1; + opd->ret_errno = errno; + return; + } + + aio_open_do(opd); +} + +static void aio_open_do(struct aio_open_private_data *opd) +{ + opd->ret_fd = openat(opd->dir_fd, + opd->smb_fname->base_name, + opd->flags, + opd->mode); + + if (opd->ret_fd == -1) { + opd->ret_errno = errno; + } else { + /* Create was successful. */ + opd->ret_errno = 0; + +#if defined(HAVE_LINUX_FALLOCATE) + /* + * See if we can set the initial + * allocation size. We don't record + * the return for this as it's an + * optimization - the upper layer + * will also do this for us once + * the open returns. + */ + if (opd->initial_allocation_size) { + (void)fallocate(opd->ret_fd, + FALLOC_FL_KEEP_SIZE, + 0, + (off_t)opd->initial_allocation_size); + } +#endif + } +} + +/************************************************************************ + Open private data teardown. +***********************************************************************/ + +static void opd_free(struct aio_open_private_data *opd) +{ + if (opd->opened_dir_fd && opd->dir_fd != -1) { + close(opd->dir_fd); + } + DLIST_REMOVE(open_pd_list, opd); + TALLOC_FREE(opd); +} + +/************************************************************************ + Create and initialize a private data struct for async open. +***********************************************************************/ + +static struct aio_open_private_data *create_private_open_data( + TALLOC_CTX *ctx, + const struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + const files_struct *fsp, + int flags, + mode_t mode) +{ + struct aio_open_private_data *opd = talloc_zero(ctx, + struct aio_open_private_data); + + if (!opd) { + return NULL; + } + + *opd = (struct aio_open_private_data) { + .dir_fd = -1, + .ret_fd = -1, + .ret_errno = EINPROGRESS, + .flags = flags, + .mode = mode, + .mid = fsp->mid, + .in_progress = true, + .conn = fsp->conn, + /* + * TODO: In future we need a proper algorithm + * to find the correct connection for a fsp. + * For now we only have one connection, so this is correct... + */ + .xconn = fsp->conn->sconn->client->connections, + .initial_allocation_size = fsp->initial_allocation_size, + }; + + /* Copy our current credentials. */ + opd->ux_tok = copy_unix_token(opd, get_current_utok(fsp->conn)); + if (opd->ux_tok == NULL) { + opd_free(opd); + return NULL; + } + + /* + * Copy the full fsp_name and smb_fname which is the basename. + */ + opd->smb_fname = cp_smb_filename(opd, smb_fname); + if (opd->smb_fname == NULL) { + opd_free(opd); + return NULL; + } + + opd->fsp_name = cp_smb_filename(opd, fsp->fsp_name); + if (opd->fsp_name == NULL) { + opd_free(opd); + return NULL; + } + + if (fsp_get_pathref_fd(dirfsp) != AT_FDCWD) { + opd->dir_fd = fsp_get_pathref_fd(dirfsp); + } else { +#if defined(O_DIRECTORY) + opd->dir_fd = open(".", O_RDONLY|O_DIRECTORY); +#else + opd->dir_fd = open(".", O_RDONLY); +#endif + opd->opened_dir_fd = true; + } + if (opd->dir_fd == -1) { + opd_free(opd); + return NULL; + } + + DLIST_ADD_END(open_pd_list, opd); + return opd; +} + +static int opd_inflight_destructor(struct aio_open_private_data *opd) +{ + /* + * Setting conn to NULL allows us to + * discover the connection was torn + * down which kills the fsp that owns + * opd. + */ + DBG_NOTICE("aio open request for %s cancelled\n", + opd->fsp_name->base_name); + opd->conn = NULL; + /* Don't let opd go away. */ + return -1; +} + +/***************************************************************** + Setup an async open. +*****************************************************************/ + +static int open_async(const struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + const files_struct *fsp, + int flags, + mode_t mode) +{ + struct aio_open_private_data *opd = NULL; + struct tevent_req *subreq = NULL; + + /* + * Allocate off fsp->conn, not NULL or fsp. As we're going + * async fsp will get talloc_free'd when we return + * EINPROGRESS/NT_STATUS_MORE_PROCESSING_REQUIRED. A new fsp + * pointer gets allocated on every re-run of the + * open code path. Allocating on fsp->conn instead + * of NULL allows use to get notified via destructor + * if the conn is force-closed or we shutdown. + * opd is always safely freed in all codepath so no + * memory leaks. + */ + opd = create_private_open_data(fsp->conn, + dirfsp, + smb_fname, + fsp, + flags, + mode); + if (opd == NULL) { + DEBUG(10, ("open_async: Could not create private data.\n")); + return -1; + } + + subreq = pthreadpool_tevent_job_send(opd, + fsp->conn->sconn->ev_ctx, + fsp->conn->sconn->pool, + aio_open_worker, opd); + if (subreq == NULL) { + opd_free(opd); + return -1; + } + tevent_req_set_callback(subreq, aio_open_handle_completion, opd); + + DEBUG(5,("open_async: mid %llu created for file %s\n", + (unsigned long long)opd->mid, + opd->fsp_name->base_name)); + + /* + * Add a destructor to protect us from connection + * teardown whilst the open thread is in flight. + */ + talloc_set_destructor(opd, opd_inflight_destructor); + + /* Cause the calling code to reschedule us. */ + errno = EINPROGRESS; /* Maps to NT_STATUS_MORE_PROCESSING_REQUIRED. */ + return -1; +} + +/***************************************************************** + Look for a matching SMB2 mid. If we find it we're rescheduled, + just return the completed open. +*****************************************************************/ + +static bool find_completed_open(files_struct *fsp, + int *p_fd, + int *p_errno) +{ + struct aio_open_private_data *opd; + + opd = find_open_private_data_by_mid(fsp->mid); + if (!opd) { + return false; + } + + if (opd->in_progress) { + DEBUG(0,("find_completed_open: mid %llu " + "still in progress for " + "file %s. PANIC !\n", + (unsigned long long)opd->mid, + opd->fsp_name->base_name)); + /* Disaster ! This is an open timeout. Just panic. */ + smb_panic("find_completed_open - in_progress\n"); + /* notreached. */ + return false; + } + + *p_fd = opd->ret_fd; + *p_errno = opd->ret_errno; + + DEBUG(5,("find_completed_open: mid %llu returning " + "fd = %d, errno = %d (%s) " + "for file %s\n", + (unsigned long long)opd->mid, + opd->ret_fd, + opd->ret_errno, + strerror(opd->ret_errno), + smb_fname_str_dbg(fsp->fsp_name))); + + /* Now we can free the opd. */ + opd_free(opd); + return true; +} + +/***************************************************************** + The core open function. Only go async on O_CREAT|O_EXCL + opens to prevent any race conditions. +*****************************************************************/ + +static int aio_pthread_openat_fn(vfs_handle_struct *handle, + const struct files_struct *dirfsp, + const struct smb_filename *smb_fname, + struct files_struct *fsp, + const struct vfs_open_how *how) +{ + int my_errno = 0; + int fd = -1; + bool aio_allow_open = lp_parm_bool( + SNUM(handle->conn), "aio_pthread", "aio open", false); + + if (how->resolve != 0) { + errno = ENOSYS; + return -1; + } + + if (is_named_stream(smb_fname)) { + /* Don't handle stream opens. */ + errno = ENOENT; + return -1; + } + + if (fsp->conn->sconn->pool == NULL) { + /* + * a threadpool is required for async support + */ + aio_allow_open = false; + } + + if (fsp->conn->sconn->client != NULL && + fsp->conn->sconn->client->server_multi_channel_enabled) { + /* + * This module is not compatible with multi channel yet. + */ + aio_allow_open = false; + } + + if (fsp->fsp_flags.is_pathref) { + /* Use SMB_VFS_NEXT_OPENAT() to call openat() with O_PATH. */ + aio_allow_open = false; + } + + if (!(how->flags & O_CREAT)) { + /* Only creates matter. */ + aio_allow_open = false; + } + + if (!(how->flags & O_EXCL)) { + /* Only creates with O_EXCL matter. */ + aio_allow_open = false; + } + + if (!aio_allow_open) { + /* aio opens turned off. */ + return SMB_VFS_NEXT_OPENAT(handle, + dirfsp, + smb_fname, + fsp, + how); + } + + /* + * See if this is a reentrant call - i.e. is this a + * restart of an existing open that just completed. + */ + + if (find_completed_open(fsp, + &fd, + &my_errno)) { + errno = my_errno; + return fd; + } + + /* Ok, it's a create exclusive call - pass it to a thread helper. */ + return open_async(dirfsp, smb_fname, fsp, how->flags, how->mode); +} +#endif + +static struct vfs_fn_pointers vfs_aio_pthread_fns = { +#if defined(HAVE_OPENAT) && defined(HAVE_LINUX_THREAD_CREDENTIALS) + .openat_fn = aio_pthread_openat_fn, +#endif +}; + +static_decl_vfs; +NTSTATUS vfs_aio_pthread_init(TALLOC_CTX *ctx) +{ + return smb_register_vfs(SMB_VFS_INTERFACE_VERSION, + "aio_pthread", &vfs_aio_pthread_fns); +} |