From f215e02bf85f68d3a6106c2a1f4f7f063f819064 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:17:27 +0200 Subject: Adding upstream version 7.0.14-dfsg. Signed-off-by: Daniel Baumann --- .../Additions/linux/sharedfolders/.scm-settings | 39 + .../Additions/linux/sharedfolders/Makefile.kmk | 57 + .../Additions/linux/sharedfolders/Makefile.module | 119 + src/VBox/Additions/linux/sharedfolders/dirops.c | 1427 +++++++ .../Additions/linux/sharedfolders/files_vboxsf | 107 + src/VBox/Additions/linux/sharedfolders/lnkops.c | 305 ++ .../Additions/linux/sharedfolders/mount.vboxsf.c | 702 ++++ src/VBox/Additions/linux/sharedfolders/regops.c | 3902 ++++++++++++++++++++ .../linux/sharedfolders/testcase/tstmmap.c | 126 + src/VBox/Additions/linux/sharedfolders/utils.c | 1288 +++++++ src/VBox/Additions/linux/sharedfolders/vbsfmount.c | 113 + src/VBox/Additions/linux/sharedfolders/vbsfmount.h | 142 + src/VBox/Additions/linux/sharedfolders/vfsmod.c | 1753 +++++++++ src/VBox/Additions/linux/sharedfolders/vfsmod.h | 483 +++ 14 files changed, 10563 insertions(+) create mode 100644 src/VBox/Additions/linux/sharedfolders/.scm-settings create mode 100644 src/VBox/Additions/linux/sharedfolders/Makefile.kmk create mode 100644 src/VBox/Additions/linux/sharedfolders/Makefile.module create mode 100644 src/VBox/Additions/linux/sharedfolders/dirops.c create mode 100755 src/VBox/Additions/linux/sharedfolders/files_vboxsf create mode 100644 src/VBox/Additions/linux/sharedfolders/lnkops.c create mode 100644 src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c create mode 100644 src/VBox/Additions/linux/sharedfolders/regops.c create mode 100644 src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c create mode 100644 src/VBox/Additions/linux/sharedfolders/utils.c create mode 100644 src/VBox/Additions/linux/sharedfolders/vbsfmount.c create mode 100644 src/VBox/Additions/linux/sharedfolders/vbsfmount.h create mode 100644 src/VBox/Additions/linux/sharedfolders/vfsmod.c create mode 100644 src/VBox/Additions/linux/sharedfolders/vfsmod.h (limited to 'src/VBox/Additions/linux/sharedfolders') diff --git a/src/VBox/Additions/linux/sharedfolders/.scm-settings b/src/VBox/Additions/linux/sharedfolders/.scm-settings new file mode 100644 index 00000000..64190d85 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/.scm-settings @@ -0,0 +1,39 @@ +# $Id: .scm-settings $ +## @file +# Source code massager settings for linux shared folders module. +# + +# +# Copyright (C) 2010-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# SPDX-License-Identifier: GPL-3.0-only +# + + +/*.c|/*.h: --no-convert-tabs +/Makefile.module: --treat-as Makefile + +# MIT licence to make it easier to re-import code from the in-kernel version. +/dirops.c: --license-mit +/lnkops.c: --license-mit +/regops.c: --license-mit +/utils.c: --license-mit +/vbsfmount.h: --license-mit +/vfsmod.c: --license-mit +/vfsmod.h: --license-mit diff --git a/src/VBox/Additions/linux/sharedfolders/Makefile.kmk b/src/VBox/Additions/linux/sharedfolders/Makefile.kmk new file mode 100644 index 00000000..50992172 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/Makefile.kmk @@ -0,0 +1,57 @@ +# $Id: Makefile.kmk $ +## @file +# Sub-Makefile for the vboxsf (linux shared folders module). +# + +# +# Copyright (C) 2006-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# SPDX-License-Identifier: GPL-3.0-only +# + +SUB_DEPTH = ../../../../.. +include $(KBUILD_PATH)/subheader.kmk + + +# +# Populate FILES_VBOXSF_NOBIN +# +INSTALLS += vboxsf-src +include $(PATH_SUB_CURRENT)/files_vboxsf +vboxsf-src_INST = $(INST_ADDITIONS)src/vboxsf/ +vboxsf-src_SOURCES = \ + $(subst $(DQUOTE),,$(FILES_VBOXSF_NOBIN)) +vboxsf-src_EXEC_SOURCES = \ + $(subst $(DQUOTE),,$(FILES_VBOXSF_BIN)) + +# Build test for the Guest Additions kernel modules (kmk check). +$(evalcall2 VBOX_LINUX_KMOD_TEST_BUILD_RULE_FN,vboxsf-src,vboxguest-src,) + +# +# The mount util. +# +PROGRAMS += mount.vboxsf +mount.vboxsf_TEMPLATE = VBoxGuestR3Exe +mount.vboxsf_DEFS = _GNU_SOURCE +mount.vboxsf_SOURCES = \ + mount.vboxsf.c \ + vbsfmount.c + +include $(FILE_KBUILD_SUB_FOOTER) + diff --git a/src/VBox/Additions/linux/sharedfolders/Makefile.module b/src/VBox/Additions/linux/sharedfolders/Makefile.module new file mode 100644 index 00000000..3eeb976c --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/Makefile.module @@ -0,0 +1,119 @@ +# $Id: Makefile.module $ +## @file +# VBox Linux Shared Folders VFS Module Makefile. +# +# (For 2.6.x this file must be 'Makefile'!) +# + +# +# Copyright (C) 2006-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# SPDX-License-Identifier: GPL-3.0-only +# + +# Linux kbuild sets this to our source directory if we are called from there +obj ?= $(CURDIR) +include $(obj)/Makefile-header.gmk +VBOXSF_DIR = $(VBOX_MODULE_SRC_DIR) + +# Allow building directly from the subdirectory without assuming the toplevel +# makefile has done the copying. Not the default use case, but can be handy. +ifndef KBUILD_EXTRA_SYMBOLS +KBUILD_EXTRA_SYMBOLS=$(abspath $(VBOXSF_DIR)/../vboxguest/Module.symvers) +endif + +VBOXMOD_NAME = vboxsf +VBOXMOD_OBJS = \ + vfsmod.o \ + dirops.o \ + lnkops.o \ + regops.o \ + utils.o \ + VBoxGuestR0LibGenericRequest.o \ + VBoxGuestR0LibHGCM.o \ + VBoxGuestR0LibIdc.o \ + VBoxGuestR0LibIdc-unix.o \ + VBoxGuestR0LibInit.o \ + VBoxGuestR0LibPhysHeap.o \ + VBoxGuestR0LibSharedFolders.o +ifeq ($(VBOX_KBUILD_TARGET_ARCH),x86) +VBOXMOD_OBJS += \ + divdi3.o \ + moddi3.o \ + udivdi3.o \ + udivmoddi4.o \ + umoddi3.o \ + qdivrem.o +endif +VBOXMOD_INCL = \ + $(VBOXSF_DIR) \ + $(VBOXSF_DIR)include \ + $(VBOXSF_DIR)r0drv/linux +VBOXMOD_DEFS = \ + RT_OS_LINUX \ + IN_RING0 \ + IN_RT_R0 \ + IN_SUP_R0 \ + VBOX \ + VBOX_WITH_HGCM \ + IN_MODULE \ + IN_GUEST \ + IN_GUEST_R0 \ + RT_NO_EXPORT_SYMBOL +ifeq ($(VBOX_KBUILD_TARGET_ARCH),amd64) +VBOXMOD_DEFS += VBOX_WITH_64_BITS_GUESTS +endif +ifneq ($(filter %uek.x86_64,$(KERN_VER)),) +VBOXMOD_DEFS += VBOX_UEK +endif +VBOXMOD_CFLAGS := $(call VBOX_GCC_CHECK_CC,-Wno-declaration-after-statement,-Wno-declaration-after-statement,,) +VBOXMOD_CFLAGS += $(call VBOX_GCC_CHECK_CC,-fno-pie,-fno-pie,,) +ifneq ($(KERN_VERSION),24) +VBOXMOD_CFLAGS += -include $(VBOXSF_DIR)/include/VBox/VBoxGuestMangling.h +## @todo r-bird: What's with -fshort-wchar here?? We either need that or we dont, right? It should be 2.6+ only. +VBOXMOD_CFLAGS += -fshort-wchar +endif +ifdef VBOX_NO_OMIT_FRAME_POINTER +VBOXMOD_CFLAGS += -fno-omit-frame-pointer +endif + +ifneq ($(KERN_VERSION),24) +# special hack for Fedora Core 6 2.6.18 (fc6), rhel5 2.6.18 (el5), +# ClarkConnect 4.3 (cc4) and ClarkConnect 5 (v5) + ifeq ($(KERNELRELEASE),) +VBOXMOD_CFLAGS += $(foreach inc,$(KERN_INCL),\ + $(if $(wildcard $(inc)/linux/utsrelease.h),\ + $(if $(shell grep '"2.6.18.*fc6.*"' $(inc)/linux/utsrelease.h; \ + grep '"2.6.18.*el5.*"' $(inc)/linux/utsrelease.h; \ + grep '"2.6.18.*v5.*"' $(inc)/linux/utsrelease.h; \ + grep '"2.6.18.*cc4.*"' $(inc)/linux/utsrelease.h),\ + -DKERNEL_FC6,),)) + else +VBOXMOD_CFLAGS += $(if $(shell echo "$(KERNELRELEASE)"|grep '2.6.18.*fc6.*';\ + echo "$(KERNELRELEASE)"|grep '2.6.18.*el5.*';\ + echo "$(KERNELRELEASE)"|grep '2.6.18.*v5.*';\ + echo "$(KERNELRELEASE)"|grep '2.6.18.*cc4.*'),\ + -DKERNEL_FC6,) + endif +endif + +VBOXMOD_CLEAN = . linux r0drv r0drv/linux + +include $(obj)/Makefile-footer.gmk + diff --git a/src/VBox/Additions/linux/sharedfolders/dirops.c b/src/VBox/Additions/linux/sharedfolders/dirops.c new file mode 100644 index 00000000..fb8c8d59 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/dirops.c @@ -0,0 +1,1427 @@ +/* $Id: dirops.c $ */ +/** @file + * vboxsf - VBox Linux Shared Folders VFS, directory inode and file operations. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "vfsmod.h" +#include + +#if RTLNX_VER_MAX(4,7,0) +# define d_in_lookup(a_pDirEntry) (d_unhashed(a_pDirEntry)) +#endif + + + +/** + * Open a directory (implements file_operations::open). + * + * @returns 0 on success, negative errno otherwise. + * @param inode inode + * @param file file + */ +static int vbsf_dir_open(struct inode *inode, struct file *file) +{ + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + struct dentry *dentry = VBSF_GET_F_DENTRY(file); + struct vbsf_dir_info *sf_d; + int rc; + + SFLOGFLOW(("vbsf_dir_open: inode=%p file=%p %s\n", inode, file, sf_i && sf_i->path ? sf_i->path->String.ach : NULL)); + AssertReturn(pSuperInfo, -EINVAL); + AssertReturn(sf_i, -EINVAL); + AssertReturn(!file->private_data, 0); + + /* + * Allocate and initialize our directory info structure. + * We delay buffer allocation until vbsf_getdent is actually used. + */ + sf_d = kmalloc(sizeof(*sf_d), GFP_KERNEL); + if (sf_d) { + VBOXSFCREATEREQ *pReq; + RT_ZERO(*sf_d); + sf_d->u32Magic = VBSF_DIR_INFO_MAGIC; + sema_init(&sf_d->Lock, 1); + + /* + * Try open the directory. + */ + pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFCREATEREQ, StrPath.String) + sf_i->path->u16Size); + if (pReq) { + RT_BCOPY_UNFORTIFIED(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size); + RT_ZERO(pReq->CreateParms); + pReq->CreateParms.Handle = SHFL_HANDLE_NIL; + pReq->CreateParms.CreateFlags = SHFL_CF_DIRECTORY + | SHFL_CF_ACT_OPEN_IF_EXISTS + | SHFL_CF_ACT_FAIL_IF_NEW + | SHFL_CF_ACCESS_READ; + + LogFunc(("calling VbglR0SfHostReqCreate on folder %s, flags %#x\n", + sf_i->path->String.utf8, pReq->CreateParms.CreateFlags)); + rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq); + if (RT_SUCCESS(rc)) { + if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) { + Assert(pReq->CreateParms.Handle != SHFL_HANDLE_NIL); + + /* + * Update the inode info with fresh stats and increase the TTL for the + * dentry cache chain that got us here. + */ + vbsf_update_inode(inode, sf_i, &pReq->CreateParms.Info, pSuperInfo, + true /*fLocked*/ /** @todo inode locking */, 0 /*fSetAttrs*/); + vbsf_dentry_chain_increase_ttl(dentry); + + sf_d->Handle.hHost = pReq->CreateParms.Handle; + sf_d->Handle.cRefs = 1; + sf_d->Handle.fFlags = VBSF_HANDLE_F_READ | VBSF_HANDLE_F_DIR | VBSF_HANDLE_F_MAGIC; + vbsf_handle_append(sf_i, &sf_d->Handle); + + file->private_data = sf_d; + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_dir_open(%p,%p): returns 0; hHost=%#llx\n", inode, file, sf_d->Handle.hHost)); + return 0; + + } + Assert(pReq->CreateParms.Handle == SHFL_HANDLE_NIL); + + /* + * Directory does not exist, so we probably got some invalid + * dir cache and inode info. + */ + /** @todo do more to invalidate dentry and inode here. */ + vbsf_dentry_invalidate_ttl(dentry); + sf_i->force_restat = true; + rc = -ENOENT; + } else + rc = -EPERM; + VbglR0PhysHeapFree(pReq); + } else { + LogRelMaxFunc(64, ("failed to allocate %zu bytes for '%s'\n", + RT_UOFFSETOF(VBOXSFCREATEREQ, StrPath.String) + sf_i->path->u16Size, sf_i->path->String.ach)); + rc = -ENOMEM; + } + sf_d->u32Magic = VBSF_DIR_INFO_MAGIC_DEAD; + kfree(sf_d); + } else + rc = -ENOMEM; + SFLOGFLOW(("vbsf_dir_open(%p,%p): returns %d\n", inode, file, rc)); + return rc; +} + + +/** + * This is called when reference count of [file] goes to zero. Notify + * the host that it can free whatever is associated with this directory + * and deallocate our own internal buffers + * + * @param inode inode + * @param file file + * returns 0 on success, Linux error code otherwise + */ +static int vbsf_dir_release(struct inode *inode, struct file *file) +{ + struct vbsf_dir_info *sf_d = (struct vbsf_dir_info *)file->private_data; + + SFLOGFLOW(("vbsf_dir_release(%p,%p): sf_d=%p hHost=%#llx\n", inode, file, sf_d, sf_d ? sf_d->Handle.hHost : SHFL_HANDLE_NIL)); + + if (sf_d) { + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + + /* Invalidate the non-handle part. */ + sf_d->u32Magic = VBSF_DIR_INFO_MAGIC_DEAD; + sf_d->cEntriesLeft = 0; + sf_d->cbValid = 0; + sf_d->pEntry = NULL; + sf_d->fNoMoreFiles = false; + if (sf_d->pBuf) { + kfree(sf_d->pBuf); + sf_d->pBuf = NULL; + } + + /* Closes the handle and frees the structure when the last reference is released. */ + vbsf_handle_release(&sf_d->Handle, pSuperInfo, "vbsf_dir_release"); + } + + return 0; +} + + +/** + * Translate RTFMODE into DT_xxx (in conjunction to rtDirType()). + * returns d_type + * @param fMode file mode + */ +DECLINLINE(int) vbsf_get_d_type(RTFMODE fMode) +{ + switch (fMode & RTFS_TYPE_MASK) { + case RTFS_TYPE_FIFO: return DT_FIFO; + case RTFS_TYPE_DEV_CHAR: return DT_CHR; + case RTFS_TYPE_DIRECTORY: return DT_DIR; + case RTFS_TYPE_DEV_BLOCK: return DT_BLK; + case RTFS_TYPE_FILE: return DT_REG; + case RTFS_TYPE_SYMLINK: return DT_LNK; + case RTFS_TYPE_SOCKET: return DT_SOCK; + case RTFS_TYPE_WHITEOUT: return DT_WHT; + } + return DT_UNKNOWN; +} + + +/** + * Refills the buffer with more entries. + * + * @returns 0 on success, negative errno on error, + */ +static int vbsf_dir_read_more(struct vbsf_dir_info *sf_d, struct vbsf_super_info *pSuperInfo, bool fRestart) +{ + int rc; + VBOXSFLISTDIRREQ *pReq; + + /* + * Don't call the host again if we've reached the end of the + * directory entries already. + */ + if (sf_d->fNoMoreFiles) { + if (!fRestart) { + SFLOGFLOW(("vbsf_dir_read_more: no more files\n")); + return 0; + } + sf_d->fNoMoreFiles = false; + } + + /* + * Make sure we've got some kind of buffers. + */ + if (sf_d->pBuf) { + /* Likely, except for the first time. */ + } else { + sf_d->pBuf = (PSHFLDIRINFO)kmalloc(pSuperInfo->cbDirBuf, GFP_KERNEL); + if (sf_d->pBuf) + sf_d->cbBuf = pSuperInfo->cbDirBuf; + else { + sf_d->pBuf = (PSHFLDIRINFO)kmalloc(_4K, GFP_KERNEL); + if (!sf_d->pBuf) { + LogRelMax(10, ("vbsf_dir_read_more: Failed to allocate buffer!\n")); + return -ENOMEM; + } + sf_d->cbBuf = _4K; + } + } + + /* + * Allocate a request buffer. + */ + pReq = (VBOXSFLISTDIRREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + rc = VbglR0SfHostReqListDirContig2x(pSuperInfo->map.root, pReq, sf_d->Handle.hHost, NULL, NIL_RTGCPHYS64, + fRestart ? SHFL_LIST_RESTART : SHFL_LIST_NONE, + sf_d->pBuf, virt_to_phys(sf_d->pBuf), sf_d->cbBuf); + if (RT_SUCCESS(rc)) { + sf_d->pEntry = sf_d->pBuf; + sf_d->cbValid = pReq->Parms.cb32Buffer.u.value32; + sf_d->cEntriesLeft = pReq->Parms.c32Entries.u.value32; + sf_d->fNoMoreFiles = pReq->Parms.f32More.u.value32 == 0; + } else { + sf_d->pEntry = sf_d->pBuf; + sf_d->cbValid = 0; + sf_d->cEntriesLeft = 0; + if (rc == VERR_NO_MORE_FILES) { + sf_d->fNoMoreFiles = true; + rc = 0; + } else { + /* In theory we could end up here with a buffer overflow, but + with a 4KB minimum buffer size that's very unlikely with the + typical filename length of today's file systems (2019). */ + LogRelMax(16, ("vbsf_dir_read_more: VbglR0SfHostReqListDirContig2x -> %Rrc\n", rc)); + rc = -EPROTO; + } + } + VbglR0PhysHeapFree(pReq); + } else + rc = -ENOMEM; + SFLOGFLOW(("vbsf_dir_read_more: returns %d; cbValid=%#x cEntriesLeft=%#x fNoMoreFiles=%d\n", + rc, sf_d->cbValid, sf_d->cEntriesLeft, sf_d->fNoMoreFiles)); + return rc; +} + + +/** + * Helper function for when we need to convert the name, avoids wasting stack in + * the UTF-8 code path. + */ +DECL_NO_INLINE(static, bool) vbsf_dir_emit_nls( +# if RTLNX_VER_MIN(3,11,0) + struct dir_context *ctx, +# else + void *opaque, filldir_t filldir, loff_t offPos, +# endif + const char *pszSrcName, uint16_t cchSrcName, ino_t d_ino, int d_type, + struct vbsf_super_info *pSuperInfo) +{ + char szDstName[NAME_MAX]; + int rc = vbsf_nlscpy(pSuperInfo, szDstName, sizeof(szDstName), pszSrcName, cchSrcName); + if (rc == 0) { +#if RTLNX_VER_MIN(3,11,0) + return dir_emit(ctx, szDstName, strlen(szDstName), d_ino, d_type); +#else + return filldir(opaque, szDstName, strlen(szDstName), offPos, d_ino, d_type) == 0; +#endif + } + + /* Assuming this is a buffer overflow issue, just silently skip it. */ + SFLOGFLOW(("vbsf_dir_emit_nls: vbsf_nlscopy failed with %d for '%s'\n", rc, pszSrcName)); + return true; +} + + +/** + * This is called when vfs wants to populate internal buffers with + * directory [dir]s contents. [opaque] is an argument to the + * [filldir]. [filldir] magically modifies it's argument - [opaque] + * and takes following additional arguments (which i in turn get from + * the host via vbsf_getdent): + * + * name : name of the entry (i must also supply it's length huh?) + * type : type of the entry (FILE | DIR | etc) (i ellect to use DT_UNKNOWN) + * pos : position/index of the entry + * ino : inode number of the entry (i fake those) + * + * [dir] contains: + * f_pos : cursor into the directory listing + * private_data : mean of communication with the host side + * + * Extract elements from the directory listing (incrementing f_pos + * along the way) and feed them to [filldir] until: + * + * a. there are no more entries (i.e. vbsf_getdent set done to 1) + * b. failure to compute fake inode number + * c. filldir returns an error (see comment on that) + */ +#if RTLNX_VER_MIN(3,11,0) +static int vbsf_dir_iterate(struct file *dir, struct dir_context *ctx) +#else +static int vbsf_dir_read(struct file *dir, void *opaque, filldir_t filldir) +#endif +{ +#if RTLNX_VER_MIN(3,11,0) + loff_t offPos = ctx->pos; +#else + loff_t offPos = dir->f_pos; +#endif + struct vbsf_dir_info *sf_d = (struct vbsf_dir_info *)dir->private_data; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(VBSF_GET_F_DENTRY(dir)->d_sb); + int rc; + + /* + * Lock the directory info structures. + */ + if (RT_LIKELY(down_interruptible(&sf_d->Lock) == 0)) { + /* likely */ + } else + return -ERESTARTSYS; + + /* + * Any seek performed in the mean time? + */ + if (offPos == sf_d->offPos) { + /* likely */ + } else { + /* Restart the search if iPos is lower than the current buffer position. */ + loff_t offCurEntry = sf_d->offPos; + if (offPos < offCurEntry) { + rc = vbsf_dir_read_more(sf_d, pSuperInfo, true /*fRestart*/); + if (rc == 0) + offCurEntry = 0; + else { + up(&sf_d->Lock); + return rc; + } + } + + /* Skip ahead to offPos. */ + while (offCurEntry < offPos) { + uint32_t cEntriesLeft = sf_d->cEntriesLeft; + if ((uint64_t)(offPos - offCurEntry) >= cEntriesLeft) { + /* Skip the current buffer and read the next: */ + offCurEntry += cEntriesLeft; + sf_d->offPos = offCurEntry; + sf_d->cEntriesLeft = 0; + rc = vbsf_dir_read_more(sf_d, pSuperInfo, false /*fRestart*/); + if (rc != 0 || sf_d->cEntriesLeft == 0) { + up(&sf_d->Lock); + return rc; + } + } else { + do + { + PSHFLDIRINFO pEntry = sf_d->pEntry; + pEntry = (PSHFLDIRINFO)&pEntry->name.String.utf8[pEntry->name.u16Length]; + AssertLogRelBreakStmt( cEntriesLeft == 1 + || (uintptr_t)pEntry - (uintptr_t)sf_d->pBuf + <= sf_d->cbValid - RT_UOFFSETOF(SHFLDIRINFO, name.String), + sf_d->cEntriesLeft = 0); + sf_d->cEntriesLeft = --cEntriesLeft; + sf_d->offPos = ++offCurEntry; + } while (offPos < sf_d->offPos); + } + } + } + + /* + * Handle '.' and '..' specially so we get the inode numbers right. + * We'll skip any '.' or '..' returned by the host (included in pos, + * however, to simplify the above skipping code). + */ + if (offPos < 2) { +#if RTLNX_VER_MIN(3,11,0) + if (offPos == 0) { + if (dir_emit_dot(dir, ctx)) + dir->f_pos = ctx->pos = sf_d->offPos = offPos = 1; + else { + up(&sf_d->Lock); + return 0; + } + } + if (offPos == 1) { + if (dir_emit_dotdot(dir, ctx)) + dir->f_pos = ctx->pos = sf_d->offPos = offPos = 2; + else { + up(&sf_d->Lock); + return 0; + } + } +#else + if (offPos == 0) { + rc = filldir(opaque, ".", 1, 0, VBSF_GET_F_DENTRY(dir)->d_inode->i_ino, DT_DIR); + if (!rc) + dir->f_pos = sf_d->offPos = offPos = 1; + else { + up(&sf_d->Lock); + return 0; + } + } + if (offPos == 1) { +# if RTLNX_VER_MIN(2,5,5) + rc = filldir(opaque, "..", 2, 1, parent_ino(VBSF_GET_F_DENTRY(dir)), DT_DIR); +# else + rc = filldir(opaque, "..", 2, 1, VBSF_GET_F_DENTRY(dir)->d_parent->d_inode->i_ino, DT_DIR); +# endif + if (!rc) + dir->f_pos = sf_d->offPos = offPos = 2; + else { + up(&sf_d->Lock); + return 0; + } + } +#endif + } + + /* + * Produce stuff. + */ + Assert(offPos == sf_d->offPos); + for (;;) { + PSHFLDIRINFO pBuf; + PSHFLDIRINFO pEntry; + + /* + * Do we need to read more? + */ + uint32_t cbValid = sf_d->cbValid; + uint32_t cEntriesLeft = sf_d->cEntriesLeft; + if (!cEntriesLeft) { + rc = vbsf_dir_read_more(sf_d, pSuperInfo, false /*fRestart*/); + if (rc == 0) { + cEntriesLeft = sf_d->cEntriesLeft; + if (!cEntriesLeft) { + up(&sf_d->Lock); + return 0; + } + cbValid = sf_d->cbValid; + } else { + up(&sf_d->Lock); + return rc; + } + } + + /* + * Feed entries to the caller. + */ + pBuf = sf_d->pBuf; + pEntry = sf_d->pEntry; + do { + /* + * Validate the entry in case the host is messing with us. + * We're ASSUMING the host gives us a zero terminated string (UTF-8) here. + */ + uintptr_t const offEntryInBuf = (uintptr_t)pEntry - (uintptr_t)pBuf; + uint16_t cbSrcName; + uint16_t cchSrcName; + AssertLogRelMsgBreak(offEntryInBuf + RT_UOFFSETOF(SHFLDIRINFO, name.String) <= cbValid, + ("%#llx + %#x vs %#x\n", offEntryInBuf, RT_UOFFSETOF(SHFLDIRINFO, name.String), cbValid)); + cbSrcName = pEntry->name.u16Size; + cchSrcName = pEntry->name.u16Length; + AssertLogRelBreak(offEntryInBuf + RT_UOFFSETOF(SHFLDIRINFO, name.String) + cbSrcName <= cbValid); + AssertLogRelBreak(cchSrcName < cbSrcName); + AssertLogRelBreak(pEntry->name.String.ach[cchSrcName] == '\0'); + + /* + * Filter out '.' and '..' entires. + */ + if ( cchSrcName > 2 + || pEntry->name.String.ach[0] != '.' + || ( cchSrcName == 2 + && pEntry->name.String.ach[1] != '.')) { + int const d_type = vbsf_get_d_type(pEntry->Info.Attr.fMode); + ino_t const d_ino = (ino_t)offPos + 0xbeef; /* very fake */ + bool fContinue; + if (pSuperInfo->fNlsIsUtf8) { +#if RTLNX_VER_MIN(3,11,0) + fContinue = dir_emit(ctx, pEntry->name.String.ach, cchSrcName, d_ino, d_type); +#else + fContinue = filldir(opaque, pEntry->name.String.ach, cchSrcName, offPos, d_ino, d_type) == 0; +#endif + } else { +#if RTLNX_VER_MIN(3,11,0) + fContinue = vbsf_dir_emit_nls(ctx, pEntry->name.String.ach, cchSrcName, d_ino, d_type, pSuperInfo); +#else + fContinue = vbsf_dir_emit_nls(opaque, filldir, offPos, pEntry->name.String.ach, cchSrcName, + d_ino, d_type, pSuperInfo); +#endif + } + if (fContinue) { + /* likely */ + } else { + sf_d->cEntriesLeft = cEntriesLeft; + sf_d->pEntry = pEntry; + sf_d->offPos = offPos; + up(&sf_d->Lock); + return 0; + } + } + + /* + * Advance to the next entry. + */ + pEntry = (PSHFLDIRINFO)((uintptr_t)pEntry + RT_UOFFSETOF(SHFLDIRINFO, name.String) + cbSrcName); + offPos += 1; + dir->f_pos = offPos; +#if RTLNX_VER_MIN(3,11,0) + ctx->pos = offPos; +#endif + cEntriesLeft -= 1; + } while (cEntriesLeft > 0); + + /* Done with all available entries. */ + sf_d->offPos = offPos + cEntriesLeft; + sf_d->pEntry = pBuf; + sf_d->cEntriesLeft = 0; + } +} + + +/** + * Directory file operations. + */ +struct file_operations vbsf_dir_fops = { + .open = vbsf_dir_open, +#if RTLNX_VER_MIN(4,7,0) + .iterate_shared = vbsf_dir_iterate, +#elif RTLNX_VER_MIN(3,11,0) + .iterate = vbsf_dir_iterate, +#else + .readdir = vbsf_dir_read, +#endif + .release = vbsf_dir_release, + .read = generic_read_dir, +#if RTLNX_VER_MIN(2,6,37) + .llseek = generic_file_llseek +#endif +}; + + + +/********************************************************************************************************************************* +* Directory Inode Operations * +*********************************************************************************************************************************/ + +/** + * Worker for vbsf_inode_lookup(), vbsf_create_worker() and + * vbsf_inode_instantiate(). + */ +static struct inode *vbsf_create_inode(struct inode *parent, struct dentry *dentry, PSHFLSTRING path, + PSHFLFSOBJINFO pObjInfo, struct vbsf_super_info *pSuperInfo, bool fInstantiate) +{ + /* + * Allocate memory for our additional inode info and create an inode. + */ + struct vbsf_inode_info *sf_new_i = (struct vbsf_inode_info *)kmalloc(sizeof(*sf_new_i), GFP_KERNEL); + if (sf_new_i) { + ino_t iNodeNo = iunique(parent->i_sb, 16); +#if RTLNX_VER_MIN(2,4,25) + struct inode *pInode = iget_locked(parent->i_sb, iNodeNo); +#else + struct inode *pInode = iget(parent->i_sb, iNodeNo); +#endif + if (pInode) { + /* + * Initialize the two structures. + */ +#ifdef VBOX_STRICT + sf_new_i->u32Magic = SF_INODE_INFO_MAGIC; +#endif + sf_new_i->path = path; + sf_new_i->force_restat = false; + sf_new_i->ts_up_to_date = jiffies; + RTListInit(&sf_new_i->HandleList); + sf_new_i->handle = SHFL_HANDLE_NIL; + + VBSF_SET_INODE_INFO(pInode, sf_new_i); + vbsf_init_inode(pInode, sf_new_i, pObjInfo, pSuperInfo); + + /* + * Before we unlock the new inode, we may need to call d_instantiate. + */ + if (fInstantiate) + d_instantiate(dentry, pInode); +#if RTLNX_VER_MIN(2,4,25) + unlock_new_inode(pInode); +#endif + return pInode; + + } + LogFunc(("iget failed\n")); + kfree(sf_new_i); + } else + LogRelFunc(("could not allocate memory for new inode info\n")); + return NULL; +} + + +/** Helper for vbsf_create_worker() and vbsf_inode_lookup() that wraps + * d_add() and setting d_op. */ +DECLINLINE(void) vbsf_d_add_inode(struct dentry *dentry, struct inode *pNewInode) +{ +#if RTLNX_VER_MIN(2,6,38) + Assert(dentry->d_op == &vbsf_dentry_ops); /* (taken from the superblock) */ +#else + dentry->d_op = &vbsf_dentry_ops; +#endif + d_add(dentry, pNewInode); +} + + +/** + * This is called when vfs failed to locate dentry in the cache. The + * job of this function is to allocate inode and link it to dentry. + * [dentry] contains the name to be looked in the [parent] directory. + * Failure to locate the name is not a "hard" error, in this case NULL + * inode is added to [dentry] and vfs should proceed trying to create + * the entry via other means. NULL(or "positive" pointer) ought to be + * returned in case of success and "negative" pointer on error + */ +static struct dentry *vbsf_inode_lookup(struct inode *parent, struct dentry *dentry +#if RTLNX_VER_MIN(3,6,0) + , unsigned int flags +#elif RTLNX_VER_MIN(2,6,0) + , struct nameidata *nd +#endif + ) +{ + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(parent); + SHFLSTRING *path; + struct dentry *dret; + int rc; + +#if RTLNX_VER_MIN(3,6,0) + SFLOGFLOW(("vbsf_inode_lookup: parent=%p dentry=%p flags=%#x\n", parent, dentry, flags)); +#elif RTLNX_VER_MIN(2,6,0) + SFLOGFLOW(("vbsf_inode_lookup: parent=%p dentry=%p nd=%p{.flags=%#x}\n", parent, dentry, nd, nd ? nd->flags : 0)); +#else + SFLOGFLOW(("vbsf_inode_lookup: parent=%p dentry=%p\n", parent, dentry)); +#endif + + Assert(pSuperInfo); + Assert(sf_i && sf_i->u32Magic == SF_INODE_INFO_MAGIC); + + /* + * Build the path. We'll associate the path with dret's inode on success. + */ + rc = vbsf_path_from_dentry(pSuperInfo, sf_i, dentry, &path, __func__); + if (rc == 0) { + /* + * Do a lookup on the host side. + */ + VBOXSFCREATEREQ *pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + path->u16Size); + if (pReq) { + struct inode *pInode = NULL; + + RT_ZERO(*pReq); + RT_BCOPY_UNFORTIFIED(&pReq->StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size); + pReq->CreateParms.Handle = SHFL_HANDLE_NIL; + pReq->CreateParms.CreateFlags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + SFLOG2(("vbsf_inode_lookup: Calling VbglR0SfHostReqCreate on %s\n", path->String.utf8)); + rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq); + if (RT_SUCCESS(rc)) { + if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) { + /* + * Create an inode for the result. Since this also confirms + * the existence of all parent dentries, we increase their TTL. + */ + pInode = vbsf_create_inode(parent, dentry, path, &pReq->CreateParms.Info, pSuperInfo, false /*fInstantiate*/); + if (rc == 0) { + path = NULL; /* given to the inode */ + dret = dentry; + } else + dret = (struct dentry *)ERR_PTR(-ENOMEM); + vbsf_dentry_chain_increase_parent_ttl(dentry); + } else if ( pReq->CreateParms.Result == SHFL_FILE_NOT_FOUND + || pReq->CreateParms.Result == SHFL_PATH_NOT_FOUND /*this probably should happen*/) { + dret = dentry; + } else { + AssertMsgFailed(("%d\n", pReq->CreateParms.Result)); + dret = (struct dentry *)ERR_PTR(-EPROTO); + } + } else if (rc == VERR_INVALID_NAME) { + SFLOGFLOW(("vbsf_inode_lookup: VERR_INVALID_NAME\n")); + dret = dentry; /* this can happen for names like 'foo*' on a Windows host */ + } else if (rc == VERR_FILENAME_TOO_LONG) { + SFLOG(("vbsf_inode_lookup: VbglR0SfHostReqCreate failed on %s: VERR_FILENAME_TOO_LONG\n", path->String.utf8)); + dret = (struct dentry *)ERR_PTR(-ENAMETOOLONG); + } else { + SFLOG(("vbsf_inode_lookup: VbglR0SfHostReqCreate failed on %s: %Rrc\n", path->String.utf8, rc)); + dret = (struct dentry *)ERR_PTR(-EPROTO); + } + VbglR0PhysHeapFree(pReq); + + /* + * When dret is set to dentry we got something to insert, + * though it may be negative (pInode == NULL). + */ + if (dret == dentry) { + vbsf_dentry_set_update_jiffies(dentry, jiffies); + vbsf_d_add_inode(dentry, pInode); + dret = NULL; + } + } else { + SFLOGFLOW(("vbsf_inode_lookup: -ENOMEM (phys heap)\n")); + dret = (struct dentry *)ERR_PTR(-ENOMEM); + } + if (path) + kfree(path); + } else { + SFLOG(("vbsf_inode_lookup: vbsf_path_from_dentry failed: %d\n", rc)); + dret = (struct dentry *)ERR_PTR(rc); + } + return dret; +} + + +/** + * This should allocate memory for vbsf_inode_info, compute a unique inode + * number, get an inode from vfs, initialize inode info, instantiate + * dentry. + * + * @param parent inode entry of the directory + * @param dentry directory cache entry + * @param path path name. Consumed on success. + * @param info file information + * @param handle handle + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_inode_instantiate(struct inode *parent, struct dentry *dentry, PSHFLSTRING path, + PSHFLFSOBJINFO info, SHFLHANDLE handle) +{ + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb); + struct inode *pInode = vbsf_create_inode(parent, dentry, path, info, pSuperInfo, true /*fInstantiate*/); + if (pInode) { + /* Store this handle if we leave the handle open. */ + struct vbsf_inode_info *sf_new_i = VBSF_GET_INODE_INFO(pInode); + sf_new_i->handle = handle; + return 0; + } + return -ENOMEM; +} + + +/** + * Create a new regular file / directory. + * + * @param parent inode of the directory + * @param dentry directory cache entry + * @param mode file mode + * @param fCreateFlags SHFL_CF_XXX. + * @param fStashHandle Whether the resulting handle should be stashed in + * the inode for a subsequent open call. + * @param fDoLookup Whether we're doing a lookup and need to d_add the + * inode we create to dentry. + * @param phHostFile Where to return the handle to the create file/dir. + * @param pfCreated Where to indicate whether the file/dir was created + * or not. Optional. + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_create_worker(struct inode *parent, struct dentry *dentry, umode_t mode, uint32_t fCreateFlags, + bool fStashHandle, bool fDoLookup, SHFLHANDLE *phHostFile, bool *pfCreated) + +{ +#ifdef SFLOG_ENABLED + const char * const pszPrefix = S_ISDIR(mode) ? "vbsf_create_worker/dir:" : "vbsf_create_worker/file:"; +#endif + struct vbsf_inode_info *sf_parent_i = VBSF_GET_INODE_INFO(parent); + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb); + PSHFLSTRING path; + int rc; + + if (pfCreated) + *pfCreated = false; + AssertReturn(sf_parent_i, -EINVAL); + AssertReturn(pSuperInfo, -EINVAL); + + /* + * Build a path. We'll donate this to the inode on success. + */ + rc = vbsf_path_from_dentry(pSuperInfo, sf_parent_i, dentry, &path, __func__); + if (rc == 0) { + /* + * Allocate, initialize and issue the SHFL_CREATE request. + */ + /** @todo combine with vbsf_path_from_dentry? */ + union CreateAuxReq + { + VBOXSFCREATEREQ Create; + VBOXSFCLOSEREQ Close; + } *pReq = (union CreateAuxReq *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFCREATEREQ, StrPath.String) + path->u16Size); + if (pReq) { + RT_BCOPY_UNFORTIFIED(&pReq->Create.StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size); + RT_ZERO(pReq->Create.CreateParms); + pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL; + pReq->Create.CreateParms.CreateFlags = fCreateFlags; + pReq->Create.CreateParms.Info.Attr.fMode = (S_ISDIR(mode) ? RTFS_TYPE_DIRECTORY : RTFS_TYPE_FILE) + | sf_access_permissions_to_vbox(mode); + pReq->Create.CreateParms.Info.Attr.enmAdditional = SHFLFSOBJATTRADD_NOTHING; + + SFLOGFLOW(("%s calling VbglR0SfHostReqCreate(%s, %#x)\n", pszPrefix, path->String.ach, pReq->Create.CreateParms.CreateFlags)); + rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, &pReq->Create); + if (RT_SUCCESS(rc)) { + SFLOGFLOW(("%s VbglR0SfHostReqCreate returned %Rrc Result=%d Handle=%#llx\n", + pszPrefix, rc, pReq->Create.CreateParms.Result, pReq->Create.CreateParms.Handle)); + + /* + * Work the dentry cache and inode restatting. + */ + if ( pReq->Create.CreateParms.Result == SHFL_FILE_CREATED + || pReq->Create.CreateParms.Result == SHFL_FILE_REPLACED) { + vbsf_dentry_chain_increase_parent_ttl(dentry); + sf_parent_i->force_restat = 1; + } else if ( pReq->Create.CreateParms.Result == SHFL_FILE_EXISTS + || pReq->Create.CreateParms.Result == SHFL_FILE_NOT_FOUND) + vbsf_dentry_chain_increase_parent_ttl(dentry); + + /* + * If we got a handle back, we're good. Create an inode for it and return. + */ + if (pReq->Create.CreateParms.Handle != SHFL_HANDLE_NIL) { + struct inode *pNewInode = vbsf_create_inode(parent, dentry, path, &pReq->Create.CreateParms.Info, pSuperInfo, + !fDoLookup /*fInstantiate*/); + if (pNewInode) { + struct vbsf_inode_info *sf_new_i = VBSF_GET_INODE_INFO(pNewInode); + if (phHostFile) { + *phHostFile = pReq->Create.CreateParms.Handle; + pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL; + } else if (fStashHandle) { + sf_new_i->handle = pReq->Create.CreateParms.Handle; + pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL; + } + if (pfCreated) + *pfCreated = pReq->Create.CreateParms.Result == SHFL_FILE_CREATED; + if (fDoLookup) + vbsf_d_add_inode(dentry, pNewInode); + path = NULL; + } else { + SFLOGFLOW(("%s vbsf_create_inode failed: -ENOMEM (path %s)\n", pszPrefix, rc, path->String.ach)); + rc = -ENOMEM; + } + } else if (pReq->Create.CreateParms.Result == SHFL_FILE_EXISTS) { + /* + * For atomic_open (at least), we should create an inode and + * convert the dentry from a negative to a positive one. + */ + SFLOGFLOW(("%s SHFL_FILE_EXISTS for %s\n", pszPrefix, sf_parent_i->path->String.ach)); + if (fDoLookup) { + struct inode *pNewInode = vbsf_create_inode(parent, dentry, path, &pReq->Create.CreateParms.Info, + pSuperInfo, false /*fInstantiate*/); + if (pNewInode) + vbsf_d_add_inode(dentry, pNewInode); + path = NULL; + } + rc = -EEXIST; + } else if (pReq->Create.CreateParms.Result == SHFL_FILE_NOT_FOUND) { + SFLOGFLOW(("%s SHFL_FILE_NOT_FOUND for %s\n", pszPrefix, sf_parent_i->path->String.ach)); + rc = -ENOENT; + } else if (pReq->Create.CreateParms.Result == SHFL_PATH_NOT_FOUND) { + SFLOGFLOW(("%s SHFL_PATH_NOT_FOUND for %s\n", pszPrefix, sf_parent_i->path->String.ach)); + rc = -ENOENT; + } else { + AssertMsgFailed(("result=%d creating '%s'\n", pReq->Create.CreateParms.Result, sf_parent_i->path->String.ach)); + rc = -EPERM; + } + } else { + int const vrc = rc; + rc = -RTErrConvertToErrno(vrc); + SFLOGFLOW(("%s SHFL_FN_CREATE(%s) failed vrc=%Rrc rc=%d\n", pszPrefix, path->String.ach, vrc, rc)); + } + + /* Cleanups. */ + if (pReq->Create.CreateParms.Handle != SHFL_HANDLE_NIL) { + AssertCompile(RTASSERT_OFFSET_OF(VBOXSFCREATEREQ, CreateParms.Handle) > sizeof(VBOXSFCLOSEREQ)); /* no aliasing issues */ + int rc2 = VbglR0SfHostReqClose(pSuperInfo->map.root, &pReq->Close, pReq->Create.CreateParms.Handle); + if (RT_FAILURE(rc2)) + SFLOGFLOW(("%s VbglR0SfHostReqCloseSimple failed rc=%Rrc\n", pszPrefix, rc2)); + } + VbglR0PhysHeapFree(pReq); + } else + rc = -ENOMEM; + if (path) + kfree(path); + } + return rc; +} + + +#if RTLNX_VER_MIN(3,16,0) +/** + * More atomic way of handling creation. + * + * Older kernels would first to a lookup that created the file, followed by + * an open call. We've got this horrid vbsf_inode_info::handle member because + * of that approach. The call combines the lookup and open. + */ +static int vbsf_inode_atomic_open(struct inode *pDirInode, struct dentry *dentry, struct file *file, unsigned fOpen, + umode_t fMode +# if RTLNX_VER_MAX(4,19,0) + , int *opened +# endif + ) +{ + SFLOGFLOW(("vbsf_inode_atomic_open: pDirInode=%p dentry=%p file=%p fOpen=%#x, fMode=%#x\n", pDirInode, dentry, file, fOpen, fMode)); + int rc; + + /* Code assumes negative dentry. */ + Assert(dentry->d_inode == NULL); + + /** @todo see if we can do this for non-create calls too, as it may save us a + * host call to revalidate the dentry. (Can't see anyone else doing + * this, so playing it safe for now.) */ + if (fOpen & O_CREAT) { + /* + * Prepare our file info structure. + */ + struct vbsf_reg_info *sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL); + if (sf_r) { + bool fCreated = false; + uint32_t fCreateFlags; + + RTListInit(&sf_r->Handle.Entry); + sf_r->Handle.cRefs = 1; + sf_r->Handle.fFlags = !(fOpen & O_DIRECTORY) + ? VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC + : VBSF_HANDLE_F_DIR | VBSF_HANDLE_F_MAGIC; + sf_r->Handle.hHost = SHFL_HANDLE_NIL; + + /* + * Try create it. + */ + /* vbsf_create_worker uses the type from fMode, so match it up to O_DIRECTORY. */ + AssertMsg(!(fMode & S_IFMT) || (fMode & S_IFMT) == (fOpen & O_DIRECTORY ? S_IFDIR : S_IFREG), ("0%o\n", fMode)); + if (!(fOpen & O_DIRECTORY)) + fMode = (fMode & ~S_IFMT) | S_IFREG; + else + fMode = (fMode & ~S_IFMT) | S_IFDIR; + + fCreateFlags = vbsf_linux_oflags_to_vbox(fOpen, &sf_r->Handle.fFlags, __FUNCTION__); + + rc = vbsf_create_worker(pDirInode, dentry, fMode, fCreateFlags, false /*fStashHandle*/, true /*fDoLookup*/, + &sf_r->Handle.hHost, &fCreated); + if (rc == 0) { + struct inode *inode = dentry->d_inode; + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + + /* + * Set FMODE_CREATED according to the action taken by SHFL_CREATE + * and call finish_open() to do the remaining open() work. + */ +# if RTLNX_VER_MIN(4,19,0) + if (fCreated) + file->f_mode |= FMODE_CREATED; + rc = finish_open(file, dentry, generic_file_open); +# else + if (fCreated) + *opened |= FILE_CREATED; + rc = finish_open(file, dentry, generic_file_open, opened); +# endif + if (rc == 0) { + /* + * Now that the file is fully opened, associate sf_r with it + * and link the handle to the inode. + */ + vbsf_handle_append(sf_i, &sf_r->Handle); + file->private_data = sf_r; + SFLOGFLOW(("vbsf_inode_atomic_open: create succeeded; hHost=%#llx path='%s'\n", + rc, sf_r->Handle.hHost, sf_i->path->String.ach)); + sf_r = NULL; /* don't free it */ + } else { + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pDirInode->i_sb); + SFLOGFLOW(("vbsf_inode_atomic_open: finish_open failed: %d (path='%s'\n", rc, sf_i->path->String.ach)); + VbglR0SfHostReqCloseSimple(pSuperInfo->map.root, sf_r->Handle.hHost); + sf_r->Handle.hHost = SHFL_HANDLE_NIL; + } + } else + SFLOGFLOW(("vbsf_inode_atomic_open: vbsf_create_worker failed: %d\n", rc)); + if (sf_r) + kfree(sf_r); + } else { + LogRelMaxFunc(64, ("could not allocate reg info\n")); + rc = -ENOMEM; + } + } + /* + * Not creating anything. + * Do we need to do a lookup or should we just fail? + */ + else if (d_in_lookup(dentry)) { + struct dentry *pResult = vbsf_inode_lookup(pDirInode, dentry, 0 /*fFlags*/); + if (!IS_ERR(pResult)) + rc = finish_no_open(file, pResult); + else + rc = PTR_ERR(pResult); + SFLOGFLOW(("vbsf_inode_atomic_open: open -> %d (%p)\n", rc, pResult)); + } else { + SFLOGFLOW(("vbsf_inode_atomic_open: open -> -ENOENT\n")); + rc = -ENOENT; + } + return rc; +} +#endif /* 3.6.0 */ + + +/** + * Create a new regular file. + * + * @param idmap idmap of the mount. + * @param parent inode of the directory + * @param dentry directory cache entry + * @param mode file mode + * @param excl Possible O_EXCL... + * @returns 0 on success, Linux error code otherwise + */ +#if RTLNX_VER_MIN(6,3,0) || defined(DOXYGEN_RUNNING) +static int vbsf_inode_create(struct mnt_idmap *idmap, struct inode *parent, struct dentry *dentry, umode_t mode, bool excl) +#elif RTLNX_VER_MIN(5,12,0) +static int vbsf_inode_create(struct user_namespace *ns, struct inode *parent, struct dentry *dentry, umode_t mode, bool excl) +#elif RTLNX_VER_MIN(3,6,0) +static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, umode_t mode, bool excl) +#elif RTLNX_VER_MIN(3,3,0) +static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, umode_t mode, struct nameidata *nd) +#elif RTLNX_VER_MIN(2,5,75) +static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, int mode, struct nameidata *nd) +#else +static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, int mode) +#endif +{ + uint32_t fCreateFlags = SHFL_CF_ACT_CREATE_IF_NEW + | SHFL_CF_ACT_FAIL_IF_EXISTS + | SHFL_CF_ACCESS_READWRITE; +#if RTLNX_VER_RANGE(2,5,75, 3,6,0) + /* Clear the RD flag if write-only access requested. Otherwise assume we + need write access to create stuff. */ + if (!(nd->intent.open.flags & 1) ) { + fCreateFlags &= SHFL_CF_ACCESS_READWRITE; + fCreateFlags |= SHFL_CF_ACCESS_WRITE; + } + /* (file since 2.6.15) */ +#endif + TRACE(); + AssertMsg(!(mode & S_IFMT) || (mode & S_IFMT) == S_IFREG, ("0%o\n", mode)); + return vbsf_create_worker(parent, dentry, (mode & ~S_IFMT) | S_IFREG, fCreateFlags, + true /*fStashHandle*/, false /*fDoLookup*/, NULL /*phHandle*/, NULL /*fCreated*/); +} + + +/** + * Create a new directory. + * + * @param idmap idmap of the mount. + * @param parent inode of the directory + * @param dentry directory cache entry + * @param mode file mode + * @returns 0 on success, Linux error code otherwise + */ +#if RTLNX_VER_MIN(6,3,0) || defined(DOXYGEN_RUNNING) +static int vbsf_inode_mkdir(struct mnt_idmap *idmap, struct inode *parent, struct dentry *dentry, umode_t mode) +#elif RTLNX_VER_MIN(5,12,0) +static int vbsf_inode_mkdir(struct user_namespace *ns, struct inode *parent, struct dentry *dentry, umode_t mode) +#elif RTLNX_VER_MIN(3,3,0) +static int vbsf_inode_mkdir(struct inode *parent, struct dentry *dentry, umode_t mode) +#else +static int vbsf_inode_mkdir(struct inode *parent, struct dentry *dentry, int mode) +#endif +{ + TRACE(); + AssertMsg(!(mode & S_IFMT) || (mode & S_IFMT) == S_IFDIR, ("0%o\n", mode)); + return vbsf_create_worker(parent, dentry, (mode & ~S_IFMT) | S_IFDIR, + SHFL_CF_ACT_CREATE_IF_NEW + | SHFL_CF_ACT_FAIL_IF_EXISTS + | SHFL_CF_ACCESS_READWRITE + | SHFL_CF_DIRECTORY, + false /*fStashHandle*/, false /*fDoLookup*/, NULL /*phHandle*/, NULL /*fCreated*/); +} + + +/** + * Remove a regular file / directory. + * + * @param parent inode of the directory + * @param dentry directory cache entry + * @param fDirectory true if directory, false otherwise + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_unlink_worker(struct inode *parent, struct dentry *dentry, int fDirectory) +{ + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb); + struct vbsf_inode_info *sf_parent_i = VBSF_GET_INODE_INFO(parent); + SHFLSTRING *path; + int rc; + + TRACE(); + + rc = vbsf_path_from_dentry(pSuperInfo, sf_parent_i, dentry, &path, __func__); + if (!rc) { + VBOXSFREMOVEREQ *pReq = (VBOXSFREMOVEREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFREMOVEREQ, StrPath.String) + + path->u16Size); + if (pReq) { + RT_BCOPY_UNFORTIFIED(&pReq->StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size); + uint32_t fFlags = fDirectory ? SHFL_REMOVE_DIR : SHFL_REMOVE_FILE; + if (dentry->d_inode && ((dentry->d_inode->i_mode & S_IFLNK) == S_IFLNK)) + fFlags |= SHFL_REMOVE_SYMLINK; + + rc = VbglR0SfHostReqRemove(pSuperInfo->map.root, pReq, fFlags); + + if (dentry->d_inode) { + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(dentry->d_inode); + sf_i->force_restat = true; + } + + if (RT_SUCCESS(rc)) { + sf_parent_i->force_restat = true; /* directory access/change time changed */ + rc = 0; + } else if (rc == VERR_FILE_NOT_FOUND || rc == VERR_PATH_NOT_FOUND) { + /* Probably deleted on the host while the guest had it cached, so don't complain: */ + LogFunc(("(%d): VbglR0SfRemove(%s) failed rc=%Rrc; calling d_drop on %p\n", + fDirectory, path->String.ach, rc, dentry)); + sf_parent_i->force_restat = true; + d_drop(dentry); + rc = 0; + } else { + LogFunc(("(%d): VbglR0SfRemove(%s) failed rc=%Rrc\n", fDirectory, path->String.ach, rc)); + rc = -RTErrConvertToErrno(rc); + } + VbglR0PhysHeapFree(pReq); + } else + rc = -ENOMEM; + kfree(path); + } + return rc; +} + + +/** + * Remove a regular file. + * + * @param parent inode of the directory + * @param dentry directory cache entry + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_inode_unlink(struct inode *parent, struct dentry *dentry) +{ + TRACE(); + return vbsf_unlink_worker(parent, dentry, false /*fDirectory*/); +} + + +/** + * Remove a directory. + * + * @param parent inode of the directory + * @param dentry directory cache entry + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_inode_rmdir(struct inode *parent, struct dentry *dentry) +{ + TRACE(); + return vbsf_unlink_worker(parent, dentry, true /*fDirectory*/); +} + + +/** + * Rename a regular file / directory. + * + * @param idmap idmap of the mount. + * @param old_parent inode of the old parent directory + * @param old_dentry old directory cache entry + * @param new_parent inode of the new parent directory + * @param new_dentry new directory cache entry + * @param flags flags + * @returns 0 on success, Linux error code otherwise + */ +#if RTLNX_VER_MIN(6,3,0) || defined(DOXYGEN_RUNNING) +static int vbsf_inode_rename(struct mnt_idmap *idmap, + struct inode *old_parent, struct dentry *old_dentry, + struct inode *new_parent, struct dentry *new_dentry, unsigned flags) +#elif RTLNX_VER_MIN(5,12,0) +static int vbsf_inode_rename(struct user_namespace *ns, + struct inode *old_parent, struct dentry *old_dentry, + struct inode *new_parent, struct dentry *new_dentry, unsigned flags) +#else +static int vbsf_inode_rename(struct inode *old_parent, struct dentry *old_dentry, + struct inode *new_parent, struct dentry *new_dentry, unsigned flags) +#endif +{ + /* + * Deal with flags. + */ + int rc; + uint32_t fRename = (old_dentry->d_inode->i_mode & S_IFDIR ? SHFL_RENAME_DIR : SHFL_RENAME_FILE) + | SHFL_RENAME_REPLACE_IF_EXISTS; +#if RTLNX_VER_MIN(3,15,0) + if (!(flags & ~RENAME_NOREPLACE)) { + if (flags & RENAME_NOREPLACE) + fRename &= ~SHFL_RENAME_REPLACE_IF_EXISTS; +#endif + /* + * Check that they are on the same mount. + */ + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(old_parent->i_sb); + if (pSuperInfo == VBSF_GET_SUPER_INFO(new_parent->i_sb)) { + /* + * Build the new path. + */ + struct vbsf_inode_info *sf_new_parent_i = VBSF_GET_INODE_INFO(new_parent); + PSHFLSTRING pNewPath; + rc = vbsf_path_from_dentry(pSuperInfo, sf_new_parent_i, new_dentry, &pNewPath, __func__); + if (rc == 0) { + /* + * Create and issue the rename request. + */ + VBOXSFRENAMEWITHSRCBUFREQ *pReq; + pReq = (VBOXSFRENAMEWITHSRCBUFREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFRENAMEWITHSRCBUFREQ, StrDstPath.String) + + pNewPath->u16Size); + if (pReq) { + struct vbsf_inode_info *sf_file_i = VBSF_GET_INODE_INFO(old_dentry->d_inode); + PSHFLSTRING pOldPath = sf_file_i->path; + + RT_BCOPY_UNFORTIFIED(&pReq->StrDstPath, pNewPath, SHFLSTRING_HEADER_SIZE + pNewPath->u16Size); + rc = VbglR0SfHostReqRenameWithSrcContig(pSuperInfo->map.root, pReq, pOldPath, virt_to_phys(pOldPath), fRename); + VbglR0PhysHeapFree(pReq); + if (RT_SUCCESS(rc)) { + /* + * On success we replace the path in the inode and trigger + * restatting of both parent directories. + */ + struct vbsf_inode_info *sf_old_parent_i = VBSF_GET_INODE_INFO(old_parent); + SFLOGFLOW(("vbsf_inode_rename: %s -> %s (%#x)\n", pOldPath->String.ach, pNewPath->String.ach, fRename)); + + sf_file_i->path = pNewPath; + kfree(pOldPath); + pNewPath = NULL; + + sf_new_parent_i->force_restat = 1; + sf_old_parent_i->force_restat = 1; + + vbsf_dentry_chain_increase_parent_ttl(old_dentry); + vbsf_dentry_chain_increase_parent_ttl(new_dentry); + + rc = 0; + } else { + SFLOGFLOW(("vbsf_inode_rename: VbglR0SfHostReqRenameWithSrcContig(%s,%s,%#x) failed -> %d\n", + pOldPath->String.ach, pNewPath->String.ach, fRename, rc)); + if (rc == VERR_IS_A_DIRECTORY || rc == VERR_IS_A_FILE) + vbsf_dentry_invalidate_ttl(old_dentry); + rc = -RTErrConvertToErrno(rc); + } + } else { + SFLOGFLOW(("vbsf_inode_rename: failed to allocate request (%#x bytes)\n", + RT_UOFFSETOF(VBOXSFRENAMEWITHSRCBUFREQ, StrDstPath.String) + pNewPath->u16Size)); + rc = -ENOMEM; + } + if (pNewPath) + kfree(pNewPath); + } else + SFLOGFLOW(("vbsf_inode_rename: vbsf_path_from_dentry failed: %d\n", rc)); + } else { + SFLOGFLOW(("vbsf_inode_rename: rename with different roots (%#x vs %#x)\n", + pSuperInfo->map.root, VBSF_GET_SUPER_INFO(new_parent->i_sb)->map.root)); + rc = -EXDEV; + } +#if RTLNX_VER_MIN(3,15,0) + } else { + SFLOGFLOW(("vbsf_inode_rename: Unsupported flags: %#x\n", flags)); + rc = -EINVAL; + } +#else + RT_NOREF(flags); +#endif + return rc; +} + + +#if RTLNX_VER_MAX(4,9,0) +/** + * The traditional rename interface without any flags. + */ +static int vbsf_inode_rename_no_flags(struct inode *old_parent, struct dentry *old_dentry, + struct inode *new_parent, struct dentry *new_dentry) +{ + return vbsf_inode_rename(old_parent, old_dentry, new_parent, new_dentry, 0); +} +#endif + + +/** + * Create a symbolic link. + */ +#if RTLNX_VER_MIN(6,3,0) +static int vbsf_inode_symlink(struct mnt_idmap *idmap, struct inode *parent, struct dentry *dentry, const char *target) +#elif RTLNX_VER_MIN(5,12,0) +static int vbsf_inode_symlink(struct user_namespace *ns, struct inode *parent, struct dentry *dentry, const char *target) +#else +static int vbsf_inode_symlink(struct inode *parent, struct dentry *dentry, const char *target) +#endif +{ + /* + * Turn the target into a string (contiguous physcial memory). + */ + /** @todo we can save a kmalloc here if we switch to embedding the target rather + * than the symlink path into the request. Will require more NLS helpers. */ + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb); + PSHFLSTRING pTarget = NULL; + int rc = vbsf_nls_to_shflstring(pSuperInfo, target, &pTarget); + if (rc == 0) { + /* + * Create a full path for the symlink name. + */ + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(parent); + PSHFLSTRING pPath = NULL; + rc = vbsf_path_from_dentry(pSuperInfo, sf_i, dentry, &pPath, __func__); + if (rc == 0) { + /* + * Create the request and issue it. + */ + uint32_t const cbReq = RT_UOFFSETOF(VBOXSFCREATESYMLINKREQ, StrSymlinkPath.String) + pPath->u16Size; + VBOXSFCREATESYMLINKREQ *pReq = (VBOXSFCREATESYMLINKREQ *)VbglR0PhysHeapAlloc(cbReq); + if (pReq) { + RT_ZERO(*pReq); + RT_BCOPY_UNFORTIFIED(&pReq->StrSymlinkPath, pPath, SHFLSTRING_HEADER_SIZE + pPath->u16Size); + + rc = VbglR0SfHostReqCreateSymlinkContig(pSuperInfo->map.root, pTarget, virt_to_phys(pTarget), pReq); + if (RT_SUCCESS(rc)) { + sf_i->force_restat = 1; + + /* + * Instantiate a new inode for the symlink. + */ + rc = vbsf_inode_instantiate(parent, dentry, pPath, &pReq->ObjInfo, SHFL_HANDLE_NIL); + if (rc == 0) { + SFLOGFLOW(("vbsf_inode_symlink: Successfully created '%s' -> '%s'\n", pPath->String.ach, pTarget->String.ach)); + pPath = NULL; /* consumed by inode */ + vbsf_dentry_chain_increase_ttl(dentry); + } else { + SFLOGFLOW(("vbsf_inode_symlink: Failed to create inode for '%s': %d\n", pPath->String.ach, rc)); + vbsf_dentry_chain_increase_parent_ttl(dentry); + vbsf_dentry_invalidate_ttl(dentry); + } + } else { + int const vrc = rc; + if (vrc == VERR_WRITE_PROTECT) + rc = -EPERM; /* EPERM: Symlink creation not supported according to the linux manpage as of 2017-09-15. + "VBoxInternal2/SharedFoldersEnableSymlinksCreate/" is not 1. */ + else + rc = -RTErrConvertToErrno(vrc); + SFLOGFLOW(("vbsf_inode_symlink: VbglR0SfHostReqCreateSymlinkContig failed for '%s' -> '%s': %Rrc (-> %d)\n", + pPath->String.ach, pTarget->String.ach, vrc, rc)); + } + VbglR0PhysHeapFree(pReq); + } else { + SFLOGFLOW(("vbsf_inode_symlink: failed to allocate %u phys heap for the request!\n", cbReq)); + rc = -ENOMEM; + } + if (pPath) + kfree(pPath); + } + kfree(pTarget); + } + return rc; +} + + +/** + * Directory inode operations. + */ +struct inode_operations vbsf_dir_iops = { + .lookup = vbsf_inode_lookup, +#if RTLNX_VER_MIN(3,16,0) + .atomic_open = vbsf_inode_atomic_open, +#endif + .create = vbsf_inode_create, + .symlink = vbsf_inode_symlink, + .mkdir = vbsf_inode_mkdir, + .rmdir = vbsf_inode_rmdir, + .unlink = vbsf_inode_unlink, +#if RTLNX_VER_MIN(4,9,0) + .rename = vbsf_inode_rename, +#else +# if RTLNX_VER_MAX(3,17,0) + .rename = vbsf_inode_rename_no_flags, +# endif +# if RTLNX_VER_MIN(3,15,0) + .rename2 = vbsf_inode_rename, +# endif +#endif +#if RTLNX_VER_MIN(2,5,18) + .getattr = vbsf_inode_getattr, +#else + .revalidate = vbsf_inode_revalidate, +#endif + .setattr = vbsf_inode_setattr, +}; + diff --git a/src/VBox/Additions/linux/sharedfolders/files_vboxsf b/src/VBox/Additions/linux/sharedfolders/files_vboxsf new file mode 100755 index 00000000..8187ad33 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/files_vboxsf @@ -0,0 +1,107 @@ +#!/bin/sh +# $Id: files_vboxsf $ +## @file +# Shared file between Makefile.kmk and export_modules.sh. +# + +# +# Copyright (C) 2007-2023 Oracle and/or its affiliates. +# +# This file is part of VirtualBox base platform packages, as +# available from https://www.virtualbox.org. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation, in version 3 of the +# License. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, see . +# +# SPDX-License-Identifier: GPL-3.0-only +# + +FILES_VBOXSF_NOBIN=" \ + ${PATH_ROOT}/include/iprt/nocrt/limits.h=>include/iprt/nocrt/limits.h \ + ${PATH_ROOT}/include/iprt/alloc.h=>include/iprt/alloc.h \ + ${PATH_ROOT}/include/iprt/asm.h=>include/iprt/asm.h \ + ${PATH_ROOT}/include/iprt/asm-amd64-x86.h=>include/iprt/asm-amd64-x86.h \ + ${PATH_ROOT}/include/iprt/asm-math.h=>include/iprt/asm-math.h \ + ${PATH_ROOT}/include/iprt/assert.h=>include/iprt/assert.h \ + ${PATH_ROOT}/include/iprt/assertcompile.h=>include/iprt/assertcompile.h \ + ${PATH_ROOT}/include/iprt/cdefs.h=>include/iprt/cdefs.h \ + ${PATH_ROOT}/include/iprt/err.h=>include/iprt/err.h \ + ${PATH_ROOT}/include/iprt/errcore.h=>include/iprt/errcore.h \ + ${PATH_ROOT}/include/iprt/fs.h=>include/iprt/fs.h \ + ${PATH_ROOT}/include/iprt/latin1.h=>include/iprt/latin1.h \ + ${PATH_ROOT}/include/iprt/list.h=>include/iprt/list.h \ + ${PATH_ROOT}/include/iprt/log.h=>include/iprt/log.h \ + ${PATH_ROOT}/include/iprt/mangling.h=>include/iprt/mangling.h \ + ${PATH_ROOT}/include/iprt/mem.h=>include/iprt/mem.h \ + ${PATH_ROOT}/include/iprt/memobj.h=>include/iprt/memobj.h \ + ${PATH_ROOT}/include/iprt/param.h=>include/iprt/param.h \ + ${PATH_ROOT}/include/iprt/path.h=>include/iprt/path.h \ + ${PATH_ROOT}/include/iprt/semaphore.h=>include/iprt/semaphore.h \ + ${PATH_ROOT}/include/iprt/stdarg.h=>include/iprt/stdarg.h \ + ${PATH_ROOT}/include/iprt/stdint.h=>include/iprt/stdint.h \ + ${PATH_ROOT}/include/iprt/string.h=>include/iprt/string.h \ + ${PATH_ROOT}/include/iprt/time.h=>include/iprt/time.h \ + ${PATH_ROOT}/include/iprt/types.h=>include/iprt/types.h \ + ${PATH_ROOT}/include/iprt/uint64.h=>include/iprt/uint64.h \ + ${PATH_ROOT}/include/iprt/uni.h=>include/iprt/uni.h \ + ${PATH_ROOT}/include/iprt/utf16.h=>include/iprt/utf16.h \ + ${PATH_ROOT}/include/iprt/x86-helpers.h=>include/iprt/x86-helpers.h \ + ${PATH_ROOT}/include/iprt/linux/version.h=>include/iprt/linux/version.h \ + ${PATH_ROOT}/include/VBox/cdefs.h=>include/VBox/cdefs.h \ + ${PATH_ROOT}/include/VBox/err.h=>include/VBox/err.h \ + ${PATH_ROOT}/include/VBox/log.h=>include/VBox/log.h \ + ${PATH_ROOT}/include/VBox/ostypes.h=>include/VBox/ostypes.h \ + ${PATH_ROOT}/include/VBox/param.h=>include/VBox/param.h \ + ${PATH_ROOT}/include/VBox/shflsvc.h=>include/VBox/shflsvc.h \ + ${PATH_ROOT}/include/VBox/types.h=>include/VBox/types.h \ + ${PATH_ROOT}/include/VBox/VBoxGuest.h=>include/VBox/VBoxGuest.h \ + ${PATH_ROOT}/include/VBox/VBoxGuestCoreTypes.h=>include/VBox/VBoxGuestCoreTypes.h \ + ${PATH_ROOT}/include/VBox/VBoxGuestLib.h=>include/VBox/VBoxGuestLib.h \ + ${PATH_ROOT}/include/VBox/VBoxGuestLibSharedFolders.h=>include/VBox/VBoxGuestLibSharedFolders.h \ + ${PATH_ROOT}/include/VBox/VBoxGuestLibSharedFoldersInline.h=>include/VBox/VBoxGuestLibSharedFoldersInline.h \ + ${PATH_ROOT}/include/VBox/VBoxGuestMangling.h=>include/VBox/VBoxGuestMangling.h \ + ${PATH_ROOT}/include/VBox/VMMDev.h=>include/VBox/VMMDev.h \ + ${PATH_ROOT}/include/VBox/VMMDevCoreTypes.h=>include/VBox/VMMDevCoreTypes.h \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibInternal.h=>VBoxGuestR0LibInternal.h \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibGenericRequest.cpp=>VBoxGuestR0LibGenericRequest.c \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibHGCM.cpp=>VBoxGuestR0LibHGCM.c \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibIdc.cpp=>VBoxGuestR0LibIdc.c \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibIdc-unix.cpp=>VBoxGuestR0LibIdc-unix.c \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibInit.cpp=>VBoxGuestR0LibInit.c \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibPhysHeap.cpp=>VBoxGuestR0LibPhysHeap.c \ + ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibSharedFolders.c=>VBoxGuestR0LibSharedFolders.c \ + ${PATH_ROOT}/src/VBox/Installer/linux/Makefile-header.gmk=>Makefile-header.gmk \ + ${PATH_ROOT}/src/VBox/Installer/linux/Makefile-footer.gmk=>Makefile-footer.gmk \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/divdi3.c=>divdi3.c \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/moddi3.c=>moddi3.c \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/qdivrem.c=>qdivrem.c \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/quad.h=>quad.h \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/udivdi3.c=>udivdi3.c \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/udivmoddi4.c=>udivmoddi4.c \ + ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/umoddi3.c=>umoddi3.c \ + ${PATH_ROOT}/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h=>r0drv/linux/the-linux-kernel.h \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/Makefile.module=>Makefile \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/dirops.c=>dirops.c \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/lnkops.c=>lnkops.c \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/regops.c=>regops.c \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/utils.c=>utils.c \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/vbsfmount.h=>vbsfmount.h \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/vfsmod.c=>vfsmod.c \ + ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/vfsmod.h=>vfsmod.h \ + ${PATH_OUT}/version-generated.h=>version-generated.h \ + ${PATH_OUT}/revision-generated.h=>revision-generated.h \ + ${PATH_OUT}/product-generated.h=>product-generated.h \ +" + +FILES_VBOXSF_BIN=" \ +" diff --git a/src/VBox/Additions/linux/sharedfolders/lnkops.c b/src/VBox/Additions/linux/sharedfolders/lnkops.c new file mode 100644 index 00000000..0d37a155 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/lnkops.c @@ -0,0 +1,305 @@ +/* $Id: lnkops.c $ */ +/** @file + * vboxsf - VBox Linux Shared Folders VFS, operations for symbolic links. + */ + +/* + * Copyright (C) 2010-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "vfsmod.h" + + +/** + * Converts error codes as best we can. + */ +DECLINLINE(int) vbsf_convert_symlink_error(int vrc) +{ + if ( vrc == VERR_IS_A_DIRECTORY + || vrc == VERR_IS_A_FIFO + || vrc == VERR_IS_A_FILE + || vrc == VERR_IS_A_BLOCK_DEVICE + || vrc == VERR_IS_A_CHAR_DEVICE + || vrc == VERR_IS_A_SOCKET + || vrc == VERR_NOT_SYMLINK) + return -EINVAL; + if (vrc == VERR_PATH_NOT_FOUND) + return -ENOTDIR; + if (vrc == VERR_FILE_NOT_FOUND) + return -ENOENT; + return -EPROTO; +} + + +/** + * Does the NLS conversion of the symlink target. + */ +static int vbsf_symlink_nls_convert_slow(struct vbsf_super_info *pSuperInfo, char *pszTarget, size_t cbTargetBuf) +{ + int rc; + size_t const cchUtf8 = RTStrNLen(pszTarget, cbTargetBuf); + if (cchUtf8 < cbTargetBuf) { + /* + * If the target is short and there is a lot of space left in the target + * buffer (typically PAGE_SIZE in size), we move the input to the end + * instead of allocating a temporary buffer for it. This works because + * there shouldn't be anything that is more than 8x worse than UTF-8 + * when it comes to efficiency. + */ + char *pszFree = NULL; + char *pszUtf8; + if (cchUtf8 - 1 <= cbTargetBuf / 8) { + pszUtf8 = &pszTarget[cbTargetBuf - cchUtf8 - 1]; + cbTargetBuf -= cchUtf8 - 1; + } else { + pszFree = pszUtf8 = kmalloc(cchUtf8 + 1, GFP_KERNEL); + if (RT_UNLIKELY(!pszUtf8)) { + LogRelMax(50, ("vbsf_symlink_nls_convert_slow: failed to allocate %u bytes\n", cchUtf8 + 1)); + return -ENOMEM; + } + } + memcpy(pszUtf8, pszTarget, cchUtf8); + pszUtf8[cchUtf8] = '\0'; + + rc = vbsf_nlscpy(pSuperInfo, pszTarget, cbTargetBuf, pszUtf8, cchUtf8); + if (pszFree) + kfree(pszFree); + } else { + SFLOGFLOW(("vbsf_symlink_nls_convert_slow: Impossible! Unterminated target!\n")); + rc = -ENAMETOOLONG; + } + return rc; +} + + +/** + * Does NLS conversion if needed. + */ +DECLINLINE(int) vbsf_symlink_nls_convert(struct vbsf_super_info *pSuperInfo, char *pszTarget, size_t cbTargetBuf) +{ + if (pSuperInfo->fNlsIsUtf8) + return 0; + return vbsf_symlink_nls_convert_slow(pSuperInfo, pszTarget, cbTargetBuf); +} + +#if RTLNX_VER_MIN(4,5,0) + +/** + * Get symbolic link. + */ +static const char *vbsf_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) +{ + char *pszTarget; + if (dentry) { + pszTarget = (char *)kzalloc(PAGE_SIZE, GFP_KERNEL); + if (pszTarget) { + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + int rc = VbglR0SfHostReqReadLinkContigSimple(pSuperInfo->map.root, sf_i->path->String.ach, sf_i->path->u16Length, + pszTarget, virt_to_phys(pszTarget), RT_MIN(PATH_MAX, PAGE_SIZE - 1)); + if (RT_SUCCESS(rc)) { + pszTarget[PAGE_SIZE - 1] = '\0'; + SFLOGFLOW(("vbsf_get_link: %s -> %s\n", sf_i->path->String.ach, pszTarget)); + rc = vbsf_symlink_nls_convert(pSuperInfo, pszTarget, PAGE_SIZE); + if (rc == 0) { + vbsf_dentry_chain_increase_ttl(dentry); + set_delayed_call(done, kfree_link, pszTarget); + return pszTarget; + } + } else { + SFLOGFLOW(("vbsf_get_link: VbglR0SfHostReqReadLinkContigSimple failed on '%s': %Rrc\n", + sf_i->path->String.ach, rc)); + } + kfree(pszTarget); + pszTarget = ERR_PTR(vbsf_convert_symlink_error(rc)); + } else + pszTarget = ERR_PTR(-ENOMEM); + } else + pszTarget = ERR_PTR(-ECHILD); + return pszTarget; +} + +#else /* < 4.5 */ + +# if RTLNX_VER_MAX(2,6,8) +/** + * Reads the link into the given buffer. + */ +static int vbsf_readlink(struct dentry *dentry, char *buffer, int len) +{ + int rc; + char *pszTarget = (char *)get_zeroed_page(GFP_KERNEL); + if (pszTarget) { + struct inode *inode = dentry->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + rc = VbglR0SfHostReqReadLinkContigSimple(pSuperInfo->map.root, sf_i->path->String.ach, sf_i->path->u16Length, + pszTarget, virt_to_phys(pszTarget), RT_MIN(PATH_MAX, PAGE_SIZE - 1)); + if (RT_SUCCESS(rc)) { + pszTarget[PAGE_SIZE - 1] = '\0'; + SFLOGFLOW(("vbsf_readlink: %s -> %*s\n", sf_i->path->String.ach, pszTarget)); + rc = vbsf_symlink_nls_convert(pSuperInfo, pszTarget, PAGE_SIZE); + if (rc == 0) { + vbsf_dentry_chain_increase_ttl(dentry); + rc = vfs_readlink(dentry, buffer, len, pszTarget); + } + } else { + SFLOGFLOW(("vbsf_readlink: VbglR0SfHostReqReadLinkContigSimple failed on '%s': %Rrc\n", sf_i->path->String.ach, rc)); + rc = vbsf_convert_symlink_error(rc); + } + free_page((unsigned long)pszTarget); + } else + rc = -ENOMEM; + return rc; +} +# endif /* < 2.6.8 */ + +/** + * Follow link in dentry. + */ +# if RTLNX_VER_MIN(4,2,0) +static const char *vbsf_follow_link(struct dentry *dentry, void **cookie) +# elif RTLNX_VER_MIN(2,6,13) +static void *vbsf_follow_link(struct dentry *dentry, struct nameidata *nd) +# else +static int vbsf_follow_link(struct dentry *dentry, struct nameidata *nd) +# endif +{ + int rc; + char *pszTarget = (char *)get_zeroed_page(GFP_KERNEL); + if (pszTarget) { + struct inode *inode = dentry->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + + rc = VbglR0SfHostReqReadLinkContigSimple(pSuperInfo->map.root, sf_i->path->String.ach, sf_i->path->u16Length, + pszTarget, virt_to_phys(pszTarget), RT_MIN(PATH_MAX, PAGE_SIZE - 1)); + if (RT_SUCCESS(rc)) { + pszTarget[PAGE_SIZE - 1] = '\0'; + SFLOGFLOW(("vbsf_follow_link: %s -> %s\n", sf_i->path->String.ach, pszTarget)); + rc = vbsf_symlink_nls_convert(pSuperInfo, pszTarget, PAGE_SIZE); + if (rc == 0) { + /* + * Succeeded. For 2.6.8 and later the page gets associated + * with the caller-cookie or nameidata structure and freed + * later by vbsf_put_link(). On earlier kernels we have to + * call vfs_follow_link() which will try continue the walking + * using the buffer we pass it here. + */ + vbsf_dentry_chain_increase_ttl(dentry); +# if RTLNX_VER_MIN(4,2,0) + *cookie = pszTarget; + return pszTarget; +# elif RTLNX_VER_MIN(2,6,8) + nd_set_link(nd, pszTarget); +# if RTLNX_VER_MIN(2,6,13) + return NULL; +# else + return 0; +# endif +# else /* < 2.6.8 */ + rc = vfs_follow_link(nd, pszTarget); + free_page((unsigned long)pszTarget); + return rc; +# endif + } + + /* + * Failed. + */ + } else { + LogFunc(("VbglR0SfReadLink failed, caller=%s, rc=%Rrc\n", __func__, rc)); + rc = vbsf_convert_symlink_error(rc); + } + free_page((unsigned long)pszTarget); + } else { + rc = -ENOMEM; + } +# if RTLNX_VER_MIN(4,2,0) + *cookie = ERR_PTR(rc); + return (const char *)ERR_PTR(rc); +# elif RTLNX_VER_MIN(2,6,8) + nd_set_link(nd, (char *)ERR_PTR(rc)); +# if RTLNX_VER_MIN(2,6,13) + return NULL; +# else + return 0; +# endif +# else /* < 2.6.8 */ + return rc; +# endif /* < 2.6.8 */ +} + +# if RTLNX_VER_MIN(2,6,8) +/** + * For freeing target link buffer allocated by vbsf_follow_link. + * + * For kernels before 2.6.8 memory isn't being kept around. + */ +# if RTLNX_VER_MIN(4,2,0) +static void vbsf_put_link(struct inode *inode, void *cookie) +# elif RTLNX_VER_MIN(2,6,13) +static void vbsf_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +# else +static void vbsf_put_link(struct dentry *dentry, struct nameidata *nd) +# endif +{ +# if RTLNX_VER_MIN(2,6,13) + char *page = cookie; +# else + char *page = nd_get_link(nd); +# endif + SFLOGFLOW(("vbsf_put_link: page=%p\n", page)); + if (!IS_ERR(page)) + free_page((unsigned long)page); +} +# endif /* >= 2.6.8 */ + +#endif /* < 4.5.0 */ + +/** + * Symlink inode operations. + */ +struct inode_operations vbsf_lnk_iops = { +#if RTLNX_VER_MAX(4,10,0) +# if RTLNX_VER_MIN(2,6,8) + .readlink = generic_readlink, +# else + .readlink = vbsf_readlink, +# endif +#endif +#if RTLNX_VER_MIN(4,5,0) + .get_link = vbsf_get_link +#else + .follow_link = vbsf_follow_link, +# if RTLNX_VER_MIN(2,6,8) + .put_link = vbsf_put_link, +# endif +#endif +}; + diff --git a/src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c b/src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c new file mode 100644 index 00000000..bcf0c59b --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c @@ -0,0 +1,702 @@ +/* $Id: mount.vboxsf.c $ */ +/** @file + * VirtualBox Guest Additions for Linux - mount(8) helper. + * + * Parses options provided by mount (or user directly) + * Packs them into struct vbsfmount and passes to mount(2) + * Optionally adds entries to mtab + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif + +/* #define DEBUG */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vbsfmount.h" + +#include +#include /* PAGE_SIZE (used by MAX_MNTOPT_STR) */ +#include + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#define PANIC_ATTR __attribute ((noreturn, __format__ (__printf__, 1, 2))) + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +struct vbsf_mount_opts +{ + unsigned long fFlags; /**< MS_XXX */ + + /** @name Preformatted option=value or empty if not specified. + * Helps eliminate duplicate options as well as simplifying concatting. + * @{ */ + char szTTL[32]; + char szMsDirCacheTTL[32]; + char szMsInodeTTL[32]; + char szMaxIoPages[32]; + char szDirBuf[32]; + char szCacheMode[32]; + char szUid[32]; + char szGid[32]; + char szDMode[32]; + char szFMode[32]; + char szDMask[32]; + char szFMask[32]; + char szIoCharset[32]; + /** @} */ + + bool fSloppy; + char *pszConvertCp; +}; + + +static void PANIC_ATTR +panic(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void PANIC_ATTR +panic_err(const char *fmt, ...) +{ + va_list ap; + int errno_code = errno; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, ": %s\n", strerror(errno_code)); + exit(EXIT_FAILURE); +} + +static int +safe_atoi(const char *s, size_t size, int base) +{ + char *endptr; + long long int val = strtoll(s, &endptr, base); + + if ( val < INT_MIN + || ( val > INT_MAX + && (base != 8 || val != UINT_MAX) ) /* hack for printf("%o", -1) - 037777777777 */ + || endptr < s + size) + { + errno = ERANGE; + panic_err("could not convert %.*s to integer, result = %lld (%d)", + (int)size, s, val, (int)val); + } + return (int)val; +} + +static unsigned +safe_atoiu(const char *s, size_t size, int base) +{ + char *endptr; + long long int val = strtoll(s, &endptr, base); + + if ( val < 0 + || val > UINT_MAX + || endptr < s + size) + { + errno = ERANGE; + panic_err("could not convert %.*s to unsigned integer, result = %lld (%#llx)", + (int)size, s, val, val); + } + return (unsigned)val; +} + +static void +process_mount_opts(const char *s, struct vbsf_mount_opts *opts) +{ + const char *next = s; + size_t len; + typedef enum handler_opt + { + HO_RW, + HO_RO, + HO_UID, + HO_GID, + HO_TTL, + HO_DENTRY_TTL, + HO_INODE_TTL, + HO_MAX_IO_PAGES, + HO_DIR_BUF, + HO_CACHE, + HO_DMODE, + HO_FMODE, + HO_UMASK, + HO_DMASK, + HO_FMASK, + HO_IOCHARSET, + HO_NLS, + HO_CONVERTCP, + HO_NOEXEC, + HO_EXEC, + HO_NODEV, + HO_DEV, + HO_NOSUID, + HO_SUID, + HO_REMOUNT, + HO_NOAUTO, + HO_NIGNORE + } handler_opt; + struct + { + const char *name; + handler_opt opt; + int has_arg; + const char *desc; + } handlers[] = + { + {"rw", HO_RW, 0, "mount read write (default)"}, + {"ro", HO_RO, 0, "mount read only"}, + {"uid", HO_UID, 1, "default file owner user id"}, + {"gid", HO_GID, 1, "default file owner group id"}, + {"ttl", HO_TTL, 1, "time to live for dentries & inode info"}, + {"dcachettl", HO_DENTRY_TTL, 1, "time to live for dentries"}, + {"inodettl", HO_INODE_TTL, 1, "time to live for inode info"}, + {"maxiopages", HO_MAX_IO_PAGES, 1, "max buffer size for I/O with host"}, + {"dirbuf", HO_DIR_BUF, 1, "directory buffer size (0 for default)"}, + {"cache", HO_CACHE, 1, "cache mode: none, strict (default), read, readwrite"}, + {"iocharset", HO_IOCHARSET, 1, "i/o charset (default utf8)"}, + {"nls", HO_NLS, 1, "i/o charset (default utf8)"}, + {"convertcp", HO_CONVERTCP, 1, "convert share name from given charset to utf8"}, + {"dmode", HO_DMODE, 1, "mode of all directories"}, + {"fmode", HO_FMODE, 1, "mode of all regular files"}, + {"umask", HO_UMASK, 1, "umask of directories and regular files"}, + {"dmask", HO_DMASK, 1, "umask of directories"}, + {"fmask", HO_FMASK, 1, "umask of regular files"}, + {"noexec", HO_NOEXEC, 0, NULL}, /* don't document these options directly here */ + {"exec", HO_EXEC, 0, NULL}, /* as they are well known and described in the */ + {"nodev", HO_NODEV, 0, NULL}, /* usual manpages */ + {"dev", HO_DEV, 0, NULL}, + {"nosuid", HO_NOSUID, 0, NULL}, + {"suid", HO_SUID, 0, NULL}, + {"remount", HO_REMOUNT, 0, NULL}, + {"noauto", HO_NOAUTO, 0, NULL}, + {"_netdev", HO_NIGNORE, 0, NULL}, + {"relatime", HO_NIGNORE, 0, NULL}, + {NULL, 0, 0, NULL} + }, *handler; + + while (next) + { + const char *val; + size_t key_len, val_len; + + s = next; + next = strchr(s, ','); + if (!next) + { + len = strlen(s); + } + else + { + len = next - s; + next += 1; + if (!*next) + next = 0; + } + + val = NULL; + val_len = 0; + for (key_len = 0; key_len < len; ++key_len) + { + if (s[key_len] == '=') + { + if (key_len + 1 < len) + { + val = s + key_len + 1; + val_len = len - key_len - 1; + } + break; + } + } + + for (handler = handlers; handler->name; ++handler) + { + size_t j; + for (j = 0; j < key_len && handler->name[j] == s[j]; ++j) + ; + + if (j == key_len && !handler->name[j]) + { + if (handler->has_arg) + { + if (!(val && *val)) + { + panic("%.*s requires an argument (i.e. %.*s=)\n", + (int)len, s, (int)len, s); + } + } + + switch (handler->opt) + { + case HO_RW: + opts->fFlags &= ~MS_RDONLY; + break; + case HO_RO: + opts->fFlags |= MS_RDONLY; + break; + case HO_NOEXEC: + opts->fFlags |= MS_NOEXEC; + break; + case HO_EXEC: + opts->fFlags &= ~MS_NOEXEC; + break; + case HO_NODEV: + opts->fFlags |= MS_NODEV; + break; + case HO_DEV: + opts->fFlags &= ~MS_NODEV; + break; + case HO_NOSUID: + opts->fFlags |= MS_NOSUID; + break; + case HO_SUID: + opts->fFlags &= ~MS_NOSUID; + break; + case HO_REMOUNT: + opts->fFlags |= MS_REMOUNT; + break; + case HO_TTL: + snprintf(opts->szTTL, sizeof(opts->szTTL), + "ttl=%d", safe_atoi(val, val_len, 10)); + break; + case HO_DENTRY_TTL: + snprintf(opts->szMsDirCacheTTL, sizeof(opts->szMsDirCacheTTL), + "dcachettl=%d", safe_atoi(val, val_len, 10)); + break; + case HO_INODE_TTL: + snprintf(opts->szMsInodeTTL, sizeof(opts->szMsInodeTTL), + "inodettl=%d", safe_atoi(val, val_len, 10)); + break; + case HO_MAX_IO_PAGES: + snprintf(opts->szMaxIoPages, sizeof(opts->szMaxIoPages), + "maxiopages=%d", safe_atoiu(val, val_len, 10)); + break; + case HO_DIR_BUF: + snprintf(opts->szDirBuf, sizeof(opts->szDirBuf), + "dirbuf=%d", safe_atoiu(val, val_len, 10)); + break; + case HO_CACHE: +#define IS_EQUAL(a_sz) (val_len == sizeof(a_sz) - 1U && strncmp(val, a_sz, sizeof(a_sz) - 1U) == 0) + if (IS_EQUAL("default")) + strcpy(opts->szCacheMode, "cache=default"); + else if (IS_EQUAL("none")) + strcpy(opts->szCacheMode, "cache=none"); + else if (IS_EQUAL("strict")) + strcpy(opts->szCacheMode, "cache=strict"); + else if (IS_EQUAL("read")) + strcpy(opts->szCacheMode, "cache=read"); + else if (IS_EQUAL("readwrite")) + strcpy(opts->szCacheMode, "cache=readwrite"); + else + panic("invalid cache mode '%.*s'\n" + "Valid cache modes are: default, none, strict, read, readwrite\n", + (int)val_len, val); + break; + case HO_UID: + /** @todo convert string to id. */ + snprintf(opts->szUid, sizeof(opts->szUid), + "uid=%d", safe_atoi(val, val_len, 10)); + break; + case HO_GID: + /** @todo convert string to id. */ + snprintf(opts->szGid, sizeof(opts->szGid), + "gid=%d", safe_atoi(val, val_len, 10)); + break; + case HO_DMODE: + snprintf(opts->szDMode, sizeof(opts->szDMode), + "dmode=0%o", safe_atoi(val, val_len, 8)); + break; + case HO_FMODE: + snprintf(opts->szFMode, sizeof(opts->szFMode), + "fmode=0%o", safe_atoi(val, val_len, 8)); + break; + case HO_UMASK: + { + int fMask = safe_atoi(val, val_len, 8); + snprintf(opts->szDMask, sizeof(opts->szDMask), "dmask=0%o", fMask); + snprintf(opts->szFMask, sizeof(opts->szFMask), "fmask=0%o", fMask); + break; + } + case HO_DMASK: + snprintf(opts->szDMask, sizeof(opts->szDMask), + "dmask=0%o", safe_atoi(val, val_len, 8)); + break; + case HO_FMASK: + snprintf(opts->szFMask, sizeof(opts->szFMask), + "fmask=0%o", safe_atoi(val, val_len, 8)); + break; + case HO_IOCHARSET: + case HO_NLS: + if (val_len >= MAX_NLS_NAME) + panic("the character set name for I/O is too long: %*.*s\n", (int)val_len, (int)val_len, val); + snprintf(opts->szIoCharset, sizeof(opts->szIoCharset), + "%s=%*.*s", handler->opt == HO_IOCHARSET ? "iocharset" : "nls", (int)val_len, (int)val_len, val); + break; + case HO_CONVERTCP: + opts->pszConvertCp = malloc(val_len + 1); + if (!opts->pszConvertCp) + panic_err("could not allocate memory"); + memcpy(opts->pszConvertCp, val, val_len); + opts->pszConvertCp[val_len] = '\0'; + break; + case HO_NOAUTO: + case HO_NIGNORE: + break; + } + break; + } + continue; + } + + if ( !handler->name + && !opts->fSloppy) + { + fprintf(stderr, "unknown mount option `%.*s'\n", (int)len, s); + fprintf(stderr, "valid options:\n"); + + for (handler = handlers; handler->name; ++handler) + { + if (handler->desc) + fprintf(stderr, " %-10s%s %s\n", handler->name, + handler->has_arg ? "=" : "", handler->desc); + } + exit(EXIT_FAILURE); + } + } +} + +/** Appends @a pszOptVal to pszOpts if not empty. */ +static size_t append_option(char *pszOpts, size_t cbOpts, size_t offOpts, const char *pszOptVal) +{ + if (*pszOptVal != '\0') + { + size_t cchOptVal = strlen(pszOptVal); + if (offOpts + (offOpts > 0) + cchOptVal < cbOpts) + { + if (offOpts) + pszOpts[offOpts++] = ','; + memcpy(&pszOpts[offOpts], pszOptVal, cchOptVal); + offOpts += cchOptVal; + pszOpts[offOpts] = '\0'; + } + else + panic("Too many options!"); + } + return offOpts; +} + +static void +convertcp(char *in_codeset, char *pszSharedFolder, char *pszDst) +{ + char *i = pszSharedFolder; + char *o = pszDst; + size_t ib = strlen(pszSharedFolder); + size_t ob = MAX_HOST_NAME - 1; + iconv_t cd; + + cd = iconv_open("UTF-8", in_codeset); + if (cd == (iconv_t)-1) + { + panic_err("could not convert share name, iconv_open `%s' failed", + in_codeset); + } + + while (ib) + { + size_t c = iconv(cd, &i, &ib, &o, &ob); + if (c == (size_t)-1) + { + panic_err("could not convert share name(%s) at %d", + pszSharedFolder, (int)(strlen(pszSharedFolder) - ib)); + } + } + *o = 0; +} + + +/** + * Print out a usage message and exit. + * + * @returns 1 + * @param argv0 The name of the application + */ +static int usage(char *argv0) +{ + printf("Usage: %s [OPTIONS] NAME MOUNTPOINT\n" + "Mount the VirtualBox shared folder NAME from the host system to MOUNTPOINT.\n" + "\n" + " -w mount the shared folder writable (the default)\n" + " -r mount the shared folder read-only\n" + " -n do not create an mtab entry\n" + " -s sloppy parsing, ignore unrecognized mount options\n" + " -o OPTION[,OPTION...] use the mount options specified\n" + "\n", argv0); + printf("Available mount options are:\n" + " rw mount writable (the default)\n" + " ro mount read only\n" + " uid=UID set the default file owner user id to UID\n" + " gid=GID set the default file owner group id to GID\n"); + printf(" ttl=MILLIESECSONDS set the \"time to live\" for both the directory cache\n" + " and inode info. -1 for kernel default, 0 disables it.\n" + " dcachettl=MILLIES set the \"time to live\" for the directory cache,\n" + " overriding the 'ttl' option. Ignored if negative.\n" + " inodettl=MILLIES set the \"time to live\" for the inode information,\n" + " overriding the 'ttl' option. Ignored if negative.\n"); + printf(" maxiopages=PAGES set the max host I/O buffers size in pages. Uses\n" + " default if zero.\n" + " dirbuf=BYTES set the directory enumeration buffer size in bytes.\n" + " Uses default size if zero.\n"); + printf(" cache=MODE set the caching mode for the mount. Allowed values:\n" + " default: use the kernel default (strict)\n" + " none: no caching; may experience guest side\n" + " coherence issues between mmap and read.\n"); + printf(" strict: no caching, except for writably mapped\n" + " files (for guest side coherence)\n" + " read: read via the page cache; host changes\n" + " may be completely ignored\n"); + printf(" readwrite: read and write via the page cache; host\n" + " changes may be completely ignored and\n" + " guest changes takes a while to reach the host\n"); + printf(" dmode=MODE override the mode of all directories to (octal) MODE\n" + " fmode=MODE override the mode of all regular files to (octal) MODE\n" + " umask=UMASK set the umask to (octal) UMASK\n"); + printf(" dmask=UMASK set the umask applied to directories only\n" + " fmask=UMASK set the umask applied to regular files only\n" + " iocharset CHARSET use the character set CHARSET for I/O operations\n" + " (default set is utf8)\n" + " convertcp CHARSET convert the folder name from CHARSET to utf8\n" + "\n"); + printf("Less common used options:\n" + " noexec,exec,nodev,dev,nosuid,suid\n"); + return EXIT_FAILURE; +} + +int +main(int argc, char **argv) +{ + int c; + int err; + int saved_errno; + int nomtab = 0; + char *pszSharedFolder; + char *pszMountPoint; + struct utsname uts; + int major, minor, patch; + size_t offOpts; + static const char s_szSfNameOpt[] = "sf_name="; + char szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1 + MAX_HOST_NAME]; + char szOpts[MAX_MNTOPT_STR]; + struct vbsf_mount_opts opts = + { + MS_NODEV, + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + "", + false, /*fSloppy*/ + NULL, + }; + + AssertCompile(sizeof(uid_t) == sizeof(int)); + AssertCompile(sizeof(gid_t) == sizeof(int)); + + if (getuid()) + panic("Only root can mount shared folders from the host.\n"); + + if (!argv[0]) + argv[0] = "mount.vboxsf"; + + /* + * Parse options. + */ + while ((c = getopt(argc, argv, "rwsno:h")) != -1) + { + switch (c) + { + default: + fprintf(stderr, "unknown option `%c:%#x'\n", c, c); + RT_FALL_THRU(); + case '?': + case 'h': + return usage(argv[0]); + + case 'r': + opts.fFlags |= MS_RDONLY; + break; + + case 'w': + opts.fFlags &= ~MS_RDONLY; + break; + + case 's': + opts.fSloppy = true; + break; + + case 'o': + process_mount_opts(optarg, &opts); + break; + + case 'n': + nomtab = 1; + break; + } + } + + if (argc - optind < 2) + return usage(argv[0]); + + pszSharedFolder = argv[optind]; + pszMountPoint = argv[optind + 1]; + if (opts.pszConvertCp) + { + convertcp(opts.pszConvertCp, pszSharedFolder, &szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1]); + pszSharedFolder = &szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1]; + } + + /* + * Concat option strings. + */ + offOpts = 0; + szOpts[0] = '\0'; + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szTTL); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szMsDirCacheTTL); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szMsInodeTTL); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szMaxIoPages); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szDirBuf); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szCacheMode); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szUid); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szGid); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szDMode); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szFMode); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szDMask); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szFMask); + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szIoCharset); + + /* For pre-2.6 kernels we have to supply the shared folder name as a + string option because the kernel hides the device name from us. */ + RT_ZERO(uts); + if ( uname(&uts) == -1 + || sscanf(uts.release, "%d.%d.%d", &major, &minor, &patch) != 3) + major = minor = patch = 5; + + if (KERNEL_VERSION(major, minor, patch) < KERNEL_VERSION(2,6,0)) + { + memcpy(szSharedFolderIconved, s_szSfNameOpt, sizeof(s_szSfNameOpt) - 1); + if (!opts.pszConvertCp) + { + if (strlen(pszSharedFolder) >= MAX_HOST_NAME) + panic("%s: shared folder name is too long (max %d)", argv[0], (int)MAX_HOST_NAME - 1); + strcpy(&szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1], pszSharedFolder); + } + offOpts = append_option(szOpts, sizeof(szOpts), offOpts, szSharedFolderIconved); + } + + /* + * Do the actual mounting. + */ + err = mount(pszSharedFolder, pszMountPoint, "vboxsf", opts.fFlags, szOpts); + saved_errno = errno; + + if (err) + { + if (saved_errno == ENXIO) + panic("%s: shared folder '%s' was not found (check VM settings / spelling)\n", argv[0], pszSharedFolder); + else + panic_err("%s: mounting failed with the error", argv[0]); + } + + if (!nomtab) + { + err = vbsfmount_complete(pszSharedFolder, pszMountPoint, opts.fFlags, szOpts); + switch (err) + { + case 0: /* Success. */ + break; + + case 1: + panic_err("%s: Could not update mount table (out of memory).", argv[0]); + break; + + case 2: + panic_err("%s: Could not open mount table for update.", argv[0]); + break; + + case 3: + /* panic_err("%s: Could not add an entry to the mount table.", argv[0]); */ + break; + + default: + panic_err("%s: Unknown error while completing mount operation: %d", argv[0], err); + break; + } + } + + exit(EXIT_SUCCESS); +} + diff --git a/src/VBox/Additions/linux/sharedfolders/regops.c b/src/VBox/Additions/linux/sharedfolders/regops.c new file mode 100644 index 00000000..ca604090 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/regops.c @@ -0,0 +1,3902 @@ +/* $Id: regops.c $ */ +/** @file + * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "vfsmod.h" +#include +#if RTLNX_VER_MIN(2,5,32) +# include /* struct kiocb before 4.1 */ +#endif +#if RTLNX_VER_MIN(2,5,12) +# include +#endif +#if RTLNX_VER_RANGE(2,5,12, 2,6,31) +# include +#endif +#if RTLNX_VER_RANGE(2,6,23, 3,16,0) +# include +#endif +#if RTLNX_VER_RANGE(2,6,17, 2,6,23) +# include +#endif +#if RTLNX_VER_MIN(2,4,10) +# include /* for mark_page_accessed */ +#endif +#include + +#if RTLNX_VER_MAX(2,6,18) +# define SEEK_END 2 +#endif + +#if RTLNX_VER_MIN(6,4,0) +# define VBOX_ITER_IOV_ADDR(a_iter) iter_iov_addr(a_iter) +#elif RTLNX_VER_MIN(3,19,0) +# define VBOX_ITER_IOV_ADDR(a_iter) (a_iter->kvec->iov_base + a_iter->iov_offset) +#else +# define VBOX_ITER_IOV_ADDR(a_iter) (a_iter->iov->iov_base + a_iter->iov_offset) +#endif + +#if RTLNX_VER_MAX(3,16,0) +# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & ITER_KVEC) ) +#elif RTLNX_VER_MAX(3,19,0) +# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & (ITER_KVEC | ITER_BVEC)) ) +#endif + +#if RTLNX_VER_MAX(4,17,0) +# define vm_fault_t int +#endif + +#if RTLNX_VER_MAX(2,5,20) +# define pgoff_t unsigned long +#endif + +#if RTLNX_VER_MAX(2,5,12) +# define PageUptodate(a_pPage) Page_Uptodate(a_pPage) +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +/** @def VBSF_GET_ITER_TYPE + * Accessor for getting iov iter type member which changed name in 5.14. */ +#if RTLNX_VER_MIN(5,14,0) +# define VBSF_GET_ITER_TYPE(a_pIter) ((a_pIter)->iter_type) +#else +# define VBSF_GET_ITER_TYPE(a_pIter) ((a_pIter)->type) +#endif + +/** Starting from 6.4.0, iter_iov() macro should be used in order to access to iov field + * of struct iov_iter. */ +#if RTLNX_VER_MIN(6,4,0) || RTLNX_RHEL_RANGE(9,4, 9,99) +# define VBSF_GET_ITER_IOV(_iter) iter_iov(_iter) +#else +# define VBSF_GET_ITER_IOV(_iter) iter->iov +#endif + +/** @def VBOX_IOV_ITER_IS_KVEC + * Test if iov iter type is ITER_KVEC. */ +#if RTLNX_VER_MIN(4,20,0) +# define VBOX_IOV_ITER_IS_KVEC(a_iter) iov_iter_is_kvec(a_iter) +#else +# define VBOX_IOV_ITER_IS_KVEC(a_iter) (VBSF_GET_ITER_TYPE(iter) & ITER_KVEC) +#endif + + +/********************************************************************************************************************************* +* Structures and Typedefs * +*********************************************************************************************************************************/ +#if RTLNX_VER_MAX(3,16,0) +struct vbsf_iov_iter { + unsigned int type; + unsigned int v_write : 1; + size_t iov_offset; + size_t nr_segs; + struct iovec const *iov; +# ifdef VBOX_STRICT + struct iovec const *iov_org; + size_t nr_segs_org; +# endif +}; +# ifdef VBOX_STRICT +# define VBSF_IOV_ITER_INITIALIZER(a_cSegs, a_pIov, a_fWrite) \ + { vbsf_iov_iter_detect_type(a_pIov, a_cSegs), a_fWrite, 0, a_cSegs, a_pIov, a_pIov, a_cSegs } +# else +# define VBSF_IOV_ITER_INITIALIZER(a_cSegs, a_pIov, a_fWrite) \ + { vbsf_iov_iter_detect_type(a_pIov, a_cSegs), a_fWrite, 0, a_cSegs, a_pIov } +# endif +# define ITER_KVEC 1 +# define iov_iter vbsf_iov_iter +#endif + +#if RTLNX_VER_MIN(2,6,19) +/** Used by vbsf_iter_lock_pages() to keep the first page of the next segment. */ +struct vbsf_iter_stash { + struct page *pPage; + size_t off; + size_t cb; +# if RTLNX_VER_MAX(4,11,0) + size_t offFromEnd; + struct iov_iter Copy; +# endif +}; +#endif /* >= 3.16.0 */ +/** Initializer for struct vbsf_iter_stash. */ +#if RTLNX_VER_MIN(4,11,0) +# define VBSF_ITER_STASH_INITIALIZER { NULL, 0 } +#else +# define VBSF_ITER_STASH_INITIALIZER { NULL, 0, ~(size_t)0 } +#endif + + +/********************************************************************************************************************************* +* Internal Functions * +*********************************************************************************************************************************/ +DECLINLINE(void) vbsf_put_page(struct page *pPage); +static void vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack); +static void vbsf_reg_write_sync_page_cache(struct address_space *mapping, loff_t offFile, uint32_t cbRange, + uint8_t const *pbSrcBuf, struct page **papSrcPages, + uint32_t offSrcPage, size_t cSrcPages); + + +/********************************************************************************************************************************* +* Provide more recent uio.h functionality to older kernels. * +*********************************************************************************************************************************/ +#if RTLNX_VER_RANGE(2,6,19, 3,16,0) + +/** + * Detects the vector type. + */ +static int vbsf_iov_iter_detect_type(struct iovec const *paIov, size_t cSegs) +{ + /* Check the first segment with a non-zero length. */ + while (cSegs-- > 0) { + if (paIov->iov_len > 0) { + if (access_ok(VERIFY_READ, paIov->iov_base, paIov->iov_len)) +#if RTLNX_VER_MIN(5,10,0) + return (uintptr_t)paIov->iov_base >= TASK_SIZE_MAX ? ITER_KVEC : 0; +#else + return (uintptr_t)paIov->iov_base >= USER_DS.seg ? ITER_KVEC : 0; +#endif + AssertMsgFailed(("%p LB %#zx\n", paIov->iov_base, paIov->iov_len)); + break; + } + paIov++; + } + return 0; +} + + +# undef iov_iter_count +# define iov_iter_count(a_pIter) vbsf_iov_iter_count(a_pIter) +static size_t vbsf_iov_iter_count(struct vbsf_iov_iter const *iter) +{ + size_t cbRet = 0; + size_t cLeft = iter->nr_segs; + struct iovec const *iov = iter->iov; + while (cLeft-- > 0) { + cbRet += iov->iov_len; + iov++; + } + return cbRet - iter->iov_offset; +} + + +# undef iov_iter_single_seg_count +# define iov_iter_single_seg_count(a_pIter) vbsf_iov_iter_single_seg_count(a_pIter) +static size_t vbsf_iov_iter_single_seg_count(struct vbsf_iov_iter const *iter) +{ + if (iter->nr_segs > 0) + return iter->iov->iov_len - iter->iov_offset; + return 0; +} + + +# undef iov_iter_advance +# define iov_iter_advance(a_pIter, a_cbSkip) vbsf_iov_iter_advance(a_pIter, a_cbSkip) +static void vbsf_iov_iter_advance(struct vbsf_iov_iter *iter, size_t cbSkip) +{ + SFLOG2(("vbsf_iov_iter_advance: cbSkip=%#zx\n", cbSkip)); + if (iter->nr_segs > 0) { + size_t const cbLeftCur = iter->iov->iov_len - iter->iov_offset; + Assert(iter->iov_offset <= iter->iov->iov_len); + if (cbLeftCur > cbSkip) { + iter->iov_offset += cbSkip; + } else { + cbSkip -= cbLeftCur; + iter->iov_offset = 0; + iter->iov++; + iter->nr_segs--; + while (iter->nr_segs > 0) { + size_t const cbSeg = iter->iov->iov_len; + if (cbSeg > cbSkip) { + iter->iov_offset = cbSkip; + break; + } + cbSkip -= cbSeg; + iter->iov++; + iter->nr_segs--; + } + } + } +} + + +# undef iov_iter_get_pages +# define iov_iter_get_pages(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0) \ + vbsf_iov_iter_get_pages(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0) +static ssize_t vbsf_iov_iter_get_pages(struct vbsf_iov_iter *iter, struct page **papPages, + size_t cbMax, unsigned cMaxPages, size_t *poffPg0) +{ + while (iter->nr_segs > 0) { + size_t const cbLeft = iter->iov->iov_len - iter->iov_offset; + Assert(iter->iov->iov_len >= iter->iov_offset); + if (cbLeft > 0) { + uintptr_t uPtrFrom = (uintptr_t)iter->iov->iov_base + iter->iov_offset; + size_t offPg0 = *poffPg0 = uPtrFrom & PAGE_OFFSET_MASK; + size_t cPagesLeft = RT_ALIGN_Z(offPg0 + cbLeft, PAGE_SIZE) >> PAGE_SHIFT; + size_t cPages = RT_MIN(cPagesLeft, cMaxPages); + struct task_struct *pTask = current; + size_t cPagesLocked; + + down_read(&pTask->mm->mmap_sem); + cPagesLocked = get_user_pages(pTask, pTask->mm, uPtrFrom, cPages, iter->v_write, 1 /*force*/, papPages, NULL); + up_read(&pTask->mm->mmap_sem); + if (cPagesLocked == cPages) { + size_t cbRet = (cPages << PAGE_SHIFT) - offPg0; + if (cPages == cPagesLeft) { + size_t offLastPg = (uPtrFrom + cbLeft) & PAGE_OFFSET_MASK; + if (offLastPg) + cbRet -= PAGE_SIZE - offLastPg; + } + Assert(cbRet <= cbLeft); + return cbRet; + } + if (cPagesLocked > 0) + vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/); + return -EFAULT; + } + iter->iov_offset = 0; + iter->iov++; + iter->nr_segs--; + } + AssertFailed(); + return 0; +} + + +# undef iov_iter_truncate +# define iov_iter_truncate(iter, cbNew) vbsf_iov_iter_truncate(iter, cbNew) +static void vbsf_iov_iter_truncate(struct vbsf_iov_iter *iter, size_t cbNew) +{ + /* we have no counter or stuff, so it's a no-op. */ + RT_NOREF(iter, cbNew); +} + + +# undef iov_iter_revert +# define iov_iter_revert(a_pIter, a_cbRewind) vbsf_iov_iter_revert(a_pIter, a_cbRewind) +void vbsf_iov_iter_revert(struct vbsf_iov_iter *iter, size_t cbRewind) +{ + SFLOG2(("vbsf_iov_iter_revert: cbRewind=%#zx\n", cbRewind)); + if (iter->iov_offset > 0) { + if (cbRewind <= iter->iov_offset) { + iter->iov_offset -= cbRewind; + return; + } + cbRewind -= iter->iov_offset; + iter->iov_offset = 0; + } + + while (cbRewind > 0) { + struct iovec const *pIov = --iter->iov; + size_t const cbSeg = pIov->iov_len; + iter->nr_segs++; + + Assert((uintptr_t)pIov >= (uintptr_t)iter->iov_org); + Assert(iter->nr_segs <= iter->nr_segs_org); + + if (cbRewind <= cbSeg) { + iter->iov_offset = cbSeg - cbRewind; + break; + } + cbRewind -= cbSeg; + } +} + +#endif /* 2.6.19 <= linux < 3.16.0 */ +#if RTLNX_VER_RANGE(3,16,0, 3,16,35) + +/** This is for implementing cMaxPage on 3.16 which doesn't have it. */ +static ssize_t vbsf_iov_iter_get_pages_3_16(struct iov_iter *iter, struct page **papPages, + size_t cbMax, unsigned cMaxPages, size_t *poffPg0) +{ + if (!(iter->type & ITER_BVEC)) { + size_t const offPg0 = iter->iov_offset & PAGE_OFFSET_MASK; + size_t const cbMaxPages = ((size_t)cMaxPages << PAGE_SHIFT) - offPg0; + if (cbMax > cbMaxPages) + cbMax = cbMaxPages; + } + /* else: BVEC works a page at a time and shouldn't have much of a problem here. */ + return iov_iter_get_pages(iter, papPages, cbMax, poffPg0); +} +# undef iov_iter_get_pages +# define iov_iter_get_pages(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0) \ + vbsf_iov_iter_get_pages_3_16(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0) + +#endif /* 3.16.0-3.16.34 */ +#if RTLNX_VER_RANGE(2,6,19, 3,18,0) + +static size_t copy_from_iter(uint8_t *pbDst, size_t cbToCopy, struct iov_iter *pSrcIter) +{ + size_t const cbTotal = cbToCopy; + Assert(iov_iter_count(pSrcIter) >= cbToCopy); +# if RTLNX_VER_MIN(3,16,0) + if (pSrcIter->type & ITER_BVEC) { + while (cbToCopy > 0) { + size_t const offPage = (uintptr_t)pbDst & PAGE_OFFSET_MASK; + size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy); + struct page *pPage = rtR0MemObjLinuxVirtToPage(pbDst); + size_t cbCopied = copy_page_from_iter(pPage, offPage, cbThisCopy, pSrcIter); + AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy); + pbDst += cbCopied; + cbToCopy -= cbCopied; + if (cbCopied != cbToCopy) + break; + } + } else +# endif + { + while (cbToCopy > 0) { + size_t cbThisCopy = iov_iter_single_seg_count(pSrcIter); + if (cbThisCopy > 0) { + if (cbThisCopy > cbToCopy) + cbThisCopy = cbToCopy; + if (pSrcIter->type & ITER_KVEC) + memcpy(pbDst, (void *)VBOX_ITER_IOV_ADDR(pSrcIter), cbThisCopy); + else if (copy_from_user(pbDst, VBOX_ITER_IOV_ADDR(pSrcIter), cbThisCopy) != 0) + break; + pbDst += cbThisCopy; + cbToCopy -= cbThisCopy; + } + iov_iter_advance(pSrcIter, cbThisCopy); + } + } + return cbTotal - cbToCopy; +} + + +static size_t copy_to_iter(uint8_t const *pbSrc, size_t cbToCopy, struct iov_iter *pDstIter) +{ + size_t const cbTotal = cbToCopy; + Assert(iov_iter_count(pDstIter) >= cbToCopy); +# if RTLNX_VER_MIN(3,16,0) + if (pDstIter->type & ITER_BVEC) { + while (cbToCopy > 0) { + size_t const offPage = (uintptr_t)pbSrc & PAGE_OFFSET_MASK; + size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy); + struct page *pPage = rtR0MemObjLinuxVirtToPage((void *)pbSrc); + size_t cbCopied = copy_page_to_iter(pPage, offPage, cbThisCopy, pDstIter); + AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy); + pbSrc += cbCopied; + cbToCopy -= cbCopied; + if (cbCopied != cbToCopy) + break; + } + } else +# endif + { + while (cbToCopy > 0) { + size_t cbThisCopy = iov_iter_single_seg_count(pDstIter); + if (cbThisCopy > 0) { + if (cbThisCopy > cbToCopy) + cbThisCopy = cbToCopy; + if (pDstIter->type & ITER_KVEC) + memcpy((void *)VBOX_ITER_IOV_ADDR(pDstIter), pbSrc, cbThisCopy); + else if (copy_to_user(VBOX_ITER_IOV_ADDR(pDstIter), pbSrc, cbThisCopy) != 0) { + break; + } + pbSrc += cbThisCopy; + cbToCopy -= cbThisCopy; + } + iov_iter_advance(pDstIter, cbThisCopy); + } + } + return cbTotal - cbToCopy; +} + +#endif /* 3.16.0 <= linux < 3.18.0 */ + + + +/********************************************************************************************************************************* +* Handle management * +*********************************************************************************************************************************/ + +/** + * Called when an inode is released to unlink all handles that might impossibly + * still be associated with it. + * + * @param pInodeInfo The inode which handles to drop. + */ +void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo) +{ + struct vbsf_handle *pCur, *pNext; + unsigned long fSavedFlags; + SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo)); + spin_lock_irqsave(&g_SfHandleLock, fSavedFlags); + + RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) { + AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) + == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags)); + pCur->fFlags |= VBSF_HANDLE_F_ON_LIST; + RTListNodeRemove(&pCur->Entry); + } + + spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags); +} + + +/** + * Locates a handle that matches all the flags in @a fFlags. + * + * @returns Pointer to handle on success (retained), use vbsf_handle_release() to + * release it. NULL if no suitable handle was found. + * @param pInodeInfo The inode info to search. + * @param fFlagsSet The flags that must be set. + * @param fFlagsClear The flags that must be clear. + */ +struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear) +{ + struct vbsf_handle *pCur; + unsigned long fSavedFlags; + spin_lock_irqsave(&g_SfHandleLock, fSavedFlags); + + RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) { + AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) + == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags)); + if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) { + uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs); + if (cRefs > 1) { + spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags); + SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur)); + return pCur; + } + /* Oops, already being closed (safe as it's only ever increased here). */ + ASMAtomicDecU32(&pCur->cRefs); + } + } + + spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags); + SFLOGFLOW(("vbsf_handle_find: returns NULL!\n")); + return NULL; +} + + +/** + * Slow worker for vbsf_handle_release() that does the freeing. + * + * @returns 0 (ref count). + * @param pHandle The handle to release. + * @param pSuperInfo The info structure for the shared folder associated with + * the handle. + * @param pszCaller The caller name (for logging failures). + */ +uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *pSuperInfo, const char *pszCaller) +{ + int rc; + unsigned long fSavedFlags; + + SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller)); + + /* + * Remove from the list. + */ + spin_lock_irqsave(&g_SfHandleLock, fSavedFlags); + + AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags)); + Assert(pHandle->pInodeInfo); + Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC); + + if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) { + pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST; + RTListNodeRemove(&pHandle->Entry); + } + + spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags); + + /* + * Actually destroy it. + */ + rc = VbglR0SfHostReqCloseSimple(pSuperInfo->map.root, pHandle->hHost); + if (RT_FAILURE(rc)) + LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc)); + pHandle->hHost = SHFL_HANDLE_NIL; + pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD; + kfree(pHandle); + return 0; +} + + +/** + * Appends a handle to a handle list. + * + * @param pInodeInfo The inode to add it to. + * @param pHandle The handle to add. + */ +void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle) +{ +#ifdef VBOX_STRICT + struct vbsf_handle *pCur; +#endif + unsigned long fSavedFlags; + + SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo)); + AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC, + ("%p %#x\n", pHandle, pHandle->fFlags)); + Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC); + + spin_lock_irqsave(&g_SfHandleLock, fSavedFlags); + + AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC, + ("%p %#x\n", pHandle, pHandle->fFlags)); +#ifdef VBOX_STRICT + RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) { + Assert(pCur != pHandle); + AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) + == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags)); + } + pHandle->pInodeInfo = pInodeInfo; +#endif + + pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST; + RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry); + + spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags); +} + + + +/********************************************************************************************************************************* +* Misc * +*********************************************************************************************************************************/ + +#if RTLNX_VER_MAX(2,6,6) +/** Any writable mappings? */ +DECLINLINE(bool) mapping_writably_mapped(struct address_space const *mapping) +{ +# if RTLNX_VER_MIN(2,5,6) + return !list_empty(&mapping->i_mmap_shared); +# else + return mapping->i_mmap_shared != NULL; +# endif +} +#endif + + +#if RTLNX_VER_MAX(2,5,12) +/** Missing in 2.4.x, so just stub it for now. */ +DECLINLINE(bool) PageWriteback(struct page const *page) +{ + return false; +} +#endif + + +/** + * Helper for deciding wheter we should do a read via the page cache or not. + * + * By default we will only use the page cache if there is a writable memory + * mapping of the file with a chance that it may have modified any of the pages + * already. + */ +DECLINLINE(bool) vbsf_should_use_cached_read(struct file *file, struct address_space *mapping, struct vbsf_super_info *pSuperInfo) +{ + if ( (file->f_flags & O_DIRECT) + || pSuperInfo->enmCacheMode == kVbsfCacheMode_None) + return false; + if ( pSuperInfo->enmCacheMode == kVbsfCacheMode_Read + || pSuperInfo->enmCacheMode == kVbsfCacheMode_ReadWrite) + return true; + Assert(pSuperInfo->enmCacheMode == kVbsfCacheMode_Strict); + return mapping + && mapping->nrpages > 0 + && mapping_writably_mapped(mapping); +} + + + +/********************************************************************************************************************************* +* Pipe / splice stuff mainly for 2.6.17 >= linux < 2.6.31 (where no fallbacks were available) * +*********************************************************************************************************************************/ + +#if RTLNX_VER_RANGE(2,6,17, 3,16,0) + +# if RTLNX_VER_MAX(2,6,30) +# define LOCK_PIPE(a_pPipe) do { if ((a_pPipe)->inode) mutex_lock(&(a_pPipe)->inode->i_mutex); } while (0) +# define UNLOCK_PIPE(a_pPipe) do { if ((a_pPipe)->inode) mutex_unlock(&(a_pPipe)->inode->i_mutex); } while (0) +# else +# define LOCK_PIPE(a_pPipe) pipe_lock(a_pPipe) +# define UNLOCK_PIPE(a_pPipe) pipe_unlock(a_pPipe) +# endif + + +/** Waits for the pipe buffer status to change. */ +static void vbsf_wait_pipe(struct pipe_inode_info *pPipe) +{ + DEFINE_WAIT(WaitStuff); +# ifdef TASK_NONINTERACTIVE + prepare_to_wait(&pPipe->wait, &WaitStuff, TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE); +# else + prepare_to_wait(&pPipe->wait, &WaitStuff, TASK_INTERRUPTIBLE); +# endif + UNLOCK_PIPE(pPipe); + + schedule(); + + finish_wait(&pPipe->wait, &WaitStuff); + LOCK_PIPE(pPipe); +} + + +/** Worker for vbsf_feed_pages_to_pipe that wakes up readers. */ +static void vbsf_wake_up_pipe(struct pipe_inode_info *pPipe, bool fReaders) +{ + smp_mb(); + if (waitqueue_active(&pPipe->wait)) + wake_up_interruptible_sync(&pPipe->wait); + if (fReaders) + kill_fasync(&pPipe->fasync_readers, SIGIO, POLL_IN); + else + kill_fasync(&pPipe->fasync_writers, SIGIO, POLL_OUT); +} + +#endif +#if RTLNX_VER_RANGE(2,6,17, 2,6,31) + +/** Verify pipe buffer content (needed for page-cache to ensure idle page). */ +static int vbsf_pipe_buf_confirm(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf) +{ + /*SFLOG3(("vbsf_pipe_buf_confirm: %p\n", pPipeBuf));*/ + return 0; +} + + +/** Maps the buffer page. */ +static void *vbsf_pipe_buf_map(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf, int atomic) +{ + void *pvRet; + if (!atomic) + pvRet = kmap(pPipeBuf->page); + else { + pPipeBuf->flags |= PIPE_BUF_FLAG_ATOMIC; + pvRet = kmap_atomic(pPipeBuf->page, KM_USER0); + } + /*SFLOG3(("vbsf_pipe_buf_map: %p -> %p\n", pPipeBuf, pvRet));*/ + return pvRet; +} + + +/** Unmaps the buffer page. */ +static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf, void *pvMapping) +{ + /*SFLOG3(("vbsf_pipe_buf_unmap: %p/%p\n", pPipeBuf, pvMapping)); */ + if (!(pPipeBuf->flags & PIPE_BUF_FLAG_ATOMIC)) + kunmap(pPipeBuf->page); + else { + pPipeBuf->flags &= ~PIPE_BUF_FLAG_ATOMIC; + kunmap_atomic(pvMapping, KM_USER0); + } +} + + +/** Gets a reference to the page. */ +static void vbsf_pipe_buf_get(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf) +{ + page_cache_get(pPipeBuf->page); + /*SFLOG3(("vbsf_pipe_buf_get: %p (return count=%d)\n", pPipeBuf, page_count(pPipeBuf->page)));*/ +} + + +/** Release the buffer page (counter to vbsf_pipe_buf_get). */ +static void vbsf_pipe_buf_release(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf) +{ + /*SFLOG3(("vbsf_pipe_buf_release: %p (incoming count=%d)\n", pPipeBuf, page_count(pPipeBuf->page)));*/ + page_cache_release(pPipeBuf->page); +} + + +/** Attempt to steal the page. + * @returns 0 success, 1 on failure. */ +static int vbsf_pipe_buf_steal(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf) +{ + if (page_count(pPipeBuf->page) == 1) { + lock_page(pPipeBuf->page); + SFLOG3(("vbsf_pipe_buf_steal: %p -> 0\n", pPipeBuf)); + return 0; + } + SFLOG3(("vbsf_pipe_buf_steal: %p -> 1\n", pPipeBuf)); + return 1; +} + + +/** + * Pipe buffer operations for used by vbsf_feed_pages_to_pipe. + */ +static struct pipe_buf_operations vbsf_pipe_buf_ops = { + .can_merge = 0, +# if RTLNX_VER_MIN(2,6,23) + .confirm = vbsf_pipe_buf_confirm, +# else + .pin = vbsf_pipe_buf_confirm, +# endif + .map = vbsf_pipe_buf_map, + .unmap = vbsf_pipe_buf_unmap, + .get = vbsf_pipe_buf_get, + .release = vbsf_pipe_buf_release, + .steal = vbsf_pipe_buf_steal, +}; + + +/** + * Feeds the pages to the pipe. + * + * Pages given to the pipe are set to NULL in papPages. + */ +static ssize_t vbsf_feed_pages_to_pipe(struct pipe_inode_info *pPipe, struct page **papPages, size_t cPages, uint32_t offPg0, + uint32_t cbActual, unsigned fFlags) +{ + ssize_t cbRet = 0; + size_t iPage = 0; + bool fNeedWakeUp = false; + + LOCK_PIPE(pPipe); + for (;;) { + if ( pPipe->readers > 0 + && pPipe->nrbufs < PIPE_BUFFERS) { + struct pipe_buffer *pPipeBuf = &pPipe->bufs[(pPipe->curbuf + pPipe->nrbufs) % PIPE_BUFFERS]; + uint32_t const cbThisPage = RT_MIN(cbActual, PAGE_SIZE - offPg0); + pPipeBuf->len = cbThisPage; + pPipeBuf->offset = offPg0; +# if RTLNX_VER_MIN(2,6,23) + pPipeBuf->private = 0; +# endif + pPipeBuf->ops = &vbsf_pipe_buf_ops; + pPipeBuf->flags = fFlags & SPLICE_F_GIFT ? PIPE_BUF_FLAG_GIFT : 0; + pPipeBuf->page = papPages[iPage]; + + papPages[iPage++] = NULL; + pPipe->nrbufs++; + fNeedWakeUp |= pPipe->inode != NULL; + offPg0 = 0; + cbRet += cbThisPage; + + /* done? */ + cbActual -= cbThisPage; + if (!cbActual) + break; + } else if (pPipe->readers == 0) { + SFLOGFLOW(("vbsf_feed_pages_to_pipe: no readers!\n")); + send_sig(SIGPIPE, current, 0); + if (cbRet == 0) + cbRet = -EPIPE; + break; + } else if (fFlags & SPLICE_F_NONBLOCK) { + if (cbRet == 0) + cbRet = -EAGAIN; + break; + } else if (signal_pending(current)) { + if (cbRet == 0) + cbRet = -ERESTARTSYS; + SFLOGFLOW(("vbsf_feed_pages_to_pipe: pending signal! (%zd)\n", cbRet)); + break; + } else { + if (fNeedWakeUp) { + vbsf_wake_up_pipe(pPipe, true /*fReaders*/); + fNeedWakeUp = 0; + } + pPipe->waiting_writers++; + vbsf_wait_pipe(pPipe); + pPipe->waiting_writers--; + } + } + UNLOCK_PIPE(pPipe); + + if (fNeedWakeUp) + vbsf_wake_up_pipe(pPipe, true /*fReaders*/); + + return cbRet; +} + + +/** + * For splicing from a file to a pipe. + */ +static ssize_t vbsf_splice_read(struct file *file, loff_t *poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags) +{ + struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + ssize_t cbRet; + + SFLOGFLOW(("vbsf_splice_read: file=%p poffset=%p{%#RX64} pipe=%p len=%#zx flags=%#x\n", file, poffset, *poffset, pipe, len, flags)); + if (vbsf_should_use_cached_read(file, inode->i_mapping, pSuperInfo)) { + cbRet = generic_file_splice_read(file, poffset, pipe, len, flags); + } else { + /* + * Create a read request. + */ + loff_t offFile = *poffset; + size_t cPages = RT_MIN(RT_ALIGN_Z((offFile & ~PAGE_CACHE_MASK) + len, PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT, + PIPE_BUFFERS); + VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, + PgLst.aPages[cPages])); + if (pReq) { + /* + * Allocate pages. + */ + struct page *apPages[PIPE_BUFFERS]; + size_t i; + pReq->PgLst.offFirstPage = (uint16_t)offFile & (uint16_t)PAGE_OFFSET_MASK; + cbRet = 0; + for (i = 0; i < cPages; i++) { + struct page *pPage; + apPages[i] = pPage = alloc_page(GFP_USER); + if (pPage) { + pReq->PgLst.aPages[i] = page_to_phys(pPage); +# ifdef VBOX_STRICT + ASMMemFill32(kmap(pPage), PAGE_SIZE, UINT32_C(0xdeadbeef)); + kunmap(pPage); +# endif + } else { + cbRet = -ENOMEM; + break; + } + } + if (cbRet == 0) { + /* + * Do the reading. + */ + uint32_t const cbToRead = RT_MIN((cPages << PAGE_SHIFT) - (offFile & PAGE_OFFSET_MASK), len); + struct vbsf_reg_info *sf_r = (struct vbsf_reg_info *)file->private_data; + int vrc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbToRead, cPages); + if (RT_SUCCESS(vrc)) { + /* + * Get the number of bytes read, jettison the request + * and, in case of EOF, any unnecessary pages. + */ + uint32_t cbActual = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbActual <= cbToRead, cbActual = cbToRead); + SFLOG2(("vbsf_splice_read: read -> %#x bytes @ %#RX64\n", cbActual, offFile)); + + VbglR0PhysHeapFree(pReq); + pReq = NULL; + + /* + * Now, feed it to the pipe thingy. + * This will take ownership of the all pages no matter what happens. + */ + cbRet = vbsf_feed_pages_to_pipe(pipe, apPages, cPages, offFile & PAGE_OFFSET_MASK, cbActual, flags); + if (cbRet > 0) + *poffset = offFile + cbRet; + } else { + cbRet = -RTErrConvertToErrno(vrc); + SFLOGFLOW(("vbsf_splice_read: Read failed: %Rrc -> %zd\n", vrc, cbRet)); + } + i = cPages; + } + + while (i-- > 0) + if (apPages[i]) + __free_pages(apPages[i], 0); + if (pReq) + VbglR0PhysHeapFree(pReq); + } else { + cbRet = -ENOMEM; + } + } + SFLOGFLOW(("vbsf_splice_read: returns %zd (%#zx), *poffset=%#RX64\n", cbRet, cbRet, *poffset)); + return cbRet; +} + +#endif /* 2.6.17 <= LINUX_VERSION_CODE < 2.6.31 */ +#if RTLNX_VER_RANGE(2,6,17, 3,16,0) + +/** + * For splicing from a pipe to a file. + * + * Since we can combine buffers and request allocations, this should be faster + * than the default implementation. + */ +static ssize_t vbsf_splice_write(struct pipe_inode_info *pPipe, struct file *file, loff_t *poffset, size_t len, unsigned int flags) +{ + struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + ssize_t cbRet; + + SFLOGFLOW(("vbsf_splice_write: pPipe=%p file=%p poffset=%p{%#RX64} len=%#zx flags=%#x\n", pPipe, file, poffset, *poffset, len, flags)); + /** @todo later if (false) { + cbRet = generic_file_splice_write(pPipe, file, poffset, len, flags); + } else */ { + /* + * Prepare a write request. + */ +# ifdef PIPE_BUFFERS + uint32_t const cMaxPages = RT_MIN(PIPE_BUFFERS, RT_ALIGN_Z(len, PAGE_SIZE) >> PAGE_SHIFT); +# else + uint32_t const cMaxPages = RT_MIN(RT_MAX(RT_MIN(pPipe->buffers, 256), PIPE_DEF_BUFFERS), + RT_ALIGN_Z(len, PAGE_SIZE) >> PAGE_SHIFT); +# endif + VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, + PgLst.aPages[cMaxPages])); + if (pReq) { + /* + * Feed from the pipe. + */ + struct vbsf_reg_info *sf_r = (struct vbsf_reg_info *)file->private_data; + struct address_space *mapping = inode->i_mapping; + loff_t offFile = *poffset; + bool fNeedWakeUp = false; + cbRet = 0; + + LOCK_PIPE(pPipe); + + for (;;) { + unsigned cBufs = pPipe->nrbufs; + /*SFLOG2(("vbsf_splice_write: nrbufs=%#x curbuf=%#x\n", cBufs, pPipe->curbuf));*/ + if (cBufs) { + /* + * There is data available. Write it to the file. + */ + int vrc; + struct pipe_buffer *pPipeBuf = &pPipe->bufs[pPipe->curbuf]; + uint32_t cPagesToWrite = 1; + uint32_t cbToWrite = pPipeBuf->len; + + Assert(pPipeBuf->offset < PAGE_SIZE); + Assert(pPipeBuf->offset + pPipeBuf->len <= PAGE_SIZE); + + pReq->PgLst.offFirstPage = pPipeBuf->offset & PAGE_OFFSET; + pReq->PgLst.aPages[0] = page_to_phys(pPipeBuf->page); + + /* Add any adjacent page buffers: */ + while ( cPagesToWrite < cBufs + && cPagesToWrite < cMaxPages + && ((pReq->PgLst.offFirstPage + cbToWrite) & PAGE_OFFSET_MASK) == 0) { +# ifdef PIPE_BUFFERS + struct pipe_buffer *pPipeBuf2 = &pPipe->bufs[(pPipe->curbuf + cPagesToWrite) % PIPE_BUFFERS]; +# else + struct pipe_buffer *pPipeBuf2 = &pPipe->bufs[(pPipe->curbuf + cPagesToWrite) % pPipe->buffers]; +# endif + Assert(pPipeBuf2->len <= PAGE_SIZE); + Assert(pPipeBuf2->offset < PAGE_SIZE); + if (pPipeBuf2->offset != 0) + break; + pReq->PgLst.aPages[cPagesToWrite] = page_to_phys(pPipeBuf2->page); + cbToWrite += pPipeBuf2->len; + cPagesToWrite += 1; + } + + /* Check that we don't have signals pending before we issue the write, as + we'll only end up having to cancel the HGCM request 99% of the time: */ + if (!signal_pending(current)) { + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + vrc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, + cbToWrite, cPagesToWrite); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + } else + vrc = VERR_INTERRUPTED; + if (RT_SUCCESS(vrc)) { + /* + * Get the number of bytes actually written, update file position + * and return value, and advance the pipe buffer. + */ + uint32_t cbActual = pReq->Parms.cb32Write.u.value32; + AssertStmt(cbActual <= cbToWrite, cbActual = cbToWrite); + SFLOG2(("vbsf_splice_write: write -> %#x bytes @ %#RX64\n", cbActual, offFile)); + + cbRet += cbActual; + + while (cbActual > 0) { + uint32_t cbAdvance = RT_MIN(pPipeBuf->len, cbActual); + + vbsf_reg_write_sync_page_cache(mapping, offFile, cbAdvance, NULL, + &pPipeBuf->page, pPipeBuf->offset, 1); + + offFile += cbAdvance; + cbActual -= cbAdvance; + pPipeBuf->offset += cbAdvance; + pPipeBuf->len -= cbAdvance; + + if (!pPipeBuf->len) { + struct pipe_buf_operations const *pOps = pPipeBuf->ops; + pPipeBuf->ops = NULL; + pOps->release(pPipe, pPipeBuf); + +# ifdef PIPE_BUFFERS + pPipe->curbuf = (pPipe->curbuf + 1) % PIPE_BUFFERS; +# else + pPipe->curbuf = (pPipe->curbuf + 1) % pPipe->buffers; +# endif + pPipe->nrbufs -= 1; + pPipeBuf = &pPipe->bufs[pPipe->curbuf]; + +# if RTLNX_VER_MAX(2,6,30) + fNeedWakeUp |= pPipe->inode != NULL; +# else + fNeedWakeUp = true; +# endif + } else { + Assert(cbActual == 0); + break; + } + } + + *poffset = offFile; + } else { + if (cbRet == 0) + cbRet = vrc == VERR_INTERRUPTED ? -ERESTARTSYS : -RTErrConvertToErrno(vrc); + SFLOGFLOW(("vbsf_splice_write: Write failed: %Rrc -> %zd (cbRet=%#zx)\n", + vrc, -RTErrConvertToErrno(vrc), cbRet)); + break; + } + } else { + /* + * Wait for data to become available, if there is chance that'll happen. + */ + /* Quit if there are no writers (think EOF): */ + if (pPipe->writers == 0) { + SFLOGFLOW(("vbsf_splice_write: No buffers. No writers. The show is done!\n")); + break; + } + + /* Quit if if we've written some and no writers waiting on the lock: */ + if (cbRet > 0 && pPipe->waiting_writers == 0) { + SFLOGFLOW(("vbsf_splice_write: No waiting writers, returning what we've got.\n")); + break; + } + + /* Quit with EAGAIN if non-blocking: */ + if (flags & SPLICE_F_NONBLOCK) { + if (cbRet == 0) + cbRet = -EAGAIN; + break; + } + + /* Quit if we've got pending signals: */ + if (signal_pending(current)) { + if (cbRet == 0) + cbRet = -ERESTARTSYS; + SFLOGFLOW(("vbsf_splice_write: pending signal! (%zd)\n", cbRet)); + break; + } + + /* Wake up writers before we start waiting: */ + if (fNeedWakeUp) { + vbsf_wake_up_pipe(pPipe, false /*fReaders*/); + fNeedWakeUp = false; + } + vbsf_wait_pipe(pPipe); + } + } /* feed loop */ + + if (fNeedWakeUp) + vbsf_wake_up_pipe(pPipe, false /*fReaders*/); + + UNLOCK_PIPE(pPipe); + + VbglR0PhysHeapFree(pReq); + } else { + cbRet = -ENOMEM; + } + } + SFLOGFLOW(("vbsf_splice_write: returns %zd (%#zx), *poffset=%#RX64\n", cbRet, cbRet, *poffset)); + return cbRet; +} + +#endif /* 2.6.17 <= LINUX_VERSION_CODE < 3.16.0 */ + +#if RTLNX_VER_RANGE(2,5,30, 2,6,23) +/** + * Our own senfile implementation that does not go via the page cache like + * generic_file_sendfile() does. + */ +static ssize_t vbsf_reg_sendfile(struct file *pFile, loff_t *poffFile, size_t cbToSend, read_actor_t pfnActor, +# if RTLNX_VER_MIN(2,6,8) + void *pvUser +# else + void __user *pvUser +# endif + ) +{ + struct inode *inode = VBSF_GET_F_DENTRY(pFile)->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + ssize_t cbRet; + SFLOGFLOW(("vbsf_reg_sendfile: pFile=%p poffFile=%p{%#RX64} cbToSend=%#zx pfnActor=%p pvUser=%p\n", + pFile, poffFile, poffFile ? *poffFile : 0, cbToSend, pfnActor, pvUser)); + Assert(pSuperInfo); + + /* + * Return immediately if asked to send nothing. + */ + if (cbToSend == 0) + return 0; + + /* + * Like for vbsf_reg_read() and vbsf_reg_read_iter(), we allow going via + * the page cache in some cases or configs. + */ + if (vbsf_should_use_cached_read(pFile, inode->i_mapping, pSuperInfo)) { + cbRet = generic_file_sendfile(pFile, poffFile, cbToSend, pfnActor, pvUser); + SFLOGFLOW(("vbsf_reg_sendfile: returns %#zx *poffFile=%#RX64 [generic_file_sendfile]\n", cbRet, poffFile ? *poffFile : UINT64_MAX)); + } else { + /* + * Allocate a request and a bunch of pages for reading from the file. + */ + struct page *apPages[16]; + loff_t offFile = poffFile ? *poffFile : 0; + size_t const cPages = cbToSend + ((size_t)offFile & PAGE_OFFSET_MASK) >= RT_ELEMENTS(apPages) * PAGE_SIZE + ? RT_ELEMENTS(apPages) + : RT_ALIGN_Z(cbToSend + ((size_t)offFile & PAGE_OFFSET_MASK), PAGE_SIZE) >> PAGE_SHIFT; + size_t iPage; + VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, + PgLst.aPages[cPages])); + if (pReq) { + Assert(cPages > 0); + cbRet = 0; + for (iPage = 0; iPage < cPages; iPage++) { + struct page *pPage; + apPages[iPage] = pPage = alloc_page(GFP_USER); + if (pPage) { + Assert(page_count(pPage) == 1); + pReq->PgLst.aPages[iPage] = page_to_phys(pPage); + } else { + while (iPage-- > 0) + vbsf_put_page(apPages[iPage]); + cbRet = -ENOMEM; + break; + } + } + if (cbRet == 0) { + /* + * Do the job. + */ + struct vbsf_reg_info *sf_r = (struct vbsf_reg_info *)pFile->private_data; + read_descriptor_t RdDesc; + RdDesc.count = cbToSend; +# if RTLNX_VER_MIN(2,6,8) + RdDesc.arg.data = pvUser; +# else + RdDesc.buf = pvUser; +# endif + RdDesc.written = 0; + RdDesc.error = 0; + + Assert(sf_r); + Assert((sf_r->Handle.fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC); + + while (cbToSend > 0) { + /* + * Read another chunk. For paranoid reasons, we keep data where the page cache + * would keep it, i.e. page offset bits corresponds to the file offset bits. + */ + uint32_t const offPg0 = (uint32_t)offFile & (uint32_t)PAGE_OFFSET_MASK; + uint32_t const cbToRead = RT_MIN((cPages << PAGE_SHIFT) - offPg0, cbToSend); + uint32_t const cPagesToRead = RT_ALIGN_Z(cbToRead + offPg0, PAGE_SIZE) >> PAGE_SHIFT; + int vrc; + pReq->PgLst.offFirstPage = (uint16_t)offPg0; + if (!signal_pending(current)) + vrc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, + cbToRead, cPagesToRead); + else + vrc = VERR_INTERRUPTED; + if (RT_SUCCESS(vrc)) { + /* + * Pass what we read to the actor. + */ + uint32_t off = offPg0; + uint32_t cbActual = pReq->Parms.cb32Read.u.value32; + bool const fIsEof = cbActual < cbToRead; + AssertStmt(cbActual <= cbToRead, cbActual = cbToRead); + SFLOG3(("vbsf_reg_sendfile: Read %#x bytes (offPg0=%#x), wanted %#x ...\n", cbActual, offPg0, cbToRead)); + + iPage = 0; + while (cbActual > 0) { + uint32_t const cbPage = RT_MIN(cbActual, PAGE_SIZE - off); + int const cbRetActor = pfnActor(&RdDesc, apPages[iPage], off, cbPage); + Assert(cbRetActor >= 0); /* Returns zero on failure, with RdDesc.error holding the status code. */ + + AssertMsg(iPage < cPages && iPage < cPagesToRead, ("iPage=%#x cPages=%#x cPagesToRead=%#x\n", iPage, cPages, cPagesToRead)); + + offFile += cbRetActor; + if ((uint32_t)cbRetActor == cbPage && RdDesc.count > 0) { + cbActual -= cbPage; + cbToSend -= cbPage; + iPage++; + } else { + SFLOG3(("vbsf_reg_sendfile: cbRetActor=%#x (%d) cbPage=%#x RdDesc{count=%#lx error=%d} iPage=%#x/%#x/%#x cbToSend=%#zx\n", + cbRetActor, cbRetActor, cbPage, RdDesc.count, RdDesc.error, iPage, cPagesToRead, cPages, cbToSend)); + vrc = VERR_CALLBACK_RETURN; + break; + } + off = 0; + } + + /* + * Are we done yet? + */ + if (RT_FAILURE_NP(vrc) || cbToSend == 0 || RdDesc.error != 0 || fIsEof) { + break; + } + + /* + * Replace pages held by the actor. + */ + vrc = VINF_SUCCESS; + for (iPage = 0; iPage < cPages; iPage++) { + struct page *pPage = apPages[iPage]; + if (page_count(pPage) != 1) { + struct page *pNewPage = alloc_page(GFP_USER); + if (pNewPage) { + SFLOGFLOW(("vbsf_reg_sendfile: Replacing page #%x: %p -> %p\n", iPage, pPage, pNewPage)); + vbsf_put_page(pPage); + apPages[iPage] = pNewPage; + } else { + SFLOGFLOW(("vbsf_reg_sendfile: Failed to allocate a replacement page.\n")); + vrc = VERR_NO_MEMORY; + break; + } + } + } + if (RT_FAILURE(vrc)) + break; /* RdDesc.written should be non-zero, so don't bother with setting error. */ + } else { + RdDesc.error = vrc == VERR_INTERRUPTED ? -ERESTARTSYS : -RTErrConvertToErrno(vrc); + SFLOGFLOW(("vbsf_reg_sendfile: Read failed: %Rrc -> %zd (RdDesc.error=%#d)\n", + vrc, -RTErrConvertToErrno(vrc), RdDesc.error)); + break; + } + } + + /* + * Free memory. + */ + for (iPage = 0; iPage < cPages; iPage++) + vbsf_put_page(apPages[iPage]); + + /* + * Set the return values. + */ + if (RdDesc.written) { + cbRet = RdDesc.written; + if (poffFile) + *poffFile = offFile; + } else { + cbRet = RdDesc.error; + } + } + VbglR0PhysHeapFree(pReq); + } else { + cbRet = -ENOMEM; + } + SFLOGFLOW(("vbsf_reg_sendfile: returns %#zx offFile=%#RX64\n", cbRet, offFile)); + } + return cbRet; +} +#endif /* 2.5.30 <= LINUX_VERSION_CODE < 2.6.23 */ + + +/********************************************************************************************************************************* +* File operations on regular files * +*********************************************************************************************************************************/ + +/** Wrapper around put_page / page_cache_release. */ +DECLINLINE(void) vbsf_put_page(struct page *pPage) +{ +#if RTLNX_VER_MIN(4,6,0) + put_page(pPage); +#else + page_cache_release(pPage); +#endif +} + + +/** Wrapper around get_page / page_cache_get. */ +DECLINLINE(void) vbsf_get_page(struct page *pPage) +{ +#if RTLNX_VER_MIN(4,6,0) + get_page(pPage); +#else + page_cache_get(pPage); +#endif +} + + +/** Companion to vbsf_lock_user_pages(). */ +static void vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack) +{ + /* We don't mark kernel pages dirty: */ + if (fLockPgHack) + fSetDirty = false; + + while (cPages-- > 0) + { + struct page *pPage = papPages[cPages]; + Assert((ssize_t)cPages >= 0); + if (fSetDirty && !PageReserved(pPage)) + set_page_dirty(pPage); + vbsf_put_page(pPage); + } +} + + +/** + * Worker for vbsf_lock_user_pages_failed_check_kernel() and + * vbsf_iter_lock_pages(). + */ +static int vbsf_lock_kernel_pages(uint8_t *pbStart, bool fWrite, size_t cPages, struct page **papPages) +{ + uintptr_t const uPtrFrom = (uintptr_t)pbStart; + uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1; + uint8_t *pbPage = (uint8_t *)uPtrLast; + size_t iPage = cPages; + + /* + * Touch the pages first (paranoia^2). + */ + if (fWrite) { + uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom; + while (iPage-- > 0) { + *pbProbe = *pbProbe; + pbProbe += PAGE_SIZE; + } + } else { + uint8_t const *pbProbe = (uint8_t const *)uPtrFrom; + while (iPage-- > 0) { + ASMProbeReadByte(pbProbe); + pbProbe += PAGE_SIZE; + } + } + + /* + * Get the pages. + * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well. + */ + iPage = cPages; + if ( uPtrFrom >= (unsigned long)__va(0) + && uPtrLast < (unsigned long)high_memory) { + /* The physical page mapping area: */ + while (iPage-- > 0) { + struct page *pPage = papPages[iPage] = virt_to_page(pbPage); + vbsf_get_page(pPage); + pbPage -= PAGE_SIZE; + } + } else { + /* This is vmalloc or some such thing, so go thru page tables: */ + while (iPage-- > 0) { + struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage); + if (pPage) { + papPages[iPage] = pPage; + vbsf_get_page(pPage); + pbPage -= PAGE_SIZE; + } else { + while (++iPage < cPages) { + pPage = papPages[iPage]; + vbsf_put_page(pPage); + } + return -EFAULT; + } + } + } + return 0; +} + + +/** + * Catches kernel_read() and kernel_write() calls and works around them. + * + * The file_operations::read and file_operations::write callbacks supposedly + * hands us the user buffers to read into and write out of. To allow the kernel + * to read and write without allocating buffers in userland, they kernel_read() + * and kernel_write() increases the user space address limit before calling us + * so that copyin/copyout won't reject it. Our problem is that get_user_pages() + * works on the userspace address space structures and will not be fooled by an + * increased addr_limit. + * + * This code tries to detect this situation and fake get_user_lock() for the + * kernel buffer. + */ +static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed, + struct page **papPages, bool *pfLockPgHack) +{ + /* + * Check that this is valid user memory that is actually in the kernel range. + */ +#if RTLNX_VER_MIN(5,10,0) + if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT) + && uPtrFrom >= TASK_SIZE_MAX) +#elif RTLNX_VER_MIN(5,0,0) || RTLNX_RHEL_MIN(8,1) + if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT) + && uPtrFrom >= USER_DS.seg) +#else + if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT) + && uPtrFrom >= USER_DS.seg) +#endif + { + int rc = vbsf_lock_kernel_pages((uint8_t *)uPtrFrom, fWrite, cPages, papPages); + if (rc == 0) { + *pfLockPgHack = true; + return 0; + } + } + + return rcFailed; +} + + +/** Wrapper around get_user_pages. */ +DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack) +{ +# if RTLNX_VER_MIN(4,9,0) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,73, 4,4,74) /** @todo Figure out when & what exactly. */) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,75, 4,4,90) /** @todo Figure out when & what exactly. */) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,92, 4,5,0) /** @todo Figure out when & what exactly. */) + ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages, + fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE); +# elif RTLNX_VER_MIN(4,6,0) + ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages); +# elif RTLNX_VER_RANGE(4,4,168, 4,5,0) + ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, papPages, + fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE); +# elif RTLNX_VER_MIN(4,0,0) + ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages); +# else + struct task_struct *pTask = current; + ssize_t cPagesLocked; + down_read(&pTask->mm->mmap_sem); + cPagesLocked = get_user_pages(pTask, pTask->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL); + up_read(&pTask->mm->mmap_sem); +# endif + *pfLockPgHack = false; + if (cPagesLocked == cPages) + return 0; + + /* + * It failed. + */ + if (cPagesLocked < 0) + return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack); + + vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/); + + /* We could use uPtrFrom + cPagesLocked to get the correct status here... */ + return -EFAULT; +} + +#if RTLNX_VER_MAX(5,10,0) /* No regular .read/.write for 5.10, only .read_iter/.write_iter or in-kernel reads/writes fail. */ + +/** + * Read function used when accessing files that are memory mapped. + * + * We read from the page cache here to present the a cohertent picture of the + * the file content. + */ +static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off) +{ +# if RTLNX_VER_MIN(3,16,0) + struct iovec iov = { .iov_base = buf, .iov_len = size }; + struct iov_iter iter; + struct kiocb kiocb; + ssize_t cbRet; + + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *off; + iov_iter_init(&iter, READ, &iov, 1, size); + + cbRet = generic_file_read_iter(&kiocb, &iter); + + *off = kiocb.ki_pos; + return cbRet; + +# elif RTLNX_VER_MIN(2,6,19) + struct iovec iov = { .iov_base = buf, .iov_len = size }; + struct kiocb kiocb; + ssize_t cbRet; + + init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *off; + + cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off); + if (cbRet == -EIOCBQUEUED) + cbRet = wait_on_sync_kiocb(&kiocb); + + *off = kiocb.ki_pos; + return cbRet; + +# else /* 2.6.18 or earlier: */ + return generic_file_read(file, buf, size, off); +# endif +} + + +/** + * Fallback case of vbsf_reg_read() that locks the user buffers and let the host + * write directly to them. + */ +static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off, + struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r) +{ + /* + * Lock pages and execute the read, taking care not to pass the host + * more than it can handle in one go or more than we care to allocate + * page arrays for. The latter limit is set at just short of 32KB due + * to how the physical heap works. + */ + struct page *apPagesStack[16]; + struct page **papPages = &apPagesStack[0]; + struct page **papPagesFree = NULL; + VBOXSFREADPGLSTREQ *pReq; + loff_t offFile = *off; + ssize_t cbRet = -ENOMEM; + size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT; + size_t cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 1), cPages); + bool fLockPgHack; + + pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages])); + while (!pReq && cMaxPages > 4) { + cMaxPages /= 2; + pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages])); + } + if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack)) + papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL); + if (pReq && papPages) { + cbRet = 0; + for (;;) { + /* + * Figure out how much to process now and lock the user pages. + */ + int rc; + size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK; + pReq->PgLst.offFirstPage = (uint16_t)cbChunk; + cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT; + if (cPages <= cMaxPages) + cbChunk = size; + else { + cPages = cMaxPages; + cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk; + } + + rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack); + if (rc == 0) { + size_t iPage = cPages; + while (iPage-- > 0) + pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]); + } else { + cbRet = rc; + break; + } + + /* + * Issue the request and unlock the pages. + */ + rc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages); + + Assert(cPages <= cMaxPages); + vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack); + + if (RT_SUCCESS(rc)) { + /* + * Success, advance position and buffer. + */ + uint32_t cbActual = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbActual <= cbChunk, cbActual = cbChunk); + cbRet += cbActual; + offFile += cbActual; + buf = (uint8_t *)buf + cbActual; + size -= cbActual; + + /* + * Are we done already? If so commit the new file offset. + */ + if (!size || cbActual < cbChunk) { + *off = offFile; + break; + } + } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) { + /* + * The host probably doesn't have enough heap to handle the + * request, reduce the page count and retry. + */ + cMaxPages /= 4; + Assert(cMaxPages > 0); + } else { + /* + * If we've successfully read stuff, return it rather than + * the error. (Not sure if this is such a great idea...) + */ + if (cbRet > 0) { + SFLOGFLOW(("vbsf_reg_read: read at %#RX64 -> %Rrc; got cbRet=%#zx already\n", offFile, rc, cbRet)); + *off = offFile; + } else { + SFLOGFLOW(("vbsf_reg_read: read at %#RX64 -> %Rrc\n", offFile, rc)); + cbRet = -EPROTO; + } + break; + } + } + } + if (papPagesFree) + kfree(papPages); + if (pReq) + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_read: returns %zd (%#zx), *off=%RX64 [lock]\n", cbRet, cbRet, *off)); + return cbRet; +} + + +/** + * Read from a regular file. + * + * @param file the file + * @param buf the buffer + * @param size length of the buffer + * @param off offset within the file (in/out). + * @returns the number of read bytes on success, Linux error code otherwise + */ +static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off) +{ + struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_reg_info *sf_r = file->private_data; + struct address_space *mapping = inode->i_mapping; + + SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off)); + + if (!S_ISREG(inode->i_mode)) { + LogFunc(("read from non regular file %d\n", inode->i_mode)); + return -EINVAL; + } + + /** @todo XXX Check read permission according to inode->i_mode! */ + + if (!size) + return 0; + + /* + * If there is a mapping and O_DIRECT isn't in effect, we must at a + * heed dirty pages in the mapping and read from them. For simplicity + * though, we just do page cache reading when there are writable + * mappings around with any kind of pages loaded. + */ + if (vbsf_should_use_cached_read(file, mapping, pSuperInfo)) + return vbsf_reg_read_mapped(file, buf, size, off); + + /* + * For small requests, try use an embedded buffer provided we get a heap block + * that does not cross page boundraries (see host code). + */ + if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) { + uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size; + VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq); + if (pReq) { + if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) { + ssize_t cbRet; + int vrc = VbglR0SfHostReqReadEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size); + if (RT_SUCCESS(vrc)) { + cbRet = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbRet <= (ssize_t)size, cbRet = size); + if (copy_to_user(buf, pReq->abData, cbRet) == 0) + *off += cbRet; + else + cbRet = -EFAULT; + } else + cbRet = -EPROTO; + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_read: returns %zd (%#zx), *off=%RX64 [embed]\n", cbRet, cbRet, *off)); + return cbRet; + } + VbglR0PhysHeapFree(pReq); + } + } + +# if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */ + /* + * For medium sized requests try use a bounce buffer. + */ + if (size <= _64K /** @todo make this configurable? */) { + void *pvBounce = kmalloc(size, GFP_KERNEL); + if (pvBounce) { + VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + ssize_t cbRet; + int vrc = VbglR0SfHostReqReadContig(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, *off, + (uint32_t)size, pvBounce, virt_to_phys(pvBounce)); + if (RT_SUCCESS(vrc)) { + cbRet = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbRet <= (ssize_t)size, cbRet = size); + if (copy_to_user(buf, pvBounce, cbRet) == 0) + *off += cbRet; + else + cbRet = -EFAULT; + } else + cbRet = -EPROTO; + VbglR0PhysHeapFree(pReq); + kfree(pvBounce); + SFLOGFLOW(("vbsf_reg_read: returns %zd (%#zx), *off=%RX64 [bounce]\n", cbRet, cbRet, *off)); + return cbRet; + } + kfree(pvBounce); + } + } +# endif + + return vbsf_reg_read_locking(file, buf, size, off, pSuperInfo, sf_r); +} + +#endif /* < 5.10.0 */ + +/** + * Helper the synchronizes the page cache content with something we just wrote + * to the host. + */ +static void vbsf_reg_write_sync_page_cache(struct address_space *mapping, loff_t offFile, uint32_t cbRange, + uint8_t const *pbSrcBuf, struct page **papSrcPages, + uint32_t offSrcPage, size_t cSrcPages) +{ + Assert(offSrcPage < PAGE_SIZE); + if (mapping && mapping->nrpages > 0) { + /* + * Work the pages in the write range. + */ + while (cbRange > 0) { + /* + * Lookup the page at offFile. We're fine if there aren't + * any there. We're skip if it's dirty or is being written + * back, at least for now. + */ + size_t const offDstPage = offFile & PAGE_OFFSET_MASK; + size_t const cbToCopy = RT_MIN(PAGE_SIZE - offDstPage, cbRange); + pgoff_t const idxPage = offFile >> PAGE_SHIFT; + struct page *pDstPage = find_lock_page(mapping, idxPage); + if (pDstPage) { + if ( pDstPage->mapping == mapping /* ignore if re-purposed (paranoia) */ + && pDstPage->index == idxPage + && !PageDirty(pDstPage) /* ignore if dirty */ + && !PageWriteback(pDstPage) /* ignore if being written back */ ) { + /* + * Map the page and do the copying. + */ + uint8_t *pbDst = (uint8_t *)kmap(pDstPage); + if (pbSrcBuf) + memcpy(&pbDst[offDstPage], pbSrcBuf, cbToCopy); + else { + uint32_t const cbSrc0 = PAGE_SIZE - offSrcPage; + uint8_t const *pbSrc = (uint8_t const *)kmap(papSrcPages[0]); + AssertMsg(cSrcPages >= 1, ("offFile=%#llx cbRange=%#zx cbToCopy=%#zx\n", offFile, cbRange, cbToCopy)); + memcpy(&pbDst[offDstPage], &pbSrc[offSrcPage], RT_MIN(cbToCopy, cbSrc0)); + kunmap(papSrcPages[0]); + if (cbToCopy > cbSrc0) { + AssertMsg(cSrcPages >= 2, ("offFile=%#llx cbRange=%#zx cbToCopy=%#zx\n", offFile, cbRange, cbToCopy)); + pbSrc = (uint8_t const *)kmap(papSrcPages[1]); + memcpy(&pbDst[offDstPage + cbSrc0], pbSrc, cbToCopy - cbSrc0); + kunmap(papSrcPages[1]); + } + } + kunmap(pDstPage); + flush_dcache_page(pDstPage); + if (cbToCopy == PAGE_SIZE) + SetPageUptodate(pDstPage); +# if RTLNX_VER_MIN(2,4,10) + mark_page_accessed(pDstPage); +# endif + } else + SFLOGFLOW(("vbsf_reg_write_sync_page_cache: Skipping page %p: mapping=%p (vs %p) writeback=%d offset=%#lx (vs%#lx)\n", + pDstPage, pDstPage->mapping, mapping, PageWriteback(pDstPage), pDstPage->index, idxPage)); + unlock_page(pDstPage); + vbsf_put_page(pDstPage); + } + + /* + * Advance. + */ + if (pbSrcBuf) + pbSrcBuf += cbToCopy; + else + { + offSrcPage += cbToCopy; + Assert(offSrcPage < PAGE_SIZE * 2); + if (offSrcPage >= PAGE_SIZE) { + offSrcPage &= PAGE_OFFSET_MASK; + papSrcPages++; +# ifdef VBOX_STRICT + Assert(cSrcPages > 0); + cSrcPages--; +# endif + } + } + offFile += cbToCopy; + cbRange -= cbToCopy; + } + } + RT_NOREF(cSrcPages); +} + +#if RTLNX_VER_MAX(5,10,0) /* No regular .read/.write for 5.10, only .read_iter/.write_iter or in-kernel reads/writes fail. */ + +/** + * Fallback case of vbsf_reg_write() that locks the user buffers and let the host + * write directly to them. + */ +static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile, + struct inode *inode, struct vbsf_inode_info *sf_i, + struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r) +{ + /* + * Lock pages and execute the write, taking care not to pass the host + * more than it can handle in one go or more than we care to allocate + * page arrays for. The latter limit is set at just short of 32KB due + * to how the physical heap works. + */ + struct page *apPagesStack[16]; + struct page **papPages = &apPagesStack[0]; + struct page **papPagesFree = NULL; + VBOXSFWRITEPGLSTREQ *pReq; + ssize_t cbRet = -ENOMEM; + size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT; + size_t cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 1), cPages); + bool fLockPgHack; + + pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages])); + while (!pReq && cMaxPages > 4) { + cMaxPages /= 2; + pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages])); + } + if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack)) + papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL); + if (pReq && papPages) { + cbRet = 0; + for (;;) { + /* + * Figure out how much to process now and lock the user pages. + */ + int rc; + size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK; + pReq->PgLst.offFirstPage = (uint16_t)cbChunk; + cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT; + if (cPages <= cMaxPages) + cbChunk = size; + else { + cPages = cMaxPages; + cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk; + } + + rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack); + if (rc == 0) { + size_t iPage = cPages; + while (iPage-- > 0) + pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]); + } else { + cbRet = rc; + break; + } + + /* + * Issue the request and unlock the pages. + */ + rc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + if (RT_SUCCESS(rc)) { + /* + * Success, advance position and buffer. + */ + uint32_t cbActual = pReq->Parms.cb32Write.u.value32; + AssertStmt(cbActual <= cbChunk, cbActual = cbChunk); + + vbsf_reg_write_sync_page_cache(inode->i_mapping, offFile, cbActual, NULL /*pbKrnlBuf*/, + papPages, (uintptr_t)buf & PAGE_OFFSET_MASK, cPages); + Assert(cPages <= cMaxPages); + vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack); + + cbRet += cbActual; + buf = (uint8_t *)buf + cbActual; + size -= cbActual; + + offFile += cbActual; + if ((file->f_flags & O_APPEND) && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET)) + offFile = pReq->Parms.off64Write.u.value64; + if (offFile > i_size_read(inode)) + i_size_write(inode, offFile); + + sf_i->force_restat = 1; /* mtime (and size) may have changed */ + + /* + * Are we done already? If so commit the new file offset. + */ + if (!size || cbActual < cbChunk) { + *off = offFile; + break; + } + } else { + vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack); + if (rc == VERR_NO_MEMORY && cMaxPages > 4) { + /* + * The host probably doesn't have enough heap to handle the + * request, reduce the page count and retry. + */ + cMaxPages /= 4; + Assert(cMaxPages > 0); + } else { + /* + * If we've successfully written stuff, return it rather than + * the error. (Not sure if this is such a great idea...) + */ + if (cbRet > 0) { + SFLOGFLOW(("vbsf_reg_write: write at %#RX64 -> %Rrc; got cbRet=%#zx already\n", offFile, rc, cbRet)); + *off = offFile; + } else { + SFLOGFLOW(("vbsf_reg_write: write at %#RX64 -> %Rrc\n", offFile, rc)); + cbRet = -EPROTO; + } + break; + } + } + } + } + if (papPagesFree) + kfree(papPages); + if (pReq) + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_write: returns %zd (%#zx), *off=%RX64 [lock]\n", cbRet, cbRet, *off)); + return cbRet; +} + + +/** + * Write to a regular file. + * + * @param file the file + * @param buf the buffer + * @param size length of the buffer + * @param off offset within the file + * @returns the number of written bytes on success, Linux error code otherwise + */ +static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off) +{ + struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode; + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_reg_info *sf_r = file->private_data; + struct address_space *mapping = inode->i_mapping; + loff_t pos; + + SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off)); + Assert(sf_i); + Assert(pSuperInfo); + Assert(sf_r); + AssertReturn(S_ISREG(inode->i_mode), -EINVAL); + + pos = *off; + if (file->f_flags & O_APPEND) + pos = i_size_read(inode); + + /** @todo XXX Check write permission according to inode->i_mode! */ + + if (!size) { + if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */ + *off = pos; + return 0; + } + + /** @todo Implement the read-write caching mode. */ + + /* + * If there are active writable mappings, coordinate with any + * pending writes via those. + */ + if ( mapping + && mapping->nrpages > 0 + && mapping_writably_mapped(mapping)) { +# if RTLNX_VER_MIN(2,6,32) + int err = filemap_fdatawait_range(mapping, pos, pos + size - 1); + if (err) + return err; +# else + /** @todo ... */ +# endif + } + + /* + * For small requests, try use an embedded buffer provided we get a heap block + * that does not cross page boundraries (see host code). + */ + if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) { + uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size; + VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq); + if ( pReq + && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) { + ssize_t cbRet; + if (copy_from_user(pReq->abData, buf, size) == 0) { + int vrc = VbglR0SfHostReqWriteEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, + pos, (uint32_t)size); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + if (RT_SUCCESS(vrc)) { + cbRet = pReq->Parms.cb32Write.u.value32; + AssertStmt(cbRet <= (ssize_t)size, cbRet = size); + vbsf_reg_write_sync_page_cache(mapping, pos, (uint32_t)cbRet, pReq->abData, + NULL /*papSrcPages*/, 0 /*offSrcPage0*/, 0 /*cSrcPages*/); + pos += cbRet; + if ((file->f_flags & O_APPEND) && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET)) + pos = pReq->Parms.off64Write.u.value64; + *off = pos; + if (pos > i_size_read(inode)) + i_size_write(inode, pos); + } else + cbRet = -EPROTO; + sf_i->force_restat = 1; /* mtime (and size) may have changed */ + } else + cbRet = -EFAULT; + + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_write: returns %zd (%#zx), *off=%RX64 [embed]\n", cbRet, cbRet, *off)); + return cbRet; + } + if (pReq) + VbglR0PhysHeapFree(pReq); + } + +# if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */ + /* + * For medium sized requests try use a bounce buffer. + */ + if (size <= _64K /** @todo make this configurable? */) { + void *pvBounce = kmalloc(size, GFP_KERNEL); + if (pvBounce) { + if (copy_from_user(pvBounce, buf, size) == 0) { + VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + ssize_t cbRet; + int vrc = VbglR0SfHostReqWriteContig(pSuperInfo->map.root, pReq, sf_r->handle, pos, + (uint32_t)size, pvBounce, virt_to_phys(pvBounce)); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + if (RT_SUCCESS(vrc)) { + cbRet = pReq->Parms.cb32Write.u.value32; + AssertStmt(cbRet <= (ssize_t)size, cbRet = size); + vbsf_reg_write_sync_page_cache(mapping, pos, (uint32_t)cbRet, (uint8_t const *)pvBounce, + NULL /*papSrcPages*/, 0 /*offSrcPage0*/, 0 /*cSrcPages*/); + pos += cbRet; + *off = pos; + if (pos > i_size_read(inode)) + i_size_write(inode, pos); + } else + cbRet = -EPROTO; + sf_i->force_restat = 1; /* mtime (and size) may have changed */ + VbglR0PhysHeapFree(pReq); + kfree(pvBounce); + SFLOGFLOW(("vbsf_reg_write: returns %zd (%#zx), *off=%RX64 [bounce]\n", cbRet, cbRet, *off)); + return cbRet; + } + kfree(pvBounce); + } else { + kfree(pvBounce); + SFLOGFLOW(("vbsf_reg_write: returns -EFAULT, *off=%RX64 [bounce]\n", *off)); + return -EFAULT; + } + } + } +# endif + + return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, pSuperInfo, sf_r); +} + +#endif /* < 5.10.0 */ +#if RTLNX_VER_MIN(2,6,19) +/* See kernel 6.0.0 change eba2d3d798295dc43cae8fade102f9d083a2a741. */ +# if RTLNX_VER_MIN(6,0,0) || RTLNX_RHEL_RANGE(9,4, 9,99) +# define VBOX_IOV_GET_PAGES iov_iter_get_pages2 +# else +# define VBOX_IOV_GET_PAGES iov_iter_get_pages +# endif + +/** + * Companion to vbsf_iter_lock_pages(). + */ +DECLINLINE(void) vbsf_iter_unlock_pages(struct iov_iter *iter, struct page **papPages, size_t cPages, bool fSetDirty) +{ + /* We don't mark kernel pages dirty (KVECs, BVECs, PIPEs): */ + if (!iter_is_iovec(iter)) + fSetDirty = false; + + while (cPages-- > 0) + { + struct page *pPage = papPages[cPages]; + if (fSetDirty && !PageReserved(pPage)) + set_page_dirty(pPage); + vbsf_put_page(pPage); + } +} + + +/** + * Locks up to @a cMaxPages from the I/O vector iterator, advancing the + * iterator. + * + * @returns 0 on success, negative errno value on failure. + * @param iter The iterator to lock pages from. + * @param fWrite Whether to write (true) or read (false) lock the pages. + * @param pStash Where we stash peek results. + * @param cMaxPages The maximum number of pages to get. + * @param papPages Where to return the locked pages. + * @param pcPages Where to return the number of pages. + * @param poffPage0 Where to return the offset into the first page. + * @param pcbChunk Where to return the number of bytes covered. + */ +static int vbsf_iter_lock_pages(struct iov_iter *iter, bool fWrite, struct vbsf_iter_stash *pStash, size_t cMaxPages, + struct page **papPages, size_t *pcPages, size_t *poffPage0, size_t *pcbChunk) +{ + size_t cbChunk = 0; + size_t cPages = 0; + size_t offPage0 = 0; + int rc = 0; + + Assert(iov_iter_count(iter) + pStash->cb > 0); + if (!VBOX_IOV_ITER_IS_KVEC(iter)) + { + /* + * Do we have a stashed page? + */ + if (pStash->pPage) { + papPages[0] = pStash->pPage; + offPage0 = pStash->off; + cbChunk = pStash->cb; + cPages = 1; + pStash->pPage = NULL; + pStash->off = 0; + pStash->cb = 0; + if ( offPage0 + cbChunk < PAGE_SIZE + || iov_iter_count(iter) == 0) { + *poffPage0 = offPage0; + *pcbChunk = cbChunk; + *pcPages = cPages; + SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx (stashed)\n", + rc, cPages, offPage0, cbChunk)); + return 0; + } + cMaxPages -= 1; + SFLOG3(("vbsf_iter_lock_pages: Picked up stashed page: %#zx LB %#zx\n", offPage0, cbChunk)); + } else { +# if RTLNX_VER_MAX(4,11,0) + /* + * Copy out our starting point to assist rewinding. + */ + pStash->offFromEnd = iov_iter_count(iter); + pStash->Copy = *iter; +# endif + } + + /* + * Get pages segment by segment. + */ + do { + /* + * Make a special case of the first time thru here, since that's + * the most typical scenario. + */ + ssize_t cbSegRet; + if (cPages == 0) { +# if RTLNX_VER_MAX(3,19,0) + while (!iov_iter_single_seg_count(iter)) /* Old code didn't skip empty segments which caused EFAULTs. */ + iov_iter_advance(iter, 0); +# endif + cbSegRet = VBOX_IOV_GET_PAGES(iter, papPages, iov_iter_count(iter), cMaxPages, &offPage0); + if (cbSegRet > 0) { +# if RTLNX_VER_MAX(6,0,0) + iov_iter_advance(iter, cbSegRet); +#endif + cbChunk = (size_t)cbSegRet; + cPages = RT_ALIGN_Z(offPage0 + cbSegRet, PAGE_SIZE) >> PAGE_SHIFT; + cMaxPages -= cPages; + SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages -> %#zx @ %#zx; %#zx pages [first]\n", cbSegRet, offPage0, cPages)); + if ( cMaxPages == 0 + || ((offPage0 + (size_t)cbSegRet) & PAGE_OFFSET_MASK)) + break; + } else { + AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT); + rc = (int)cbSegRet; + break; + } + } else { + /* + * Probe first page of new segment to check that we've got a zero offset and + * can continue on the current chunk. Stash the page if the offset isn't zero. + */ + size_t offPgProbe; + size_t cbSeg = iov_iter_single_seg_count(iter); + while (!cbSeg) { + iov_iter_advance(iter, 0); + cbSeg = iov_iter_single_seg_count(iter); + } + cbSegRet = VBOX_IOV_GET_PAGES(iter, &papPages[cPages], iov_iter_count(iter), 1, &offPgProbe); + if (cbSegRet > 0) { +# if RTLNX_VER_MAX(6,0,0) + iov_iter_advance(iter, cbSegRet); /** @todo maybe not do this if we stash the page? */ +#endif + Assert(offPgProbe + cbSegRet <= PAGE_SIZE); + if (offPgProbe == 0) { + cbChunk += cbSegRet; + cPages += 1; + cMaxPages -= 1; + SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx\n", cbSegRet, offPgProbe)); + if ( cMaxPages == 0 + || cbSegRet != PAGE_SIZE) + break; + + /* + * Get the rest of the segment (if anything remaining). + */ + cbSeg -= cbSegRet; + if (cbSeg > 0) { + cbSegRet = VBOX_IOV_GET_PAGES(iter, &papPages[cPages], iov_iter_count(iter), cMaxPages, &offPgProbe); + if (cbSegRet > 0) { + size_t const cPgRet = RT_ALIGN_Z((size_t)cbSegRet, PAGE_SIZE) >> PAGE_SHIFT; + Assert(offPgProbe == 0); +# if RTLNX_VER_MAX(6,0,0) + iov_iter_advance(iter, cbSegRet); +# endif + SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages() -> %#zx; %#zx pages\n", cbSegRet, cPgRet)); + cPages += cPgRet; + cMaxPages -= cPgRet; + cbChunk += cbSegRet; + if ( cMaxPages == 0 + || ((size_t)cbSegRet & PAGE_OFFSET_MASK)) + break; + } else { + AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT); + rc = (int)cbSegRet; + break; + } + } + } else { + /* The segment didn't start at a page boundrary, so stash it for + the next round: */ + SFLOGFLOW(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx; stashed\n", cbSegRet, offPgProbe)); + Assert(papPages[cPages]); + pStash->pPage = papPages[cPages]; + pStash->off = offPgProbe; + pStash->cb = cbSegRet; + break; + } + } else { + AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT); + rc = (int)cbSegRet; + break; + } + } + Assert(cMaxPages > 0); + } while (iov_iter_count(iter) > 0); + + } else { + /* + * The silly iov_iter_get_pages_alloc() function doesn't handle KVECs, + * so everyone needs to do that by themselves. + * + * Note! Fixes here may apply to rtR0MemObjNativeLockKernel() + * and vbsf_lock_user_pages_failed_check_kernel() as well. + */ +# if RTLNX_VER_MAX(4,11,0) + pStash->offFromEnd = iov_iter_count(iter); + pStash->Copy = *iter; +# endif + do { + uint8_t *pbBuf; + size_t offStart; + size_t cPgSeg; + + size_t cbSeg = iov_iter_single_seg_count(iter); + while (!cbSeg) { + iov_iter_advance(iter, 0); + cbSeg = iov_iter_single_seg_count(iter); + } + + pbBuf = VBOX_ITER_IOV_ADDR(iter); + offStart = (uintptr_t)pbBuf & PAGE_OFFSET_MASK; + if (!cPages) + offPage0 = offStart; + else if (offStart) + break; + + cPgSeg = RT_ALIGN_Z(cbSeg, PAGE_SIZE) >> PAGE_SHIFT; + if (cPgSeg > cMaxPages) { + cPgSeg = cMaxPages; + cbSeg = (cPgSeg << PAGE_SHIFT) - offStart; + } + + rc = vbsf_lock_kernel_pages(pbBuf, fWrite, cPgSeg, &papPages[cPages]); + if (rc == 0) { + iov_iter_advance(iter, cbSeg); + cbChunk += cbSeg; + cPages += cPgSeg; + cMaxPages -= cPgSeg; + if ( cMaxPages == 0 + || ((offStart + cbSeg) & PAGE_OFFSET_MASK) != 0) + break; + } else + break; + } while (iov_iter_count(iter) > 0); + } + + /* + * Clean up if we failed; set return values. + */ + if (rc == 0) { + /* likely */ + } else { + if (cPages > 0) + vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/); + offPage0 = cbChunk = cPages = 0; + } + *poffPage0 = offPage0; + *pcbChunk = cbChunk; + *pcPages = cPages; + SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx\n", rc, cPages, offPage0, cbChunk)); + return rc; +} + + +/** + * Rewinds the I/O vector. + */ +static bool vbsf_iter_rewind(struct iov_iter *iter, struct vbsf_iter_stash *pStash, size_t cbToRewind, size_t cbChunk) +{ + size_t cbExtra; + if (!pStash->pPage) { + cbExtra = 0; + } else { + cbExtra = pStash->cb; + vbsf_put_page(pStash->pPage); + pStash->pPage = NULL; + pStash->cb = 0; + pStash->off = 0; + } + +# if RTLNX_VER_MIN(4,11,0) || RTLNX_VER_MAX(3,16,0) + iov_iter_revert(iter, cbToRewind + cbExtra); + return true; +# else + /** @todo impl this */ + return false; +# endif +} + + +/** + * Cleans up the page locking stash. + */ +DECLINLINE(void) vbsf_iter_cleanup_stash(struct iov_iter *iter, struct vbsf_iter_stash *pStash) +{ + if (pStash->pPage) + vbsf_iter_rewind(iter, pStash, 0, 0); +} + + +/** + * Calculates the longest span of pages we could transfer to the host in a + * single request. + * + * @returns Page count, non-zero. + * @param iter The I/O vector iterator to inspect. + */ +static size_t vbsf_iter_max_span_of_pages(struct iov_iter *iter) +{ + size_t cPages; +# if RTLNX_VER_MIN(3,16,0) + if (iter_is_iovec(iter) || (VBSF_GET_ITER_TYPE(iter) & ITER_KVEC)) { +# endif + const struct iovec *pCurIov = VBSF_GET_ITER_IOV(iter); + size_t cLeft = iter->nr_segs; + size_t cPagesSpan = 0; + + /* iovect and kvec are identical, except for the __user tagging of iov_base. */ + AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, struct kvec, iov_base); + AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, struct kvec, iov_len); + AssertCompile(sizeof(struct iovec) == sizeof(struct kvec)); + + cPages = 1; + AssertReturn(cLeft > 0, cPages); + + /* Special case: segment offset. */ + if (iter->iov_offset > 0) { + if (iter->iov_offset < pCurIov->iov_len) { + size_t const cbSegLeft = pCurIov->iov_len - iter->iov_offset; + size_t const offPage0 = ((uintptr_t)pCurIov->iov_base + iter->iov_offset) & PAGE_OFFSET_MASK; + cPages = cPagesSpan = RT_ALIGN_Z(offPage0 + cbSegLeft, PAGE_SIZE) >> PAGE_SHIFT; + if ((offPage0 + cbSegLeft) & PAGE_OFFSET_MASK) + cPagesSpan = 0; + } + SFLOGFLOW(("vbsf_iter: seg[0]= %p LB %#zx\n", pCurIov->iov_base, pCurIov->iov_len)); + pCurIov++; + cLeft--; + } + + /* Full segments. */ + while (cLeft-- > 0) { + if (pCurIov->iov_len > 0) { + size_t const offPage0 = (uintptr_t)pCurIov->iov_base & PAGE_OFFSET_MASK; + if (offPage0 == 0) { + if (!(pCurIov->iov_len & PAGE_OFFSET_MASK)) { + cPagesSpan += pCurIov->iov_len >> PAGE_SHIFT; + } else { + cPagesSpan += RT_ALIGN_Z(pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT; + if (cPagesSpan > cPages) + cPages = cPagesSpan; + cPagesSpan = 0; + } + } else { + if (cPagesSpan > cPages) + cPages = cPagesSpan; + if (!((offPage0 + pCurIov->iov_len) & PAGE_OFFSET_MASK)) { + cPagesSpan = pCurIov->iov_len >> PAGE_SHIFT; + } else { + cPagesSpan += RT_ALIGN_Z(offPage0 + pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT; + if (cPagesSpan > cPages) + cPages = cPagesSpan; + cPagesSpan = 0; + } + } + } + SFLOGFLOW(("vbsf_iter: seg[%u]= %p LB %#zx\n", iter->nr_segs - cLeft, pCurIov->iov_base, pCurIov->iov_len)); + pCurIov++; + } + if (cPagesSpan > cPages) + cPages = cPagesSpan; +# if RTLNX_VER_MIN(3,16,0) + } else { + /* Won't bother with accurate counts for the next two types, just make + some rough estimates (does pipes have segments?): */ + size_t cSegs = VBSF_GET_ITER_TYPE(iter) & ITER_BVEC ? RT_MAX(1, iter->nr_segs) : 1; + cPages = (iov_iter_count(iter) + (PAGE_SIZE * 2 - 2) * cSegs) >> PAGE_SHIFT; + } +# endif + SFLOGFLOW(("vbsf_iter_max_span_of_pages: returns %#zx\n", cPages)); + return cPages; +} + + +/** + * Worker for vbsf_reg_read_iter() that deals with larger reads using page + * locking. + */ +static ssize_t vbsf_reg_read_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToRead, + struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r) +{ + /* + * Estimate how many pages we may possible submit in a single request so + * that we can allocate matching request buffer and page array. + */ + struct page *apPagesStack[16]; + struct page **papPages = &apPagesStack[0]; + struct page **papPagesFree = NULL; + VBOXSFREADPGLSTREQ *pReq; + ssize_t cbRet = 0; + size_t cMaxPages = vbsf_iter_max_span_of_pages(iter); + cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 2), cMaxPages); + + pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages])); + while (!pReq && cMaxPages > 4) { + cMaxPages /= 2; + pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages])); + } + if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack)) + papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL); + if (pReq && papPages) { + + /* + * The read loop. + */ + struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER; + do { + /* + * Grab as many pages as we can. This means that if adjacent + * segments both starts and ends at a page boundrary, we can + * do them both in the same transfer from the host. + */ + size_t cPages = 0; + size_t cbChunk = 0; + size_t offPage0 = 0; + int rc = vbsf_iter_lock_pages(iter, true /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk); + if (rc == 0) { + size_t iPage = cPages; + while (iPage-- > 0) + pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]); + pReq->PgLst.offFirstPage = (uint16_t)offPage0; + AssertStmt(cbChunk <= cbToRead, cbChunk = cbToRead); + } else { + cbRet = rc; + break; + } + + /* + * Issue the request and unlock the pages. + */ + rc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, cbChunk, cPages); + SFLOGFLOW(("vbsf_reg_read_iter_locking: VbglR0SfHostReqReadPgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n", + rc, pReq->Parms.cb32Read.u.value32, cbChunk, cbToRead, cPages, offPage0)); + + vbsf_iter_unlock_pages(iter, papPages, cPages, true /*fSetDirty*/); + + if (RT_SUCCESS(rc)) { + /* + * Success, advance position and buffer. + */ + uint32_t cbActual = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbActual <= cbChunk, cbActual = cbChunk); + cbRet += cbActual; + kio->ki_pos += cbActual; + cbToRead -= cbActual; + + /* + * Are we done already? + */ + if (!cbToRead) + break; + if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */ + if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual)) + iov_iter_truncate(iter, 0); + break; + } + } else { + /* + * Try rewind the iter structure. + */ + bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk); + if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) { + /* + * The host probably doesn't have enough heap to handle the + * request, reduce the page count and retry. + */ + cMaxPages /= 4; + Assert(cMaxPages > 0); + } else { + /* + * If we've successfully read stuff, return it rather than + * the error. (Not sure if this is such a great idea...) + */ + if (cbRet <= 0) + cbRet = -EPROTO; + break; + } + } + } while (cbToRead > 0); + + vbsf_iter_cleanup_stash(iter, &Stash); + } + else + cbRet = -ENOMEM; + if (papPagesFree) + kfree(papPages); + if (pReq) + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_read_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet)); + return cbRet; +} + + +/** + * Read into I/O vector iterator. + * + * @returns Number of bytes read on success, negative errno on error. + * @param kio The kernel I/O control block (or something like that). + * @param iter The I/O vector iterator describing the buffer. + */ +# if RTLNX_VER_MIN(3,16,0) +static ssize_t vbsf_reg_read_iter(struct kiocb *kio, struct iov_iter *iter) +# else +static ssize_t vbsf_reg_aio_read(struct kiocb *kio, const struct iovec *iov, unsigned long cSegs, loff_t offFile) +# endif +{ +# if RTLNX_VER_MAX(3,16,0) + struct vbsf_iov_iter fake_iter = VBSF_IOV_ITER_INITIALIZER(cSegs, iov, 0 /*write*/); + struct vbsf_iov_iter *iter = &fake_iter; +# endif + size_t cbToRead = iov_iter_count(iter); + struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode; + struct address_space *mapping = inode->i_mapping; + + struct vbsf_reg_info *sf_r = kio->ki_filp->private_data; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + + SFLOGFLOW(("vbsf_reg_read_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n", + inode, kio->ki_filp, cbToRead, kio->ki_pos, VBSF_GET_ITER_TYPE(iter) )); + AssertReturn(S_ISREG(inode->i_mode), -EINVAL); + + /* + * Do we have anything at all to do here? + */ + if (!cbToRead) + return 0; + + /* + * If there is a mapping and O_DIRECT isn't in effect, we must at a + * heed dirty pages in the mapping and read from them. For simplicity + * though, we just do page cache reading when there are writable + * mappings around with any kind of pages loaded. + */ + if (vbsf_should_use_cached_read(kio->ki_filp, mapping, pSuperInfo)) { +# if RTLNX_VER_MIN(3,16,0) + return generic_file_read_iter(kio, iter); +# else + return generic_file_aio_read(kio, iov, cSegs, offFile); +# endif + } + + /* + * Now now we reject async I/O requests. + */ + if (!is_sync_kiocb(kio)) { + SFLOGFLOW(("vbsf_reg_read_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */ + return -EOPNOTSUPP; + } + + /* + * For small requests, try use an embedded buffer provided we get a heap block + * that does not cross page boundraries (see host code). + */ + if (cbToRead <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) { + uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + cbToRead; + VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq); + if (pReq) { + if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) { + ssize_t cbRet; + int vrc = VbglR0SfHostReqReadEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, + kio->ki_pos, (uint32_t)cbToRead); + if (RT_SUCCESS(vrc)) { + cbRet = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbRet <= (ssize_t)cbToRead, cbRet = cbToRead); + if (copy_to_iter(pReq->abData, cbRet, iter) == cbRet) { + kio->ki_pos += cbRet; + if (cbRet < cbToRead) + iov_iter_truncate(iter, 0); + } else + cbRet = -EFAULT; + } else + cbRet = -EPROTO; + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_read_iter: returns %#zx (%zd)\n", cbRet, cbRet)); + return cbRet; + } + VbglR0PhysHeapFree(pReq); + } + } + + /* + * Otherwise do the page locking thing. + */ + return vbsf_reg_read_iter_locking(kio, iter, cbToRead, pSuperInfo, sf_r); +} + + +/** + * Worker for vbsf_reg_write_iter() that deals with larger writes using page + * locking. + */ +static ssize_t vbsf_reg_write_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToWrite, loff_t offFile, + struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r, struct inode *inode, + struct vbsf_inode_info *sf_i, struct address_space *mapping, bool fAppend) +{ + /* + * Estimate how many pages we may possible submit in a single request so + * that we can allocate matching request buffer and page array. + */ + struct page *apPagesStack[16]; + struct page **papPages = &apPagesStack[0]; + struct page **papPagesFree = NULL; + VBOXSFWRITEPGLSTREQ *pReq; + ssize_t cbRet = 0; + size_t cMaxPages = vbsf_iter_max_span_of_pages(iter); + cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 2), cMaxPages); + + pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages])); + while (!pReq && cMaxPages > 4) { + cMaxPages /= 2; + pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages])); + } + if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack)) + papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL); + if (pReq && papPages) { + + /* + * The write loop. + */ + struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER; + do { + /* + * Grab as many pages as we can. This means that if adjacent + * segments both starts and ends at a page boundrary, we can + * do them both in the same transfer from the host. + */ + size_t cPages = 0; + size_t cbChunk = 0; + size_t offPage0 = 0; + int rc = vbsf_iter_lock_pages(iter, false /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk); + if (rc == 0) { + size_t iPage = cPages; + while (iPage-- > 0) + pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]); + pReq->PgLst.offFirstPage = (uint16_t)offPage0; + AssertStmt(cbChunk <= cbToWrite, cbChunk = cbToWrite); + } else { + cbRet = rc; + break; + } + + /* + * Issue the request and unlock the pages. + */ + rc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + SFLOGFLOW(("vbsf_reg_write_iter_locking: VbglR0SfHostReqWritePgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n", + rc, pReq->Parms.cb32Write.u.value32, cbChunk, cbToWrite, cPages, offPage0)); + if (RT_SUCCESS(rc)) { + /* + * Success, advance position and buffer. + */ + uint32_t cbActual = pReq->Parms.cb32Write.u.value32; + AssertStmt(cbActual <= cbChunk, cbActual = cbChunk); + + vbsf_reg_write_sync_page_cache(mapping, offFile, cbActual, NULL /*pbSrcBuf*/, papPages, offPage0, cPages); + vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/); + + cbRet += cbActual; + cbToWrite -= cbActual; + + offFile += cbActual; + if (fAppend && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET)) + offFile = pReq->Parms.off64Write.u.value64; + kio->ki_pos = offFile; + if (offFile > i_size_read(inode)) + i_size_write(inode, offFile); + + sf_i->force_restat = 1; /* mtime (and size) may have changed */ + + /* + * Are we done already? + */ + if (!cbToWrite) + break; + if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */ + if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual)) + iov_iter_truncate(iter, 0); + break; + } + } else { + /* + * Try rewind the iter structure. + */ + bool fRewindOkay; + vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/); + fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk); + if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) { + /* + * The host probably doesn't have enough heap to handle the + * request, reduce the page count and retry. + */ + cMaxPages /= 4; + Assert(cMaxPages > 0); + } else { + /* + * If we've successfully written stuff, return it rather than + * the error. (Not sure if this is such a great idea...) + */ + if (cbRet <= 0) + cbRet = -EPROTO; + break; + } + } + } while (cbToWrite > 0); + + vbsf_iter_cleanup_stash(iter, &Stash); + } + else + cbRet = -ENOMEM; + if (papPagesFree) + kfree(papPages); + if (pReq) + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_write_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet)); + return cbRet; +} + + +/** + * Write from I/O vector iterator. + * + * @returns Number of bytes written on success, negative errno on error. + * @param kio The kernel I/O control block (or something like that). + * @param iter The I/O vector iterator describing the buffer. + */ +# if RTLNX_VER_MIN(3,16,0) +static ssize_t vbsf_reg_write_iter(struct kiocb *kio, struct iov_iter *iter) +# else +static ssize_t vbsf_reg_aio_write(struct kiocb *kio, const struct iovec *iov, unsigned long cSegs, loff_t offFile) +# endif +{ +# if RTLNX_VER_MAX(3,16,0) + struct vbsf_iov_iter fake_iter = VBSF_IOV_ITER_INITIALIZER(cSegs, iov, 1 /*write*/); + struct vbsf_iov_iter *iter = &fake_iter; +# endif + size_t cbToWrite = iov_iter_count(iter); + struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode; + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + struct address_space *mapping = inode->i_mapping; + + struct vbsf_reg_info *sf_r = kio->ki_filp->private_data; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); +# if RTLNX_VER_MIN(3,16,0) + loff_t offFile = kio->ki_pos; +# endif +# if RTLNX_VER_MIN(4,1,0) + bool const fAppend = RT_BOOL(kio->ki_flags & IOCB_APPEND); +# else + bool const fAppend = RT_BOOL(kio->ki_filp->f_flags & O_APPEND); +# endif + + + SFLOGFLOW(("vbsf_reg_write_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n", + inode, kio->ki_filp, cbToWrite, offFile, VBSF_GET_ITER_TYPE(iter) )); + AssertReturn(S_ISREG(inode->i_mode), -EINVAL); + + /* + * Enforce APPEND flag (more later). + */ + if (fAppend) + kio->ki_pos = offFile = i_size_read(inode); + + /* + * Do we have anything at all to do here? + */ + if (!cbToWrite) + return 0; + + /** @todo Implement the read-write caching mode. */ + + /* + * Now now we reject async I/O requests. + */ + if (!is_sync_kiocb(kio)) { + SFLOGFLOW(("vbsf_reg_write_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */ + return -EOPNOTSUPP; + } + + /* + * If there are active writable mappings, coordinate with any + * pending writes via those. + */ + if ( mapping + && mapping->nrpages > 0 + && mapping_writably_mapped(mapping)) { +# if RTLNX_VER_MIN(2,6,32) + int err = filemap_fdatawait_range(mapping, offFile, offFile + cbToWrite - 1); + if (err) + return err; +# else + /** @todo ... */ +# endif + } + + /* + * For small requests, try use an embedded buffer provided we get a heap block + * that does not cross page boundraries (see host code). + */ + if (cbToWrite <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) { + uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + cbToWrite; + VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq); + if (pReq) { + if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) { + ssize_t cbRet; + if (copy_from_iter(pReq->abData, cbToWrite, iter) == cbToWrite) { + int vrc = VbglR0SfHostReqWriteEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, + offFile, (uint32_t)cbToWrite); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + if (RT_SUCCESS(vrc)) { + cbRet = pReq->Parms.cb32Write.u.value32; + AssertStmt(cbRet <= (ssize_t)cbToWrite, cbRet = cbToWrite); + vbsf_reg_write_sync_page_cache(mapping, offFile, (uint32_t)cbRet, pReq->abData, + NULL /*papSrcPages*/, 0 /*offSrcPage0*/, 0 /*cSrcPages*/); + + offFile += cbRet; + if (fAppend && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET)) + offFile = pReq->Parms.off64Write.u.value64; + kio->ki_pos = offFile; + if (offFile > i_size_read(inode)) + i_size_write(inode, offFile); + +# if RTLNX_VER_MIN(4,11,0) + if ((size_t)cbRet < cbToWrite) + iov_iter_revert(iter, cbToWrite - cbRet); +# endif + } else + cbRet = -EPROTO; + sf_i->force_restat = 1; /* mtime (and size) may have changed */ + } else + cbRet = -EFAULT; + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_write_iter: returns %#zx (%zd)\n", cbRet, cbRet)); + return cbRet; + } + VbglR0PhysHeapFree(pReq); + } + } + + /* + * Otherwise do the page locking thing. + */ + return vbsf_reg_write_iter_locking(kio, iter, cbToWrite, offFile, pSuperInfo, sf_r, inode, sf_i, mapping, fAppend); +} + +#endif /* >= 2.6.19 */ + +/** + * Used by vbsf_reg_open() and vbsf_inode_atomic_open() to + * + * @returns shared folders create flags. + * @param fLnxOpen The linux O_XXX flags to convert. + * @param pfHandle Pointer to vbsf_handle::fFlags. + * @param pszCaller Caller, for logging purposes. + */ +uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller) +{ + uint32_t fVBoxFlags = SHFL_CF_ACCESS_DENYNONE; + + /* + * Disposition. + */ + if (fLnxOpen & O_CREAT) { + Log(("%s: O_CREAT set\n", pszCaller)); + fVBoxFlags |= SHFL_CF_ACT_CREATE_IF_NEW; + if (fLnxOpen & O_EXCL) { + Log(("%s: O_EXCL set\n", pszCaller)); + fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_EXISTS; + } else if (fLnxOpen & O_TRUNC) { + Log(("%s: O_TRUNC set\n", pszCaller)); + fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + } else + fVBoxFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS; + } else { + fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_NEW; + if (fLnxOpen & O_TRUNC) { + Log(("%s: O_TRUNC set\n", pszCaller)); + fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS; + } + } + + /* + * Access. + */ + switch (fLnxOpen & O_ACCMODE) { + case O_RDONLY: + fVBoxFlags |= SHFL_CF_ACCESS_READ; + *pfHandle |= VBSF_HANDLE_F_READ; + break; + + case O_WRONLY: + fVBoxFlags |= SHFL_CF_ACCESS_WRITE; + *pfHandle |= VBSF_HANDLE_F_WRITE; + break; + + case O_RDWR: + fVBoxFlags |= SHFL_CF_ACCESS_READWRITE; + *pfHandle |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; + break; + + default: + BUG(); + } + + if (fLnxOpen & O_APPEND) { + Log(("%s: O_APPEND set\n", pszCaller)); + fVBoxFlags |= SHFL_CF_ACCESS_APPEND; + *pfHandle |= VBSF_HANDLE_F_APPEND; + } + + /* + * Only directories? + */ + if (fLnxOpen & O_DIRECTORY) { + Log(("%s: O_DIRECTORY set\n", pszCaller)); + fVBoxFlags |= SHFL_CF_DIRECTORY; + } + + return fVBoxFlags; +} + + +/** + * Open a regular file. + * + * @param inode the inode + * @param file the file + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_reg_open(struct inode *inode, struct file *file) +{ + int rc, rc_linux = 0; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + struct dentry *dentry = VBSF_GET_F_DENTRY(file); + struct vbsf_reg_info *sf_r; + VBOXSFCREATEREQ *pReq; + + SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL)); + Assert(pSuperInfo); + Assert(sf_i); + + sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL); + if (!sf_r) { + LogRelFunc(("could not allocate reg info\n")); + return -ENOMEM; + } + + RTListInit(&sf_r->Handle.Entry); + sf_r->Handle.cRefs = 1; + sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC; + sf_r->Handle.hHost = SHFL_HANDLE_NIL; + + /* Already open? */ + if (sf_i->handle != SHFL_HANDLE_NIL) { + /* + * This inode was created with vbsf_create_worker(). Check the CreateFlags: + * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure + * about the access flags (SHFL_CF_ACCESS_*). + */ + sf_i->force_restat = 1; + sf_r->Handle.hHost = sf_i->handle; + sf_i->handle = SHFL_HANDLE_NIL; + file->private_data = sf_r; + + sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */ + vbsf_handle_append(sf_i, &sf_r->Handle); + SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost)); + return 0; + } + + pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size); + if (!pReq) { + kfree(sf_r); + LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n")); + return -ENOMEM; + } + RT_BCOPY_UNFORTIFIED(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size); + RT_ZERO(pReq->CreateParms); + pReq->CreateParms.Handle = SHFL_HANDLE_NIL; + + /* We check the value of pReq->CreateParms.Handle afterwards to + * find out if the call succeeded or failed, as the API does not seem + * to cleanly distinguish error and informational messages. + * + * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL + * to make the shared folders host service use our fMode parameter */ + + /* We ignore O_EXCL, as the Linux kernel seems to call create + beforehand itself, so O_EXCL should always fail. */ + pReq->CreateParms.CreateFlags = vbsf_linux_oflags_to_vbox(file->f_flags & ~O_EXCL, &sf_r->Handle.fFlags, __FUNCTION__); + pReq->CreateParms.Info.Attr.fMode = inode->i_mode; + LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n", + sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags)); + rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq); + if (RT_FAILURE(rc)) { + LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc)); + kfree(sf_r); + VbglR0PhysHeapFree(pReq); + return -RTErrConvertToErrno(rc); + } + + if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) { + vbsf_dentry_chain_increase_ttl(dentry); + vbsf_update_inode(inode, sf_i, &pReq->CreateParms.Info, pSuperInfo, false /*fInodeLocked*/, 0 /*fSetAttrs*/); + rc_linux = 0; + } else { + switch (pReq->CreateParms.Result) { + case SHFL_PATH_NOT_FOUND: + vbsf_dentry_invalidate_ttl(dentry); + rc_linux = -ENOENT; + break; + case SHFL_FILE_NOT_FOUND: + vbsf_dentry_invalidate_ttl(dentry); + /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */ + rc_linux = -ENOENT; + break; + case SHFL_FILE_EXISTS: + vbsf_dentry_chain_increase_ttl(dentry); + vbsf_update_inode(inode, sf_i, &pReq->CreateParms.Info, pSuperInfo, false /*fInodeLocked*/, 0 /*fSetAttrs*/); + rc_linux = -EEXIST; + break; + default: + vbsf_dentry_chain_increase_parent_ttl(dentry); + rc_linux = 0; + break; + } + } + + sf_r->Handle.hHost = pReq->CreateParms.Handle; + file->private_data = sf_r; + vbsf_handle_append(sf_i, &sf_r->Handle); + VbglR0PhysHeapFree(pReq); + SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost)); + return rc_linux; +} + + +/** + * Close a regular file. + * + * @param inode the inode + * @param file the file + * @returns 0 on success, Linux error code otherwise + */ +static int vbsf_reg_release(struct inode *inode, struct file *file) +{ + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + struct vbsf_reg_info *sf_r = file->private_data; + + SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file)); + if (sf_r) { + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct address_space *mapping = inode->i_mapping; + Assert(pSuperInfo); + + /* If we're closing the last handle for this inode, make sure the flush + the mapping or we'll end up in vbsf_writepage without a handle. */ + if ( mapping + && mapping->nrpages > 0 + /** @todo && last writable handle */ ) { +#if RTLNX_VER_MIN(2,4,25) + if (filemap_fdatawrite(mapping) != -EIO) +#else + if ( filemap_fdatasync(mapping) == 0 + && fsync_inode_data_buffers(inode) == 0) +#endif + filemap_fdatawait(inode->i_mapping); + } + + /* Release sf_r, closing the handle if we're the last user. */ + file->private_data = NULL; + vbsf_handle_release(&sf_r->Handle, pSuperInfo, "vbsf_reg_release"); + + sf_i->handle = SHFL_HANDLE_NIL; + } + return 0; +} + + +/** + * Wrapper around generic/default seek function that ensures that we've got + * the up-to-date file size when doing anything relative to EOF. + * + * The issue is that the host may extend the file while we weren't looking and + * if the caller wishes to append data, it may end up overwriting existing data + * if we operate with a stale size. So, we always retrieve the file size on EOF + * relative seeks. + */ +static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence) +{ + SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence)); + + switch (whence) { +#ifdef SEEK_HOLE + case SEEK_HOLE: + case SEEK_DATA: +#endif + case SEEK_END: { + struct vbsf_reg_info *sf_r = file->private_data; + int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost, + true /*fForce*/, false /*fInodeLocked*/); + if (rc == 0) + break; + return rc; + } + } + +#if RTLNX_VER_MIN(2,4,8) + return generic_file_llseek(file, off, whence); +#else + return default_llseek(file, off, whence); +#endif +} + + +/** + * Flush region of file - chiefly mmap/msync. + * + * We cannot use the noop_fsync / simple_sync_file here as that means + * msync(,,MS_SYNC) will return before the data hits the host, thereby + * causing coherency issues with O_DIRECT access to the same file as + * well as any host interaction with the file. + */ +#if RTLNX_VER_MIN(3,1,0) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_MIN(3,0,101) /** @todo figure when exactly */) +static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ +# if RTLNX_VER_MIN(3,16,0) + return __generic_file_fsync(file, start, end, datasync); +# else + return generic_file_fsync(file, start, end, datasync); +# endif +} +#elif RTLNX_VER_MIN(2,6,35) +static int vbsf_reg_fsync(struct file *file, int datasync) +{ + return generic_file_fsync(file, datasync); +} +#else /* < 2.6.35 */ +static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync) +{ +# if RTLNX_VER_MIN(2,6,31) + return simple_fsync(file, dentry, datasync); +# else + int rc; + struct inode *inode = dentry->d_inode; + AssertReturn(inode, -EINVAL); + + /** @todo What about file_fsync()? (<= 2.5.11) */ + +# if RTLNX_VER_MIN(2,5,12) + rc = sync_mapping_buffers(inode->i_mapping); + if ( rc == 0 + && (inode->i_state & I_DIRTY) + && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync) + ) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_ALL, + .nr_to_write = 0 + }; + rc = sync_inode(inode, &wbc); + } +# else /* < 2.5.12 */ + /** @todo + * Somethings is buggy here or in the 2.4.21-27.EL kernel I'm testing on. + * + * In theory we shouldn't need to do anything here, since msync will call + * writepage() on each dirty page and we write them out synchronously. So, the + * problem is elsewhere... Doesn't happen all the time either. Sigh. + */ + rc = fsync_inode_buffers(inode); +# if RTLNX_VER_MIN(2,4,10) + if (rc == 0 && datasync) + rc = fsync_inode_data_buffers(inode); +# endif + +# endif /* < 2.5.12 */ + return rc; +# endif +} +#endif /* < 2.6.35 */ + + +#if RTLNX_VER_MIN(4,5,0) +/** + * Copy a datablock from one file to another on the host side. + */ +static ssize_t vbsf_reg_copy_file_range(struct file *pFileSrc, loff_t offSrc, struct file *pFileDst, loff_t offDst, + size_t cbRange, unsigned int fFlags) +{ + ssize_t cbRet; + if (g_uSfLastFunction >= SHFL_FN_COPY_FILE_PART) { + struct inode *pInodeSrc = pFileSrc->f_inode; + struct vbsf_inode_info *pInodeInfoSrc = VBSF_GET_INODE_INFO(pInodeSrc); + struct vbsf_super_info *pSuperInfoSrc = VBSF_GET_SUPER_INFO(pInodeSrc->i_sb); + struct vbsf_reg_info *pFileInfoSrc = (struct vbsf_reg_info *)pFileSrc->private_data; + struct inode *pInodeDst = pInodeSrc; + struct vbsf_inode_info *pInodeInfoDst = VBSF_GET_INODE_INFO(pInodeDst); + struct vbsf_super_info *pSuperInfoDst = VBSF_GET_SUPER_INFO(pInodeDst->i_sb); + struct vbsf_reg_info *pFileInfoDst = (struct vbsf_reg_info *)pFileDst->private_data; + VBOXSFCOPYFILEPARTREQ *pReq; + + /* + * Some extra validation. + */ + AssertPtrReturn(pInodeInfoSrc, -EOPNOTSUPP); + Assert(pInodeInfoSrc->u32Magic == SF_INODE_INFO_MAGIC); + AssertPtrReturn(pInodeInfoDst, -EOPNOTSUPP); + Assert(pInodeInfoDst->u32Magic == SF_INODE_INFO_MAGIC); + +# if RTLNX_VER_MAX(4,11,0) + if (!S_ISREG(pInodeSrc->i_mode) || !S_ISREG(pInodeDst->i_mode)) + return S_ISDIR(pInodeSrc->i_mode) || S_ISDIR(pInodeDst->i_mode) ? -EISDIR : -EINVAL; +# endif + + /* + * Allocate the request and issue it. + */ + pReq = (VBOXSFCOPYFILEPARTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + int vrc = VbglR0SfHostReqCopyFilePart(pSuperInfoSrc->map.root, pFileInfoSrc->Handle.hHost, offSrc, + pSuperInfoDst->map.root, pFileInfoDst->Handle.hHost, offDst, + cbRange, 0 /*fFlags*/, pReq); + if (RT_SUCCESS(vrc)) + cbRet = pReq->Parms.cb64ToCopy.u.value64; + else if (vrc == VERR_NOT_IMPLEMENTED) + cbRet = -EOPNOTSUPP; + else + cbRet = -RTErrConvertToErrno(vrc); + + VbglR0PhysHeapFree(pReq); + } else + cbRet = -ENOMEM; + } else { + cbRet = -EOPNOTSUPP; + } + SFLOGFLOW(("vbsf_reg_copy_file_range: returns %zd\n", cbRet)); + return cbRet; +} +#endif /* > 4.5 */ + + +#ifdef SFLOG_ENABLED +/* + * This is just for logging page faults and such. + */ + +/** Pointer to the ops generic_file_mmap returns the first time it's called. */ +static struct vm_operations_struct const *g_pGenericFileVmOps = NULL; +/** Merge of g_LoggingVmOpsTemplate and g_pGenericFileVmOps. */ +static struct vm_operations_struct g_LoggingVmOps; + + +/* Generic page fault callback: */ +# if RTLNX_VER_MIN(4,11,0) +static vm_fault_t vbsf_vmlog_fault(struct vm_fault *vmf) +{ + vm_fault_t rc; + SFLOGFLOW(("vbsf_vmlog_fault: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address)); + rc = g_pGenericFileVmOps->fault(vmf); + SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc)); + return rc; +} +# elif RTLNX_VER_MIN(2,6,23) +static int vbsf_vmlog_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int rc; +# if RTLNX_VER_MIN(4,10,0) + SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address)); +# else + SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address)); +# endif + rc = g_pGenericFileVmOps->fault(vma, vmf); + SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc)); + return rc; +} +# endif + + +/* Special/generic page fault handler: */ +# if RTLNX_VER_MIN(2,6,26) +# elif RTLNX_VER_MIN(2,6,1) +static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +{ + struct page *page; + SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p type=%p:{%#x}\n", vma, address, type, type ? *type : 0)); + page = g_pGenericFileVmOps->nopage(vma, address, type); + SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page)); + return page; +} +# else +static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int write_access_or_unused) +{ + struct page *page; + SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p wau=%d\n", vma, address, write_access_or_unused)); + page = g_pGenericFileVmOps->nopage(vma, address, write_access_or_unused); + SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page)); + return page; +} +# endif /* < 2.6.26 */ + + +/* Special page fault callback for making something writable: */ +# if RTLNX_VER_MIN(4,11,0) +static vm_fault_t vbsf_vmlog_page_mkwrite(struct vm_fault *vmf) +{ + vm_fault_t rc; + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address)); + rc = g_pGenericFileVmOps->page_mkwrite(vmf); + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc)); + return rc; +} +# elif RTLNX_VER_MIN(2,6,30) +static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + int rc; +# if RTLNX_VER_MIN(4,10,0) + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address)); +# else + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address)); +# endif + rc = g_pGenericFileVmOps->page_mkwrite(vma, vmf); + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc)); + return rc; +} +# elif RTLNX_VER_MIN(2,6,18) +static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + int rc; + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p page=%p\n", vma, page)); + rc = g_pGenericFileVmOps->page_mkwrite(vma, page); + SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc)); + return rc; +} +# endif + + +/* Special page fault callback for mapping pages: */ +# if RTLNX_VER_MIN(5,12,0) +static vm_fault_t vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end) +{ + vm_fault_t rc; + SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end)); + rc = g_pGenericFileVmOps->map_pages(vmf, start, end); + SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n")); + return rc; +} +# elif RTLNX_VER_MIN(4,10,0) +static void vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end) +{ + SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end)); + g_pGenericFileVmOps->map_pages(vmf, start, end); + SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n")); +} +# elif RTLNX_VER_MIN(4,8,0) +static void vbsf_vmlog_map_pages(struct fault_env *fenv, pgoff_t start, pgoff_t end) +{ + SFLOGFLOW(("vbsf_vmlog_map_pages: fenv=%p (flags=%#x addr=%p) start=%p end=%p\n", fenv, fenv->flags, fenv->address, start, end)); + g_pGenericFileVmOps->map_pages(fenv, start, end); + SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n")); +} +# elif RTLNX_VER_MIN(3,15,0) +static void vbsf_vmlog_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + SFLOGFLOW(("vbsf_vmlog_map_pages: vma=%p vmf=%p (flags=%#x addr=%p)\n", vma, vmf, vmf->flags, vmf->virtual_address)); + g_pGenericFileVmOps->map_pages(vma, vmf); + SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n")); +} +# endif + + +/** Overload template. */ +static struct vm_operations_struct const g_LoggingVmOpsTemplate = { +# if RTLNX_VER_MIN(2,6,23) + .fault = vbsf_vmlog_fault, +# endif +# if RTLNX_VER_MAX(2,6,26) + .nopage = vbsf_vmlog_nopage, +# endif +# if RTLNX_VER_MIN(2,6,18) + .page_mkwrite = vbsf_vmlog_page_mkwrite, +# endif +# if RTLNX_VER_MIN(3,15,0) + .map_pages = vbsf_vmlog_map_pages, +# endif +}; + +/** file_operations::mmap wrapper for logging purposes. */ +extern int vbsf_reg_mmap(struct file *file, struct vm_area_struct *vma) +{ + int rc; + SFLOGFLOW(("vbsf_reg_mmap: file=%p vma=%p\n", file, vma)); + rc = generic_file_mmap(file, vma); + if (rc == 0) { + /* Merge the ops and template the first time thru (there's a race here). */ + if (g_pGenericFileVmOps == NULL) { + uintptr_t const *puSrc1 = (uintptr_t *)vma->vm_ops; + uintptr_t const *puSrc2 = (uintptr_t *)&g_LoggingVmOpsTemplate; + uintptr_t volatile *puDst = (uintptr_t *)&g_LoggingVmOps; + size_t cbLeft = sizeof(g_LoggingVmOps) / sizeof(*puDst); + while (cbLeft-- > 0) { + *puDst = *puSrc2 && *puSrc1 ? *puSrc2 : *puSrc1; + puSrc1++; + puSrc2++; + puDst++; + } + g_pGenericFileVmOps = vma->vm_ops; + vma->vm_ops = &g_LoggingVmOps; + } else if (g_pGenericFileVmOps == vma->vm_ops) + vma->vm_ops = &g_LoggingVmOps; + else + SFLOGFLOW(("vbsf_reg_mmap: Warning: vm_ops=%p, expected %p!\n", vma->vm_ops, g_pGenericFileVmOps)); + } + SFLOGFLOW(("vbsf_reg_mmap: returns %d\n", rc)); + return rc; +} + +#endif /* SFLOG_ENABLED */ + + +/** + * File operations for regular files. + * + * Note on splice_read/splice_write/sendfile: + * - Splice was introduced in 2.6.17. The generic_file_splice_read/write + * methods go thru the page cache, which is undesirable and is why we + * need to cook our own versions of the code as long as we cannot track + * host-side writes and correctly invalidate the guest page-cache. + * - Sendfile reimplemented using splice in 2.6.23. + * - The default_file_splice_read/write no-page-cache fallback functions, + * were introduced in 2.6.31. The write one work in page units. + * - Since linux 3.16 there is iter_file_splice_write that uses iter_write. + * - Since linux 4.9 the generic_file_splice_read function started using + * read_iter. + */ +struct file_operations vbsf_reg_fops = { + .open = vbsf_reg_open, +#if RTLNX_VER_MAX(5,10,0) /* No regular .read/.write for 5.10, only .read_iter/.write_iter or in-kernel reads/writes fail. */ + .read = vbsf_reg_read, + .write = vbsf_reg_write, +#endif +#if RTLNX_VER_MIN(3,16,0) + .read_iter = vbsf_reg_read_iter, + .write_iter = vbsf_reg_write_iter, +#elif RTLNX_VER_MIN(2,6,19) + .aio_read = vbsf_reg_aio_read, + .aio_write = vbsf_reg_aio_write, +#endif + .release = vbsf_reg_release, +#ifdef SFLOG_ENABLED + .mmap = vbsf_reg_mmap, +#else + .mmap = generic_file_mmap, +#endif +#if RTLNX_VER_RANGE(2,6,17, 2,6,31) + .splice_read = vbsf_splice_read, +#endif +#if RTLNX_VER_MIN(3,16,0) + .splice_write = iter_file_splice_write, +#elif RTLNX_VER_MIN(2,6,17) + .splice_write = vbsf_splice_write, +#endif +#if RTLNX_VER_RANGE(2,5,30, 2,6,23) + .sendfile = vbsf_reg_sendfile, +#endif + .llseek = vbsf_reg_llseek, + .fsync = vbsf_reg_fsync, +#if RTLNX_VER_MIN(4,5,0) + .copy_file_range = vbsf_reg_copy_file_range, +#endif +}; + + +/** + * Inodes operations for regular files. + */ +struct inode_operations vbsf_reg_iops = { +#if RTLNX_VER_MIN(2,5,18) + .getattr = vbsf_inode_getattr, +#else + .revalidate = vbsf_inode_revalidate, +#endif + .setattr = vbsf_inode_setattr, +}; + + + +/********************************************************************************************************************************* +* Address Space Operations on Regular Files (for mmap, sendfile, direct I/O) * +*********************************************************************************************************************************/ + +/** + * Used to read the content of a page into the page cache. + * + * Needed for mmap and reads+writes when the file is mmapped in a + * shared+writeable fashion. + */ +#if RTLNX_VER_MIN(5,19,0)|| RTLNX_RHEL_RANGE(9,3, 9,99) +static int vbsf_read_folio(struct file *file, struct folio *folio) +{ + struct page *page = &folio->page; +#else +static int vbsf_readpage(struct file *file, struct page *page) +{ +#endif + struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode; + int err; + + SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT)); + Assert(PageLocked(page)); + + if (PageUptodate(page)) { + unlock_page(page); + return 0; + } + + if (!is_bad_inode(inode)) { + VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + struct vbsf_reg_info *sf_r = file->private_data; + uint32_t cbRead; + int vrc; + + pReq->PgLst.offFirstPage = 0; + pReq->PgLst.aPages[0] = page_to_phys(page); + vrc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, + pReq, + sf_r->Handle.hHost, + (uint64_t)page->index << PAGE_SHIFT, + PAGE_SIZE, + 1 /*cPages*/); + + cbRead = pReq->Parms.cb32Read.u.value32; + AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE); + VbglR0PhysHeapFree(pReq); + + if (RT_SUCCESS(vrc)) { + if (cbRead == PAGE_SIZE) { + /* likely */ + } else { + uint8_t *pbMapped = (uint8_t *)kmap(page); + RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead); + kunmap(page); + /** @todo truncate the inode file size? */ + } + + flush_dcache_page(page); + SetPageUptodate(page); + unlock_page(page); + return 0; + } + err = -RTErrConvertToErrno(vrc); + } else + err = -ENOMEM; + } else + err = -EIO; + SetPageError(page); + unlock_page(page); + return err; +} + + +/** + * Used to write out the content of a dirty page cache page to the host file. + * + * Needed for mmap and writes when the file is mmapped in a shared+writeable + * fashion. + */ +#if RTLNX_VER_MIN(2,5,52) +static int vbsf_writepage(struct page *page, struct writeback_control *wbc) +#else +static int vbsf_writepage(struct page *page) +#endif +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode); + struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND); + int err; + + SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n", + inode, page, (uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle ? pHandle->hHost : 0)); + + if (pHandle) { + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb); + VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + uint64_t const cbFile = i_size_read(inode); + uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT; + uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE + : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK; + int vrc; + + pReq->PgLst.offFirstPage = 0; + pReq->PgLst.aPages[0] = page_to_phys(page); + vrc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, + pReq, + pHandle->hHost, + offInFile, + cbToWrite, + 1 /*cPages*/); + sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime; + AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */ + ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite), + vrc = VERR_WRITE_ERROR); + VbglR0PhysHeapFree(pReq); + + if (RT_SUCCESS(vrc)) { + /* Update the inode if we've extended the file. */ + /** @todo is this necessary given the cbToWrite calc above? */ + uint64_t const offEndOfWrite = offInFile + cbToWrite; + if ( offEndOfWrite > cbFile + && offEndOfWrite > i_size_read(inode)) + i_size_write(inode, offEndOfWrite); + + /* Update and unlock the page. */ + if (PageError(page)) + ClearPageError(page); + SetPageUptodate(page); + unlock_page(page); + + vbsf_handle_release(pHandle, pSuperInfo, "vbsf_writepage"); + return 0; + } + + /* + * We failed. + */ + err = -EIO; + } else + err = -ENOMEM; + vbsf_handle_release(pHandle, pSuperInfo, "vbsf_writepage"); + } else { + /** @todo we could re-open the file here and deal with this... */ + static uint64_t volatile s_cCalls = 0; + if (s_cCalls++ < 16) + printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach); + err = -EIO; + } + SetPageError(page); + unlock_page(page); + return err; +} + + +#if RTLNX_VER_MIN(2,6,24) +/** + * Called when writing thru the page cache (which we shouldn't be doing). + */ +static inline void vbsf_write_begin_warn(loff_t pos, unsigned len, unsigned flags) +{ + /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use + * the page cache for any writes AFAIK. We could just as well use + * simple_write_begin & simple_write_end here if we think we really + * need to have non-NULL function pointers in the table... */ + static uint64_t volatile s_cCalls = 0; + if (s_cCalls++ < 16) { + printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n", + (unsigned long long)pos, len, flags); + RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n", + (unsigned long long)pos, len, flags); +# ifdef WARN_ON + WARN_ON(1); +# endif + } +} + +# if RTLNX_VER_MIN(5,19,0) || RTLNX_RHEL_RANGE(9,3, 9,99) +int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos, + unsigned len, struct page **pagep, void **fsdata) +{ + vbsf_write_begin_warn(pos, len, 0); + return simple_write_begin(file, mapping, pos, len, pagep, fsdata); +} +# else +int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos, + unsigned len, unsigned flags, struct page **pagep, void **fsdata) +{ + vbsf_write_begin_warn(pos, len, flags); + return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata); +} +# endif + +#endif /* KERNEL_VERSION >= 2.6.24 */ + +#if RTLNX_VER_MIN(5,14,0) +/** + * Companion to vbsf_write_begin (i.e. shouldn't be called). + */ +static int vbsf_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned int len, unsigned int copied, + struct page *page, void *fsdata) +{ + static uint64_t volatile s_cCalls = 0; + if (s_cCalls++ < 16) + { + printk("vboxsf: Unexpected call to vbsf_write_end(pos=%#llx len=%#x)! Please report.\n", + (unsigned long long)pos, len); + RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_end(pos=%#llx len=%#x)! Please report.\n", + (unsigned long long)pos, len); +# ifdef WARN_ON + WARN_ON(1); +# endif + } + return -ENOTSUPP; +} +#endif /* KERNEL_VERSION >= 5.14.0 */ + + +#if RTLNX_VER_MIN(2,4,10) + +# ifdef VBOX_UEK +# undef iov_iter /* HACK ALERT! Don't put anything needing vbsf_iov_iter after this fun! */ +# endif + +/** + * This is needed to make open accept O_DIRECT as well as dealing with direct + * I/O requests if we don't intercept them earlier. + */ +# if RTLNX_VER_MIN(4, 7, 0) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,73, 4,4,74) /** @todo Figure out when exactly. */) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,75, 4,4,90) /** @todo Figure out when exactly. */) \ + || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,92, 4,5,0) /** @todo Figure out when exactly. */) +static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +# elif RTLNX_VER_MIN(4, 1, 0) +static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +# elif RTLNX_VER_MIN(3, 16, 0) || defined(VBOX_UEK) +static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset) +# elif RTLNX_VER_MIN(2, 6, 6) +static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) +# elif RTLNX_VER_MIN(2, 5, 55) +static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) +# elif RTLNX_VER_MIN(2, 5, 41) +static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs) +# elif RTLNX_VER_MIN(2, 5, 35) +static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs) +# elif RTLNX_VER_MIN(2, 5, 26) +static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count) +# elif LINUX_VERSION_CODE == KERNEL_VERSION(2, 4, 21) && defined(I_NEW) /* RHEL3 Frankenkernel. */ +static int vbsf_direct_IO(int rw, struct file *file, struct kiobuf *buf, unsigned long whatever1, int whatever2) +# else +static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *buf, unsigned long whatever1, int whatever2) +# endif +{ + TRACE(); + return -EINVAL; +} + +#endif + +/** + * Address space (for the page cache) operations for regular files. + * + * @todo the FsPerf touch/flush (mmap) test fails on 4.4.0 (ubuntu 16.04 lts). + */ +struct address_space_operations vbsf_reg_aops = { +#if RTLNX_VER_MIN(5,19,0) || RTLNX_RHEL_RANGE(9,3, 9,99) + .read_folio = vbsf_read_folio, +#else + .readpage = vbsf_readpage, +#endif + .writepage = vbsf_writepage, + /** @todo Need .writepages if we want msync performance... */ +#if RTLNX_VER_MIN(5,18,0) || RTLNX_RHEL_RANGE(9,2, 9,99) + .dirty_folio = filemap_dirty_folio, +#elif RTLNX_VER_MIN(2,5,12) + .set_page_dirty = __set_page_dirty_buffers, +#endif +#if RTLNX_VER_MIN(5,14,0) + .write_begin = vbsf_write_begin, + .write_end = vbsf_write_end, +#elif RTLNX_VER_MIN(2,6,24) + .write_begin = vbsf_write_begin, + .write_end = simple_write_end, +#elif RTLNX_VER_MIN(2,5,45) + .prepare_write = simple_prepare_write, + .commit_write = simple_commit_write, +#endif +#if RTLNX_VER_MIN(2,4,10) + .direct_IO = vbsf_direct_IO, +#endif +}; diff --git a/src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c b/src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c new file mode 100644 index 00000000..9af2b4e4 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c @@ -0,0 +1,126 @@ +/* $Id: tstmmap.c $ */ +/** @file + * vboxsf - Simple writable mmap testcase. + */ + +/* + * Copyright (C) 2019-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include +#include +#include +#include +#include +#include + + +int main(int argc, char **argv) +{ + uint8_t abBuf[4096]; + int fd; + size_t cErrors = 0; + size_t cbFile; + size_t offFile; + uint8_t *pbMapping; + const char *pszFile = "tstmmap-file1"; + if (argc > 1) + pszFile = argv[1]; + + fd = open(pszFile, O_CREAT | O_TRUNC | O_RDWR, 0660); + if (fd < 0) + { + fprintf(stderr, "error creating file: %s\n", pszFile); + return 1; + } + + /* write 64 KB to the file: */ + memset(abBuf, 0xf6, sizeof(abBuf)); + for (cbFile = 0; cbFile < 0x10000; cbFile += sizeof(abBuf)) + if (write(fd, abBuf, sizeof(abBuf)) != sizeof(abBuf)) + { + fprintf(stderr, "error writing file: %s\n", pszFile); + return 1; + } + fsync(fd); + + /* Map the file: */ + pbMapping = (uint8_t *)mmap(NULL, cbFile, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (pbMapping == (void *)-1) + { + fprintf(stderr, "error mapping file: %s\n", pszFile); + return 1; + } + + /* Modify the mapping and sync it: */ + memset(pbMapping, 0xf7, cbFile); + if (msync(pbMapping, cbFile, MS_SYNC) != 0) + { + fprintf(stderr, "error msync'ing file: %s\n", pszFile); + return 1; + } + + /* Unmap and close it: */ + if (munmap(pbMapping, cbFile) != 0) + fprintf(stderr, "error munmap'ing file: %s\n", pszFile); + close(fd); + + /* + * Open it again and check the content. + */ + fd = open(pszFile, O_RDWR, 0); + if (fd < 0) + { + fprintf(stderr, "error reopening file: %s\n", pszFile); + return 1; + } + + while (offFile < cbFile && cErrors < 42) + { + size_t offBuf; + ssize_t cbRead = read(fd, abBuf, sizeof(abBuf)); + if (cbRead != (ssize_t)sizeof(abBuf)) + { + fprintf(stderr, "error reading file: %zd, off %#zx (%s)\n", cbRead, offFile, pszFile); + return 1; + } + + for (offBuf = 0; offBuf < sizeof(abBuf); offBuf++) + if (abBuf[offBuf] != 0xf7) + { + fprintf(stderr, "mismatch at %#zx: %#x, expected %#x\n", offFile + offBuf, abBuf[offBuf], 0xf7); + cErrors++; + if (cErrors > 42) + break; + } + + offFile += sizeof(abBuf); + } + + close(fd); + + return cErrors == 0 ? 0 : 1; +} + diff --git a/src/VBox/Additions/linux/sharedfolders/utils.c b/src/VBox/Additions/linux/sharedfolders/utils.c new file mode 100644 index 00000000..b75997a8 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/utils.c @@ -0,0 +1,1288 @@ +/* $Id: utils.c $ */ +/** @file + * vboxsf - VBox Linux Shared Folders VFS, utility functions. + * + * Utility functions (mainly conversion from/to VirtualBox/Linux data structures). + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "vfsmod.h" +#include +#include +#include + + +int vbsf_nlscpy(struct vbsf_super_info *pSuperInfo, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len) +{ + Assert(name_bound_len > 1); + Assert(RTStrNLen(utf8_name, utf8_len) == utf8_len); + + if (pSuperInfo->nls) { + const char *in = utf8_name; + size_t in_bound_len = utf8_len; + char *out = name; + size_t out_bound_len = name_bound_len - 1; + + while (in_bound_len) { +#if RTLNX_VER_MIN(2,6,31) + unicode_t uni; + int cbInEnc = utf8_to_utf32(in, in_bound_len, &uni); +#else + linux_wchar_t uni; + int cbInEnc = utf8_mbtowc(&uni, in, in_bound_len); +#endif + if (cbInEnc >= 0) { + int cbOutEnc = pSuperInfo->nls->uni2char(uni, out, out_bound_len); + if (cbOutEnc >= 0) { + /*SFLOG3(("vbsf_nlscpy: cbOutEnc=%d cbInEnc=%d uni=%#x in_bound_len=%u\n", cbOutEnc, cbInEnc, uni, in_bound_len));*/ + out += cbOutEnc; + out_bound_len -= cbOutEnc; + + in += cbInEnc; + in_bound_len -= cbInEnc; + } else { + SFLOG(("vbsf_nlscpy: nls->uni2char failed with %d on %#x (pos %u in '%s'), out_bound_len=%u\n", + cbOutEnc, uni, in - (const char *)utf8_name, (const char *)utf8_name, (unsigned)out_bound_len)); + return cbOutEnc; + } + } else { + SFLOG(("vbsf_nlscpy: utf8_to_utf32/utf8_mbtowc failed with %d on %x (pos %u in '%s'), in_bound_len=%u!\n", + cbInEnc, *in, in - (const char *)utf8_name, (const char *)utf8_name, (unsigned)in_bound_len)); + return -EINVAL; + } + } + + *out = '\0'; + } else { + if (utf8_len + 1 > name_bound_len) + return -ENAMETOOLONG; + + memcpy(name, utf8_name, utf8_len + 1); + } + return 0; +} + + +/** + * Converts the given NLS string to a host one, kmalloc'ing + * the output buffer (use kfree on result). + */ +int vbsf_nls_to_shflstring(struct vbsf_super_info *pSuperInfo, const char *pszNls, PSHFLSTRING *ppString) +{ + int rc; + size_t const cchNls = strlen(pszNls); + PSHFLSTRING pString = NULL; + if (pSuperInfo->nls) { + /* + * NLS -> UTF-8 w/ SHLF string header. + */ + /* Calc length first: */ + size_t cchUtf8 = 0; + size_t offNls = 0; + while (offNls < cchNls) { + linux_wchar_t uc; /* Note! We renamed the type due to clashes. */ + int const cbNlsCodepoint = pSuperInfo->nls->char2uni(&pszNls[offNls], cchNls - offNls, &uc); + if (cbNlsCodepoint >= 0) { + char achTmp[16]; +#if RTLNX_VER_MIN(2,6,31) + int cbUtf8Codepoint = utf32_to_utf8(uc, achTmp, sizeof(achTmp)); +#else + int cbUtf8Codepoint = utf8_wctomb(achTmp, uc, sizeof(achTmp)); +#endif + if (cbUtf8Codepoint > 0) { + cchUtf8 += cbUtf8Codepoint; + offNls += cbNlsCodepoint; + } else { + Log(("vbsf_nls_to_shflstring: nls->uni2char(%#x) failed: %d\n", uc, cbUtf8Codepoint)); + return -EINVAL; + } + } else { + Log(("vbsf_nls_to_shflstring: nls->char2uni(%.*Rhxs) failed: %d\n", + RT_MIN(8, cchNls - offNls), &pszNls[offNls], cbNlsCodepoint)); + return -EINVAL; + } + } + if (cchUtf8 + 1 < _64K) { + /* Allocate: */ + pString = (PSHFLSTRING)kmalloc(SHFLSTRING_HEADER_SIZE + cchUtf8 + 1, GFP_KERNEL); + if (pString) { + char *pchDst = pString->String.ach; + pString->u16Length = (uint16_t)cchUtf8; + pString->u16Size = (uint16_t)(cchUtf8 + 1); + + /* Do the conversion (cchUtf8 is counted down): */ + rc = 0; + offNls = 0; + while (offNls < cchNls) { + linux_wchar_t uc; /* Note! We renamed the type due to clashes. */ + int const cbNlsCodepoint = pSuperInfo->nls->char2uni(&pszNls[offNls], cchNls - offNls, &uc); + if (cbNlsCodepoint >= 0) { +#if RTLNX_VER_MIN(2,6,31) + int cbUtf8Codepoint = utf32_to_utf8(uc, pchDst, cchUtf8); +#else + int cbUtf8Codepoint = utf8_wctomb(pchDst, uc, cchUtf8); +#endif + if (cbUtf8Codepoint > 0) { + AssertBreakStmt(cbUtf8Codepoint <= cchUtf8, rc = -EINVAL); + cchUtf8 -= cbUtf8Codepoint; + pchDst += cbUtf8Codepoint; + offNls += cbNlsCodepoint; + } else { + Log(("vbsf_nls_to_shflstring: nls->uni2char(%#x) failed! %d, cchUtf8=%zu\n", + uc, cbUtf8Codepoint, cchUtf8)); + rc = -EINVAL; + break; + } + } else { + Log(("vbsf_nls_to_shflstring: nls->char2uni(%.*Rhxs) failed! %d\n", + RT_MIN(8, cchNls - offNls), &pszNls[offNls], cbNlsCodepoint)); + rc = -EINVAL; + break; + } + } + if (rc == 0) { + /* + * Succeeded. Just terminate the string and we're good. + */ + Assert(pchDst - pString->String.ach == pString->u16Length); + *pchDst = '\0'; + } else { + kfree(pString); + pString = NULL; + } + } else { + Log(("vbsf_nls_to_shflstring: failed to allocate %u bytes\n", SHFLSTRING_HEADER_SIZE + cchUtf8 + 1)); + rc = -ENOMEM; + } + } else { + Log(("vbsf_nls_to_shflstring: too long: %zu bytes (%zu nls bytes)\n", cchUtf8, cchNls)); + rc = -ENAMETOOLONG; + } + } else { + /* + * UTF-8 -> UTF-8 w/ SHLF string header. + */ + if (cchNls + 1 < _64K) { + pString = (PSHFLSTRING)kmalloc(SHFLSTRING_HEADER_SIZE + cchNls + 1, GFP_KERNEL); + if (pString) { + pString->u16Length = (uint16_t)cchNls; + pString->u16Size = (uint16_t)(cchNls + 1); + RT_BCOPY_UNFORTIFIED(pString->String.ach, pszNls, cchNls); + pString->String.ach[cchNls] = '\0'; + rc = 0; + } else { + Log(("vbsf_nls_to_shflstring: failed to allocate %u bytes\n", SHFLSTRING_HEADER_SIZE + cchNls + 1)); + rc = -ENOMEM; + } + } else { + Log(("vbsf_nls_to_shflstring: too long: %zu bytes\n", cchNls)); + rc = -ENAMETOOLONG; + } + } + *ppString = pString; + return rc; +} + + +/** + * Convert from VBox to linux time. + */ +#if RTLNX_VER_MAX(2,6,0) +DECLINLINE(void) vbsf_time_to_linux(time_t *pLinuxDst, PCRTTIMESPEC pVBoxSrc) +{ + int64_t t = RTTimeSpecGetNano(pVBoxSrc); + do_div(t, RT_NS_1SEC); + *pLinuxDst = t; +} +#else /* >= 2.6.0 */ +# if RTLNX_VER_MAX(4,18,0) +DECLINLINE(void) vbsf_time_to_linux(struct timespec *pLinuxDst, PCRTTIMESPEC pVBoxSrc) +# else +DECLINLINE(void) vbsf_time_to_linux(struct timespec64 *pLinuxDst, PCRTTIMESPEC pVBoxSrc) +# endif +{ + int64_t t = RTTimeSpecGetNano(pVBoxSrc); + pLinuxDst->tv_nsec = do_div(t, RT_NS_1SEC); + pLinuxDst->tv_sec = t; +} +#endif /* >= 2.6.0 */ + + +/** + * Convert from linux to VBox time. + */ +#if RTLNX_VER_MAX(2,6,0) +DECLINLINE(void) vbsf_time_to_vbox(PRTTIMESPEC pVBoxDst, time_t *pLinuxSrc) +{ + RTTimeSpecSetNano(pVBoxDst, RT_NS_1SEC_64 * *pLinuxSrc); +} +#else /* >= 2.6.0 */ +# if RTLNX_VER_MAX(4,18,0) +DECLINLINE(void) vbsf_time_to_vbox(PRTTIMESPEC pVBoxDst, struct timespec const *pLinuxSrc) +# else +DECLINLINE(void) vbsf_time_to_vbox(PRTTIMESPEC pVBoxDst, struct timespec64 const *pLinuxSrc) +# endif +{ + RTTimeSpecSetNano(pVBoxDst, pLinuxSrc->tv_nsec + pLinuxSrc->tv_sec * (int64_t)RT_NS_1SEC); +} +#endif /* >= 2.6.0 */ + + +/** + * Converts VBox access permissions to Linux ones (mode & 0777). + * + * @note Currently identical. + * @sa sf_access_permissions_to_vbox + */ +DECLINLINE(int) sf_access_permissions_to_linux(uint32_t fAttr) +{ + /* Access bits should be the same: */ + AssertCompile(RTFS_UNIX_IRUSR == S_IRUSR); + AssertCompile(RTFS_UNIX_IWUSR == S_IWUSR); + AssertCompile(RTFS_UNIX_IXUSR == S_IXUSR); + AssertCompile(RTFS_UNIX_IRGRP == S_IRGRP); + AssertCompile(RTFS_UNIX_IWGRP == S_IWGRP); + AssertCompile(RTFS_UNIX_IXGRP == S_IXGRP); + AssertCompile(RTFS_UNIX_IROTH == S_IROTH); + AssertCompile(RTFS_UNIX_IWOTH == S_IWOTH); + AssertCompile(RTFS_UNIX_IXOTH == S_IXOTH); + + return fAttr & RTFS_UNIX_ALL_ACCESS_PERMS; +} + + +/** + * Produce the Linux mode mask, given VBox, mount options and file type. + */ +DECLINLINE(int) sf_file_mode_to_linux(uint32_t fVBoxMode, int fFixedMode, int fClearMask, int fType) +{ + int fLnxMode = sf_access_permissions_to_linux(fVBoxMode); + if (fFixedMode != ~0) + fLnxMode = fFixedMode & 0777; + fLnxMode &= ~fClearMask; + fLnxMode |= fType; + return fLnxMode; +} + +/** + * Update inode timestamps. + * + * @param pInode Linux inode object. + * @param pObjInfo VBox vboxsf object. + */ +static void vbsf_update_inode_timestamps(struct inode *pInode, PSHFLFSOBJINFO pObjInfo) +{ +#if RTLNX_VER_MIN(6,6,0) + struct timespec64 ts; + vbsf_time_to_linux(&ts, &pObjInfo->ChangeTime); + inode_set_ctime_to_ts(pInode, ts); +#else + vbsf_time_to_linux(&pInode->i_atime, &pObjInfo->AccessTime); + vbsf_time_to_linux(&pInode->i_ctime, &pObjInfo->ChangeTime); + vbsf_time_to_linux(&pInode->i_mtime, &pObjInfo->ModificationTime); +#endif +} + +/** + * Initializes the @a inode attributes based on @a pObjInfo and @a pSuperInfo + * options. + */ +void vbsf_init_inode(struct inode *inode, struct vbsf_inode_info *sf_i, PSHFLFSOBJINFO pObjInfo, + struct vbsf_super_info *pSuperInfo) +{ + PCSHFLFSOBJATTR pAttr = &pObjInfo->Attr; + + TRACE(); + + sf_i->ts_up_to_date = jiffies; + sf_i->force_restat = 0; + + if (RTFS_IS_DIRECTORY(pAttr->fMode)) { + inode->i_mode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->dmode, pSuperInfo->dmask, S_IFDIR); + inode->i_op = &vbsf_dir_iops; + inode->i_fop = &vbsf_dir_fops; + + /* XXX: this probably should be set to the number of entries + in the directory plus two (. ..) */ + set_nlink(inode, 1); + } + else if (RTFS_IS_SYMLINK(pAttr->fMode)) { + /** @todo r=bird: Aren't System V symlinks w/o any mode mask? IIRC there is + * no lchmod on Linux. */ + inode->i_mode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFLNK); + inode->i_op = &vbsf_lnk_iops; + set_nlink(inode, 1); + } else { + inode->i_mode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFREG); + inode->i_op = &vbsf_reg_iops; + inode->i_fop = &vbsf_reg_fops; + inode->i_mapping->a_ops = &vbsf_reg_aops; +#if RTLNX_VER_RANGE(2,5,17, 4,0,0) + inode->i_mapping->backing_dev_info = &pSuperInfo->bdi; /* This is needed for mmap. */ +#endif + set_nlink(inode, 1); + } + +#if RTLNX_VER_MIN(3,5,0) + inode->i_uid = make_kuid(current_user_ns(), pSuperInfo->uid); + inode->i_gid = make_kgid(current_user_ns(), pSuperInfo->gid); +#else + inode->i_uid = pSuperInfo->uid; + inode->i_gid = pSuperInfo->gid; +#endif + + inode->i_size = pObjInfo->cbObject; +#if RTLNX_VER_MAX(2,6,19) && !defined(KERNEL_FC6) + inode->i_blksize = 4096; +#endif +#if RTLNX_VER_MIN(2,4,11) + inode->i_blkbits = 12; +#endif + /* i_blocks always in units of 512 bytes! */ + inode->i_blocks = (pObjInfo->cbAllocated + 511) / 512; + + vbsf_update_inode_timestamps(inode, pObjInfo); + + sf_i->BirthTime = pObjInfo->BirthTime; + sf_i->ModificationTime = pObjInfo->ModificationTime; + RTTimeSpecSetSeconds(&sf_i->ModificationTimeAtOurLastWrite, 0); +} + + +/** + * Update the inode with new object info from the host. + * + * Called by sf_inode_revalidate() and sf_inode_revalidate_with_handle(). + */ +void vbsf_update_inode(struct inode *pInode, struct vbsf_inode_info *pInodeInfo, PSHFLFSOBJINFO pObjInfo, + struct vbsf_super_info *pSuperInfo, bool fInodeLocked, unsigned fSetAttrs) +{ + PCSHFLFSOBJATTR pAttr = &pObjInfo->Attr; + int fMode; + + TRACE(); + +#if RTLNX_VER_MIN(4,5,0) + if (!fInodeLocked) + inode_lock(pInode); +#endif + + /* + * Calc new mode mask and update it if it changed. + */ + if (RTFS_IS_DIRECTORY(pAttr->fMode)) + fMode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->dmode, pSuperInfo->dmask, S_IFDIR); + else if (RTFS_IS_SYMLINK(pAttr->fMode)) + /** @todo r=bird: Aren't System V symlinks w/o any mode mask? IIRC there is + * no lchmod on Linux. */ + fMode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFLNK); + else + fMode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFREG); + + if (fMode == pInode->i_mode) { + /* likely */ + } else { + if ((fMode & S_IFMT) == (pInode->i_mode & S_IFMT)) + pInode->i_mode = fMode; + else { + SFLOGFLOW(("vbsf_update_inode: Changed from %o to %o (%s)\n", + pInode->i_mode & S_IFMT, fMode & S_IFMT, pInodeInfo->path->String.ach)); + /** @todo we probably need to be more drastic... */ + vbsf_init_inode(pInode, pInodeInfo, pObjInfo, pSuperInfo); + +#if RTLNX_VER_MIN(4,5,0) + if (!fInodeLocked) + inode_unlock(pInode); +#endif + return; + } + } + + /* + * Update the sizes. + * Note! i_blocks is always in units of 512 bytes! + */ + pInode->i_blocks = (pObjInfo->cbAllocated + 511) / 512; + i_size_write(pInode, pObjInfo->cbObject); + + /* + * Update the timestamps. + */ + vbsf_update_inode_timestamps(pInode, pObjInfo); + pInodeInfo->BirthTime = pObjInfo->BirthTime; + + /* + * Mark it as up to date. + * Best to do this before we start with any expensive map invalidation. + */ + pInodeInfo->ts_up_to_date = jiffies; + pInodeInfo->force_restat = 0; + + /* + * If the modification time changed, we may have to invalidate the page + * cache pages associated with this inode if we suspect the change was + * made by the host. How supicious we are depends on the cache mode. + * + * Note! The invalidate_inode_pages() call is pretty weak. It will _not_ + * touch pages that are already mapped into an address space, but it + * will help if the file isn't currently mmap'ed or if we're in read + * or read/write caching mode. + */ + if (!RTTimeSpecIsEqual(&pInodeInfo->ModificationTime, &pObjInfo->ModificationTime)) { + if (RTFS_IS_FILE(pAttr->fMode)) { + if (!(fSetAttrs & (ATTR_MTIME | ATTR_SIZE))) { + bool fInvalidate; + if (pSuperInfo->enmCacheMode == kVbsfCacheMode_None) { + fInvalidate = true; /* No-caching: always invalidate. */ + } else { + if (RTTimeSpecIsEqual(&pInodeInfo->ModificationTimeAtOurLastWrite, &pInodeInfo->ModificationTime)) { + fInvalidate = false; /* Could be our write, so don't invalidate anything */ + RTTimeSpecSetSeconds(&pInodeInfo->ModificationTimeAtOurLastWrite, 0); + } else { + /*RTLogBackdoorPrintf("vbsf_update_inode: Invalidating the mapping %s - %RU64 vs %RU64 vs %RU64 - %#x\n", + pInodeInfo->path->String.ach, + RTTimeSpecGetNano(&pInodeInfo->ModificationTimeAtOurLastWrite), + RTTimeSpecGetNano(&pInodeInfo->ModificationTime), + RTTimeSpecGetNano(&pObjInfo->ModificationTime), fSetAttrs);*/ + fInvalidate = true; /* We haven't modified the file recently, so probably a host update. */ + } + } + pInodeInfo->ModificationTime = pObjInfo->ModificationTime; + + if (fInvalidate) { + struct address_space *mapping = pInode->i_mapping; + if (mapping && mapping->nrpages > 0) { + SFLOGFLOW(("vbsf_update_inode: Invalidating the mapping %s (%#x)\n", pInodeInfo->path->String.ach, fSetAttrs)); +#if RTLNX_VER_MIN(2,6,34) + invalidate_mapping_pages(mapping, 0, ~(pgoff_t)0); +#elif RTLNX_VER_MIN(2,5,41) + invalidate_inode_pages(mapping); +#else + invalidate_inode_pages(pInode); +#endif + } + } + } else { + RTTimeSpecSetSeconds(&pInodeInfo->ModificationTimeAtOurLastWrite, 0); + pInodeInfo->ModificationTime = pObjInfo->ModificationTime; + } + } else + pInodeInfo->ModificationTime = pObjInfo->ModificationTime; + } + + /* + * Done. + */ +#if RTLNX_VER_MIN(4,5,0) + if (!fInodeLocked) + inode_unlock(pInode); +#endif +} + + +/** @note Currently only used for the root directory during (re-)mount. */ +int vbsf_stat(const char *caller, struct vbsf_super_info *pSuperInfo, SHFLSTRING *path, PSHFLFSOBJINFO result, int ok_to_fail) +{ + int rc; + VBOXSFCREATEREQ *pReq; + NOREF(caller); + + TRACE(); + + pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + path->u16Size); + if (pReq) { + RT_ZERO(*pReq); + RT_BCOPY_UNFORTIFIED(&pReq->StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size); + pReq->CreateParms.Handle = SHFL_HANDLE_NIL; + pReq->CreateParms.CreateFlags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + LogFunc(("Calling VbglR0SfHostReqCreate on %s\n", path->String.utf8)); + rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq); + if (RT_SUCCESS(rc)) { + if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) { + *result = pReq->CreateParms.Info; + rc = 0; + } else { + if (!ok_to_fail) + LogFunc(("VbglR0SfHostReqCreate on %s: file does not exist: %d (caller=%s)\n", + path->String.utf8, pReq->CreateParms.Result, caller)); + rc = -ENOENT; + } + } else if (rc == VERR_INVALID_NAME) { + rc = -ENOENT; /* this can happen for names like 'foo*' on a Windows host */ + } else { + LogFunc(("VbglR0SfHostReqCreate failed on %s: %Rrc (caller=%s)\n", path->String.utf8, rc, caller)); + rc = -EPROTO; + } + VbglR0PhysHeapFree(pReq); + } + else + rc = -ENOMEM; + return rc; +} + + +/** + * Revalidate an inode, inner worker. + * + * @sa sf_inode_revalidate() + */ +int vbsf_inode_revalidate_worker(struct dentry *dentry, bool fForced, bool fInodeLocked) +{ + int rc; + struct inode *pInode = dentry ? dentry->d_inode : NULL; + if (pInode) { + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(pInode); + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pInode->i_sb); + AssertReturn(sf_i, -EINVAL); + AssertReturn(pSuperInfo, -EINVAL); + + /* + * Can we get away without any action here? + */ + if ( !fForced + && !sf_i->force_restat + && jiffies - sf_i->ts_up_to_date < pSuperInfo->cJiffiesInodeTTL) + rc = 0; + else { + /* + * No, we have to query the file info from the host. + * Try get a handle we can query, any kind of handle will do here. + */ + struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, 0, 0); + if (pHandle) { + /* Query thru pHandle. */ + VBOXSFOBJINFOREQ *pReq = (VBOXSFOBJINFOREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + RT_ZERO(*pReq); + rc = VbglR0SfHostReqQueryObjInfo(pSuperInfo->map.root, pReq, pHandle->hHost); + if (RT_SUCCESS(rc)) { + /* + * Reset the TTL and copy the info over into the inode structure. + */ + vbsf_update_inode(pInode, sf_i, &pReq->ObjInfo, pSuperInfo, fInodeLocked, 0 /*fSetAttrs*/); + } else if (rc == VERR_INVALID_HANDLE) { + rc = -ENOENT; /* Restore.*/ + } else { + LogFunc(("VbglR0SfHostReqQueryObjInfo failed on %#RX64: %Rrc\n", pHandle->hHost, rc)); + rc = -RTErrConvertToErrno(rc); + } + VbglR0PhysHeapFree(pReq); + } else + rc = -ENOMEM; + vbsf_handle_release(pHandle, pSuperInfo, "vbsf_inode_revalidate_worker"); + + } else { + /* Query via path. */ + SHFLSTRING *pPath = sf_i->path; + VBOXSFCREATEREQ *pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + pPath->u16Size); + if (pReq) { + RT_ZERO(*pReq); + RT_BCOPY_UNFORTIFIED(&pReq->StrPath, pPath, SHFLSTRING_HEADER_SIZE + pPath->u16Size); + pReq->CreateParms.Handle = SHFL_HANDLE_NIL; + pReq->CreateParms.CreateFlags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW; + + rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq); + if (RT_SUCCESS(rc)) { + if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) { + /* + * Reset the TTL and copy the info over into the inode structure. + */ + vbsf_update_inode(pInode, sf_i, &pReq->CreateParms.Info, pSuperInfo, fInodeLocked, 0 /*fSetAttrs*/); + rc = 0; + } else { + rc = -ENOENT; + } + } else if (rc == VERR_INVALID_NAME) { + rc = -ENOENT; /* this can happen for names like 'foo*' on a Windows host */ + } else { + LogFunc(("VbglR0SfHostReqCreate failed on %s: %Rrc\n", pPath->String.ach, rc)); + rc = -EPROTO; + } + VbglR0PhysHeapFree(pReq); + } + else + rc = -ENOMEM; + } + } + } else { + LogFunc(("no dentry(%p) or inode(%p)\n", dentry, pInode)); + rc = -EINVAL; + } + return rc; +} + + +#if RTLNX_VER_MAX(2,5,18) +/** + * Revalidate an inode for 2.4. + * + * This is called in the stat(), lstat() and readlink() code paths. In the stat + * cases the caller will use the result afterwards to produce the stat data. + * + * @note 2.4.x has a getattr() inode operation too, but it is not used. + */ +int vbsf_inode_revalidate(struct dentry *dentry) +{ + /* + * We pretend the inode is locked here, as 2.4.x does not have inode level locking. + */ + return vbsf_inode_revalidate_worker(dentry, false /*fForced*/, true /*fInodeLocked*/); +} +#endif /* < 2.5.18 */ + + +/** + * Similar to sf_inode_revalidate, but uses associated host file handle as that + * is quite a bit faster. + */ +int vbsf_inode_revalidate_with_handle(struct dentry *dentry, SHFLHANDLE hHostFile, bool fForced, bool fInodeLocked) +{ + int err; + struct inode *pInode = dentry ? dentry->d_inode : NULL; + if (!pInode) { + LogFunc(("no dentry(%p) or inode(%p)\n", dentry, pInode)); + err = -EINVAL; + } else { + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(pInode); + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pInode->i_sb); + AssertReturn(sf_i, -EINVAL); + AssertReturn(pSuperInfo, -EINVAL); + + /* + * Can we get away without any action here? + */ + if ( !fForced + && !sf_i->force_restat + && jiffies - sf_i->ts_up_to_date < pSuperInfo->cJiffiesInodeTTL) + err = 0; + else { + /* + * No, we have to query the file info from the host. + */ + VBOXSFOBJINFOREQ *pReq = (VBOXSFOBJINFOREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + RT_ZERO(*pReq); + err = VbglR0SfHostReqQueryObjInfo(pSuperInfo->map.root, pReq, hHostFile); + if (RT_SUCCESS(err)) { + /* + * Reset the TTL and copy the info over into the inode structure. + */ + vbsf_update_inode(pInode, sf_i, &pReq->ObjInfo, pSuperInfo, fInodeLocked, 0 /*fSetAttrs*/); + } else { + LogFunc(("VbglR0SfHostReqQueryObjInfo failed on %#RX64: %Rrc\n", hHostFile, err)); + err = -RTErrConvertToErrno(err); + } + VbglR0PhysHeapFree(pReq); + } else + err = -ENOMEM; + } + } + return err; +} + + +/* on 2.6 this is a proxy for [sf_inode_revalidate] which (as a side + effect) updates inode attributes for [dentry] (given that [dentry] + has inode at all) from these new attributes we derive [kstat] via + [generic_fillattr] */ +#if RTLNX_VER_MIN(2,5,18) +# if RTLNX_VER_MIN(6,3,0) +int vbsf_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *kstat, u32 request_mask, unsigned int flags) +# elif RTLNX_VER_MIN(5,12,0) +int vbsf_inode_getattr(struct user_namespace *ns, const struct path *path, + struct kstat *kstat, u32 request_mask, unsigned int flags) +# elif RTLNX_VER_MIN(4,11,0) +int vbsf_inode_getattr(const struct path *path, struct kstat *kstat, u32 request_mask, unsigned int flags) +# else +int vbsf_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat) +# endif +{ + int rc; +# if RTLNX_VER_MIN(4,11,0) + struct dentry *dentry = path->dentry; +# endif + +# if RTLNX_VER_MIN(4,11,0) + SFLOGFLOW(("vbsf_inode_getattr: dentry=%p request_mask=%#x flags=%#x\n", dentry, request_mask, flags)); +# else + SFLOGFLOW(("vbsf_inode_getattr: dentry=%p\n", dentry)); +# endif + +# if RTLNX_VER_MIN(4,11,0) + /* + * With the introduction of statx() userland can control whether we + * update the inode information or not. + */ + switch (flags & AT_STATX_SYNC_TYPE) { + default: + rc = vbsf_inode_revalidate_worker(dentry, false /*fForced*/, false /*fInodeLocked*/); + break; + + case AT_STATX_FORCE_SYNC: + rc = vbsf_inode_revalidate_worker(dentry, true /*fForced*/, false /*fInodeLocked*/); + break; + + case AT_STATX_DONT_SYNC: + rc = 0; + break; + } +# else + rc = vbsf_inode_revalidate_worker(dentry, false /*fForced*/, false /*fInodeLocked*/); +# endif + if (rc == 0) { + /* Do generic filling in of info. */ +# if RTLNX_VER_MIN(6,6,0) + generic_fillattr(idmap, request_mask, dentry->d_inode, kstat); +# elif RTLNX_VER_MIN(6,3,0) + generic_fillattr(idmap, dentry->d_inode, kstat); +# elif RTLNX_VER_MIN(5,12,0) + generic_fillattr(ns, dentry->d_inode, kstat); +# else + generic_fillattr(dentry->d_inode, kstat); +# endif + + /* Add birth time. */ +# if RTLNX_VER_MIN(4,11,0) + if (dentry->d_inode) { + struct vbsf_inode_info *pInodeInfo = VBSF_GET_INODE_INFO(dentry->d_inode); + if (pInodeInfo) { + vbsf_time_to_linux(&kstat->btime, &pInodeInfo->BirthTime); + kstat->result_mask |= STATX_BTIME; + } + } +# endif + + /* + * FsPerf shows the following numbers for sequential file access against + * a tmpfs folder on an AMD 1950X host running debian buster/sid: + * + * block size = r128600 ----- r128755 ----- + * reads reads writes + * 4096 KB = 2254 MB/s 4953 MB/s 3668 MB/s + * 2048 KB = 2368 MB/s 4908 MB/s 3541 MB/s + * 1024 KB = 2208 MB/s 4011 MB/s 3291 MB/s + * 512 KB = 1908 MB/s 3399 MB/s 2721 MB/s + * 256 KB = 1625 MB/s 2679 MB/s 2251 MB/s + * 128 KB = 1413 MB/s 1967 MB/s 1684 MB/s + * 64 KB = 1152 MB/s 1409 MB/s 1265 MB/s + * 32 KB = 726 MB/s 815 MB/s 783 MB/s + * 16 KB = 683 MB/s 475 MB/s + * 8 KB = 294 MB/s 286 MB/s + * 4 KB = 145 MB/s 156 MB/s 149 MB/s + * + */ + if (S_ISREG(kstat->mode)) + kstat->blksize = _1M; + else if (S_ISDIR(kstat->mode)) + /** @todo this may need more tuning after we rewrite the directory handling. */ + kstat->blksize = _16K; + } + return rc; +} +#endif /* >= 2.5.18 */ + + +/** + * Modify inode attributes. + */ +#if RTLNX_VER_MIN(6,3,0) +int vbsf_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) +#elif RTLNX_VER_MIN(5,12,0) +int vbsf_inode_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *iattr) +#else +int vbsf_inode_setattr(struct dentry *dentry, struct iattr *iattr) +#endif +{ + struct inode *pInode = dentry->d_inode; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pInode->i_sb); + struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(pInode); + int vrc; + int rc; + + SFLOGFLOW(("vbsf_inode_setattr: dentry=%p inode=%p ia_valid=%#x %s\n", + dentry, pInode, iattr->ia_valid, sf_i ? sf_i->path->String.ach : NULL)); + AssertReturn(sf_i, -EINVAL); + + /* + * Do minimal attribute permission checks. We set ATTR_FORCE since we cannot + * preserve ownership and such and would end up with EPERM here more often than + * we would like. For instance it would cause 'cp' to complain about EPERM + * from futimes() when asked to preserve times, see ticketref:18569. + */ + iattr->ia_valid |= ATTR_FORCE; +#if (RTLNX_VER_RANGE(3,16,39, 3,17,0)) || RTLNX_VER_MIN(4,9,0) || (RTLNX_VER_RANGE(4,1,37, 4,2,0)) || RTLNX_UBUNTU_ABI_MIN(4,4,255,208) +# if RTLNX_VER_MIN(6,3,0) + rc = setattr_prepare(idmap, dentry, iattr); +# elif RTLNX_VER_MIN(5,12,0) + rc = setattr_prepare(ns, dentry, iattr); +# else + rc = setattr_prepare(dentry, iattr); +# endif +#else + rc = inode_change_ok(pInode, iattr); +#endif + if (rc == 0) { + /* + * Don't modify MTIME and CTIME for open(O_TRUNC) and ftruncate, those + * operations will set those timestamps automatically. Saves a host call. + */ + unsigned fAttrs = iattr->ia_valid; +#if RTLNX_VER_MIN(2,6,15) + fAttrs &= ~ATTR_FILE; +#endif + if ( fAttrs == (ATTR_SIZE | ATTR_MTIME | ATTR_CTIME) +#if RTLNX_VER_MIN(2,6,24) + || (fAttrs & (ATTR_OPEN | ATTR_SIZE)) == (ATTR_OPEN | ATTR_SIZE) +#endif + ) + fAttrs &= ~(ATTR_MTIME | ATTR_CTIME); + + /* + * We only implement a handful of attributes, so ignore any attempts + * at setting bits we don't support. + */ + if (fAttrs & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE)) { + /* + * Try find a handle which allows us to modify the attributes, otherwise + * open the file/dir/whatever. + */ + union SetAttrReqs + { + VBOXSFCREATEREQ Create; + VBOXSFOBJINFOREQ Info; + VBOXSFSETFILESIZEREQ SetSize; + VBOXSFCLOSEREQ Close; + } *pReq; + size_t cbReq; + SHFLHANDLE hHostFile; + /** @todo ATTR_FILE (2.6.15+) could be helpful here if we like. */ + struct vbsf_handle *pHandle = fAttrs & ATTR_SIZE + ? vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, 0) + : vbsf_handle_find(sf_i, 0, 0); + if (pHandle) { + hHostFile = pHandle->hHost; + cbReq = RT_MAX(sizeof(VBOXSFOBJINFOREQ), sizeof(VBOXSFSETFILESIZEREQ)); + pReq = (union SetAttrReqs *)VbglR0PhysHeapAlloc(cbReq); + if (pReq) { + /* likely */ + } else + rc = -ENOMEM; + } else { + hHostFile = SHFL_HANDLE_NIL; + cbReq = RT_MAX(sizeof(pReq->Info), sizeof(pReq->Create) + SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size); + pReq = (union SetAttrReqs *)VbglR0PhysHeapAlloc(cbReq); + if (pReq) { + RT_ZERO(pReq->Create.CreateParms); + pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL; + pReq->Create.CreateParms.CreateFlags = SHFL_CF_ACT_OPEN_IF_EXISTS + | SHFL_CF_ACT_FAIL_IF_NEW + | SHFL_CF_ACCESS_ATTR_WRITE; + if (fAttrs & ATTR_SIZE) + pReq->Create.CreateParms.CreateFlags |= SHFL_CF_ACCESS_WRITE; + RT_BCOPY_UNFORTIFIED(&pReq->Create.StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size); + vrc = VbglR0SfHostReqCreate(pSuperInfo->map.root, &pReq->Create); + if (RT_SUCCESS(vrc)) { + if (pReq->Create.CreateParms.Result == SHFL_FILE_EXISTS) { + hHostFile = pReq->Create.CreateParms.Handle; + Assert(hHostFile != SHFL_HANDLE_NIL); + vbsf_dentry_chain_increase_ttl(dentry); + } else { + LogFunc(("file %s does not exist\n", sf_i->path->String.utf8)); + vbsf_dentry_invalidate_ttl(dentry); + sf_i->force_restat = true; + rc = -ENOENT; + } + } else { + rc = -RTErrConvertToErrno(vrc); + LogFunc(("VbglR0SfCreate(%s) failed vrc=%Rrc rc=%d\n", sf_i->path->String.ach, vrc, rc)); + } + } else + rc = -ENOMEM; + } + if (rc == 0) { + /* + * Set mode and/or timestamps. + */ + if (fAttrs & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) { + /* Fill in the attributes. Start by setting all to zero + since the host will ignore zeroed fields. */ + RT_ZERO(pReq->Info.ObjInfo); + + if (fAttrs & ATTR_MODE) { + pReq->Info.ObjInfo.Attr.fMode = sf_access_permissions_to_vbox(iattr->ia_mode); + if (iattr->ia_mode & S_IFDIR) + pReq->Info.ObjInfo.Attr.fMode |= RTFS_TYPE_DIRECTORY; + else if (iattr->ia_mode & S_IFLNK) + pReq->Info.ObjInfo.Attr.fMode |= RTFS_TYPE_SYMLINK; + else + pReq->Info.ObjInfo.Attr.fMode |= RTFS_TYPE_FILE; + } + if (fAttrs & ATTR_ATIME) + vbsf_time_to_vbox(&pReq->Info.ObjInfo.AccessTime, &iattr->ia_atime); + if (fAttrs & ATTR_MTIME) + vbsf_time_to_vbox(&pReq->Info.ObjInfo.ModificationTime, &iattr->ia_mtime); + if (fAttrs & ATTR_CTIME) + vbsf_time_to_vbox(&pReq->Info.ObjInfo.ChangeTime, &iattr->ia_ctime); + + /* Make the change. */ + vrc = VbglR0SfHostReqSetObjInfo(pSuperInfo->map.root, &pReq->Info, hHostFile); + if (RT_SUCCESS(vrc)) { + vbsf_update_inode(pInode, sf_i, &pReq->Info.ObjInfo, pSuperInfo, true /*fLocked*/, fAttrs); + } else { + rc = -RTErrConvertToErrno(vrc); + LogFunc(("VbglR0SfHostReqSetObjInfo(%s) failed vrc=%Rrc rc=%d\n", sf_i->path->String.ach, vrc, rc)); + } + } + + /* + * Change the file size. + * Note! Old API is more convenient here as it gives us up to date + * inode info back. + */ + if ((fAttrs & ATTR_SIZE) && rc == 0) { + /*vrc = VbglR0SfHostReqSetFileSize(pSuperInfo->map.root, &pReq->SetSize, hHostFile, iattr->ia_size); + if (RT_SUCCESS(vrc)) { + i_size_write(pInode, iattr->ia_size); + } else if (vrc == VERR_NOT_IMPLEMENTED)*/ { + /* Fallback for pre 6.0 hosts: */ + RT_ZERO(pReq->Info.ObjInfo); + pReq->Info.ObjInfo.cbObject = iattr->ia_size; + vrc = VbglR0SfHostReqSetFileSizeOld(pSuperInfo->map.root, &pReq->Info, hHostFile); + if (RT_SUCCESS(vrc)) + vbsf_update_inode(pInode, sf_i, &pReq->Info.ObjInfo, pSuperInfo, true /*fLocked*/, fAttrs); + } + if (RT_SUCCESS(vrc)) { + /** @todo there is potentially more to be done here if there are mappings of + * the lovely file. */ + } else { + rc = -RTErrConvertToErrno(vrc); + LogFunc(("VbglR0SfHostReqSetFileSize(%s, %#llx) failed vrc=%Rrc rc=%d\n", + sf_i->path->String.ach, (unsigned long long)iattr->ia_size, vrc, rc)); + } + } + + /* + * Clean up. + */ + if (!pHandle) { + vrc = VbglR0SfHostReqClose(pSuperInfo->map.root, &pReq->Close, hHostFile); + if (RT_FAILURE(vrc)) + LogFunc(("VbglR0SfHostReqClose(%s [%#llx]) failed vrc=%Rrc\n", sf_i->path->String.utf8, hHostFile, vrc)); + } + } + if (pReq) + VbglR0PhysHeapFree(pReq); + if (pHandle) + vbsf_handle_release(pHandle, pSuperInfo, "vbsf_inode_setattr"); + } else + SFLOGFLOW(("vbsf_inode_setattr: Nothing to do here: %#x (was %#x).\n", fAttrs, iattr->ia_valid)); + } + return rc; +} + + +static int vbsf_make_path(const char *caller, struct vbsf_inode_info *sf_i, + const char *d_name, size_t d_len, SHFLSTRING **result) +{ + size_t path_len, shflstring_len; + SHFLSTRING *tmp; + uint16_t p_len; + uint8_t *p_name; + int fRoot = 0; + + TRACE(); + p_len = sf_i->path->u16Length; + p_name = sf_i->path->String.utf8; + + if (p_len == 1 && *p_name == '/') { + path_len = d_len + 1; + fRoot = 1; + } else { + /* lengths of constituents plus terminating zero plus slash */ + path_len = p_len + d_len + 2; + if (path_len > 0xffff) { + LogFunc(("path too long. caller=%s, path_len=%zu\n", + caller, path_len)); + return -ENAMETOOLONG; + } + } + + shflstring_len = offsetof(SHFLSTRING, String.utf8) + path_len; + tmp = kmalloc(shflstring_len, GFP_KERNEL); + if (!tmp) { + LogRelFunc(("kmalloc failed, caller=%s\n", caller)); + return -ENOMEM; + } + tmp->u16Length = path_len - 1; + tmp->u16Size = path_len; + + if (fRoot) + RT_BCOPY_UNFORTIFIED(&tmp->String.utf8[0], d_name, d_len + 1); + else { + RT_BCOPY_UNFORTIFIED(&tmp->String.utf8[0], p_name, p_len); + tmp->String.utf8[p_len] = '/'; + RT_BCOPY_UNFORTIFIED(&tmp->String.utf8[p_len + 1], d_name, d_len); + tmp->String.utf8[p_len + 1 + d_len] = '\0'; + } + + *result = tmp; + return 0; +} + + +/** + * [dentry] contains string encoded in coding system that corresponds + * to [pSuperInfo]->nls, we must convert it to UTF8 here and pass down to + * [vbsf_make_path] which will allocate SHFLSTRING and fill it in + */ +int vbsf_path_from_dentry(struct vbsf_super_info *pSuperInfo, struct vbsf_inode_info *sf_i, struct dentry *dentry, + SHFLSTRING **result, const char *caller) +{ + int err; + const char *d_name; + size_t d_len; + const char *name; + size_t len = 0; + + TRACE(); + d_name = dentry->d_name.name; + d_len = dentry->d_name.len; + + if (pSuperInfo->nls) { + size_t in_len, i, out_bound_len; + const char *in; + char *out; + + in = d_name; + in_len = d_len; + + out_bound_len = PATH_MAX; + out = kmalloc(out_bound_len, GFP_KERNEL); + name = out; + + for (i = 0; i < d_len; ++i) { + /* We renamed the linux kernel wchar_t type to linux_wchar_t in + the-linux-kernel.h, as it conflicts with the C++ type of that name. */ + linux_wchar_t uni; + int nb; + + nb = pSuperInfo->nls->char2uni(in, in_len, &uni); + if (nb < 0) { + LogFunc(("nls->char2uni failed %x %d\n", + *in, in_len)); + err = -EINVAL; + goto fail1; + } + in_len -= nb; + in += nb; + +#if RTLNX_VER_MIN(2,6,31) + nb = utf32_to_utf8(uni, out, out_bound_len); +#else + nb = utf8_wctomb(out, uni, out_bound_len); +#endif + if (nb < 0) { + LogFunc(("nls->uni2char failed %x %d\n", + uni, out_bound_len)); + err = -EINVAL; + goto fail1; + } + out_bound_len -= nb; + out += nb; + len += nb; + } + if (len >= PATH_MAX - 1) { + err = -ENAMETOOLONG; + goto fail1; + } + + LogFunc(("result(%d) = %.*s\n", len, len, name)); + *out = 0; + } else { + name = d_name; + len = d_len; + } + + err = vbsf_make_path(caller, sf_i, name, len, result); + if (name != d_name) + kfree(name); + + return err; + + fail1: + kfree(name); + return err; +} + + +/** + * This is called during name resolution/lookup to check if the @a dentry in the + * cache is still valid. The actual validation is job is handled by + * vbsf_inode_revalidate_worker(). + * + * @note Caller holds no relevant locks, just a dentry reference. + */ +#if RTLNX_VER_MIN(3,6,0) +static int vbsf_dentry_revalidate(struct dentry *dentry, unsigned flags) +#elif RTLNX_VER_MIN(2,6,0) +static int vbsf_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) +#else +static int vbsf_dentry_revalidate(struct dentry *dentry, int flags) +#endif +{ +#if RTLNX_VER_RANGE(2,6,0, 3,6,0) + int const flags = nd ? nd->flags : 0; +#endif + + int rc; + + Assert(dentry); + SFLOGFLOW(("vbsf_dentry_revalidate: %p %#x %s\n", dentry, flags, + dentry->d_inode ? VBSF_GET_INODE_INFO(dentry->d_inode)->path->String.ach : "")); + + /* + * See Documentation/filesystems/vfs.txt why we skip LOOKUP_RCU. + * + * Also recommended: https://lwn.net/Articles/649115/ + * https://lwn.net/Articles/649729/ + * https://lwn.net/Articles/650786/ + * + */ +#if RTLNX_VER_MIN(2,6,38) + if (flags & LOOKUP_RCU) { + rc = -ECHILD; + SFLOGFLOW(("vbsf_dentry_revalidate: RCU -> -ECHILD\n")); + } else +#endif + { + /* + * Do we have an inode or not? If not it's probably a negative cache + * entry, otherwise most likely a positive one. + */ + struct inode *pInode = dentry->d_inode; + if (pInode) { + /* + * Positive entry. + * + * Note! We're more aggressive here than other remote file systems, + * current (4.19) CIFS will for instance revalidate the inode + * and ignore the dentry timestamp for positive entries. + */ + unsigned long const cJiffiesAge = jiffies - vbsf_dentry_get_update_jiffies(dentry); + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(dentry->d_sb); + if (cJiffiesAge < pSuperInfo->cJiffiesDirCacheTTL) { + SFLOGFLOW(("vbsf_dentry_revalidate: age: %lu vs. TTL %lu -> 1\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL)); + rc = 1; + } else if (!vbsf_inode_revalidate_worker(dentry, true /*fForced*/, false /*fInodeLocked*/)) { + vbsf_dentry_set_update_jiffies(dentry, jiffies); + SFLOGFLOW(("vbsf_dentry_revalidate: age: %lu vs. TTL %lu -> reval -> 1\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL)); + rc = 1; + } else { + SFLOGFLOW(("vbsf_dentry_revalidate: age: %lu vs. TTL %lu -> reval -> 0\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL)); + rc = 0; + } + } else { + /* + * Negative entry. + * + * Invalidate dentries for open and renames here as we'll revalidate + * these when taking the actual action (also good for case preservation + * if we do case-insensitive mounts against windows + mac hosts at some + * later point). + */ +#if RTLNX_VER_MIN(2,6,28) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) +#elif RTLNX_VER_MIN(2,5,75) + if (flags & LOOKUP_CREATE) +#else + if (0) +#endif + { + SFLOGFLOW(("vbsf_dentry_revalidate: negative: create or rename target -> 0\n")); + rc = 0; + } else { + /* Can we skip revalidation based on TTL? */ + unsigned long const cJiffiesAge = vbsf_dentry_get_update_jiffies(dentry) - jiffies; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(dentry->d_sb); + if (cJiffiesAge < pSuperInfo->cJiffiesDirCacheTTL) { + SFLOGFLOW(("vbsf_dentry_revalidate: negative: age: %lu vs. TTL %lu -> 1\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL)); + rc = 1; + } else { + /* We could revalidate it here, but we could instead just + have the caller kick it out. */ + /** @todo stat the direntry and see if it exists now. */ + SFLOGFLOW(("vbsf_dentry_revalidate: negative: age: %lu vs. TTL %lu -> 0\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL)); + rc = 0; + } + } + } + } + return rc; +} + +#ifdef SFLOG_ENABLED + +/** For logging purposes only. */ +# if RTLNX_VER_MIN(2,6,38) +static int vbsf_dentry_delete(const struct dentry *pDirEntry) +# else +static int vbsf_dentry_delete(struct dentry *pDirEntry) +# endif +{ + SFLOGFLOW(("vbsf_dentry_delete: %p\n", pDirEntry)); + return 0; +} + +# if RTLNX_VER_MIN(4,8,0) +/** For logging purposes only. */ +static int vbsf_dentry_init(struct dentry *pDirEntry) +{ + SFLOGFLOW(("vbsf_dentry_init: %p\n", pDirEntry)); + return 0; +} +# endif + +#endif /* SFLOG_ENABLED */ + +/** + * Directory entry operations. + * + * Since 2.6.38 this is used via the super_block::s_d_op member. + */ +struct dentry_operations vbsf_dentry_ops = { + .d_revalidate = vbsf_dentry_revalidate, +#ifdef SFLOG_ENABLED + .d_delete = vbsf_dentry_delete, +# if RTLNX_VER_MIN(4,8,0) + .d_init = vbsf_dentry_init, +# endif +#endif +}; + diff --git a/src/VBox/Additions/linux/sharedfolders/vbsfmount.c b/src/VBox/Additions/linux/sharedfolders/vbsfmount.c new file mode 100644 index 00000000..72d9210e --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/vbsfmount.c @@ -0,0 +1,113 @@ +/* $Id: vbsfmount.c $ */ +/** @file + * vbsfmount - Commonly used code to mount shared folders on Linux-based + * systems. Currently used by mount.vboxsf and VBoxService. + */ + +/* + * Copyright (C) 2010-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * SPDX-License-Identifier: GPL-3.0-only + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE +#endif +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vbsfmount.h" + + +/** @todo Use defines for return values! */ +int vbsfmount_complete(const char *pszSharedFolder, const char *pszMountPoint, + unsigned long fFlags, const char *pszOpts) +{ + /* + * Combine pszOpts and fFlags. + */ + int rc; + size_t const cchFlags = (fFlags & MS_NOSUID ? strlen(MNTOPT_NOSUID) + 1 : 0) + + (fFlags & MS_RDONLY ? strlen(MNTOPT_RO) : strlen(MNTOPT_RW)); + size_t const cchOpts = pszOpts ? 1 + strlen(pszOpts) : 0; + char *pszBuf = (char *)malloc(cchFlags + cchOpts + 8); + if (pszBuf) + { + char *psz = pszBuf; + FILE *pMTab; + + strcpy(psz, fFlags & MS_RDONLY ? MNTOPT_RO : MNTOPT_RW); + psz += strlen(psz); + + if (fFlags & MS_NOSUID) + { + *psz++ = ','; + strcpy(psz, MNTOPT_NOSUID); + psz += strlen(psz); + } + + if (cchOpts) + { + *psz++ = ','; + strcpy(psz, pszOpts); + } + + assert(strlen(pszBuf) <= cchFlags + cchOpts); + + /* + * Open the mtab and update it: + */ + pMTab = setmntent(MOUNTED, "a+"); + if (pMTab) + { + struct mntent Entry; + Entry.mnt_fsname = (char*)pszSharedFolder; + Entry.mnt_dir = (char *)pszMountPoint; + Entry.mnt_type = "vboxsf"; + Entry.mnt_opts = pszBuf; + Entry.mnt_freq = 0; + Entry.mnt_passno = 0; + + if (!addmntent(pMTab, &Entry)) + rc = 0; /* success. */ + else + rc = 3; /* Could not add an entry to the mount table. */ + + endmntent(pMTab); + } + else + rc = 2; /* Could not open mount table for update. */ + + free(pszBuf); + } + else + rc = 1; /* allocation error */ + return rc; +} + diff --git a/src/VBox/Additions/linux/sharedfolders/vbsfmount.h b/src/VBox/Additions/linux/sharedfolders/vbsfmount.h new file mode 100644 index 00000000..663cc8be --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/vbsfmount.h @@ -0,0 +1,142 @@ +/* $Id: vbsfmount.h $ */ +/** @file + * vboxsf - VBox Linux Shared Folders VFS, mount(2) parameter structure. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef GA_INCLUDED_SRC_linux_sharedfolders_vbsfmount_h +#define GA_INCLUDED_SRC_linux_sharedfolders_vbsfmount_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +/* Linux constrains the size of data mount argument to PAGE_SIZE - 1. */ +#define MAX_MNTOPT_STR PAGE_SIZE +#define MAX_HOST_NAME 256 +#define MAX_NLS_NAME 32 +#define VBSF_DEFAULT_TTL_MS 200 + +#define VBSF_MOUNT_SIGNATURE_BYTE_0 '\377' +#define VBSF_MOUNT_SIGNATURE_BYTE_1 '\376' +#define VBSF_MOUNT_SIGNATURE_BYTE_2 '\375' + +/** + * VBox Linux Shared Folders VFS caching mode. + */ +enum vbsf_cache_mode { + /** Use the kernel modules default caching mode (kVbsfCacheMode_Strict). */ + kVbsfCacheMode_Default = 0, + /** No caching, go to the host for everything. This will have some minor + * coherency issues for memory mapping with unsynced dirty pages. */ + kVbsfCacheMode_None, + /** No caching, except for files with writable memory mappings. + * (Note to future: if we do oplock like stuff, it goes in here.) */ + kVbsfCacheMode_Strict, + /** Use page cache for reads. + * This improves guest performance for read intensive jobs, like compiling + * building. The flip side is that the guest may not see host modification in a + * timely manner and possibly update files with out-of-date cache information, + * as there exists no protocol for the host to notify the guest about file + * modifications. */ + kVbsfCacheMode_Read, + /** Use page cache for both reads and writes as far as that's possible. + * This is good for guest performance, but the price is that the guest possibly + * ignoring host changes and the host not seeing guest changes in a timely + * manner. */ + kVbsfCacheMode_ReadWrite, + /** End of valid values (exclusive). */ + kVbsfCacheMode_End, + /** Make sure the enum is sizeof(int32_t). */ + kVbsfCacheMode_32BitHack = 0x7fffffff +}; + +/** + * VBox Linux Shared Folders VFS mount options. + */ +struct vbsf_mount_info_new { + /** + * The old version of the mount_info struct started with a + * char name[MAX_HOST_NAME] field, where name cannot be '\0'. + * So the new version of the mount_info struct starts with a + * nullchar field which is always 0 so that we can detect and + * reject the old structure being passed. + */ + char nullchar; + /** Signature */ + char signature[3]; + /** Length of the whole structure */ + int length; + /** Share name */ + char name[MAX_HOST_NAME]; + /** Name of an I/O charset */ + char nls_name[MAX_NLS_NAME]; + /** User ID for all entries, default 0=root */ + int uid; + /** Group ID for all entries, default 0=root */ + int gid; + /** Directory entry and inode time to live in milliseconds. + * -1 for kernel default, 0 to disable caching. + * @sa vbsf_mount_info_new::msDirCacheTTL, vbsf_mount_info_new::msInodeTTL */ + int ttl; + /** Mode for directories if != -1. */ + int dmode; + /** Mode for regular files if != -1. */ + int fmode; + /** umask applied to directories */ + int dmask; + /** umask applied to regular files */ + int fmask; + /** Mount tag for VBoxService automounter. + * @since 6.0.0 */ + char szTag[32]; + /** Max pages to read & write at a time. + * @since 6.0.6 */ + uint32_t cMaxIoPages; + /** The directory content buffer size. Set to 0 for kernel module default. + * Larger value reduces the number of host calls on large directories. */ + uint32_t cbDirBuf; + /** The time to live for directory entries (in milliseconds). @a ttl is used + * if negative. + * @since 6.0.6 */ + int32_t msDirCacheTTL; + /** The time to live for inode information (in milliseconds). @a ttl is used + * if negative. + * @since 6.0.6 */ + int32_t msInodeTTL; + /** The cache and coherency mode. + * @since 6.0.6 */ + enum vbsf_cache_mode enmCacheMode; +}; +#ifdef AssertCompileSize +AssertCompileSize(struct vbsf_mount_info_new, 2*4 + MAX_HOST_NAME + MAX_NLS_NAME + 7*4 + 32 + 5*4); +#endif + +/** Completes the mount operation by adding the new mount point to mtab if required. */ +int vbsfmount_complete(const char *pszSharedFolder, const char *pszMountPoint, + unsigned long fFlags, const char *pszOpts); + +#endif /* !GA_INCLUDED_SRC_linux_sharedfolders_vbsfmount_h */ diff --git a/src/VBox/Additions/linux/sharedfolders/vfsmod.c b/src/VBox/Additions/linux/sharedfolders/vfsmod.c new file mode 100644 index 00000000..18324fe1 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/vfsmod.c @@ -0,0 +1,1753 @@ +/* $Id: vfsmod.c $ */ +/** @file + * vboxsf - VBox Linux Shared Folders VFS, module init/term, super block management. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * @note Anyone wishing to make changes here might wish to take a look at + * https://github.com/torvalds/linux/blob/master/Documentation/filesystems/vfs.txt + * which seems to be the closest there is to official documentation on + * writing filesystem drivers for Linux. + * + * See also: http://us1.samba.org/samba/ftp/cifs-cvs/ols2006-fs-tutorial-smf.odp + */ + + +/********************************************************************************************************************************* +* Header Files * +*********************************************************************************************************************************/ +#include "vfsmod.h" +#include "version-generated.h" +#include "revision-generated.h" +#include "product-generated.h" +#if RTLNX_VER_MIN(5,0,0) || RTLNX_RHEL_MIN(8,4) +# include /* for MS_REMOUNT */ +#elif RTLNX_VER_MAX(3,3,0) +# include +#endif +#include +#include +#if RTLNX_VER_RANGE(2,5,62, 5,8,0) +# include +#endif +#include +#include +#if RTLNX_VER_MIN(5,1,0) +# include +# include +#elif RTLNX_VER_MIN(2,6,0) +# include +#endif + + +/********************************************************************************************************************************* +* Defined Constants And Macros * +*********************************************************************************************************************************/ +#define VBSF_DEFAULT_MAX_IO_PAGES RT_MIN(_16K / sizeof(RTGCPHYS64) /* => 8MB buffer */, VMMDEV_MAX_HGCM_DATA_SIZE >> PAGE_SHIFT) +#define VBSF_DEFAULT_DIR_BUF_SIZE _64K + + +/********************************************************************************************************************************* +* Global Variables * +*********************************************************************************************************************************/ +VBGLSFCLIENT g_SfClient; +uint32_t g_fHostFeatures = 0; +/** Last valid shared folders function number. */ +uint32_t g_uSfLastFunction = SHFL_FN_SET_FILE_SIZE; +/** Shared folders features (SHFL_FEATURE_XXX). */ +uint64_t g_fSfFeatures = 0; + +/** Protects all the vbsf_inode_info::HandleList lists. */ +spinlock_t g_SfHandleLock; + +/** The 'follow_symlinks' module parameter. + * @todo Figure out how do this for 2.4.x! */ +static int g_fFollowSymlinks = 0; + +/* forward declaration */ +static struct super_operations g_vbsf_super_ops; + + + +/** + * Copies options from the mount info structure into @a pSuperInfo. + * + * This is used both by vbsf_super_info_alloc_and_map_it() and + * vbsf_remount_fs(). + */ +static void vbsf_super_info_copy_remount_options(struct vbsf_super_info *pSuperInfo, struct vbsf_mount_info_new *info) +{ + pSuperInfo->uid = info->uid; + pSuperInfo->gid = info->gid; + + if ((unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, szTag)) { + /* new fields */ + pSuperInfo->dmode = info->dmode; + pSuperInfo->fmode = info->fmode; + pSuperInfo->dmask = info->dmask; + pSuperInfo->fmask = info->fmask; + } else { + pSuperInfo->dmode = ~0; + pSuperInfo->fmode = ~0; + } + + if ((unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, cMaxIoPages)) { + AssertCompile(sizeof(pSuperInfo->szTag) >= sizeof(info->szTag)); + RT_BCOPY_UNFORTIFIED(pSuperInfo->szTag, info->szTag, sizeof(info->szTag)); + pSuperInfo->szTag[sizeof(pSuperInfo->szTag) - 1] = '\0'; + } else { + pSuperInfo->szTag[0] = '\0'; + } + + /* The max number of pages in an I/O request. This must take into + account that the physical heap generally grows in 64 KB chunks, + so we should not try push that limit. It also needs to take + into account that the host will allocate temporary heap buffers + for the I/O bytes we send/receive, so don't push the host heap + too hard as we'd have to retry with smaller requests when this + happens, which isn't too efficient. */ + pSuperInfo->cMaxIoPages = VBSF_DEFAULT_MAX_IO_PAGES; + if ( (unsigned)info->length >= sizeof(struct vbsf_mount_info_new) + && info->cMaxIoPages > 0) { + if (info->cMaxIoPages <= VMMDEV_MAX_HGCM_DATA_SIZE >> PAGE_SHIFT) + pSuperInfo->cMaxIoPages = RT_MAX(info->cMaxIoPages, 2); /* read_iter/write_iter requires a minimum of 2. */ + else + printk(KERN_WARNING "vboxsf: max I/O page count (%#x) is out of range, using default (%#x) instead.\n", + info->cMaxIoPages, pSuperInfo->cMaxIoPages); + } + + pSuperInfo->cbDirBuf = VBSF_DEFAULT_DIR_BUF_SIZE; + if ( (unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, cbDirBuf) + && info->cbDirBuf > 0) { + if (info->cbDirBuf <= _16M) + pSuperInfo->cbDirBuf = RT_ALIGN_32(info->cbDirBuf, PAGE_SIZE); + else + printk(KERN_WARNING "vboxsf: max directory buffer size (%#x) is out of range, using default (%#x) instead.\n", + info->cMaxIoPages, pSuperInfo->cMaxIoPages); + } + + /* + * TTLs. + */ + pSuperInfo->msTTL = info->ttl; + if (info->ttl > 0) + pSuperInfo->cJiffiesDirCacheTTL = msecs_to_jiffies(info->ttl); + else if (info->ttl == 0 || info->ttl != -1) + pSuperInfo->cJiffiesDirCacheTTL = pSuperInfo->msTTL = 0; + else + pSuperInfo->cJiffiesDirCacheTTL = msecs_to_jiffies(VBSF_DEFAULT_TTL_MS); + pSuperInfo->cJiffiesInodeTTL = pSuperInfo->cJiffiesDirCacheTTL; + + pSuperInfo->msDirCacheTTL = -1; + if ( (unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, msDirCacheTTL) + && info->msDirCacheTTL >= 0) { + if (info->msDirCacheTTL > 0) { + pSuperInfo->msDirCacheTTL = info->msDirCacheTTL; + pSuperInfo->cJiffiesDirCacheTTL = msecs_to_jiffies(info->msDirCacheTTL); + } else { + pSuperInfo->msDirCacheTTL = 0; + pSuperInfo->cJiffiesDirCacheTTL = 0; + } + } + + pSuperInfo->msInodeTTL = -1; + if ( (unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, msInodeTTL) + && info->msInodeTTL >= 0) { + if (info->msInodeTTL > 0) { + pSuperInfo->msInodeTTL = info->msInodeTTL; + pSuperInfo->cJiffiesInodeTTL = msecs_to_jiffies(info->msInodeTTL); + } else { + pSuperInfo->msInodeTTL = 0; + pSuperInfo->cJiffiesInodeTTL = 0; + } + } + + /* + * Caching. + */ + pSuperInfo->enmCacheMode = kVbsfCacheMode_Strict; + if ((unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, enmCacheMode)) { + switch (info->enmCacheMode) { + case kVbsfCacheMode_Default: + case kVbsfCacheMode_Strict: + break; + case kVbsfCacheMode_None: + case kVbsfCacheMode_Read: + case kVbsfCacheMode_ReadWrite: + pSuperInfo->enmCacheMode = info->enmCacheMode; + break; + default: + printk(KERN_WARNING "vboxsf: cache mode (%#x) is out of range, using default instead.\n", info->enmCacheMode); + break; + } + } +} + +/** + * Allocate the super info structure and try map the host share. + */ +static int vbsf_super_info_alloc_and_map_it(struct vbsf_mount_info_new *info, struct vbsf_super_info **sf_gp) +{ + int rc; + SHFLSTRING *str_name; + size_t name_len, str_len; + struct vbsf_super_info *pSuperInfo; + + TRACE(); + *sf_gp = NULL; /* (old gcc maybe used initialized) */ + + name_len = RTStrNLen(info->name, sizeof(info->name)); + if (name_len >= sizeof(info->name)) { + SFLOGRELBOTH(("vboxsf: Specified shared folder name is not zero terminated!\n")); + return -EINVAL; + } + if (RTStrNLen(info->nls_name, sizeof(info->nls_name)) >= sizeof(info->nls_name)) { + SFLOGRELBOTH(("vboxsf: Specified nls name is not zero terminated!\n")); + return -EINVAL; + } + + /* + * Allocate memory. + */ + str_len = offsetof(SHFLSTRING, String.utf8) + name_len + 1; + str_name = (PSHFLSTRING)kmalloc(str_len, GFP_KERNEL); + pSuperInfo = (struct vbsf_super_info *)kmalloc(sizeof(*pSuperInfo), GFP_KERNEL); + if (pSuperInfo && str_name) { + RT_ZERO(*pSuperInfo); + + str_name->u16Length = name_len; + str_name->u16Size = name_len + 1; + RT_BCOPY_UNFORTIFIED(str_name->String.utf8, info->name, name_len + 1); + + /* + * Init the NLS support, if needed. + */ + rc = 0; +#define _IS_UTF8(_str) (strcmp(_str, "utf8") == 0) +#define _IS_EMPTY(_str) (strcmp(_str, "") == 0) + + /* Check if NLS charset is valid and not points to UTF8 table */ + pSuperInfo->fNlsIsUtf8 = true; + if (info->nls_name[0]) { + if (_IS_UTF8(info->nls_name)) { + SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=utf8\n")); + pSuperInfo->nls = NULL; + } else { + pSuperInfo->fNlsIsUtf8 = false; + pSuperInfo->nls = load_nls(info->nls_name); + if (pSuperInfo->nls) { + SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=%s -> %p\n", info->nls_name, pSuperInfo->nls)); + } else { + SFLOGRELBOTH(("vboxsf: Failed to load nls '%s'!\n", info->nls_name)); + rc = -EINVAL; + } + } + } else { +#ifdef CONFIG_NLS_DEFAULT + /* If no NLS charset specified, try to load the default + * one if it's not points to UTF8. */ + if (!_IS_UTF8(CONFIG_NLS_DEFAULT) + && !_IS_EMPTY(CONFIG_NLS_DEFAULT)) { + pSuperInfo->fNlsIsUtf8 = false; + pSuperInfo->nls = load_nls_default(); + SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: CONFIG_NLS_DEFAULT=%s -> %p\n", CONFIG_NLS_DEFAULT, pSuperInfo->nls)); + } else { + SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=utf8 (default %s)\n", CONFIG_NLS_DEFAULT)); + pSuperInfo->nls = NULL; + } +#else + SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=utf8 (no default)\n")); + pSuperInfo->nls = NULL; +#endif + } +#undef _IS_UTF8 +#undef _IS_EMPTY + if (rc == 0) { + /* + * Try mount it. + */ + rc = VbglR0SfHostReqMapFolderWithContigSimple(str_name, virt_to_phys(str_name), RTPATH_DELIMITER, + true /*fCaseSensitive*/, &pSuperInfo->map.root); + if (RT_SUCCESS(rc)) { + kfree(str_name); + + /* The rest is shared with remount. */ + vbsf_super_info_copy_remount_options(pSuperInfo, info); + + *sf_gp = pSuperInfo; + return 0; + } + + /* + * bail out: + */ + if (rc == VERR_FILE_NOT_FOUND) { + LogRel(("vboxsf: SHFL_FN_MAP_FOLDER failed for '%s': share not found\n", info->name)); + rc = -ENXIO; + } else { + LogRel(("vboxsf: SHFL_FN_MAP_FOLDER failed for '%s': %Rrc\n", info->name, rc)); + rc = -EPROTO; + } + if (pSuperInfo->nls) + unload_nls(pSuperInfo->nls); + } + } else { + SFLOGRELBOTH(("vboxsf: Could not allocate memory for super info!\n")); + rc = -ENOMEM; + } + if (str_name) + kfree(str_name); + if (pSuperInfo) + kfree(pSuperInfo); + return rc; +} + +/* unmap the share and free super info [pSuperInfo] */ +static void vbsf_super_info_free(struct vbsf_super_info *pSuperInfo) +{ + int rc; + + TRACE(); + rc = VbglR0SfHostReqUnmapFolderSimple(pSuperInfo->map.root); + if (RT_FAILURE(rc)) + LogFunc(("VbglR0SfHostReqUnmapFolderSimple failed rc=%Rrc\n", rc)); + + if (pSuperInfo->nls) + unload_nls(pSuperInfo->nls); + + kfree(pSuperInfo); +} + + +/** + * Initialize backing device related matters. + */ +static int vbsf_init_backing_dev(struct super_block *sb, struct vbsf_super_info *pSuperInfo) +{ + int rc = 0; +#if RTLNX_VER_MIN(2,6,0) + /* Each new shared folder map gets a new uint64_t identifier, + * allocated in sequence. We ASSUME the sequence will not wrap. */ +# if RTLNX_VER_MIN(2,6,26) + static uint64_t s_u64Sequence = 0; + uint64_t idSeqMine = ASMAtomicIncU64(&s_u64Sequence); +# endif + struct backing_dev_info *bdi; + +# if RTLNX_VER_RANGE(4,0,0, 4,2,0) + pSuperInfo->bdi_org = sb->s_bdi; +# endif + +# if RTLNX_VER_MIN(4,12,0) + rc = super_setup_bdi_name(sb, "vboxsf-%llu", (unsigned long long)idSeqMine); + if (!rc) + bdi = sb->s_bdi; + else + return rc; +# else + bdi = &pSuperInfo->bdi; +# endif + + bdi->ra_pages = 0; /* No readahead */ + +# if RTLNX_VER_MIN(2,6,12) + bdi->capabilities = 0 +# ifdef BDI_CAP_MAP_DIRECT + | BDI_CAP_MAP_DIRECT /* MAP_SHARED */ +# endif +# ifdef BDI_CAP_MAP_COPY + | BDI_CAP_MAP_COPY /* MAP_PRIVATE */ +# endif +# ifdef BDI_CAP_READ_MAP + | BDI_CAP_READ_MAP /* can be mapped for reading */ +# endif +# ifdef BDI_CAP_WRITE_MAP + | BDI_CAP_WRITE_MAP /* can be mapped for writing */ +# endif +# ifdef BDI_CAP_EXEC_MAP + | BDI_CAP_EXEC_MAP /* can be mapped for execution */ +# endif +# ifdef BDI_CAP_STRICTLIMIT +# if RTLNX_VER_MIN(4,19,0) /* Trouble with 3.16.x/debian8. Process stops after dirty page throttling. + * Only tested successfully with 4.19. Maybe skip altogether? */ + | BDI_CAP_STRICTLIMIT; +# endif +# endif + ; +# ifdef BDI_CAP_STRICTLIMIT + /* Smalles possible amount of dirty pages: %1 of RAM. We set this to + try reduce amount of data that's out of sync with the host side. + Besides, writepages isn't implemented, so flushing is extremely slow. + Note! Extremely slow linux 3.0.0 msync doesn't seem to be related to this setting. */ + bdi_set_max_ratio(bdi, 1); +# endif +# endif /* >= 2.6.12 */ + +# if RTLNX_VER_RANGE(2,6,24, 4,12,0) + rc = bdi_init(&pSuperInfo->bdi); +# if RTLNX_VER_MIN(2,6,26) + if (!rc) + rc = bdi_register(&pSuperInfo->bdi, NULL, "vboxsf-%llu", (unsigned long long)idSeqMine); +# endif /* >= 2.6.26 */ +# endif /* 4.11.0 > version >= 2.6.24 */ + +# if RTLNX_VER_RANGE(2,6,34, 4,12,0) + if (!rc) + sb->s_bdi = bdi; +# endif + +#endif /* >= 2.6.0 */ + return rc; +} + + +/** + * Undoes what vbsf_init_backing_dev did. + */ +static void vbsf_done_backing_dev(struct super_block *sb, struct vbsf_super_info *pSuperInfo) +{ +#if RTLNX_VER_RANGE(2,6,24, 4,12,0) + bdi_destroy(&pSuperInfo->bdi); /* includes bdi_unregister() */ + + /* Paranoia: Make sb->s_bdi not point at pSuperInfo->bdi, in case someone + trouches it after this point (we may screw up something). */ +# if RTLNX_VER_RANGE(4,0,0, 4,2,0) + sb->s_bdi = pSuperInfo->bdi_org; /* (noop_backing_dev_info is not exported) */ +# elif RTLNX_VER_RANGE(2,6,34, 4,10,0) + sb->s_bdi = &noop_backing_dev_info; +# endif +#endif +} + + +/** + * Creates the root inode and attaches it to the super block. + * + * @returns 0 on success, negative errno on failure. + * @param sb The super block. + * @param pSuperInfo Our super block info. + */ +static int vbsf_create_root_inode(struct super_block *sb, struct vbsf_super_info *pSuperInfo) +{ + SHFLFSOBJINFO fsinfo; + int rc; + + /* + * Allocate and initialize the memory for our inode info structure. + */ + struct vbsf_inode_info *sf_i = kmalloc(sizeof(*sf_i), GFP_KERNEL); + SHFLSTRING *path = kmalloc(sizeof(SHFLSTRING) + 1, GFP_KERNEL); + if (sf_i && path) { + sf_i->handle = SHFL_HANDLE_NIL; + sf_i->force_restat = false; + RTListInit(&sf_i->HandleList); +#ifdef VBOX_STRICT + sf_i->u32Magic = SF_INODE_INFO_MAGIC; +#endif + sf_i->path = path; + + path->u16Length = 1; + path->u16Size = 2; + path->String.utf8[0] = '/'; + path->String.utf8[1] = 0; + + /* + * Stat the root directory (for inode info). + */ + rc = vbsf_stat(__func__, pSuperInfo, sf_i->path, &fsinfo, 0); + if (rc == 0) { + /* + * Create the actual inode structure. + * Note! ls -la does display '.' and '..' entries with st_ino == 0, so root is #1. + */ +#if RTLNX_VER_MIN(2,4,25) + struct inode *iroot = iget_locked(sb, 1); +#else + struct inode *iroot = iget(sb, 1); +#endif + if (iroot) { + vbsf_init_inode(iroot, sf_i, &fsinfo, pSuperInfo); + VBSF_SET_INODE_INFO(iroot, sf_i); + +#if RTLNX_VER_MIN(2,4,25) + unlock_new_inode(iroot); +#endif + + /* + * Now make it a root inode. + */ +#if RTLNX_VER_MIN(3,4,0) + sb->s_root = d_make_root(iroot); +#else + sb->s_root = d_alloc_root(iroot); +#endif + if (sb->s_root) { + + return 0; + } + + SFLOGRELBOTH(("vboxsf: d_make_root failed!\n")); +#if RTLNX_VER_MAX(3,4,0) /* d_make_root calls iput */ + iput(iroot); +#endif + /* iput() will call vbsf_evict_inode()/vbsf_clear_inode(). */ + sf_i = NULL; + path = NULL; + + rc = -ENOMEM; + } else { + SFLOGRELBOTH(("vboxsf: failed to allocate root inode!\n")); + rc = -ENOMEM; + } + } else + SFLOGRELBOTH(("vboxsf: could not stat root of share: %d\n", rc)); + } else { + SFLOGRELBOTH(("vboxsf: Could not allocate memory for root inode info!\n")); + rc = -ENOMEM; + } + if (sf_i) + kfree(sf_i); + if (path) + kfree(path); + return rc; +} + + +#if RTLNX_VER_MAX(5,1,0) +static void vbsf_init_mount_info(struct vbsf_mount_info_new *mount_info, + const char *sf_name) +{ + mount_info->ttl = mount_info->msDirCacheTTL = mount_info->msInodeTTL = -1; + mount_info->dmode = mount_info->fmode = ~0U; + mount_info->enmCacheMode = kVbsfCacheMode_Strict; + mount_info->length = sizeof(struct vbsf_mount_info_new); + if (sf_name) { +# if RTLNX_VER_MAX(2,5,69) + strncpy(mount_info->name, sf_name, sizeof(mount_info->name)); + mount_info->name[sizeof(mount_info->name)-1] = 0; +# else + strlcpy(mount_info->name, sf_name, sizeof(mount_info->name)); +# endif + } +} +#endif + +#if RTLNX_VER_RANGE(2,6,0, 5,1,0) +/** + * The following section of code uses the Linux match_token() family of + * routines to parse string-based mount options. + */ +enum { + Opt_iocharset, /* nls_name[] */ + Opt_nls, /* alias for iocharset */ + Opt_uid, + Opt_gid, + Opt_ttl, + Opt_dmode, + Opt_fmode, + Opt_dmask, + Opt_fmask, + Opt_umask, + Opt_maxiopages, + Opt_dirbuf, + Opt_dcachettl, + Opt_inodettl, + Opt_cachemode, /* enum vbsf_cache_mode */ + Opt_tag, + Opt_err +}; + +# if RTLNX_VER_MAX(2,6,28) +static match_table_t vbsf_tokens = { +# else +static const match_table_t vbsf_tokens = { +# endif + { Opt_iocharset, "iocharset=%s" }, + { Opt_nls, "nls=%s" }, + { Opt_uid, "uid=%u" }, + { Opt_gid, "gid=%u" }, + { Opt_ttl, "ttl=%u" }, + { Opt_dmode, "dmode=%o" }, + { Opt_fmode, "fmode=%o" }, + { Opt_dmask, "dmask=%o" }, + { Opt_fmask, "fmask=%o" }, + { Opt_umask, "umask=%o" }, + { Opt_maxiopages, "maxiopages=%u" }, + { Opt_dirbuf, "dirbuf=%u" }, + { Opt_dcachettl, "dcachettl=%u" }, + { Opt_inodettl, "inodettl=%u" }, + { Opt_cachemode, "cache=%s" }, + { Opt_tag, "tag=%s" }, /* private option for automounter */ + { Opt_err, NULL } +}; + +static int vbsf_parse_mount_options(char *options, + struct vbsf_mount_info_new *mount_info) +{ + substring_t args[MAX_OPT_ARGS]; + int option; + int token; + char *p; + char *iocharset; + char *cachemode; + char *tag; + + if (!options) + return -EINVAL; + + while ((p = strsep(&options, ",")) != NULL) { + if (!*p) + continue; + + token = match_token(p, vbsf_tokens, args); + switch (token) { + case Opt_iocharset: + case Opt_nls: + iocharset = match_strdup(&args[0]); + if (!iocharset) { + SFLOGRELBOTH(("vboxsf: Could not allocate memory for iocharset!\n")); + return -ENOMEM; + } + strlcpy(mount_info->nls_name, iocharset, + sizeof(mount_info->nls_name)); + kfree(iocharset); + break; + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->uid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->gid = option; + break; + case Opt_ttl: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->ttl = option; + break; + case Opt_dmode: + if (match_octal(&args[0], &option)) + return -EINVAL; + mount_info->dmode = option; + break; + case Opt_fmode: + if (match_octal(&args[0], &option)) + return -EINVAL; + mount_info->fmode = option; + break; + case Opt_dmask: + if (match_octal(&args[0], &option)) + return -EINVAL; + mount_info->dmask = option; + break; + case Opt_fmask: + if (match_octal(&args[0], &option)) + return -EINVAL; + mount_info->fmask = option; + break; + case Opt_umask: + if (match_octal(&args[0], &option)) + return -EINVAL; + mount_info->dmask = mount_info->fmask = option; + break; + case Opt_maxiopages: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->cMaxIoPages = option; + break; + case Opt_dirbuf: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->cbDirBuf = option; + break; + case Opt_dcachettl: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->msDirCacheTTL = option; + break; + case Opt_inodettl: + if (match_int(&args[0], &option)) + return -EINVAL; + mount_info->msInodeTTL = option; + break; + case Opt_cachemode: { + cachemode = match_strdup(&args[0]); + if (!cachemode) { + SFLOGRELBOTH(("vboxsf: Could not allocate memory for cachemode!\n")); + return -ENOMEM; + } + if (!strcmp(cachemode, "default") || !strcmp(cachemode, "strict")) + mount_info->enmCacheMode = kVbsfCacheMode_Strict; + else if (!strcmp(cachemode, "none")) + mount_info->enmCacheMode = kVbsfCacheMode_None; + else if (!strcmp(cachemode, "read")) + mount_info->enmCacheMode = kVbsfCacheMode_Read; + else if (!strcmp(cachemode, "readwrite")) + mount_info->enmCacheMode = kVbsfCacheMode_ReadWrite; + else + printk(KERN_WARNING "vboxsf: cache mode (%s) is out of range, using default instead.\n", cachemode); + kfree(cachemode); + break; + } + case Opt_tag: + tag = match_strdup(&args[0]); + if (!tag) { + SFLOGRELBOTH(("vboxsf: Could not allocate memory for automount tag!\n")); + return -ENOMEM; + } + strlcpy(mount_info->szTag, tag, sizeof(mount_info->szTag)); + kfree(tag); + break; + default: + printk(KERN_ERR "unrecognised mount option \"%s\"", p); + return -EINVAL; + } + } + + return 0; +} +#endif /* 5.1.0 > version >= 2.6.0 */ + + +#if RTLNX_VER_MAX(2,6,0) +/** + * Linux kernel versions older than 2.6.0 don't have the match_token() routines + * so we parse the string-based mount options manually here. + */ +static int vbsf_parse_mount_options(char *options, + struct vbsf_mount_info_new *mount_info) +{ + char *value; + char *option; + + if (!options) + return -EINVAL; + +# if RTLNX_VER_MIN(2,3,9) + while ((option = strsep(&options, ",")) != NULL) { +# else + for (option = strtok(options, ","); option; option = strtok(NULL, ",")) { +# endif + if (!*option) + continue; + + value = strchr(option, '='); + if (value) + *value++ = '\0'; + + if (!strcmp(option, "iocharset") || !strcmp(option, "nls")) { + if (!value || !*value) + return -EINVAL; + strncpy(mount_info->nls_name, value, sizeof(mount_info->nls_name)); + mount_info->nls_name[sizeof(mount_info->nls_name)-1] = 0; + } else if (!strcmp(option, "uid")) { + mount_info->uid = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "gid")) { + mount_info->gid = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "ttl")) { + mount_info->ttl = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "dmode")) { + mount_info->dmode = simple_strtoul(value, &value, 8); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "fmode")) { + mount_info->fmode = simple_strtoul(value, &value, 8); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "dmask")) { + mount_info->dmask = simple_strtoul(value, &value, 8); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "fmask")) { + mount_info->fmask = simple_strtoul(value, &value, 8); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "umask")) { + mount_info->dmask = mount_info->fmask = simple_strtoul(value, + &value, 8); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "maxiopages")) { + mount_info->cMaxIoPages = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "dirbuf")) { + mount_info->cbDirBuf = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "dcachettl")) { + mount_info->msDirCacheTTL = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "inodettl")) { + mount_info->msInodeTTL = simple_strtoul(value, &value, 0); + if (*value) + return -EINVAL; + } else if (!strcmp(option, "cache")) { + if (!value || !*value) + return -EINVAL; + if (!strcmp(value, "default") || !strcmp(value, "strict")) + mount_info->enmCacheMode = kVbsfCacheMode_Strict; + else if (!strcmp(value, "none")) + mount_info->enmCacheMode = kVbsfCacheMode_None; + else if (!strcmp(value, "read")) + mount_info->enmCacheMode = kVbsfCacheMode_Read; + else if (!strcmp(value, "readwrite")) + mount_info->enmCacheMode = kVbsfCacheMode_ReadWrite; + else + printk(KERN_WARNING "vboxsf: cache mode (%s) is out of range, using default instead.\n", value); + } else if (!strcmp(option, "tag")) { + if (!value || !*value) + return -EINVAL; + strncpy(mount_info->szTag, value, sizeof(mount_info->szTag)); + mount_info->szTag[sizeof(mount_info->szTag)-1] = 0; + } else if (!strcmp(option, "sf_name")) { + if (!value || !*value) + return -EINVAL; + strncpy(mount_info->name, value, sizeof(mount_info->name)); + mount_info->name[sizeof(mount_info->name)-1] = 0; + } else { + printk(KERN_ERR "unrecognised mount option \"%s\"", option); + return -EINVAL; + } + } + + return 0; +} +#endif + + +/** + * This is called by vbsf_read_super_24(), vbsf_read_super_26(), and + * vbsf_get_tree() when vfs mounts the fs and wants to read the super_block. + * + * Calls vbsf_super_info_alloc_and_map_it() to map the folder and allocate super + * information structure. + * + * Initializes @a sb, initializes root inode and dentry. + * + * Should respect @a flags. + */ +#if RTLNX_VER_MIN(5,1,0) +static int vbsf_read_super_aux(struct super_block *sb, struct fs_context *fc) +#else +static int vbsf_read_super_aux(struct super_block *sb, void *data, int flags) +#endif +{ + int rc; + struct vbsf_super_info *pSuperInfo; + + TRACE(); +#if RTLNX_VER_MAX(5,1,0) + if (!data) { + SFLOGRELBOTH(("vboxsf: No mount data. Is mount.vboxsf installed (typically in /sbin)?\n")); + return -EINVAL; + } + + if (flags & MS_REMOUNT) { + SFLOGRELBOTH(("vboxsf: Remounting is not supported!\n")); + return -ENOSYS; + } +#endif + + /* + * Create our super info structure and map the shared folder. + */ +#if RTLNX_VER_MIN(5,1,0) + struct vbsf_mount_info_new *info = fc->fs_private; + rc = vbsf_super_info_alloc_and_map_it(info, &pSuperInfo); +#else + rc = vbsf_super_info_alloc_and_map_it((struct vbsf_mount_info_new *)data, &pSuperInfo); +#endif + if (rc == 0) { + /* + * Initialize the super block structure (must be done before + * root inode creation). + */ + sb->s_magic = 0xface; + sb->s_blocksize = 1024; +#if RTLNX_VER_MIN(2,4,3) + /* Required for seek/sendfile (see 'loff_t max' in fs/read_write.c / do_sendfile()). */ +# if defined MAX_LFS_FILESIZE + sb->s_maxbytes = MAX_LFS_FILESIZE; +# elif BITS_PER_LONG == 32 + sb->s_maxbytes = (loff_t)ULONG_MAX << PAGE_SHIFT; +# else + sb->s_maxbytes = INT64_MAX; +# endif +#endif +#if RTLNX_VER_MIN(2,6,11) + sb->s_time_gran = 1; /* This might be a little optimistic for windows hosts, where it should be 100. */ +#endif + sb->s_op = &g_vbsf_super_ops; +#if RTLNX_VER_MIN(2,6,38) + sb->s_d_op = &vbsf_dentry_ops; +#endif + + /* + * Initialize the backing device. This is important for memory mapped + * files among other things. + */ + rc = vbsf_init_backing_dev(sb, pSuperInfo); + if (rc == 0) { + /* + * Create the root inode and we're done. + */ + rc = vbsf_create_root_inode(sb, pSuperInfo); + if (rc == 0) { + VBSF_SET_SUPER_INFO(sb, pSuperInfo); + SFLOGFLOW(("vbsf_read_super_aux: returns successfully\n")); + return 0; + } + vbsf_done_backing_dev(sb, pSuperInfo); + } else + SFLOGRELBOTH(("vboxsf: backing device information initialization failed: %d\n", rc)); + vbsf_super_info_free(pSuperInfo); + } + return rc; +} + + +/** + * This is called when vfs is about to destroy the @a inode. + * + * We must free the inode info structure here. + */ +#if RTLNX_VER_MIN(2,6,36) +static void vbsf_evict_inode(struct inode *inode) +#else +static void vbsf_clear_inode(struct inode *inode) +#endif +{ + struct vbsf_inode_info *sf_i; + + TRACE(); + + /* + * Flush stuff. + */ +#if RTLNX_VER_MIN(2,6,36) + truncate_inode_pages(&inode->i_data, 0); +# if RTLNX_VER_MIN(3,5,0) + clear_inode(inode); +# else + end_writeback(inode); +# endif +#endif + /* + * Clean up our inode info. + */ + sf_i = VBSF_GET_INODE_INFO(inode); + if (sf_i) { + VBSF_SET_INODE_INFO(inode, NULL); + + Assert(sf_i->u32Magic == SF_INODE_INFO_MAGIC); + BUG_ON(!sf_i->path); + kfree(sf_i->path); + vbsf_handle_drop_chain(sf_i); +# ifdef VBOX_STRICT + sf_i->u32Magic = SF_INODE_INFO_MAGIC_DEAD; +# endif + kfree(sf_i); + } +} + + +/* this is called by vfs when it wants to populate [inode] with data. + the only thing that is known about inode at this point is its index + hence we can't do anything here, and let lookup/whatever with the + job to properly fill then [inode] */ +#if RTLNX_VER_MAX(2,6,25) +static void vbsf_read_inode(struct inode *inode) +{ +} +#endif + + +/* vfs is done with [sb] (umount called) call [vbsf_super_info_free] to unmap + the folder and free [pSuperInfo] */ +static void vbsf_put_super(struct super_block *sb) +{ + struct vbsf_super_info *pSuperInfo; + + pSuperInfo = VBSF_GET_SUPER_INFO(sb); + BUG_ON(!pSuperInfo); + vbsf_done_backing_dev(sb, pSuperInfo); + vbsf_super_info_free(pSuperInfo); +} + + +/** + * Get file system statistics. + */ +#if RTLNX_VER_MIN(2,6,18) +static int vbsf_statfs(struct dentry *dentry, struct kstatfs *stat) +#elif RTLNX_VER_MIN(2,5,73) +static int vbsf_statfs(struct super_block *sb, struct kstatfs *stat) +#else +static int vbsf_statfs(struct super_block *sb, struct statfs *stat) +#endif +{ +#if RTLNX_VER_MIN(2,6,18) + struct super_block *sb = dentry->d_inode->i_sb; +#endif + int rc; + VBOXSFVOLINFOREQ *pReq = (VBOXSFVOLINFOREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq)); + if (pReq) { + SHFLVOLINFO *pVolInfo = &pReq->VolInfo; + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(sb); + rc = VbglR0SfHostReqQueryVolInfo(pSuperInfo->map.root, pReq, SHFL_HANDLE_ROOT); + if (RT_SUCCESS(rc)) { + stat->f_type = UINT32_C(0x786f4256); /* 'VBox' little endian */ + stat->f_bsize = pVolInfo->ulBytesPerAllocationUnit; +#if RTLNX_VER_MIN(2,5,73) + stat->f_frsize = pVolInfo->ulBytesPerAllocationUnit; +#endif + stat->f_blocks = pVolInfo->ullTotalAllocationBytes + / pVolInfo->ulBytesPerAllocationUnit; + stat->f_bfree = pVolInfo->ullAvailableAllocationBytes + / pVolInfo->ulBytesPerAllocationUnit; + stat->f_bavail = pVolInfo->ullAvailableAllocationBytes + / pVolInfo->ulBytesPerAllocationUnit; + stat->f_files = 1000; + stat->f_ffree = 1000000; /* don't return 0 here since the guest may think + * that it is not possible to create any more files */ + stat->f_fsid.val[0] = 0; + stat->f_fsid.val[1] = 0; + stat->f_namelen = 255; +#if RTLNX_VER_MIN(2,6,36) + stat->f_flags = 0; /* not valid */ +#endif + RT_ZERO(stat->f_spare); + rc = 0; + } else + rc = -RTErrConvertToErrno(rc); + VbglR0PhysHeapFree(pReq); + } else + rc = -ENOMEM; + return rc; +} + +#if RTLNX_VER_MIN(5,1,0) +static int vbsf_remount_fs(struct super_block *sb, + struct vbsf_mount_info_new *info) +#else +static int vbsf_remount_fs(struct super_block *sb, int *flags, char *data) +#endif +{ +#if RTLNX_VER_MIN(2,4,23) + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(sb); + struct vbsf_inode_info *sf_i; + struct inode *iroot; + SHFLFSOBJINFO fsinfo; + int err; + Assert(pSuperInfo); + +# if RTLNX_VER_MIN(5,1,0) + vbsf_super_info_copy_remount_options(pSuperInfo, info); +# else + if (VBSF_IS_MOUNT_VBOXSF_DATA(data)) { + vbsf_super_info_copy_remount_options(pSuperInfo, (struct vbsf_mount_info_new *)data); + } else { + struct vbsf_mount_info_new mount_opts = { '\0' }; + vbsf_init_mount_info(&mount_opts, NULL); + err = vbsf_parse_mount_options(data, &mount_opts); + if (err) + return err; + vbsf_super_info_copy_remount_options(pSuperInfo, &mount_opts); + } +# endif + + /* '.' and '..' entries are st_ino == 0 so root is #1 */ + iroot = ilookup(sb, 1); + if (!iroot) + return -ENOSYS; + + sf_i = VBSF_GET_INODE_INFO(iroot); + err = vbsf_stat(__func__, pSuperInfo, sf_i->path, &fsinfo, 0); + BUG_ON(err != 0); + vbsf_init_inode(iroot, sf_i, &fsinfo, pSuperInfo); + iput(iroot); + return 0; +#else /* < 2.4.23 */ + return -ENOSYS; +#endif /* < 2.4.23 */ +} + + +/** + * Show mount options. + * + * This is needed by the VBoxService automounter in order for it to pick up + * the the 'szTag' option value it sets on its mount. + */ +#if RTLNX_VER_MAX(3,3,0) +static int vbsf_show_options(struct seq_file *m, struct vfsmount *mnt) +#else +static int vbsf_show_options(struct seq_file *m, struct dentry *root) +#endif +{ +#if RTLNX_VER_MAX(3,3,0) + struct super_block *sb = mnt->mnt_sb; +#else + struct super_block *sb = root->d_sb; +#endif + struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(sb); + if (pSuperInfo) { + /* Performance related options: */ + if (pSuperInfo->msTTL != -1) + seq_printf(m, ",ttl=%d", pSuperInfo->msTTL); + if (pSuperInfo->msDirCacheTTL >= 0) + seq_printf(m, ",dcachettl=%d", pSuperInfo->msDirCacheTTL); + if (pSuperInfo->msInodeTTL >= 0) + seq_printf(m, ",inodettl=%d", pSuperInfo->msInodeTTL); + if (pSuperInfo->cMaxIoPages != VBSF_DEFAULT_MAX_IO_PAGES) + seq_printf(m, ",maxiopages=%u", pSuperInfo->cMaxIoPages); + if (pSuperInfo->cbDirBuf != VBSF_DEFAULT_DIR_BUF_SIZE) + seq_printf(m, ",dirbuf=%u", pSuperInfo->cbDirBuf); + switch (pSuperInfo->enmCacheMode) { + default: AssertFailed(); RT_FALL_THRU(); + case kVbsfCacheMode_Strict: + break; + case kVbsfCacheMode_None: seq_puts(m, ",cache=none"); break; + case kVbsfCacheMode_Read: seq_puts(m, ",cache=read"); break; + case kVbsfCacheMode_ReadWrite: seq_puts(m, ",cache=readwrite"); break; + } + + /* Attributes and NLS: */ + seq_printf(m, ",iocharset=%s", pSuperInfo->nls ? pSuperInfo->nls->charset : "utf8"); + seq_printf(m, ",uid=%u,gid=%u", pSuperInfo->uid, pSuperInfo->gid); + if (pSuperInfo->dmode != ~0) + seq_printf(m, ",dmode=0%o", pSuperInfo->dmode); + if (pSuperInfo->fmode != ~0) + seq_printf(m, ",fmode=0%o", pSuperInfo->fmode); + if (pSuperInfo->dmask != 0) + seq_printf(m, ",dmask=0%o", pSuperInfo->dmask); + if (pSuperInfo->fmask != 0) + seq_printf(m, ",fmask=0%o", pSuperInfo->fmask); + + /* Misc: */ + if (pSuperInfo->szTag[0] != '\0') { + seq_puts(m, ",tag="); + seq_escape(m, pSuperInfo->szTag, " \t\n\\"); + } + } + return 0; +} + + +/** + * Super block operations. + */ +static struct super_operations g_vbsf_super_ops = { +#if RTLNX_VER_MAX(2,6,36) + .clear_inode = vbsf_clear_inode, +#else + .evict_inode = vbsf_evict_inode, +#endif +#if RTLNX_VER_MAX(2,6,25) + .read_inode = vbsf_read_inode, +#endif + .put_super = vbsf_put_super, + .statfs = vbsf_statfs, +#if RTLNX_VER_MAX(5,1,0) + .remount_fs = vbsf_remount_fs, +#endif + .show_options = vbsf_show_options +}; + + + +/********************************************************************************************************************************* +* File system type related stuff. * +*********************************************************************************************************************************/ + +#if RTLNX_VER_RANGE(2,5,4, 5,1,0) + +static int vbsf_read_super_26(struct super_block *sb, void *data, int flags) +{ + int err; + + TRACE(); + err = vbsf_read_super_aux(sb, data, flags); + if (err) + printk(KERN_DEBUG "vbsf_read_super_aux err=%d\n", err); + + return err; +} + +# if RTLNX_VER_MIN(2,6,39) +static struct dentry *sf_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +{ + TRACE(); + + if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) { + int rc; + struct vbsf_mount_info_new mount_opts = { '\0' }; + + vbsf_init_mount_info(&mount_opts, dev_name); + rc = vbsf_parse_mount_options(data, &mount_opts); + if (rc) + return ERR_PTR(rc); + return mount_nodev(fs_type, flags, &mount_opts, vbsf_read_super_26); + } else { + return mount_nodev(fs_type, flags, data, vbsf_read_super_26); + } +} +# elif RTLNX_VER_MIN(2,6,18) +static int vbsf_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) +{ + TRACE(); + + if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) { + int rc; + struct vbsf_mount_info_new mount_opts = { '\0' }; + + vbsf_init_mount_info(&mount_opts, dev_name); + rc = vbsf_parse_mount_options(data, &mount_opts); + if (rc) + return rc; + return get_sb_nodev(fs_type, flags, &mount_opts, vbsf_read_super_26, + mnt); + } else { + return get_sb_nodev(fs_type, flags, data, vbsf_read_super_26, mnt); + } +} +# else /* 2.6.18 > version >= 2.5.4 */ +static struct super_block *vbsf_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +{ + TRACE(); + + if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) { + int rc; + struct vbsf_mount_info_new mount_opts = { '\0' }; + + vbsf_init_mount_info(&mount_opts, dev_name); + rc = vbsf_parse_mount_options(data, &mount_opts); + if (rc) + return ERR_PTR(rc); + return get_sb_nodev(fs_type, flags, &mount_opts, vbsf_read_super_26); + } else { + return get_sb_nodev(fs_type, flags, data, vbsf_read_super_26); + } +} +# endif +#endif /* 5.1.0 > version >= 2.5.4 */ + +#if RTLNX_VER_MAX(2,5,4) /* < 2.5.4 */ + +static struct super_block *vbsf_read_super_24(struct super_block *sb, void *data, int flags) +{ + int err; + + TRACE(); + + if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) { + int rc; + struct vbsf_mount_info_new mount_opts = { '\0' }; + + vbsf_init_mount_info(&mount_opts, NULL); + rc = vbsf_parse_mount_options(data, &mount_opts); + if (rc) + return ERR_PTR(rc); + err = vbsf_read_super_aux(sb, &mount_opts, flags); + } else { + err = vbsf_read_super_aux(sb, data, flags); + } + if (err) { + printk(KERN_DEBUG "vbsf_read_super_aux err=%d\n", err); + return NULL; + } + + return sb; +} + +static DECLARE_FSTYPE(g_vboxsf_fs_type, "vboxsf", vbsf_read_super_24, 0); + +#endif /* < 2.5.4 */ + +#if RTLNX_VER_MIN(5,1,0) + +/** + * The following section of code uses the Linux filesystem mount API (also + * known as the "filesystem context API") to parse string-based mount options. + * The API is described here: + * https://www.kernel.org/doc/Documentation/filesystems/mount_api.txt + */ +enum vbsf_cache_modes { + VBSF_CACHE_DEFAULT, + VBSF_CACHE_NONE, + VBSF_CACHE_STRICT, + VBSF_CACHE_READ, + VBSF_CACHE_RW +}; + +static const struct constant_table vbsf_param_cache_mode[] = { + { "default", VBSF_CACHE_DEFAULT }, + { "none", VBSF_CACHE_NONE }, + { "strict", VBSF_CACHE_STRICT }, + { "read", VBSF_CACHE_READ }, + { "readwrite", VBSF_CACHE_RW }, + {} +}; + +enum { + Opt_iocharset, /* nls_name[] */ + Opt_nls, /* alias for iocharset */ + Opt_uid, + Opt_gid, + Opt_ttl, + Opt_dmode, + Opt_fmode, + Opt_dmask, + Opt_fmask, + Opt_umask, + Opt_maxiopages, + Opt_dirbuf, + Opt_dcachettl, + Opt_inodettl, + Opt_cachemode, /* enum vbsf_cache_mode */ + Opt_tag +}; + +# if RTLNX_VER_MAX(5,6,0) +static const struct fs_parameter_spec vbsf_fs_specs[] = { +# else +static const struct fs_parameter_spec vbsf_fs_parameters[] = { +# endif + fsparam_string("iocharset", Opt_iocharset), + fsparam_string("nls", Opt_nls), + fsparam_u32 ("uid", Opt_uid), + fsparam_u32 ("gid", Opt_gid), + fsparam_u32 ("ttl", Opt_ttl), + fsparam_u32oct("dmode", Opt_dmode), + fsparam_u32oct("fmode", Opt_fmode), + fsparam_u32oct("dmask", Opt_dmask), + fsparam_u32oct("fmask", Opt_fmask), + fsparam_u32oct("umask", Opt_umask), + fsparam_u32 ("maxiopages", Opt_maxiopages), + fsparam_u32 ("dirbuf", Opt_dirbuf), + fsparam_u32 ("dcachettl", Opt_dcachettl), + fsparam_u32 ("inodettl", Opt_inodettl), +# if RTLNX_VER_MAX(5,6,0) + fsparam_enum ("cache", Opt_cachemode), +# else + fsparam_enum ("cache", Opt_cachemode, vbsf_param_cache_mode), +# endif + fsparam_string("tag", Opt_tag), + {} +}; + +# if RTLNX_VER_MAX(5,6,0) +static const struct fs_parameter_enum vbsf_fs_enums[] = { + { Opt_cachemode, "default", VBSF_CACHE_DEFAULT }, + { Opt_cachemode, "none", VBSF_CACHE_NONE }, + { Opt_cachemode, "strict", VBSF_CACHE_STRICT }, + { Opt_cachemode, "read", VBSF_CACHE_READ }, + { Opt_cachemode, "readwrite", VBSF_CACHE_RW }, + {} +}; + +static const struct fs_parameter_description vbsf_fs_parameters = { + .name = "vboxsf", + .specs = vbsf_fs_specs, + .enums = vbsf_fs_enums +}; +# endif + +/** + * Parse the (string-based) mount options passed in as -o foo,bar=123,etc. + */ +static int vbsf_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct fs_parse_result result; + struct vbsf_mount_info_new *info = fc->fs_private; + int opt; + +# if RTLNX_VER_MAX(5,6,0) + opt = fs_parse(fc, &vbsf_fs_parameters, param, &result); +# else + opt = fs_parse(fc, vbsf_fs_parameters, param, &result); +# endif + if (opt < 0) + return opt; + + switch (opt) { + case Opt_iocharset: + case Opt_nls: + strlcpy(info->nls_name, param->string, sizeof(info->nls_name)); + break; + case Opt_uid: + info->uid = result.uint_32; + break; + case Opt_gid: + info->gid = result.uint_32; + break; + case Opt_ttl: + info->ttl = result.uint_32; + break; + case Opt_dmode: + if (result.uint_32 & ~0777) + return invalf(fc, "Invalid dmode specified: '%o'", result.uint_32); + info->dmode = result.uint_32; + break; + case Opt_fmode: + if (result.uint_32 & ~0777) + return invalf(fc, "Invalid fmode specified: '%o'", result.uint_32); + info->fmode = result.uint_32; + break; + case Opt_dmask: + if (result.uint_32 & ~07777) + return invalf(fc, "Invalid dmask specified: '%o'", result.uint_32); + info->dmask = result.uint_32; + break; + case Opt_fmask: + if (result.uint_32 & ~07777) + return invalf(fc, "Invalid fmask specified: '%o'", result.uint_32); + info->fmask = result.uint_32; + break; + case Opt_umask: + if (result.uint_32 & ~07777) + return invalf(fc, "Invalid umask specified: '%o'", result.uint_32); + info->dmask = info->fmask = result.uint_32; + break; + case Opt_maxiopages: + info->cMaxIoPages = result.uint_32; + break; + case Opt_dirbuf: + info->cbDirBuf = result.uint_32; + break; + case Opt_dcachettl: + info->msDirCacheTTL = result.uint_32; + break; + case Opt_inodettl: + info->msInodeTTL = result.uint_32; + break; + case Opt_cachemode: + if (result.uint_32 == VBSF_CACHE_DEFAULT || result.uint_32 == VBSF_CACHE_STRICT) + info->enmCacheMode = kVbsfCacheMode_Strict; + else if (result.uint_32 == VBSF_CACHE_NONE) + info->enmCacheMode = kVbsfCacheMode_None; + else if (result.uint_32 == VBSF_CACHE_READ) + info->enmCacheMode = kVbsfCacheMode_Read; + else if (result.uint_32 == VBSF_CACHE_RW) + info->enmCacheMode = kVbsfCacheMode_ReadWrite; + else + printk(KERN_WARNING "vboxsf: cache mode (%u) is out of range, using default instead.\n", result.uint_32); + break; + case Opt_tag: + strlcpy(info->szTag, param->string, sizeof(info->szTag)); + break; + default: + return invalf(fc, "Invalid mount option: '%s'", param->key); + } + + return 0; +} + +/** + * Parse the mount options provided whether by the mount.vboxsf utility + * which supplies the mount information as a page of data or else as a + * string in the following format: key[=val][,key[=val]]*. + */ +static int vbsf_parse_monolithic(struct fs_context *fc, void *data) +{ + struct vbsf_mount_info_new *info = fc->fs_private; + + if (data) { + if (VBSF_IS_MOUNT_VBOXSF_DATA(data)) { + RT_BCOPY_UNFORTIFIED(info, data, sizeof(struct vbsf_mount_info_new)); + } else { + /* this will call vbsf_parse_param() */ + return generic_parse_monolithic(fc, data); + } + } + + return 0; +} + +/** + * Clean up the filesystem-specific part of the filesystem context. + */ +static void vbsf_free_ctx(struct fs_context *fc) +{ + struct vbsf_mount_info_new *info = fc->fs_private; + + if (info) { + kfree(info); + fc->fs_private = NULL; + } +} + +/** + * Create the mountable root and superblock which can then be used later for + * mounting the shared folder. The superblock is populated by + * vbsf_read_super_aux() which also sets up the shared folder mapping and the + * related paperwork in preparation for mounting the shared folder. + */ +static int vbsf_get_tree(struct fs_context *fc) +{ + struct vbsf_mount_info_new *info = fc->fs_private; + + if (!fc->source) { + SFLOGRELBOTH(("vboxsf: No shared folder specified\n")); + return invalf(fc, "vboxsf: No shared folder specified"); + } + + /* fc->source (the shared folder name) is set after vbsf_init_fs_ctx() */ + strlcpy(info->name, fc->source, sizeof(info->name)); + +# if RTLNX_VER_MAX(5,3,0) + return vfs_get_super(fc, vfs_get_independent_super, vbsf_read_super_aux); +# else + return get_tree_nodev(fc, vbsf_read_super_aux); +# endif +} + +/** + * Reconfigures the superblock based on the mount information stored in the + * filesystem context. Called via '-o remount' (aka mount(2) with MS_REMOUNT) + * and is the equivalent of .fs_remount. + */ +static int vbsf_reconfigure(struct fs_context *fc) +{ + struct vbsf_mount_info_new *info = fc->fs_private; + struct super_block *sb = fc->root->d_sb; + + return vbsf_remount_fs(sb, info); +} + +static const struct fs_context_operations vbsf_context_ops = { + .parse_param = vbsf_parse_param, + .parse_monolithic = vbsf_parse_monolithic, + .free = vbsf_free_ctx, + .get_tree = vbsf_get_tree, + .reconfigure = vbsf_reconfigure +}; + +/** + * Set up the filesystem mount context. + */ +static int vbsf_init_fs_context(struct fs_context *fc) +{ + struct vbsf_mount_info_new *info = fc->fs_private; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + SFLOGRELBOTH(("vboxsf: Could not allocate memory for mount options\n")); + return -ENOMEM; + } + + /* set default values for the mount information structure */ + info->ttl = info->msDirCacheTTL = info->msInodeTTL = -1; + info->dmode = info->fmode = ~0U; + info->enmCacheMode = kVbsfCacheMode_Strict; + info->length = sizeof(struct vbsf_mount_info_new); + + fc->fs_private = info; + fc->ops = &vbsf_context_ops; + + return 0; +} +#endif /* >= 5.1.0 */ + + +#if RTLNX_VER_MIN(2,5,4) +/** + * File system registration structure. + */ +static struct file_system_type g_vboxsf_fs_type = { + .owner = THIS_MODULE, + .name = "vboxsf", +# if RTLNX_VER_MIN(5,1,0) + .init_fs_context = vbsf_init_fs_context, +# if RTLNX_VER_MAX(5,6,0) + .parameters = &vbsf_fs_parameters, +# else + .parameters = vbsf_fs_parameters, +# endif +# elif RTLNX_VER_MIN(2,6,39) + .mount = sf_mount, +# else + .get_sb = vbsf_get_sb, +# endif + .kill_sb = kill_anon_super +}; +#endif /* >= 2.5.4 */ + + +/********************************************************************************************************************************* +* Module stuff * +*********************************************************************************************************************************/ + +/** + * Called on module initialization. + */ +static int __init init(void) +{ + int rc; + SFLOGFLOW(("vboxsf: init\n")); + + /* + * Must be paranoid about the vbsf_mount_info_new size. + */ + AssertCompile(sizeof(struct vbsf_mount_info_new) <= PAGE_SIZE); + if (sizeof(struct vbsf_mount_info_new) > PAGE_SIZE) { + printk(KERN_ERR + "vboxsf: Mount information structure is too large %lu\n" + "vboxsf: Must be less than or equal to %lu\n", + (unsigned long)sizeof(struct vbsf_mount_info_new), + (unsigned long)PAGE_SIZE); + return -EINVAL; + } + + /* + * Initialize stuff. + */ + spin_lock_init(&g_SfHandleLock); + rc = VbglR0SfInit(); + if (RT_SUCCESS(rc)) { + /* + * Try connect to the shared folder HGCM service. + * It is possible it is not there. + */ + rc = VbglR0SfConnect(&g_SfClient); + if (RT_SUCCESS(rc)) { + /* + * Query host HGCM features and afterwards (must be last) shared folder features. + */ + rc = VbglR0QueryHostFeatures(&g_fHostFeatures); + if (RT_FAILURE(rc)) + { + LogRel(("vboxsf: VbglR0QueryHostFeatures failed: rc=%Rrc (ignored)\n", rc)); + g_fHostFeatures = 0; + } + VbglR0SfHostReqQueryFeaturesSimple(&g_fSfFeatures, &g_uSfLastFunction); + LogRel(("vboxsf: g_fHostFeatures=%#x g_fSfFeatures=%#RX64 g_uSfLastFunction=%u\n", + g_fHostFeatures, g_fSfFeatures, g_uSfLastFunction)); + + /* + * Tell the shared folder service about our expectations: + * - UTF-8 strings (rather than UTF-16) + * - Wheter to return or follow (default) symbolic links. + */ + rc = VbglR0SfHostReqSetUtf8Simple(); + if (RT_SUCCESS(rc)) { + if (!g_fFollowSymlinks) { + rc = VbglR0SfHostReqSetSymlinksSimple(); + if (RT_FAILURE(rc)) + printk(KERN_WARNING "vboxsf: Host unable to enable showing symlinks, rc=%d\n", rc); + } + /* + * Now that we're ready for action, try register the + * file system with the kernel. + */ + rc = register_filesystem(&g_vboxsf_fs_type); + if (rc == 0) { + printk(KERN_INFO "vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) "\n"); +#ifdef VERMAGIC_STRING + LogRel(("vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) " on %s (LINUX_VERSION_CODE=%#x)\n", + VERMAGIC_STRING, LINUX_VERSION_CODE)); +#elif defined(UTS_RELEASE) + LogRel(("vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) " on %s (LINUX_VERSION_CODE=%#x)\n", + UTS_RELEASE, LINUX_VERSION_CODE)); +#else + LogRel(("vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) " (LINUX_VERSION_CODE=%#x)\n", LINUX_VERSION_CODE)); +#endif + return 0; + } + + /* + * Failed. Bail out. + */ + LogRel(("vboxsf: register_filesystem failed: rc=%d\n", rc)); + } else { + LogRel(("vboxsf: VbglR0SfSetUtf8 failed, rc=%Rrc\n", rc)); + rc = -EPROTO; + } + VbglR0SfDisconnect(&g_SfClient); + } else { + LogRel(("vboxsf: VbglR0SfConnect failed, rc=%Rrc\n", rc)); + rc = rc == VERR_HGCM_SERVICE_NOT_FOUND ? -EHOSTDOWN : -ECONNREFUSED; + } + VbglR0SfTerm(); + } else { + LogRel(("vboxsf: VbglR0SfInit failed, rc=%Rrc\n", rc)); + rc = -EPROTO; + } + return rc; +} + + +/** + * Called on module finalization. + */ +static void __exit fini(void) +{ + SFLOGFLOW(("vboxsf: fini\n")); + + unregister_filesystem(&g_vboxsf_fs_type); + VbglR0SfDisconnect(&g_SfClient); + VbglR0SfTerm(); +} + + +/* + * Module parameters. + */ +#if RTLNX_VER_MIN(2,5,52) +module_param_named(follow_symlinks, g_fFollowSymlinks, int, 0); +MODULE_PARM_DESC(follow_symlinks, + "Let host resolve symlinks rather than showing them"); +#endif + + +/* + * Module declaration related bits. + */ +module_init(init); +module_exit(fini); + +MODULE_DESCRIPTION(VBOX_PRODUCT " VFS Module for Host File System Access"); +MODULE_AUTHOR(VBOX_VENDOR); +MODULE_LICENSE("GPL and additional rights"); +#ifdef MODULE_ALIAS_FS +MODULE_ALIAS_FS("vboxsf"); +#endif +#ifdef MODULE_VERSION +MODULE_VERSION(VBOX_VERSION_STRING " r" RT_XSTR(VBOX_SVN_REV)); +#endif + diff --git a/src/VBox/Additions/linux/sharedfolders/vfsmod.h b/src/VBox/Additions/linux/sharedfolders/vfsmod.h new file mode 100644 index 00000000..b39db721 --- /dev/null +++ b/src/VBox/Additions/linux/sharedfolders/vfsmod.h @@ -0,0 +1,483 @@ +/* $Id: vfsmod.h $ */ +/** @file + * vboxsf - Linux Shared Folders VFS, internal header. + */ + +/* + * Copyright (C) 2006-2023 Oracle and/or its affiliates. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef GA_INCLUDED_SRC_linux_sharedfolders_vfsmod_h +#define GA_INCLUDED_SRC_linux_sharedfolders_vfsmod_h +#ifndef RT_WITHOUT_PRAGMA_ONCE +# pragma once +#endif + +#if 0 /* Enables strict checks. */ +# define RT_STRICT +# define VBOX_STRICT +#endif + +#define LOG_GROUP LOG_GROUP_SHARED_FOLDERS +#include "the-linux-kernel.h" +#include +#include +#include + +#if RTLNX_VER_MIN(2,6,0) +# include +#endif + +#include +#include +#include +#include "vbsfmount.h" + + +/* + * Logging wrappers. + */ +#if 1 +# define TRACE() LogFunc(("tracepoint\n")) +# define SFLOG(aArgs) Log(aArgs) +# define SFLOGFLOW(aArgs) LogFlow(aArgs) +# define SFLOG2(aArgs) Log2(aArgs) +# define SFLOG3(aArgs) Log3(aArgs) +# define SFLOGRELBOTH(aArgs) LogRel(aArgs) +# ifdef LOG_ENABLED +# define SFLOG_ENABLED 1 +# endif +#else +# define TRACE() RTLogBackdoorPrintf("%s: tracepoint\n", __FUNCTION__) +# define SFLOG(aArgs) RTLogBackdoorPrintf aArgs +# define SFLOGFLOW(aArgs) RTLogBackdoorPrintf aArgs +# define SFLOG2(aArgs) RTLogBackdoorPrintf aArgs +# define SFLOG3(aArgs) RTLogBackdoorPrintf aArgs +# define SFLOG_ENABLED 1 +# define SFLOGRELBOTH(aArgs) do { RTLogBackdoorPrintf aArgs; printk aArgs; } while (0) +#endif + + +/* + * inode compatibility glue. + */ +#if RTLNX_VER_MAX(2,6,0) + +DECLINLINE(loff_t) i_size_read(struct inode *pInode) +{ + AssertCompile(sizeof(loff_t) == sizeof(uint64_t)); + return ASMAtomicReadU64((uint64_t volatile *)&pInode->i_size); +} + +DECLINLINE(void) i_size_write(struct inode *pInode, loff_t cbNew) +{ + AssertCompile(sizeof(pInode->i_size) == sizeof(uint64_t)); + ASMAtomicWriteU64((uint64_t volatile *)&pInode->i_size, cbNew); +} + +#endif /* < 2.6.0 */ + +#if RTLNX_VER_MAX(3,2,0) && !RTLNX_RHEL_MIN(6, 10) +DECLINLINE(void) set_nlink(struct inode *pInode, unsigned int cLinks) +{ + pInode->i_nlink = cLinks; +} +#endif + + +/* global variables */ +extern VBGLSFCLIENT g_SfClient; +extern spinlock_t g_SfHandleLock; +extern uint32_t g_uSfLastFunction; +extern uint64_t g_fSfFeatures; + +extern struct inode_operations vbsf_dir_iops; +extern struct inode_operations vbsf_lnk_iops; +extern struct inode_operations vbsf_reg_iops; +extern struct file_operations vbsf_dir_fops; +extern struct file_operations vbsf_reg_fops; +extern struct dentry_operations vbsf_dentry_ops; +extern struct address_space_operations vbsf_reg_aops; + + +/** + * VBox specific per-mount (shared folder) information. + */ +struct vbsf_super_info { + VBGLSFMAP map; + struct nls_table *nls; + /** Set if the NLS table is UTF-8. */ + bool fNlsIsUtf8; + int uid; + int gid; + int dmode; + int fmode; + int dmask; + int fmask; + /** Maximum number of pages to allow in an I/O buffer with the host. + * This applies to read and write operations. */ + uint32_t cMaxIoPages; + /** The default directory buffer size. */ + uint32_t cbDirBuf; + /** The time to live for directory entries in jiffies, zero if disabled. */ + uint32_t cJiffiesDirCacheTTL; + /** The time to live for inode information in jiffies, zero if disabled. */ + uint32_t cJiffiesInodeTTL; + /** The cache and coherency mode. */ + enum vbsf_cache_mode enmCacheMode; + /** Mount tag for VBoxService automounter. @since 6.0 */ + char szTag[32]; +#if RTLNX_VER_RANGE(2,6,0, 4,12,0) + /** The backing device info structure. */ + struct backing_dev_info bdi; +#endif + /** The mount option value for /proc/mounts. */ + int32_t msTTL; + /** The time to live for directory entries in milliseconds, for /proc/mounts. */ + int32_t msDirCacheTTL; + /** The time to live for inode information in milliseconds, for /proc/mounts. */ + int32_t msInodeTTL; +#if RTLNX_VER_RANGE(4,0,0, 4,2,0) + /** 4.0 and 4.1 are missing noop_backing_dev_info export, so take down the + * initial value so we can restore it in vbsf_done_backing_dev(). (paranoia) */ + struct backing_dev_info *bdi_org; +#endif +}; + +/* Following casts are here to prevent assignment of void * to + pointers of arbitrary type */ +#if RTLNX_VER_MAX(2,6,0) +# define VBSF_GET_SUPER_INFO(sb) ((struct vbsf_super_info *)(sb)->u.generic_sbp) +# define VBSF_SET_SUPER_INFO(sb, a_pSuperInfo) do { (sb)->u.generic_sbp = a_pSuperInfo; } while (0) +#else +# define VBSF_GET_SUPER_INFO(sb) ((struct vbsf_super_info *)(sb)->s_fs_info) +# define VBSF_SET_SUPER_INFO(sb, a_pSuperInfo) do { (sb)->s_fs_info = a_pSuperInfo;} while (0) +#endif + + +/** + * For associating inodes with host handles. + * + * This is necessary for address_space_operations::vbsf_writepage and allows + * optimizing stat, lookups and other operations on open files and directories. + */ +struct vbsf_handle { + /** List entry (head vbsf_inode_info::HandleList). */ + RTLISTNODE Entry; + /** Host file/whatever handle. */ + SHFLHANDLE hHost; + /** VBSF_HANDLE_F_XXX */ + uint32_t fFlags; + /** Reference counter. + * Close the handle and free the structure when it reaches zero. */ + uint32_t volatile cRefs; +#ifdef VBOX_STRICT + /** For strictness checks. */ + struct vbsf_inode_info *pInodeInfo; +#endif +}; + +/** @name VBSF_HANDLE_F_XXX - Handle summary flags (vbsf_handle::fFlags). + * @{ */ +#define VBSF_HANDLE_F_READ UINT32_C(0x00000001) +#define VBSF_HANDLE_F_WRITE UINT32_C(0x00000002) +#define VBSF_HANDLE_F_APPEND UINT32_C(0x00000004) +#define VBSF_HANDLE_F_FILE UINT32_C(0x00000010) +#define VBSF_HANDLE_F_DIR UINT32_C(0x00000020) +#define VBSF_HANDLE_F_ON_LIST UINT32_C(0x00000080) +#define VBSF_HANDLE_F_MAGIC_MASK UINT32_C(0xffffff00) +#define VBSF_HANDLE_F_MAGIC UINT32_C(0x75030700) /**< Maurice Ravel (1875-03-07). */ +#define VBSF_HANDLE_F_MAGIC_DEAD UINT32_C(0x19371228) +/** @} */ + + +/** + * VBox specific per-inode information. + */ +struct vbsf_inode_info { + /** Which file */ + SHFLSTRING *path; + /** Some information was changed, update data on next revalidate */ + bool force_restat; + /** The timestamp (jiffies) where the inode info was last updated. */ + unsigned long ts_up_to_date; + /** The birth time. */ + RTTIMESPEC BirthTime; + + /** @name Host modification detection stats. + * @{ */ + /** The raw modification time, for mapping invalidation purposes. */ + RTTIMESPEC ModificationTime; + /** Copy of ModificationTime from the last time we wrote to the the file. */ + RTTIMESPEC ModificationTimeAtOurLastWrite; + /** @} */ + + /** handle valid if a file was created with vbsf_create_worker until it will + * be opened with vbsf_reg_open() + * @todo r=bird: figure this one out... */ + SHFLHANDLE handle; + + /** List of open handles (struct vbsf_handle), protected by g_SfHandleLock. */ + RTLISTANCHOR HandleList; +#ifdef VBOX_STRICT + uint32_t u32Magic; +# define SF_INODE_INFO_MAGIC UINT32_C(0x18620822) /**< Claude Debussy */ +# define SF_INODE_INFO_MAGIC_DEAD UINT32_C(0x19180325) +#endif +}; + +#if RTLNX_VER_MIN(2,6,19) || defined(KERNEL_FC6) +/* FC6 kernel 2.6.18, vanilla kernel 2.6.19+ */ +# define VBSF_GET_INODE_INFO(i) ((struct vbsf_inode_info *) (i)->i_private) +# define VBSF_SET_INODE_INFO(i, sf_i) (i)->i_private = sf_i +#else +/* vanilla kernel up to 2.6.18 */ +# define VBSF_GET_INODE_INFO(i) ((struct vbsf_inode_info *) (i)->u.generic_ip) +# define VBSF_SET_INODE_INFO(i, sf_i) (i)->u.generic_ip = sf_i +#endif + +extern void vbsf_init_inode(struct inode *inode, struct vbsf_inode_info *sf_i, PSHFLFSOBJINFO info, + struct vbsf_super_info *pSuperInfo); +extern void vbsf_update_inode(struct inode *pInode, struct vbsf_inode_info *pInodeInfo, PSHFLFSOBJINFO pObjInfo, + struct vbsf_super_info *pSuperInfo, bool fInodeLocked, unsigned fSetAttrs); +extern int vbsf_inode_revalidate_worker(struct dentry *dentry, bool fForced, bool fInodeLocked); +extern int vbsf_inode_revalidate_with_handle(struct dentry *dentry, SHFLHANDLE hHostFile, bool fForced, bool fInodeLocked); +#if RTLNX_VER_MIN(2,5,18) +# if RTLNX_VER_MIN(6,3,0) +extern int vbsf_inode_getattr(struct mnt_idmap *idmap, const struct path *path, + struct kstat *kstat, u32 request_mask, unsigned int query_flags); +# elif RTLNX_VER_MIN(5,12,0) +extern int vbsf_inode_getattr(struct user_namespace *ns, const struct path *path, + struct kstat *kstat, u32 request_mask, unsigned int query_flags); +# elif RTLNX_VER_MIN(4,11,0) +extern int vbsf_inode_getattr(const struct path *path, struct kstat *kstat, u32 request_mask, unsigned int query_flags); +# else +extern int vbsf_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat); +# endif +#else /* < 2.5.44 */ +extern int vbsf_inode_revalidate(struct dentry *dentry); +#endif /* < 2.5.44 */ +#if RTLNX_VER_MIN(6,3,0) +extern int vbsf_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr); +#elif RTLNX_VER_MIN(5,12,0) +extern int vbsf_inode_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *iattr); +#else +extern int vbsf_inode_setattr(struct dentry *dentry, struct iattr *iattr); +#endif + + +extern void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo); +extern struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear); +extern uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *pSuperInfo, + const char *pszCaller); +extern void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle); + +/** + * Releases a handle. + * + * @returns New reference count. + * @param pHandle The handle to release. + * @param pSuperInfo The info structure for the shared folder associated + * with the handle. + * @param pszCaller The caller name (for logging failures). + */ +DECLINLINE(uint32_t) vbsf_handle_release(struct vbsf_handle *pHandle, struct vbsf_super_info *pSuperInfo, const char *pszCaller) +{ + uint32_t cRefs; + + Assert((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC); + Assert(pHandle->pInodeInfo); + Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC); + + cRefs = ASMAtomicDecU32(&pHandle->cRefs); + Assert(cRefs < _64M); + if (cRefs) + return cRefs; + return vbsf_handle_release_slow(pHandle, pSuperInfo, pszCaller); +} + + +/** + * VBox specific information for a regular file. + */ +struct vbsf_reg_info { + /** Handle tracking structure. + * @note Must be first! */ + struct vbsf_handle Handle; +}; + +uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller); + + +/** + * VBox specific information for an open directory. + */ +struct vbsf_dir_info { + /** Handle tracking structure. + * @note Must be first! */ + struct vbsf_handle Handle; + /** Semaphore protecting everything below. */ + struct semaphore Lock; + /** A magic number (VBSF_DIR_INFO_MAGIC). */ + uint32_t u32Magic; + /** Size of the buffer for directory entries. */ + uint32_t cbBuf; + /** Buffer for directory entries on the physical heap. */ + PSHFLDIRINFO pBuf; + /** Number of valid bytes in the buffer. */ + uint32_t cbValid; + /** Number of entries left in the buffer. */ + uint32_t cEntriesLeft; + /** The position of the next entry. Incremented by one for each entry. */ + loff_t offPos; + /** The next entry. */ + PSHFLDIRINFO pEntry; + /** Set if there are no more files. */ + bool fNoMoreFiles; +}; + +/** Magic number for vbsf_dir_info::u32Magic (Robert Anson Heinlein). */ +#define VBSF_DIR_INFO_MAGIC UINT32_C(0x19070707) +/** Value of vbsf_dir_info::u32Magic when freed. */ +#define VBSF_DIR_INFO_MAGIC_DEAD UINT32_C(0x19880508) + + +/** + * Sets the update-jiffies value for a dentry. + * + * This is used together with vbsf_super_info::cJiffiesDirCacheTTL to reduce + * re-validation of dentry structures while walking. + * + * This used to be living in d_time, but since 4.9.0 that seems to have become + * unfashionable and d_fsdata is now used to for this purpose. We do this all + * the way back, since d_time seems only to have been used by the file system + * specific code (at least going back to 2.4.0). + */ +DECLINLINE(void) vbsf_dentry_set_update_jiffies(struct dentry *pDirEntry, unsigned long uToSet) +{ + /*SFLOG3(("vbsf_dentry_set_update_jiffies: %p: %lx -> %#lx\n", pDirEntry, (unsigned long)pDirEntry->d_fsdata, uToSet));*/ + pDirEntry->d_fsdata = (void *)uToSet; +} + +/** + * Get the update-jiffies value for a dentry. + */ +DECLINLINE(unsigned long) vbsf_dentry_get_update_jiffies(struct dentry *pDirEntry) +{ + return (unsigned long)pDirEntry->d_fsdata; +} + +/** + * Invalidates the update TTL for the given directory entry so that it is + * revalidate the next time it is used. + * @param pDirEntry The directory entry cache entry to invalidate. + */ +DECLINLINE(void) vbsf_dentry_invalidate_ttl(struct dentry *pDirEntry) +{ + vbsf_dentry_set_update_jiffies(pDirEntry, jiffies - INT32_MAX / 2); +} + +/** + * Increase the time-to-live of @a pDirEntry and all ancestors. + * @param pDirEntry The directory entry cache entry which ancestors + * we should increase the TTL for. + */ +DECLINLINE(void) vbsf_dentry_chain_increase_ttl(struct dentry *pDirEntry) +{ +#ifdef VBOX_STRICT + struct super_block * const pSuper = pDirEntry->d_sb; +#endif + unsigned long const uToSet = jiffies; + do { + Assert(pDirEntry->d_sb == pSuper); + vbsf_dentry_set_update_jiffies(pDirEntry, uToSet); + pDirEntry = pDirEntry->d_parent; + } while (!IS_ROOT(pDirEntry)); +} + +/** + * Increase the time-to-live of all ancestors. + * @param pDirEntry The directory entry cache entry which ancestors + * we should increase the TTL for. + */ +DECLINLINE(void) vbsf_dentry_chain_increase_parent_ttl(struct dentry *pDirEntry) +{ + Assert(!pDirEntry->d_parent || pDirEntry->d_parent->d_sb == pDirEntry->d_sb); + pDirEntry = pDirEntry->d_parent; + if (pDirEntry) + vbsf_dentry_chain_increase_ttl(pDirEntry); +} + +/** Macro for getting the dentry for a struct file. */ +#if RTLNX_VER_MIN(4,6,0) +# define VBSF_GET_F_DENTRY(f) file_dentry(f) +#elif RTLNX_VER_MIN(2,6,20) +# define VBSF_GET_F_DENTRY(f) (f->f_path.dentry) +#else +# define VBSF_GET_F_DENTRY(f) (f->f_dentry) +#endif + +/** + * Macro for checking if the 'data' argument passed in via mount(2) was supplied + * by the mount.vboxsf command line utility as a page of data containing the + * vbsf_mount_info_new structure. + */ +#define VBSF_IS_MOUNT_VBOXSF_DATA(data) \ + (((struct vbsf_mount_info_new *)data)->nullchar == '\0' && \ + ((struct vbsf_mount_info_new *)data)->signature[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && \ + ((struct vbsf_mount_info_new *)data)->signature[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && \ + ((struct vbsf_mount_info_new *)data)->signature[2] == VBSF_MOUNT_SIGNATURE_BYTE_2) + +extern int vbsf_stat(const char *caller, struct vbsf_super_info *pSuperInfo, SHFLSTRING * path, PSHFLFSOBJINFO result, + int ok_to_fail); +extern int vbsf_path_from_dentry(struct vbsf_super_info *pSuperInfo, struct vbsf_inode_info *sf_i, struct dentry *dentry, + SHFLSTRING ** result, const char *caller); +extern int vbsf_nlscpy(struct vbsf_super_info *pSuperInfo, char *name, size_t name_bound_len, + const unsigned char *utf8_name, size_t utf8_len); +extern int vbsf_nls_to_shflstring(struct vbsf_super_info *pSuperInfo, const char *pszNls, PSHFLSTRING *ppString); + + +/** + * Converts Linux access permissions to VBox ones (mode & 0777). + * + * @note Currently identical. + * @sa sf_access_permissions_to_linux + */ +DECLINLINE(uint32_t) sf_access_permissions_to_vbox(int fAttr) +{ + /* Access bits should be the same: */ + AssertCompile(RTFS_UNIX_IRUSR == S_IRUSR); + AssertCompile(RTFS_UNIX_IWUSR == S_IWUSR); + AssertCompile(RTFS_UNIX_IXUSR == S_IXUSR); + AssertCompile(RTFS_UNIX_IRGRP == S_IRGRP); + AssertCompile(RTFS_UNIX_IWGRP == S_IWGRP); + AssertCompile(RTFS_UNIX_IXGRP == S_IXGRP); + AssertCompile(RTFS_UNIX_IROTH == S_IROTH); + AssertCompile(RTFS_UNIX_IWOTH == S_IWOTH); + AssertCompile(RTFS_UNIX_IXOTH == S_IXOTH); + + return fAttr & RTFS_UNIX_ALL_ACCESS_PERMS; +} + +#endif /* !GA_INCLUDED_SRC_linux_sharedfolders_vfsmod_h */ -- cgit v1.2.3