summaryrefslogtreecommitdiffstats
path: root/src/VBox/Additions/linux/sharedfolders
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/Additions/linux/sharedfolders')
-rw-r--r--src/VBox/Additions/linux/sharedfolders/.scm-settings39
-rw-r--r--src/VBox/Additions/linux/sharedfolders/Makefile.kmk57
-rw-r--r--src/VBox/Additions/linux/sharedfolders/Makefile.module119
-rw-r--r--src/VBox/Additions/linux/sharedfolders/dirops.c1417
-rwxr-xr-xsrc/VBox/Additions/linux/sharedfolders/files_vboxsf107
-rw-r--r--src/VBox/Additions/linux/sharedfolders/lnkops.c305
-rw-r--r--src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c702
-rw-r--r--src/VBox/Additions/linux/sharedfolders/regops.c3881
-rw-r--r--src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c126
-rw-r--r--src/VBox/Additions/linux/sharedfolders/utils.c1263
-rw-r--r--src/VBox/Additions/linux/sharedfolders/vbsfmount.c113
-rw-r--r--src/VBox/Additions/linux/sharedfolders/vbsfmount.h142
-rw-r--r--src/VBox/Additions/linux/sharedfolders/vfsmod.c1753
-rw-r--r--src/VBox/Additions/linux/sharedfolders/vfsmod.h478
14 files changed, 10502 insertions, 0 deletions
diff --git a/src/VBox/Additions/linux/sharedfolders/.scm-settings b/src/VBox/Additions/linux/sharedfolders/.scm-settings
new file mode 100644
index 00000000..51bb9cee
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/.scm-settings
@@ -0,0 +1,39 @@
+# $Id: .scm-settings $
+## @file
+# Source code massager settings for linux shared folders module.
+#
+
+#
+# Copyright (C) 2010-2022 Oracle and/or its affiliates.
+#
+# This file is part of VirtualBox base platform packages, as
+# available from https://www.virtualbox.org.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, in version 3 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses>.
+#
+# SPDX-License-Identifier: GPL-3.0-only
+#
+
+
+/*.c|/*.h: --no-convert-tabs
+/Makefile.module: --treat-as Makefile
+
+# MIT licence to make it easier to re-import code from the in-kernel version.
+/dirops.c: --license-mit
+/lnkops.c: --license-mit
+/regops.c: --license-mit
+/utils.c: --license-mit
+/vbsfmount.h: --license-mit
+/vfsmod.c: --license-mit
+/vfsmod.h: --license-mit
diff --git a/src/VBox/Additions/linux/sharedfolders/Makefile.kmk b/src/VBox/Additions/linux/sharedfolders/Makefile.kmk
new file mode 100644
index 00000000..0b7787d0
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/Makefile.kmk
@@ -0,0 +1,57 @@
+# $Id: Makefile.kmk $
+## @file
+# Sub-Makefile for the vboxsf (linux shared folders module).
+#
+
+#
+# Copyright (C) 2006-2022 Oracle and/or its affiliates.
+#
+# This file is part of VirtualBox base platform packages, as
+# available from https://www.virtualbox.org.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, in version 3 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses>.
+#
+# SPDX-License-Identifier: GPL-3.0-only
+#
+
+SUB_DEPTH = ../../../../..
+include $(KBUILD_PATH)/subheader.kmk
+
+
+#
+# Populate FILES_VBOXSF_NOBIN
+#
+INSTALLS += vboxsf-src
+include $(PATH_SUB_CURRENT)/files_vboxsf
+vboxsf-src_INST = $(INST_ADDITIONS)src/vboxsf/
+vboxsf-src_SOURCES = \
+ $(subst $(DQUOTE),,$(FILES_VBOXSF_NOBIN))
+vboxsf-src_EXEC_SOURCES = \
+ $(subst $(DQUOTE),,$(FILES_VBOXSF_BIN))
+
+# Build test for the Guest Additions kernel modules (kmk check).
+$(evalcall2 VBOX_LINUX_KMOD_TEST_BUILD_RULE_FN,vboxsf-src,vboxguest-src,)
+
+#
+# The mount util.
+#
+PROGRAMS += mount.vboxsf
+mount.vboxsf_TEMPLATE = NewVBoxGuestR3Exe
+mount.vboxsf_DEFS = _GNU_SOURCE
+mount.vboxsf_SOURCES = \
+ mount.vboxsf.c \
+ vbsfmount.c
+
+include $(FILE_KBUILD_SUB_FOOTER)
+
diff --git a/src/VBox/Additions/linux/sharedfolders/Makefile.module b/src/VBox/Additions/linux/sharedfolders/Makefile.module
new file mode 100644
index 00000000..d6aca2a1
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/Makefile.module
@@ -0,0 +1,119 @@
+# $Id: Makefile.module $
+## @file
+# VBox Linux Shared Folders VFS Module Makefile.
+#
+# (For 2.6.x this file must be 'Makefile'!)
+#
+
+#
+# Copyright (C) 2006-2022 Oracle and/or its affiliates.
+#
+# This file is part of VirtualBox base platform packages, as
+# available from https://www.virtualbox.org.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, in version 3 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses>.
+#
+# SPDX-License-Identifier: GPL-3.0-only
+#
+
+# Linux kbuild sets this to our source directory if we are called from there
+obj ?= $(CURDIR)
+include $(obj)/Makefile-header.gmk
+VBOXSF_DIR = $(VBOX_MODULE_SRC_DIR)
+
+# Allow building directly from the subdirectory without assuming the toplevel
+# makefile has done the copying. Not the default use case, but can be handy.
+ifndef KBUILD_EXTRA_SYMBOLS
+KBUILD_EXTRA_SYMBOLS=$(abspath $(VBOXSF_DIR)/../vboxguest/Module.symvers)
+endif
+
+VBOXMOD_NAME = vboxsf
+VBOXMOD_OBJS = \
+ vfsmod.o \
+ dirops.o \
+ lnkops.o \
+ regops.o \
+ utils.o \
+ VBoxGuestR0LibGenericRequest.o \
+ VBoxGuestR0LibHGCM.o \
+ VBoxGuestR0LibIdc.o \
+ VBoxGuestR0LibIdc-unix.o \
+ VBoxGuestR0LibInit.o \
+ VBoxGuestR0LibPhysHeap.o \
+ VBoxGuestR0LibSharedFolders.o
+ifeq ($(VBOX_KBUILD_TARGET_ARCH),x86)
+VBOXMOD_OBJS += \
+ divdi3.o \
+ moddi3.o \
+ udivdi3.o \
+ udivmoddi4.o \
+ umoddi3.o \
+ qdivrem.o
+endif
+VBOXMOD_INCL = \
+ $(VBOXSF_DIR) \
+ $(VBOXSF_DIR)include \
+ $(VBOXSF_DIR)r0drv/linux
+VBOXMOD_DEFS = \
+ RT_OS_LINUX \
+ IN_RING0 \
+ IN_RT_R0 \
+ IN_SUP_R0 \
+ VBOX \
+ VBOX_WITH_HGCM \
+ IN_MODULE \
+ IN_GUEST \
+ IN_GUEST_R0 \
+ RT_NO_EXPORT_SYMBOL
+ifeq ($(VBOX_KBUILD_TARGET_ARCH),amd64)
+VBOXMOD_DEFS += VBOX_WITH_64_BITS_GUESTS
+endif
+ifneq ($(filter %uek.x86_64,$(KERN_VER)),)
+VBOXMOD_DEFS += VBOX_UEK
+endif
+VBOXMOD_CFLAGS := $(call VBOX_GCC_CHECK_CC,-Wno-declaration-after-statement,-Wno-declaration-after-statement,,)
+VBOXMOD_CFLAGS += $(call VBOX_GCC_CHECK_CC,-fno-pie,-fno-pie,,)
+ifneq ($(KERN_VERSION),24)
+VBOXMOD_CFLAGS += -include $(VBOXSF_DIR)/include/VBox/VBoxGuestMangling.h
+## @todo r-bird: What's with -fshort-wchar here?? We either need that or we dont, right? It should be 2.6+ only.
+VBOXMOD_CFLAGS += -fshort-wchar
+endif
+ifdef VBOX_NO_OMIT_FRAME_POINTER
+VBOXMOD_CFLAGS += -fno-omit-frame-pointer
+endif
+
+ifneq ($(KERN_VERSION),24)
+# special hack for Fedora Core 6 2.6.18 (fc6), rhel5 2.6.18 (el5),
+# ClarkConnect 4.3 (cc4) and ClarkConnect 5 (v5)
+ ifeq ($(KERNELRELEASE),)
+VBOXMOD_CFLAGS += $(foreach inc,$(KERN_INCL),\
+ $(if $(wildcard $(inc)/linux/utsrelease.h),\
+ $(if $(shell grep '"2.6.18.*fc6.*"' $(inc)/linux/utsrelease.h; \
+ grep '"2.6.18.*el5.*"' $(inc)/linux/utsrelease.h; \
+ grep '"2.6.18.*v5.*"' $(inc)/linux/utsrelease.h; \
+ grep '"2.6.18.*cc4.*"' $(inc)/linux/utsrelease.h),\
+ -DKERNEL_FC6,),))
+ else
+VBOXMOD_CFLAGS += $(if $(shell echo "$(KERNELRELEASE)"|grep '2.6.18.*fc6.*';\
+ echo "$(KERNELRELEASE)"|grep '2.6.18.*el5.*';\
+ echo "$(KERNELRELEASE)"|grep '2.6.18.*v5.*';\
+ echo "$(KERNELRELEASE)"|grep '2.6.18.*cc4.*'),\
+ -DKERNEL_FC6,)
+ endif
+endif
+
+VBOXMOD_CLEAN = . linux r0drv r0drv/linux
+
+include $(obj)/Makefile-footer.gmk
+
diff --git a/src/VBox/Additions/linux/sharedfolders/dirops.c b/src/VBox/Additions/linux/sharedfolders/dirops.c
new file mode 100644
index 00000000..c521e6a3
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/dirops.c
@@ -0,0 +1,1417 @@
+/* $Id: dirops.c $ */
+/** @file
+ * vboxsf - VBox Linux Shared Folders VFS, directory inode and file operations.
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "vfsmod.h"
+#include <iprt/err.h>
+
+#if RTLNX_VER_MAX(4,7,0)
+# define d_in_lookup(a_pDirEntry) (d_unhashed(a_pDirEntry))
+#endif
+
+
+
+/**
+ * Open a directory (implements file_operations::open).
+ *
+ * @returns 0 on success, negative errno otherwise.
+ * @param inode inode
+ * @param file file
+ */
+static int vbsf_dir_open(struct inode *inode, struct file *file)
+{
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ struct dentry *dentry = VBSF_GET_F_DENTRY(file);
+ struct vbsf_dir_info *sf_d;
+ int rc;
+
+ SFLOGFLOW(("vbsf_dir_open: inode=%p file=%p %s\n", inode, file, sf_i && sf_i->path ? sf_i->path->String.ach : NULL));
+ AssertReturn(pSuperInfo, -EINVAL);
+ AssertReturn(sf_i, -EINVAL);
+ AssertReturn(!file->private_data, 0);
+
+ /*
+ * Allocate and initialize our directory info structure.
+ * We delay buffer allocation until vbsf_getdent is actually used.
+ */
+ sf_d = kmalloc(sizeof(*sf_d), GFP_KERNEL);
+ if (sf_d) {
+ VBOXSFCREATEREQ *pReq;
+ RT_ZERO(*sf_d);
+ sf_d->u32Magic = VBSF_DIR_INFO_MAGIC;
+ sema_init(&sf_d->Lock, 1);
+
+ /*
+ * Try open the directory.
+ */
+ pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFCREATEREQ, StrPath.String) + sf_i->path->u16Size);
+ if (pReq) {
+ memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
+ RT_ZERO(pReq->CreateParms);
+ pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
+ pReq->CreateParms.CreateFlags = SHFL_CF_DIRECTORY
+ | SHFL_CF_ACT_OPEN_IF_EXISTS
+ | SHFL_CF_ACT_FAIL_IF_NEW
+ | SHFL_CF_ACCESS_READ;
+
+ LogFunc(("calling VbglR0SfHostReqCreate on folder %s, flags %#x\n",
+ sf_i->path->String.utf8, pReq->CreateParms.CreateFlags));
+ rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq);
+ if (RT_SUCCESS(rc)) {
+ if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) {
+ Assert(pReq->CreateParms.Handle != SHFL_HANDLE_NIL);
+
+ /*
+ * Update the inode info with fresh stats and increase the TTL for the
+ * dentry cache chain that got us here.
+ */
+ vbsf_update_inode(inode, sf_i, &pReq->CreateParms.Info, pSuperInfo,
+ true /*fLocked*/ /** @todo inode locking */, 0 /*fSetAttrs*/);
+ vbsf_dentry_chain_increase_ttl(dentry);
+
+ sf_d->Handle.hHost = pReq->CreateParms.Handle;
+ sf_d->Handle.cRefs = 1;
+ sf_d->Handle.fFlags = VBSF_HANDLE_F_READ | VBSF_HANDLE_F_DIR | VBSF_HANDLE_F_MAGIC;
+ vbsf_handle_append(sf_i, &sf_d->Handle);
+
+ file->private_data = sf_d;
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_dir_open(%p,%p): returns 0; hHost=%#llx\n", inode, file, sf_d->Handle.hHost));
+ return 0;
+
+ }
+ Assert(pReq->CreateParms.Handle == SHFL_HANDLE_NIL);
+
+ /*
+ * Directory does not exist, so we probably got some invalid
+ * dir cache and inode info.
+ */
+ /** @todo do more to invalidate dentry and inode here. */
+ vbsf_dentry_invalidate_ttl(dentry);
+ sf_i->force_restat = true;
+ rc = -ENOENT;
+ } else
+ rc = -EPERM;
+ VbglR0PhysHeapFree(pReq);
+ } else {
+ LogRelMaxFunc(64, ("failed to allocate %zu bytes for '%s'\n",
+ RT_UOFFSETOF(VBOXSFCREATEREQ, StrPath.String) + sf_i->path->u16Size, sf_i->path->String.ach));
+ rc = -ENOMEM;
+ }
+ sf_d->u32Magic = VBSF_DIR_INFO_MAGIC_DEAD;
+ kfree(sf_d);
+ } else
+ rc = -ENOMEM;
+ SFLOGFLOW(("vbsf_dir_open(%p,%p): returns %d\n", inode, file, rc));
+ return rc;
+}
+
+
+/**
+ * This is called when reference count of [file] goes to zero. Notify
+ * the host that it can free whatever is associated with this directory
+ * and deallocate our own internal buffers
+ *
+ * @param inode inode
+ * @param file file
+ * returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_dir_release(struct inode *inode, struct file *file)
+{
+ struct vbsf_dir_info *sf_d = (struct vbsf_dir_info *)file->private_data;
+
+ SFLOGFLOW(("vbsf_dir_release(%p,%p): sf_d=%p hHost=%#llx\n", inode, file, sf_d, sf_d ? sf_d->Handle.hHost : SHFL_HANDLE_NIL));
+
+ if (sf_d) {
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+
+ /* Invalidate the non-handle part. */
+ sf_d->u32Magic = VBSF_DIR_INFO_MAGIC_DEAD;
+ sf_d->cEntriesLeft = 0;
+ sf_d->cbValid = 0;
+ sf_d->pEntry = NULL;
+ sf_d->fNoMoreFiles = false;
+ if (sf_d->pBuf) {
+ kfree(sf_d->pBuf);
+ sf_d->pBuf = NULL;
+ }
+
+ /* Closes the handle and frees the structure when the last reference is released. */
+ vbsf_handle_release(&sf_d->Handle, pSuperInfo, "vbsf_dir_release");
+ }
+
+ return 0;
+}
+
+
+/**
+ * Translate RTFMODE into DT_xxx (in conjunction to rtDirType()).
+ * returns d_type
+ * @param fMode file mode
+ */
+DECLINLINE(int) vbsf_get_d_type(RTFMODE fMode)
+{
+ switch (fMode & RTFS_TYPE_MASK) {
+ case RTFS_TYPE_FIFO: return DT_FIFO;
+ case RTFS_TYPE_DEV_CHAR: return DT_CHR;
+ case RTFS_TYPE_DIRECTORY: return DT_DIR;
+ case RTFS_TYPE_DEV_BLOCK: return DT_BLK;
+ case RTFS_TYPE_FILE: return DT_REG;
+ case RTFS_TYPE_SYMLINK: return DT_LNK;
+ case RTFS_TYPE_SOCKET: return DT_SOCK;
+ case RTFS_TYPE_WHITEOUT: return DT_WHT;
+ }
+ return DT_UNKNOWN;
+}
+
+
+/**
+ * Refills the buffer with more entries.
+ *
+ * @returns 0 on success, negative errno on error,
+ */
+static int vbsf_dir_read_more(struct vbsf_dir_info *sf_d, struct vbsf_super_info *pSuperInfo, bool fRestart)
+{
+ int rc;
+ VBOXSFLISTDIRREQ *pReq;
+
+ /*
+ * Don't call the host again if we've reached the end of the
+ * directory entries already.
+ */
+ if (sf_d->fNoMoreFiles) {
+ if (!fRestart) {
+ SFLOGFLOW(("vbsf_dir_read_more: no more files\n"));
+ return 0;
+ }
+ sf_d->fNoMoreFiles = false;
+ }
+
+ /*
+ * Make sure we've got some kind of buffers.
+ */
+ if (sf_d->pBuf) {
+ /* Likely, except for the first time. */
+ } else {
+ sf_d->pBuf = (PSHFLDIRINFO)kmalloc(pSuperInfo->cbDirBuf, GFP_KERNEL);
+ if (sf_d->pBuf)
+ sf_d->cbBuf = pSuperInfo->cbDirBuf;
+ else {
+ sf_d->pBuf = (PSHFLDIRINFO)kmalloc(_4K, GFP_KERNEL);
+ if (!sf_d->pBuf) {
+ LogRelMax(10, ("vbsf_dir_read_more: Failed to allocate buffer!\n"));
+ return -ENOMEM;
+ }
+ sf_d->cbBuf = _4K;
+ }
+ }
+
+ /*
+ * Allocate a request buffer.
+ */
+ pReq = (VBOXSFLISTDIRREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ rc = VbglR0SfHostReqListDirContig2x(pSuperInfo->map.root, pReq, sf_d->Handle.hHost, NULL, NIL_RTGCPHYS64,
+ fRestart ? SHFL_LIST_RESTART : SHFL_LIST_NONE,
+ sf_d->pBuf, virt_to_phys(sf_d->pBuf), sf_d->cbBuf);
+ if (RT_SUCCESS(rc)) {
+ sf_d->pEntry = sf_d->pBuf;
+ sf_d->cbValid = pReq->Parms.cb32Buffer.u.value32;
+ sf_d->cEntriesLeft = pReq->Parms.c32Entries.u.value32;
+ sf_d->fNoMoreFiles = pReq->Parms.f32More.u.value32 == 0;
+ } else {
+ sf_d->pEntry = sf_d->pBuf;
+ sf_d->cbValid = 0;
+ sf_d->cEntriesLeft = 0;
+ if (rc == VERR_NO_MORE_FILES) {
+ sf_d->fNoMoreFiles = true;
+ rc = 0;
+ } else {
+ /* In theory we could end up here with a buffer overflow, but
+ with a 4KB minimum buffer size that's very unlikely with the
+ typical filename length of today's file systems (2019). */
+ LogRelMax(16, ("vbsf_dir_read_more: VbglR0SfHostReqListDirContig2x -> %Rrc\n", rc));
+ rc = -EPROTO;
+ }
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else
+ rc = -ENOMEM;
+ SFLOGFLOW(("vbsf_dir_read_more: returns %d; cbValid=%#x cEntriesLeft=%#x fNoMoreFiles=%d\n",
+ rc, sf_d->cbValid, sf_d->cEntriesLeft, sf_d->fNoMoreFiles));
+ return rc;
+}
+
+
+/**
+ * Helper function for when we need to convert the name, avoids wasting stack in
+ * the UTF-8 code path.
+ */
+DECL_NO_INLINE(static, bool) vbsf_dir_emit_nls(
+# if RTLNX_VER_MIN(3,11,0)
+ struct dir_context *ctx,
+# else
+ void *opaque, filldir_t filldir, loff_t offPos,
+# endif
+ const char *pszSrcName, uint16_t cchSrcName, ino_t d_ino, int d_type,
+ struct vbsf_super_info *pSuperInfo)
+{
+ char szDstName[NAME_MAX];
+ int rc = vbsf_nlscpy(pSuperInfo, szDstName, sizeof(szDstName), pszSrcName, cchSrcName);
+ if (rc == 0) {
+#if RTLNX_VER_MIN(3,11,0)
+ return dir_emit(ctx, szDstName, strlen(szDstName), d_ino, d_type);
+#else
+ return filldir(opaque, szDstName, strlen(szDstName), offPos, d_ino, d_type) == 0;
+#endif
+ }
+
+ /* Assuming this is a buffer overflow issue, just silently skip it. */
+ SFLOGFLOW(("vbsf_dir_emit_nls: vbsf_nlscopy failed with %d for '%s'\n", rc, pszSrcName));
+ return true;
+}
+
+
+/**
+ * This is called when vfs wants to populate internal buffers with
+ * directory [dir]s contents. [opaque] is an argument to the
+ * [filldir]. [filldir] magically modifies it's argument - [opaque]
+ * and takes following additional arguments (which i in turn get from
+ * the host via vbsf_getdent):
+ *
+ * name : name of the entry (i must also supply it's length huh?)
+ * type : type of the entry (FILE | DIR | etc) (i ellect to use DT_UNKNOWN)
+ * pos : position/index of the entry
+ * ino : inode number of the entry (i fake those)
+ *
+ * [dir] contains:
+ * f_pos : cursor into the directory listing
+ * private_data : mean of communication with the host side
+ *
+ * Extract elements from the directory listing (incrementing f_pos
+ * along the way) and feed them to [filldir] until:
+ *
+ * a. there are no more entries (i.e. vbsf_getdent set done to 1)
+ * b. failure to compute fake inode number
+ * c. filldir returns an error (see comment on that)
+ */
+#if RTLNX_VER_MIN(3,11,0)
+static int vbsf_dir_iterate(struct file *dir, struct dir_context *ctx)
+#else
+static int vbsf_dir_read(struct file *dir, void *opaque, filldir_t filldir)
+#endif
+{
+#if RTLNX_VER_MIN(3,11,0)
+ loff_t offPos = ctx->pos;
+#else
+ loff_t offPos = dir->f_pos;
+#endif
+ struct vbsf_dir_info *sf_d = (struct vbsf_dir_info *)dir->private_data;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(VBSF_GET_F_DENTRY(dir)->d_sb);
+ int rc;
+
+ /*
+ * Lock the directory info structures.
+ */
+ if (RT_LIKELY(down_interruptible(&sf_d->Lock) == 0)) {
+ /* likely */
+ } else
+ return -ERESTARTSYS;
+
+ /*
+ * Any seek performed in the mean time?
+ */
+ if (offPos == sf_d->offPos) {
+ /* likely */
+ } else {
+ /* Restart the search if iPos is lower than the current buffer position. */
+ loff_t offCurEntry = sf_d->offPos;
+ if (offPos < offCurEntry) {
+ rc = vbsf_dir_read_more(sf_d, pSuperInfo, true /*fRestart*/);
+ if (rc == 0)
+ offCurEntry = 0;
+ else {
+ up(&sf_d->Lock);
+ return rc;
+ }
+ }
+
+ /* Skip ahead to offPos. */
+ while (offCurEntry < offPos) {
+ uint32_t cEntriesLeft = sf_d->cEntriesLeft;
+ if ((uint64_t)(offPos - offCurEntry) >= cEntriesLeft) {
+ /* Skip the current buffer and read the next: */
+ offCurEntry += cEntriesLeft;
+ sf_d->offPos = offCurEntry;
+ sf_d->cEntriesLeft = 0;
+ rc = vbsf_dir_read_more(sf_d, pSuperInfo, false /*fRestart*/);
+ if (rc != 0 || sf_d->cEntriesLeft == 0) {
+ up(&sf_d->Lock);
+ return rc;
+ }
+ } else {
+ do
+ {
+ PSHFLDIRINFO pEntry = sf_d->pEntry;
+ pEntry = (PSHFLDIRINFO)&pEntry->name.String.utf8[pEntry->name.u16Length];
+ AssertLogRelBreakStmt( cEntriesLeft == 1
+ || (uintptr_t)pEntry - (uintptr_t)sf_d->pBuf
+ <= sf_d->cbValid - RT_UOFFSETOF(SHFLDIRINFO, name.String),
+ sf_d->cEntriesLeft = 0);
+ sf_d->cEntriesLeft = --cEntriesLeft;
+ sf_d->offPos = ++offCurEntry;
+ } while (offPos < sf_d->offPos);
+ }
+ }
+ }
+
+ /*
+ * Handle '.' and '..' specially so we get the inode numbers right.
+ * We'll skip any '.' or '..' returned by the host (included in pos,
+ * however, to simplify the above skipping code).
+ */
+ if (offPos < 2) {
+#if RTLNX_VER_MIN(3,11,0)
+ if (offPos == 0) {
+ if (dir_emit_dot(dir, ctx))
+ dir->f_pos = ctx->pos = sf_d->offPos = offPos = 1;
+ else {
+ up(&sf_d->Lock);
+ return 0;
+ }
+ }
+ if (offPos == 1) {
+ if (dir_emit_dotdot(dir, ctx))
+ dir->f_pos = ctx->pos = sf_d->offPos = offPos = 2;
+ else {
+ up(&sf_d->Lock);
+ return 0;
+ }
+ }
+#else
+ if (offPos == 0) {
+ rc = filldir(opaque, ".", 1, 0, VBSF_GET_F_DENTRY(dir)->d_inode->i_ino, DT_DIR);
+ if (!rc)
+ dir->f_pos = sf_d->offPos = offPos = 1;
+ else {
+ up(&sf_d->Lock);
+ return 0;
+ }
+ }
+ if (offPos == 1) {
+# if RTLNX_VER_MIN(2,5,5)
+ rc = filldir(opaque, "..", 2, 1, parent_ino(VBSF_GET_F_DENTRY(dir)), DT_DIR);
+# else
+ rc = filldir(opaque, "..", 2, 1, VBSF_GET_F_DENTRY(dir)->d_parent->d_inode->i_ino, DT_DIR);
+# endif
+ if (!rc)
+ dir->f_pos = sf_d->offPos = offPos = 2;
+ else {
+ up(&sf_d->Lock);
+ return 0;
+ }
+ }
+#endif
+ }
+
+ /*
+ * Produce stuff.
+ */
+ Assert(offPos == sf_d->offPos);
+ for (;;) {
+ PSHFLDIRINFO pBuf;
+ PSHFLDIRINFO pEntry;
+
+ /*
+ * Do we need to read more?
+ */
+ uint32_t cbValid = sf_d->cbValid;
+ uint32_t cEntriesLeft = sf_d->cEntriesLeft;
+ if (!cEntriesLeft) {
+ rc = vbsf_dir_read_more(sf_d, pSuperInfo, false /*fRestart*/);
+ if (rc == 0) {
+ cEntriesLeft = sf_d->cEntriesLeft;
+ if (!cEntriesLeft) {
+ up(&sf_d->Lock);
+ return 0;
+ }
+ cbValid = sf_d->cbValid;
+ } else {
+ up(&sf_d->Lock);
+ return rc;
+ }
+ }
+
+ /*
+ * Feed entries to the caller.
+ */
+ pBuf = sf_d->pBuf;
+ pEntry = sf_d->pEntry;
+ do {
+ /*
+ * Validate the entry in case the host is messing with us.
+ * We're ASSUMING the host gives us a zero terminated string (UTF-8) here.
+ */
+ uintptr_t const offEntryInBuf = (uintptr_t)pEntry - (uintptr_t)pBuf;
+ uint16_t cbSrcName;
+ uint16_t cchSrcName;
+ AssertLogRelMsgBreak(offEntryInBuf + RT_UOFFSETOF(SHFLDIRINFO, name.String) <= cbValid,
+ ("%#llx + %#x vs %#x\n", offEntryInBuf, RT_UOFFSETOF(SHFLDIRINFO, name.String), cbValid));
+ cbSrcName = pEntry->name.u16Size;
+ cchSrcName = pEntry->name.u16Length;
+ AssertLogRelBreak(offEntryInBuf + RT_UOFFSETOF(SHFLDIRINFO, name.String) + cbSrcName <= cbValid);
+ AssertLogRelBreak(cchSrcName < cbSrcName);
+ AssertLogRelBreak(pEntry->name.String.ach[cchSrcName] == '\0');
+
+ /*
+ * Filter out '.' and '..' entires.
+ */
+ if ( cchSrcName > 2
+ || pEntry->name.String.ach[0] != '.'
+ || ( cchSrcName == 2
+ && pEntry->name.String.ach[1] != '.')) {
+ int const d_type = vbsf_get_d_type(pEntry->Info.Attr.fMode);
+ ino_t const d_ino = (ino_t)offPos + 0xbeef; /* very fake */
+ bool fContinue;
+ if (pSuperInfo->fNlsIsUtf8) {
+#if RTLNX_VER_MIN(3,11,0)
+ fContinue = dir_emit(ctx, pEntry->name.String.ach, cchSrcName, d_ino, d_type);
+#else
+ fContinue = filldir(opaque, pEntry->name.String.ach, cchSrcName, offPos, d_ino, d_type) == 0;
+#endif
+ } else {
+#if RTLNX_VER_MIN(3,11,0)
+ fContinue = vbsf_dir_emit_nls(ctx, pEntry->name.String.ach, cchSrcName, d_ino, d_type, pSuperInfo);
+#else
+ fContinue = vbsf_dir_emit_nls(opaque, filldir, offPos, pEntry->name.String.ach, cchSrcName,
+ d_ino, d_type, pSuperInfo);
+#endif
+ }
+ if (fContinue) {
+ /* likely */
+ } else {
+ sf_d->cEntriesLeft = cEntriesLeft;
+ sf_d->pEntry = pEntry;
+ sf_d->offPos = offPos;
+ up(&sf_d->Lock);
+ return 0;
+ }
+ }
+
+ /*
+ * Advance to the next entry.
+ */
+ pEntry = (PSHFLDIRINFO)((uintptr_t)pEntry + RT_UOFFSETOF(SHFLDIRINFO, name.String) + cbSrcName);
+ offPos += 1;
+ dir->f_pos = offPos;
+#if RTLNX_VER_MIN(3,11,0)
+ ctx->pos = offPos;
+#endif
+ cEntriesLeft -= 1;
+ } while (cEntriesLeft > 0);
+
+ /* Done with all available entries. */
+ sf_d->offPos = offPos + cEntriesLeft;
+ sf_d->pEntry = pBuf;
+ sf_d->cEntriesLeft = 0;
+ }
+}
+
+
+/**
+ * Directory file operations.
+ */
+struct file_operations vbsf_dir_fops = {
+ .open = vbsf_dir_open,
+#if RTLNX_VER_MIN(4,7,0)
+ .iterate_shared = vbsf_dir_iterate,
+#elif RTLNX_VER_MIN(3,11,0)
+ .iterate = vbsf_dir_iterate,
+#else
+ .readdir = vbsf_dir_read,
+#endif
+ .release = vbsf_dir_release,
+ .read = generic_read_dir,
+#if RTLNX_VER_MIN(2,6,37)
+ .llseek = generic_file_llseek
+#endif
+};
+
+
+
+/*********************************************************************************************************************************
+* Directory Inode Operations *
+*********************************************************************************************************************************/
+
+/**
+ * Worker for vbsf_inode_lookup(), vbsf_create_worker() and
+ * vbsf_inode_instantiate().
+ */
+static struct inode *vbsf_create_inode(struct inode *parent, struct dentry *dentry, PSHFLSTRING path,
+ PSHFLFSOBJINFO pObjInfo, struct vbsf_super_info *pSuperInfo, bool fInstantiate)
+{
+ /*
+ * Allocate memory for our additional inode info and create an inode.
+ */
+ struct vbsf_inode_info *sf_new_i = (struct vbsf_inode_info *)kmalloc(sizeof(*sf_new_i), GFP_KERNEL);
+ if (sf_new_i) {
+ ino_t iNodeNo = iunique(parent->i_sb, 16);
+#if RTLNX_VER_MIN(2,4,25)
+ struct inode *pInode = iget_locked(parent->i_sb, iNodeNo);
+#else
+ struct inode *pInode = iget(parent->i_sb, iNodeNo);
+#endif
+ if (pInode) {
+ /*
+ * Initialize the two structures.
+ */
+#ifdef VBOX_STRICT
+ sf_new_i->u32Magic = SF_INODE_INFO_MAGIC;
+#endif
+ sf_new_i->path = path;
+ sf_new_i->force_restat = false;
+ sf_new_i->ts_up_to_date = jiffies;
+ RTListInit(&sf_new_i->HandleList);
+ sf_new_i->handle = SHFL_HANDLE_NIL;
+
+ VBSF_SET_INODE_INFO(pInode, sf_new_i);
+ vbsf_init_inode(pInode, sf_new_i, pObjInfo, pSuperInfo);
+
+ /*
+ * Before we unlock the new inode, we may need to call d_instantiate.
+ */
+ if (fInstantiate)
+ d_instantiate(dentry, pInode);
+#if RTLNX_VER_MIN(2,4,25)
+ unlock_new_inode(pInode);
+#endif
+ return pInode;
+
+ }
+ LogFunc(("iget failed\n"));
+ kfree(sf_new_i);
+ } else
+ LogRelFunc(("could not allocate memory for new inode info\n"));
+ return NULL;
+}
+
+
+/** Helper for vbsf_create_worker() and vbsf_inode_lookup() that wraps
+ * d_add() and setting d_op. */
+DECLINLINE(void) vbsf_d_add_inode(struct dentry *dentry, struct inode *pNewInode)
+{
+#if RTLNX_VER_MIN(2,6,38)
+ Assert(dentry->d_op == &vbsf_dentry_ops); /* (taken from the superblock) */
+#else
+ dentry->d_op = &vbsf_dentry_ops;
+#endif
+ d_add(dentry, pNewInode);
+}
+
+
+/**
+ * This is called when vfs failed to locate dentry in the cache. The
+ * job of this function is to allocate inode and link it to dentry.
+ * [dentry] contains the name to be looked in the [parent] directory.
+ * Failure to locate the name is not a "hard" error, in this case NULL
+ * inode is added to [dentry] and vfs should proceed trying to create
+ * the entry via other means. NULL(or "positive" pointer) ought to be
+ * returned in case of success and "negative" pointer on error
+ */
+static struct dentry *vbsf_inode_lookup(struct inode *parent, struct dentry *dentry
+#if RTLNX_VER_MIN(3,6,0)
+ , unsigned int flags
+#elif RTLNX_VER_MIN(2,6,0)
+ , struct nameidata *nd
+#endif
+ )
+{
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(parent);
+ SHFLSTRING *path;
+ struct dentry *dret;
+ int rc;
+
+#if RTLNX_VER_MIN(3,6,0)
+ SFLOGFLOW(("vbsf_inode_lookup: parent=%p dentry=%p flags=%#x\n", parent, dentry, flags));
+#elif RTLNX_VER_MIN(2,6,0)
+ SFLOGFLOW(("vbsf_inode_lookup: parent=%p dentry=%p nd=%p{.flags=%#x}\n", parent, dentry, nd, nd ? nd->flags : 0));
+#else
+ SFLOGFLOW(("vbsf_inode_lookup: parent=%p dentry=%p\n", parent, dentry));
+#endif
+
+ Assert(pSuperInfo);
+ Assert(sf_i && sf_i->u32Magic == SF_INODE_INFO_MAGIC);
+
+ /*
+ * Build the path. We'll associate the path with dret's inode on success.
+ */
+ rc = vbsf_path_from_dentry(pSuperInfo, sf_i, dentry, &path, __func__);
+ if (rc == 0) {
+ /*
+ * Do a lookup on the host side.
+ */
+ VBOXSFCREATEREQ *pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + path->u16Size);
+ if (pReq) {
+ struct inode *pInode = NULL;
+
+ RT_ZERO(*pReq);
+ memcpy(&pReq->StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size);
+ pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
+ pReq->CreateParms.CreateFlags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW;
+
+ SFLOG2(("vbsf_inode_lookup: Calling VbglR0SfHostReqCreate on %s\n", path->String.utf8));
+ rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq);
+ if (RT_SUCCESS(rc)) {
+ if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) {
+ /*
+ * Create an inode for the result. Since this also confirms
+ * the existence of all parent dentries, we increase their TTL.
+ */
+ pInode = vbsf_create_inode(parent, dentry, path, &pReq->CreateParms.Info, pSuperInfo, false /*fInstantiate*/);
+ if (rc == 0) {
+ path = NULL; /* given to the inode */
+ dret = dentry;
+ } else
+ dret = (struct dentry *)ERR_PTR(-ENOMEM);
+ vbsf_dentry_chain_increase_parent_ttl(dentry);
+ } else if ( pReq->CreateParms.Result == SHFL_FILE_NOT_FOUND
+ || pReq->CreateParms.Result == SHFL_PATH_NOT_FOUND /*this probably should happen*/) {
+ dret = dentry;
+ } else {
+ AssertMsgFailed(("%d\n", pReq->CreateParms.Result));
+ dret = (struct dentry *)ERR_PTR(-EPROTO);
+ }
+ } else if (rc == VERR_INVALID_NAME) {
+ SFLOGFLOW(("vbsf_inode_lookup: VERR_INVALID_NAME\n"));
+ dret = dentry; /* this can happen for names like 'foo*' on a Windows host */
+ } else if (rc == VERR_FILENAME_TOO_LONG) {
+ SFLOG(("vbsf_inode_lookup: VbglR0SfHostReqCreate failed on %s: VERR_FILENAME_TOO_LONG\n", path->String.utf8));
+ dret = (struct dentry *)ERR_PTR(-ENAMETOOLONG);
+ } else {
+ SFLOG(("vbsf_inode_lookup: VbglR0SfHostReqCreate failed on %s: %Rrc\n", path->String.utf8, rc));
+ dret = (struct dentry *)ERR_PTR(-EPROTO);
+ }
+ VbglR0PhysHeapFree(pReq);
+
+ /*
+ * When dret is set to dentry we got something to insert,
+ * though it may be negative (pInode == NULL).
+ */
+ if (dret == dentry) {
+ vbsf_dentry_set_update_jiffies(dentry, jiffies);
+ vbsf_d_add_inode(dentry, pInode);
+ dret = NULL;
+ }
+ } else {
+ SFLOGFLOW(("vbsf_inode_lookup: -ENOMEM (phys heap)\n"));
+ dret = (struct dentry *)ERR_PTR(-ENOMEM);
+ }
+ if (path)
+ kfree(path);
+ } else {
+ SFLOG(("vbsf_inode_lookup: vbsf_path_from_dentry failed: %d\n", rc));
+ dret = (struct dentry *)ERR_PTR(rc);
+ }
+ return dret;
+}
+
+
+/**
+ * This should allocate memory for vbsf_inode_info, compute a unique inode
+ * number, get an inode from vfs, initialize inode info, instantiate
+ * dentry.
+ *
+ * @param parent inode entry of the directory
+ * @param dentry directory cache entry
+ * @param path path name. Consumed on success.
+ * @param info file information
+ * @param handle handle
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_inode_instantiate(struct inode *parent, struct dentry *dentry, PSHFLSTRING path,
+ PSHFLFSOBJINFO info, SHFLHANDLE handle)
+{
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb);
+ struct inode *pInode = vbsf_create_inode(parent, dentry, path, info, pSuperInfo, true /*fInstantiate*/);
+ if (pInode) {
+ /* Store this handle if we leave the handle open. */
+ struct vbsf_inode_info *sf_new_i = VBSF_GET_INODE_INFO(pInode);
+ sf_new_i->handle = handle;
+ return 0;
+ }
+ return -ENOMEM;
+}
+
+
+/**
+ * Create a new regular file / directory.
+ *
+ * @param parent inode of the directory
+ * @param dentry directory cache entry
+ * @param mode file mode
+ * @param fCreateFlags SHFL_CF_XXX.
+ * @param fStashHandle Whether the resulting handle should be stashed in
+ * the inode for a subsequent open call.
+ * @param fDoLookup Whether we're doing a lookup and need to d_add the
+ * inode we create to dentry.
+ * @param phHostFile Where to return the handle to the create file/dir.
+ * @param pfCreated Where to indicate whether the file/dir was created
+ * or not. Optional.
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_create_worker(struct inode *parent, struct dentry *dentry, umode_t mode, uint32_t fCreateFlags,
+ bool fStashHandle, bool fDoLookup, SHFLHANDLE *phHostFile, bool *pfCreated)
+
+{
+#ifdef SFLOG_ENABLED
+ const char * const pszPrefix = S_ISDIR(mode) ? "vbsf_create_worker/dir:" : "vbsf_create_worker/file:";
+#endif
+ struct vbsf_inode_info *sf_parent_i = VBSF_GET_INODE_INFO(parent);
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb);
+ PSHFLSTRING path;
+ int rc;
+
+ if (pfCreated)
+ *pfCreated = false;
+ AssertReturn(sf_parent_i, -EINVAL);
+ AssertReturn(pSuperInfo, -EINVAL);
+
+ /*
+ * Build a path. We'll donate this to the inode on success.
+ */
+ rc = vbsf_path_from_dentry(pSuperInfo, sf_parent_i, dentry, &path, __func__);
+ if (rc == 0) {
+ /*
+ * Allocate, initialize and issue the SHFL_CREATE request.
+ */
+ /** @todo combine with vbsf_path_from_dentry? */
+ union CreateAuxReq
+ {
+ VBOXSFCREATEREQ Create;
+ VBOXSFCLOSEREQ Close;
+ } *pReq = (union CreateAuxReq *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFCREATEREQ, StrPath.String) + path->u16Size);
+ if (pReq) {
+ memcpy(&pReq->Create.StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size);
+ RT_ZERO(pReq->Create.CreateParms);
+ pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL;
+ pReq->Create.CreateParms.CreateFlags = fCreateFlags;
+ pReq->Create.CreateParms.Info.Attr.fMode = (S_ISDIR(mode) ? RTFS_TYPE_DIRECTORY : RTFS_TYPE_FILE)
+ | sf_access_permissions_to_vbox(mode);
+ pReq->Create.CreateParms.Info.Attr.enmAdditional = SHFLFSOBJATTRADD_NOTHING;
+
+ SFLOGFLOW(("%s calling VbglR0SfHostReqCreate(%s, %#x)\n", pszPrefix, path->String.ach, pReq->Create.CreateParms.CreateFlags));
+ rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, &pReq->Create);
+ if (RT_SUCCESS(rc)) {
+ SFLOGFLOW(("%s VbglR0SfHostReqCreate returned %Rrc Result=%d Handle=%#llx\n",
+ pszPrefix, rc, pReq->Create.CreateParms.Result, pReq->Create.CreateParms.Handle));
+
+ /*
+ * Work the dentry cache and inode restatting.
+ */
+ if ( pReq->Create.CreateParms.Result == SHFL_FILE_CREATED
+ || pReq->Create.CreateParms.Result == SHFL_FILE_REPLACED) {
+ vbsf_dentry_chain_increase_parent_ttl(dentry);
+ sf_parent_i->force_restat = 1;
+ } else if ( pReq->Create.CreateParms.Result == SHFL_FILE_EXISTS
+ || pReq->Create.CreateParms.Result == SHFL_FILE_NOT_FOUND)
+ vbsf_dentry_chain_increase_parent_ttl(dentry);
+
+ /*
+ * If we got a handle back, we're good. Create an inode for it and return.
+ */
+ if (pReq->Create.CreateParms.Handle != SHFL_HANDLE_NIL) {
+ struct inode *pNewInode = vbsf_create_inode(parent, dentry, path, &pReq->Create.CreateParms.Info, pSuperInfo,
+ !fDoLookup /*fInstantiate*/);
+ if (pNewInode) {
+ struct vbsf_inode_info *sf_new_i = VBSF_GET_INODE_INFO(pNewInode);
+ if (phHostFile) {
+ *phHostFile = pReq->Create.CreateParms.Handle;
+ pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL;
+ } else if (fStashHandle) {
+ sf_new_i->handle = pReq->Create.CreateParms.Handle;
+ pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL;
+ }
+ if (pfCreated)
+ *pfCreated = pReq->Create.CreateParms.Result == SHFL_FILE_CREATED;
+ if (fDoLookup)
+ vbsf_d_add_inode(dentry, pNewInode);
+ path = NULL;
+ } else {
+ SFLOGFLOW(("%s vbsf_create_inode failed: -ENOMEM (path %s)\n", pszPrefix, rc, path->String.ach));
+ rc = -ENOMEM;
+ }
+ } else if (pReq->Create.CreateParms.Result == SHFL_FILE_EXISTS) {
+ /*
+ * For atomic_open (at least), we should create an inode and
+ * convert the dentry from a negative to a positive one.
+ */
+ SFLOGFLOW(("%s SHFL_FILE_EXISTS for %s\n", pszPrefix, sf_parent_i->path->String.ach));
+ if (fDoLookup) {
+ struct inode *pNewInode = vbsf_create_inode(parent, dentry, path, &pReq->Create.CreateParms.Info,
+ pSuperInfo, false /*fInstantiate*/);
+ if (pNewInode)
+ vbsf_d_add_inode(dentry, pNewInode);
+ path = NULL;
+ }
+ rc = -EEXIST;
+ } else if (pReq->Create.CreateParms.Result == SHFL_FILE_NOT_FOUND) {
+ SFLOGFLOW(("%s SHFL_FILE_NOT_FOUND for %s\n", pszPrefix, sf_parent_i->path->String.ach));
+ rc = -ENOENT;
+ } else if (pReq->Create.CreateParms.Result == SHFL_PATH_NOT_FOUND) {
+ SFLOGFLOW(("%s SHFL_PATH_NOT_FOUND for %s\n", pszPrefix, sf_parent_i->path->String.ach));
+ rc = -ENOENT;
+ } else {
+ AssertMsgFailed(("result=%d creating '%s'\n", pReq->Create.CreateParms.Result, sf_parent_i->path->String.ach));
+ rc = -EPERM;
+ }
+ } else {
+ int const vrc = rc;
+ rc = -RTErrConvertToErrno(vrc);
+ SFLOGFLOW(("%s SHFL_FN_CREATE(%s) failed vrc=%Rrc rc=%d\n", pszPrefix, path->String.ach, vrc, rc));
+ }
+
+ /* Cleanups. */
+ if (pReq->Create.CreateParms.Handle != SHFL_HANDLE_NIL) {
+ AssertCompile(RTASSERT_OFFSET_OF(VBOXSFCREATEREQ, CreateParms.Handle) > sizeof(VBOXSFCLOSEREQ)); /* no aliasing issues */
+ int rc2 = VbglR0SfHostReqClose(pSuperInfo->map.root, &pReq->Close, pReq->Create.CreateParms.Handle);
+ if (RT_FAILURE(rc2))
+ SFLOGFLOW(("%s VbglR0SfHostReqCloseSimple failed rc=%Rrc\n", pszPrefix, rc2));
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else
+ rc = -ENOMEM;
+ if (path)
+ kfree(path);
+ }
+ return rc;
+}
+
+
+#if RTLNX_VER_MIN(3,16,0)
+/**
+ * More atomic way of handling creation.
+ *
+ * Older kernels would first to a lookup that created the file, followed by
+ * an open call. We've got this horrid vbsf_inode_info::handle member because
+ * of that approach. The call combines the lookup and open.
+ */
+static int vbsf_inode_atomic_open(struct inode *pDirInode, struct dentry *dentry, struct file *file, unsigned fOpen,
+ umode_t fMode
+# if RTLNX_VER_MAX(4,19,0)
+ , int *opened
+# endif
+ )
+{
+ SFLOGFLOW(("vbsf_inode_atomic_open: pDirInode=%p dentry=%p file=%p fOpen=%#x, fMode=%#x\n", pDirInode, dentry, file, fOpen, fMode));
+ int rc;
+
+ /* Code assumes negative dentry. */
+ Assert(dentry->d_inode == NULL);
+
+ /** @todo see if we can do this for non-create calls too, as it may save us a
+ * host call to revalidate the dentry. (Can't see anyone else doing
+ * this, so playing it safe for now.) */
+ if (fOpen & O_CREAT) {
+ /*
+ * Prepare our file info structure.
+ */
+ struct vbsf_reg_info *sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
+ if (sf_r) {
+ bool fCreated = false;
+ uint32_t fCreateFlags;
+
+ RTListInit(&sf_r->Handle.Entry);
+ sf_r->Handle.cRefs = 1;
+ sf_r->Handle.fFlags = !(fOpen & O_DIRECTORY)
+ ? VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC
+ : VBSF_HANDLE_F_DIR | VBSF_HANDLE_F_MAGIC;
+ sf_r->Handle.hHost = SHFL_HANDLE_NIL;
+
+ /*
+ * Try create it.
+ */
+ /* vbsf_create_worker uses the type from fMode, so match it up to O_DIRECTORY. */
+ AssertMsg(!(fMode & S_IFMT) || (fMode & S_IFMT) == (fOpen & O_DIRECTORY ? S_IFDIR : S_IFREG), ("0%o\n", fMode));
+ if (!(fOpen & O_DIRECTORY))
+ fMode = (fMode & ~S_IFMT) | S_IFREG;
+ else
+ fMode = (fMode & ~S_IFMT) | S_IFDIR;
+
+ fCreateFlags = vbsf_linux_oflags_to_vbox(fOpen, &sf_r->Handle.fFlags, __FUNCTION__);
+
+ rc = vbsf_create_worker(pDirInode, dentry, fMode, fCreateFlags, false /*fStashHandle*/, true /*fDoLookup*/,
+ &sf_r->Handle.hHost, &fCreated);
+ if (rc == 0) {
+ struct inode *inode = dentry->d_inode;
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+
+ /*
+ * Set FMODE_CREATED according to the action taken by SHFL_CREATE
+ * and call finish_open() to do the remaining open() work.
+ */
+# if RTLNX_VER_MIN(4,19,0)
+ if (fCreated)
+ file->f_mode |= FMODE_CREATED;
+ rc = finish_open(file, dentry, generic_file_open);
+# else
+ if (fCreated)
+ *opened |= FILE_CREATED;
+ rc = finish_open(file, dentry, generic_file_open, opened);
+# endif
+ if (rc == 0) {
+ /*
+ * Now that the file is fully opened, associate sf_r with it
+ * and link the handle to the inode.
+ */
+ vbsf_handle_append(sf_i, &sf_r->Handle);
+ file->private_data = sf_r;
+ SFLOGFLOW(("vbsf_inode_atomic_open: create succeeded; hHost=%#llx path='%s'\n",
+ rc, sf_r->Handle.hHost, sf_i->path->String.ach));
+ sf_r = NULL; /* don't free it */
+ } else {
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pDirInode->i_sb);
+ SFLOGFLOW(("vbsf_inode_atomic_open: finish_open failed: %d (path='%s'\n", rc, sf_i->path->String.ach));
+ VbglR0SfHostReqCloseSimple(pSuperInfo->map.root, sf_r->Handle.hHost);
+ sf_r->Handle.hHost = SHFL_HANDLE_NIL;
+ }
+ } else
+ SFLOGFLOW(("vbsf_inode_atomic_open: vbsf_create_worker failed: %d\n", rc));
+ if (sf_r)
+ kfree(sf_r);
+ } else {
+ LogRelMaxFunc(64, ("could not allocate reg info\n"));
+ rc = -ENOMEM;
+ }
+ }
+ /*
+ * Not creating anything.
+ * Do we need to do a lookup or should we just fail?
+ */
+ else if (d_in_lookup(dentry)) {
+ struct dentry *pResult = vbsf_inode_lookup(pDirInode, dentry, 0 /*fFlags*/);
+ if (!IS_ERR(pResult))
+ rc = finish_no_open(file, pResult);
+ else
+ rc = PTR_ERR(pResult);
+ SFLOGFLOW(("vbsf_inode_atomic_open: open -> %d (%p)\n", rc, pResult));
+ } else {
+ SFLOGFLOW(("vbsf_inode_atomic_open: open -> -ENOENT\n"));
+ rc = -ENOENT;
+ }
+ return rc;
+}
+#endif /* 3.6.0 */
+
+
+/**
+ * Create a new regular file.
+ *
+ * @param ns The name space.
+ * @param parent inode of the directory
+ * @param dentry directory cache entry
+ * @param mode file mode
+ * @param excl Possible O_EXCL...
+ * @returns 0 on success, Linux error code otherwise
+ */
+#if RTLNX_VER_MIN(5,12,0) || defined(DOXYGEN_RUNNING)
+static int vbsf_inode_create(struct user_namespace *ns, struct inode *parent, struct dentry *dentry, umode_t mode, bool excl)
+#elif RTLNX_VER_MIN(3,6,0)
+static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, umode_t mode, bool excl)
+#elif RTLNX_VER_MIN(3,3,0)
+static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, umode_t mode, struct nameidata *nd)
+#elif RTLNX_VER_MIN(2,5,75)
+static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, int mode, struct nameidata *nd)
+#else
+static int vbsf_inode_create(struct inode *parent, struct dentry *dentry, int mode)
+#endif
+{
+ uint32_t fCreateFlags = SHFL_CF_ACT_CREATE_IF_NEW
+ | SHFL_CF_ACT_FAIL_IF_EXISTS
+ | SHFL_CF_ACCESS_READWRITE;
+#if RTLNX_VER_RANGE(2,5,75, 3,6,0)
+ /* Clear the RD flag if write-only access requested. Otherwise assume we
+ need write access to create stuff. */
+ if (!(nd->intent.open.flags & 1) ) {
+ fCreateFlags &= SHFL_CF_ACCESS_READWRITE;
+ fCreateFlags |= SHFL_CF_ACCESS_WRITE;
+ }
+ /* (file since 2.6.15) */
+#endif
+ TRACE();
+ AssertMsg(!(mode & S_IFMT) || (mode & S_IFMT) == S_IFREG, ("0%o\n", mode));
+ return vbsf_create_worker(parent, dentry, (mode & ~S_IFMT) | S_IFREG, fCreateFlags,
+ true /*fStashHandle*/, false /*fDoLookup*/, NULL /*phHandle*/, NULL /*fCreated*/);
+}
+
+
+/**
+ * Create a new directory.
+ *
+ * @param ns The name space.
+ * @param parent inode of the directory
+ * @param dentry directory cache entry
+ * @param mode file mode
+ * @returns 0 on success, Linux error code otherwise
+ */
+#if RTLNX_VER_MIN(5,12,0) || defined(DOXYGEN_RUNNING)
+static int vbsf_inode_mkdir(struct user_namespace *ns, struct inode *parent, struct dentry *dentry, umode_t mode)
+#elif RTLNX_VER_MIN(3,3,0)
+static int vbsf_inode_mkdir(struct inode *parent, struct dentry *dentry, umode_t mode)
+#else
+static int vbsf_inode_mkdir(struct inode *parent, struct dentry *dentry, int mode)
+#endif
+{
+ TRACE();
+ AssertMsg(!(mode & S_IFMT) || (mode & S_IFMT) == S_IFDIR, ("0%o\n", mode));
+ return vbsf_create_worker(parent, dentry, (mode & ~S_IFMT) | S_IFDIR,
+ SHFL_CF_ACT_CREATE_IF_NEW
+ | SHFL_CF_ACT_FAIL_IF_EXISTS
+ | SHFL_CF_ACCESS_READWRITE
+ | SHFL_CF_DIRECTORY,
+ false /*fStashHandle*/, false /*fDoLookup*/, NULL /*phHandle*/, NULL /*fCreated*/);
+}
+
+
+/**
+ * Remove a regular file / directory.
+ *
+ * @param parent inode of the directory
+ * @param dentry directory cache entry
+ * @param fDirectory true if directory, false otherwise
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_unlink_worker(struct inode *parent, struct dentry *dentry, int fDirectory)
+{
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb);
+ struct vbsf_inode_info *sf_parent_i = VBSF_GET_INODE_INFO(parent);
+ SHFLSTRING *path;
+ int rc;
+
+ TRACE();
+
+ rc = vbsf_path_from_dentry(pSuperInfo, sf_parent_i, dentry, &path, __func__);
+ if (!rc) {
+ VBOXSFREMOVEREQ *pReq = (VBOXSFREMOVEREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFREMOVEREQ, StrPath.String)
+ + path->u16Size);
+ if (pReq) {
+ memcpy(&pReq->StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size);
+ uint32_t fFlags = fDirectory ? SHFL_REMOVE_DIR : SHFL_REMOVE_FILE;
+ if (dentry->d_inode && ((dentry->d_inode->i_mode & S_IFLNK) == S_IFLNK))
+ fFlags |= SHFL_REMOVE_SYMLINK;
+
+ rc = VbglR0SfHostReqRemove(pSuperInfo->map.root, pReq, fFlags);
+
+ if (dentry->d_inode) {
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(dentry->d_inode);
+ sf_i->force_restat = true;
+ }
+
+ if (RT_SUCCESS(rc)) {
+ sf_parent_i->force_restat = true; /* directory access/change time changed */
+ rc = 0;
+ } else if (rc == VERR_FILE_NOT_FOUND || rc == VERR_PATH_NOT_FOUND) {
+ /* Probably deleted on the host while the guest had it cached, so don't complain: */
+ LogFunc(("(%d): VbglR0SfRemove(%s) failed rc=%Rrc; calling d_drop on %p\n",
+ fDirectory, path->String.ach, rc, dentry));
+ sf_parent_i->force_restat = true;
+ d_drop(dentry);
+ rc = 0;
+ } else {
+ LogFunc(("(%d): VbglR0SfRemove(%s) failed rc=%Rrc\n", fDirectory, path->String.ach, rc));
+ rc = -RTErrConvertToErrno(rc);
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else
+ rc = -ENOMEM;
+ kfree(path);
+ }
+ return rc;
+}
+
+
+/**
+ * Remove a regular file.
+ *
+ * @param parent inode of the directory
+ * @param dentry directory cache entry
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_inode_unlink(struct inode *parent, struct dentry *dentry)
+{
+ TRACE();
+ return vbsf_unlink_worker(parent, dentry, false /*fDirectory*/);
+}
+
+
+/**
+ * Remove a directory.
+ *
+ * @param parent inode of the directory
+ * @param dentry directory cache entry
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_inode_rmdir(struct inode *parent, struct dentry *dentry)
+{
+ TRACE();
+ return vbsf_unlink_worker(parent, dentry, true /*fDirectory*/);
+}
+
+
+/**
+ * Rename a regular file / directory.
+ *
+ * @param ns The name space.
+ * @param old_parent inode of the old parent directory
+ * @param old_dentry old directory cache entry
+ * @param new_parent inode of the new parent directory
+ * @param new_dentry new directory cache entry
+ * @param flags flags
+ * @returns 0 on success, Linux error code otherwise
+ */
+#if RTLNX_VER_MIN(5,12,0) || defined(DOXYGEN_RUNNING)
+static int vbsf_inode_rename(struct user_namespace *ns,
+ struct inode *old_parent, struct dentry *old_dentry,
+ struct inode *new_parent, struct dentry *new_dentry, unsigned flags)
+#else
+static int vbsf_inode_rename(struct inode *old_parent, struct dentry *old_dentry,
+ struct inode *new_parent, struct dentry *new_dentry, unsigned flags)
+#endif
+{
+ /*
+ * Deal with flags.
+ */
+ int rc;
+ uint32_t fRename = (old_dentry->d_inode->i_mode & S_IFDIR ? SHFL_RENAME_DIR : SHFL_RENAME_FILE)
+ | SHFL_RENAME_REPLACE_IF_EXISTS;
+#if RTLNX_VER_MIN(3,15,0)
+ if (!(flags & ~RENAME_NOREPLACE)) {
+ if (flags & RENAME_NOREPLACE)
+ fRename &= ~SHFL_RENAME_REPLACE_IF_EXISTS;
+#endif
+ /*
+ * Check that they are on the same mount.
+ */
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(old_parent->i_sb);
+ if (pSuperInfo == VBSF_GET_SUPER_INFO(new_parent->i_sb)) {
+ /*
+ * Build the new path.
+ */
+ struct vbsf_inode_info *sf_new_parent_i = VBSF_GET_INODE_INFO(new_parent);
+ PSHFLSTRING pNewPath;
+ rc = vbsf_path_from_dentry(pSuperInfo, sf_new_parent_i, new_dentry, &pNewPath, __func__);
+ if (rc == 0) {
+ /*
+ * Create and issue the rename request.
+ */
+ VBOXSFRENAMEWITHSRCBUFREQ *pReq;
+ pReq = (VBOXSFRENAMEWITHSRCBUFREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF(VBOXSFRENAMEWITHSRCBUFREQ, StrDstPath.String)
+ + pNewPath->u16Size);
+ if (pReq) {
+ struct vbsf_inode_info *sf_file_i = VBSF_GET_INODE_INFO(old_dentry->d_inode);
+ PSHFLSTRING pOldPath = sf_file_i->path;
+
+ memcpy(&pReq->StrDstPath, pNewPath, SHFLSTRING_HEADER_SIZE + pNewPath->u16Size);
+ rc = VbglR0SfHostReqRenameWithSrcContig(pSuperInfo->map.root, pReq, pOldPath, virt_to_phys(pOldPath), fRename);
+ VbglR0PhysHeapFree(pReq);
+ if (RT_SUCCESS(rc)) {
+ /*
+ * On success we replace the path in the inode and trigger
+ * restatting of both parent directories.
+ */
+ struct vbsf_inode_info *sf_old_parent_i = VBSF_GET_INODE_INFO(old_parent);
+ SFLOGFLOW(("vbsf_inode_rename: %s -> %s (%#x)\n", pOldPath->String.ach, pNewPath->String.ach, fRename));
+
+ sf_file_i->path = pNewPath;
+ kfree(pOldPath);
+ pNewPath = NULL;
+
+ sf_new_parent_i->force_restat = 1;
+ sf_old_parent_i->force_restat = 1;
+
+ vbsf_dentry_chain_increase_parent_ttl(old_dentry);
+ vbsf_dentry_chain_increase_parent_ttl(new_dentry);
+
+ rc = 0;
+ } else {
+ SFLOGFLOW(("vbsf_inode_rename: VbglR0SfHostReqRenameWithSrcContig(%s,%s,%#x) failed -> %d\n",
+ pOldPath->String.ach, pNewPath->String.ach, fRename, rc));
+ if (rc == VERR_IS_A_DIRECTORY || rc == VERR_IS_A_FILE)
+ vbsf_dentry_invalidate_ttl(old_dentry);
+ rc = -RTErrConvertToErrno(rc);
+ }
+ } else {
+ SFLOGFLOW(("vbsf_inode_rename: failed to allocate request (%#x bytes)\n",
+ RT_UOFFSETOF(VBOXSFRENAMEWITHSRCBUFREQ, StrDstPath.String) + pNewPath->u16Size));
+ rc = -ENOMEM;
+ }
+ if (pNewPath)
+ kfree(pNewPath);
+ } else
+ SFLOGFLOW(("vbsf_inode_rename: vbsf_path_from_dentry failed: %d\n", rc));
+ } else {
+ SFLOGFLOW(("vbsf_inode_rename: rename with different roots (%#x vs %#x)\n",
+ pSuperInfo->map.root, VBSF_GET_SUPER_INFO(new_parent->i_sb)->map.root));
+ rc = -EXDEV;
+ }
+#if RTLNX_VER_MIN(3,15,0)
+ } else {
+ SFLOGFLOW(("vbsf_inode_rename: Unsupported flags: %#x\n", flags));
+ rc = -EINVAL;
+ }
+#else
+ RT_NOREF(flags);
+#endif
+ return rc;
+}
+
+
+#if RTLNX_VER_MAX(4,9,0)
+/**
+ * The traditional rename interface without any flags.
+ */
+static int vbsf_inode_rename_no_flags(struct inode *old_parent, struct dentry *old_dentry,
+ struct inode *new_parent, struct dentry *new_dentry)
+{
+ return vbsf_inode_rename(old_parent, old_dentry, new_parent, new_dentry, 0);
+}
+#endif
+
+
+/**
+ * Create a symbolic link.
+ */
+#if RTLNX_VER_MIN(5,12,0)
+static int vbsf_inode_symlink(struct user_namespace *ns, struct inode *parent, struct dentry *dentry, const char *target)
+#else
+static int vbsf_inode_symlink(struct inode *parent, struct dentry *dentry, const char *target)
+#endif
+{
+ /*
+ * Turn the target into a string (contiguous physcial memory).
+ */
+ /** @todo we can save a kmalloc here if we switch to embedding the target rather
+ * than the symlink path into the request. Will require more NLS helpers. */
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(parent->i_sb);
+ PSHFLSTRING pTarget = NULL;
+ int rc = vbsf_nls_to_shflstring(pSuperInfo, target, &pTarget);
+ if (rc == 0) {
+ /*
+ * Create a full path for the symlink name.
+ */
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(parent);
+ PSHFLSTRING pPath = NULL;
+ rc = vbsf_path_from_dentry(pSuperInfo, sf_i, dentry, &pPath, __func__);
+ if (rc == 0) {
+ /*
+ * Create the request and issue it.
+ */
+ uint32_t const cbReq = RT_UOFFSETOF(VBOXSFCREATESYMLINKREQ, StrSymlinkPath.String) + pPath->u16Size;
+ VBOXSFCREATESYMLINKREQ *pReq = (VBOXSFCREATESYMLINKREQ *)VbglR0PhysHeapAlloc(cbReq);
+ if (pReq) {
+ RT_ZERO(*pReq);
+ memcpy(&pReq->StrSymlinkPath, pPath, SHFLSTRING_HEADER_SIZE + pPath->u16Size);
+
+ rc = VbglR0SfHostReqCreateSymlinkContig(pSuperInfo->map.root, pTarget, virt_to_phys(pTarget), pReq);
+ if (RT_SUCCESS(rc)) {
+ sf_i->force_restat = 1;
+
+ /*
+ * Instantiate a new inode for the symlink.
+ */
+ rc = vbsf_inode_instantiate(parent, dentry, pPath, &pReq->ObjInfo, SHFL_HANDLE_NIL);
+ if (rc == 0) {
+ SFLOGFLOW(("vbsf_inode_symlink: Successfully created '%s' -> '%s'\n", pPath->String.ach, pTarget->String.ach));
+ pPath = NULL; /* consumed by inode */
+ vbsf_dentry_chain_increase_ttl(dentry);
+ } else {
+ SFLOGFLOW(("vbsf_inode_symlink: Failed to create inode for '%s': %d\n", pPath->String.ach, rc));
+ vbsf_dentry_chain_increase_parent_ttl(dentry);
+ vbsf_dentry_invalidate_ttl(dentry);
+ }
+ } else {
+ int const vrc = rc;
+ if (vrc == VERR_WRITE_PROTECT)
+ rc = -EPERM; /* EPERM: Symlink creation not supported according to the linux manpage as of 2017-09-15.
+ "VBoxInternal2/SharedFoldersEnableSymlinksCreate/<share>" is not 1. */
+ else
+ rc = -RTErrConvertToErrno(vrc);
+ SFLOGFLOW(("vbsf_inode_symlink: VbglR0SfHostReqCreateSymlinkContig failed for '%s' -> '%s': %Rrc (-> %d)\n",
+ pPath->String.ach, pTarget->String.ach, vrc, rc));
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else {
+ SFLOGFLOW(("vbsf_inode_symlink: failed to allocate %u phys heap for the request!\n", cbReq));
+ rc = -ENOMEM;
+ }
+ if (pPath)
+ kfree(pPath);
+ }
+ kfree(pTarget);
+ }
+ return rc;
+}
+
+
+/**
+ * Directory inode operations.
+ */
+struct inode_operations vbsf_dir_iops = {
+ .lookup = vbsf_inode_lookup,
+#if RTLNX_VER_MIN(3,16,0)
+ .atomic_open = vbsf_inode_atomic_open,
+#endif
+ .create = vbsf_inode_create,
+ .symlink = vbsf_inode_symlink,
+ .mkdir = vbsf_inode_mkdir,
+ .rmdir = vbsf_inode_rmdir,
+ .unlink = vbsf_inode_unlink,
+#if RTLNX_VER_MIN(4,9,0)
+ .rename = vbsf_inode_rename,
+#else
+# if RTLNX_VER_MAX(3,17,0)
+ .rename = vbsf_inode_rename_no_flags,
+# endif
+# if RTLNX_VER_MIN(3,15,0)
+ .rename2 = vbsf_inode_rename,
+# endif
+#endif
+#if RTLNX_VER_MIN(2,5,18)
+ .getattr = vbsf_inode_getattr,
+#else
+ .revalidate = vbsf_inode_revalidate,
+#endif
+ .setattr = vbsf_inode_setattr,
+};
+
diff --git a/src/VBox/Additions/linux/sharedfolders/files_vboxsf b/src/VBox/Additions/linux/sharedfolders/files_vboxsf
new file mode 100755
index 00000000..d896212a
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/files_vboxsf
@@ -0,0 +1,107 @@
+#!/bin/sh
+# $Id: files_vboxsf $
+## @file
+# Shared file between Makefile.kmk and export_modules.sh.
+#
+
+#
+# Copyright (C) 2007-2022 Oracle and/or its affiliates.
+#
+# This file is part of VirtualBox base platform packages, as
+# available from https://www.virtualbox.org.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation, in version 3 of the
+# License.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, see <https://www.gnu.org/licenses>.
+#
+# SPDX-License-Identifier: GPL-3.0-only
+#
+
+FILES_VBOXSF_NOBIN=" \
+ ${PATH_ROOT}/include/iprt/nocrt/limits.h=>include/iprt/nocrt/limits.h \
+ ${PATH_ROOT}/include/iprt/alloc.h=>include/iprt/alloc.h \
+ ${PATH_ROOT}/include/iprt/asm.h=>include/iprt/asm.h \
+ ${PATH_ROOT}/include/iprt/asm-amd64-x86.h=>include/iprt/asm-amd64-x86.h \
+ ${PATH_ROOT}/include/iprt/asm-math.h=>include/iprt/asm-math.h \
+ ${PATH_ROOT}/include/iprt/assert.h=>include/iprt/assert.h \
+ ${PATH_ROOT}/include/iprt/assertcompile.h=>include/iprt/assertcompile.h \
+ ${PATH_ROOT}/include/iprt/cdefs.h=>include/iprt/cdefs.h \
+ ${PATH_ROOT}/include/iprt/err.h=>include/iprt/err.h \
+ ${PATH_ROOT}/include/iprt/errcore.h=>include/iprt/errcore.h \
+ ${PATH_ROOT}/include/iprt/fs.h=>include/iprt/fs.h \
+ ${PATH_ROOT}/include/iprt/latin1.h=>include/iprt/latin1.h \
+ ${PATH_ROOT}/include/iprt/list.h=>include/iprt/list.h \
+ ${PATH_ROOT}/include/iprt/log.h=>include/iprt/log.h \
+ ${PATH_ROOT}/include/iprt/mangling.h=>include/iprt/mangling.h \
+ ${PATH_ROOT}/include/iprt/mem.h=>include/iprt/mem.h \
+ ${PATH_ROOT}/include/iprt/memobj.h=>include/iprt/memobj.h \
+ ${PATH_ROOT}/include/iprt/param.h=>include/iprt/param.h \
+ ${PATH_ROOT}/include/iprt/path.h=>include/iprt/path.h \
+ ${PATH_ROOT}/include/iprt/semaphore.h=>include/iprt/semaphore.h \
+ ${PATH_ROOT}/include/iprt/stdarg.h=>include/iprt/stdarg.h \
+ ${PATH_ROOT}/include/iprt/stdint.h=>include/iprt/stdint.h \
+ ${PATH_ROOT}/include/iprt/string.h=>include/iprt/string.h \
+ ${PATH_ROOT}/include/iprt/time.h=>include/iprt/time.h \
+ ${PATH_ROOT}/include/iprt/types.h=>include/iprt/types.h \
+ ${PATH_ROOT}/include/iprt/uint64.h=>include/iprt/uint64.h \
+ ${PATH_ROOT}/include/iprt/uni.h=>include/iprt/uni.h \
+ ${PATH_ROOT}/include/iprt/utf16.h=>include/iprt/utf16.h \
+ ${PATH_ROOT}/include/iprt/x86-helpers.h=>include/iprt/x86-helpers.h \
+ ${PATH_ROOT}/include/iprt/linux/version.h=>include/iprt/linux/version.h \
+ ${PATH_ROOT}/include/VBox/cdefs.h=>include/VBox/cdefs.h \
+ ${PATH_ROOT}/include/VBox/err.h=>include/VBox/err.h \
+ ${PATH_ROOT}/include/VBox/log.h=>include/VBox/log.h \
+ ${PATH_ROOT}/include/VBox/ostypes.h=>include/VBox/ostypes.h \
+ ${PATH_ROOT}/include/VBox/param.h=>include/VBox/param.h \
+ ${PATH_ROOT}/include/VBox/shflsvc.h=>include/VBox/shflsvc.h \
+ ${PATH_ROOT}/include/VBox/types.h=>include/VBox/types.h \
+ ${PATH_ROOT}/include/VBox/VBoxGuest.h=>include/VBox/VBoxGuest.h \
+ ${PATH_ROOT}/include/VBox/VBoxGuestCoreTypes.h=>include/VBox/VBoxGuestCoreTypes.h \
+ ${PATH_ROOT}/include/VBox/VBoxGuestLib.h=>include/VBox/VBoxGuestLib.h \
+ ${PATH_ROOT}/include/VBox/VBoxGuestLibSharedFolders.h=>include/VBox/VBoxGuestLibSharedFolders.h \
+ ${PATH_ROOT}/include/VBox/VBoxGuestLibSharedFoldersInline.h=>include/VBox/VBoxGuestLibSharedFoldersInline.h \
+ ${PATH_ROOT}/include/VBox/VBoxGuestMangling.h=>include/VBox/VBoxGuestMangling.h \
+ ${PATH_ROOT}/include/VBox/VMMDev.h=>include/VBox/VMMDev.h \
+ ${PATH_ROOT}/include/VBox/VMMDevCoreTypes.h=>include/VBox/VMMDevCoreTypes.h \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibInternal.h=>VBoxGuestR0LibInternal.h \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibGenericRequest.cpp=>VBoxGuestR0LibGenericRequest.c \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibHGCM.cpp=>VBoxGuestR0LibHGCM.c \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibIdc.cpp=>VBoxGuestR0LibIdc.c \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibIdc-unix.cpp=>VBoxGuestR0LibIdc-unix.c \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibInit.cpp=>VBoxGuestR0LibInit.c \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibPhysHeap.cpp=>VBoxGuestR0LibPhysHeap.c \
+ ${PATH_ROOT}/src/VBox/Additions/common/VBoxGuest/lib/VBoxGuestR0LibSharedFolders.c=>VBoxGuestR0LibSharedFolders.c \
+ ${PATH_ROOT}/src/VBox/Installer/linux/Makefile-header.gmk=>Makefile-header.gmk \
+ ${PATH_ROOT}/src/VBox/Installer/linux/Makefile-footer.gmk=>Makefile-footer.gmk \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/divdi3.c=>divdi3.c \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/moddi3.c=>moddi3.c \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/qdivrem.c=>qdivrem.c \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/quad.h=>quad.h \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/udivdi3.c=>udivdi3.c \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/udivmoddi4.c=>udivmoddi4.c \
+ ${PATH_ROOT}/src/VBox/Runtime/common/math/gcc/umoddi3.c=>umoddi3.c \
+ ${PATH_ROOT}/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h=>r0drv/linux/the-linux-kernel.h \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/Makefile.module=>Makefile \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/dirops.c=>dirops.c \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/lnkops.c=>lnkops.c \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/regops.c=>regops.c \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/utils.c=>utils.c \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/vbsfmount.h=>vbsfmount.h \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/vfsmod.c=>vfsmod.c \
+ ${PATH_ROOT}/src/VBox/Additions/linux/sharedfolders/vfsmod.h=>vfsmod.h \
+ ${PATH_OUT}/version-generated.h=>version-generated.h \
+ ${PATH_OUT}/revision-generated.h=>revision-generated.h \
+ ${PATH_OUT}/product-generated.h=>product-generated.h \
+"
+
+FILES_VBOXSF_BIN=" \
+"
diff --git a/src/VBox/Additions/linux/sharedfolders/lnkops.c b/src/VBox/Additions/linux/sharedfolders/lnkops.c
new file mode 100644
index 00000000..366d990b
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/lnkops.c
@@ -0,0 +1,305 @@
+/* $Id: lnkops.c $ */
+/** @file
+ * vboxsf - VBox Linux Shared Folders VFS, operations for symbolic links.
+ */
+
+/*
+ * Copyright (C) 2010-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "vfsmod.h"
+
+
+/**
+ * Converts error codes as best we can.
+ */
+DECLINLINE(int) vbsf_convert_symlink_error(int vrc)
+{
+ if ( vrc == VERR_IS_A_DIRECTORY
+ || vrc == VERR_IS_A_FIFO
+ || vrc == VERR_IS_A_FILE
+ || vrc == VERR_IS_A_BLOCK_DEVICE
+ || vrc == VERR_IS_A_CHAR_DEVICE
+ || vrc == VERR_IS_A_SOCKET
+ || vrc == VERR_NOT_SYMLINK)
+ return -EINVAL;
+ if (vrc == VERR_PATH_NOT_FOUND)
+ return -ENOTDIR;
+ if (vrc == VERR_FILE_NOT_FOUND)
+ return -ENOENT;
+ return -EPROTO;
+}
+
+
+/**
+ * Does the NLS conversion of the symlink target.
+ */
+static int vbsf_symlink_nls_convert_slow(struct vbsf_super_info *pSuperInfo, char *pszTarget, size_t cbTargetBuf)
+{
+ int rc;
+ size_t const cchUtf8 = RTStrNLen(pszTarget, cbTargetBuf);
+ if (cchUtf8 < cbTargetBuf) {
+ /*
+ * If the target is short and there is a lot of space left in the target
+ * buffer (typically PAGE_SIZE in size), we move the input to the end
+ * instead of allocating a temporary buffer for it. This works because
+ * there shouldn't be anything that is more than 8x worse than UTF-8
+ * when it comes to efficiency.
+ */
+ char *pszFree = NULL;
+ char *pszUtf8;
+ if (cchUtf8 - 1 <= cbTargetBuf / 8) {
+ pszUtf8 = &pszTarget[cbTargetBuf - cchUtf8 - 1];
+ cbTargetBuf -= cchUtf8 - 1;
+ } else {
+ pszFree = pszUtf8 = kmalloc(cchUtf8 + 1, GFP_KERNEL);
+ if (RT_UNLIKELY(!pszUtf8)) {
+ LogRelMax(50, ("vbsf_symlink_nls_convert_slow: failed to allocate %u bytes\n", cchUtf8 + 1));
+ return -ENOMEM;
+ }
+ }
+ memcpy(pszUtf8, pszTarget, cchUtf8);
+ pszUtf8[cchUtf8] = '\0';
+
+ rc = vbsf_nlscpy(pSuperInfo, pszTarget, cbTargetBuf, pszUtf8, cchUtf8);
+ if (pszFree)
+ kfree(pszFree);
+ } else {
+ SFLOGFLOW(("vbsf_symlink_nls_convert_slow: Impossible! Unterminated target!\n"));
+ rc = -ENAMETOOLONG;
+ }
+ return rc;
+}
+
+
+/**
+ * Does NLS conversion if needed.
+ */
+DECLINLINE(int) vbsf_symlink_nls_convert(struct vbsf_super_info *pSuperInfo, char *pszTarget, size_t cbTargetBuf)
+{
+ if (pSuperInfo->fNlsIsUtf8)
+ return 0;
+ return vbsf_symlink_nls_convert_slow(pSuperInfo, pszTarget, cbTargetBuf);
+}
+
+#if RTLNX_VER_MIN(4,5,0)
+
+/**
+ * Get symbolic link.
+ */
+static const char *vbsf_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
+{
+ char *pszTarget;
+ if (dentry) {
+ pszTarget = (char *)kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (pszTarget) {
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ int rc = VbglR0SfHostReqReadLinkContigSimple(pSuperInfo->map.root, sf_i->path->String.ach, sf_i->path->u16Length,
+ pszTarget, virt_to_phys(pszTarget), RT_MIN(PATH_MAX, PAGE_SIZE - 1));
+ if (RT_SUCCESS(rc)) {
+ pszTarget[PAGE_SIZE - 1] = '\0';
+ SFLOGFLOW(("vbsf_get_link: %s -> %s\n", sf_i->path->String.ach, pszTarget));
+ rc = vbsf_symlink_nls_convert(pSuperInfo, pszTarget, PAGE_SIZE);
+ if (rc == 0) {
+ vbsf_dentry_chain_increase_ttl(dentry);
+ set_delayed_call(done, kfree_link, pszTarget);
+ return pszTarget;
+ }
+ } else {
+ SFLOGFLOW(("vbsf_get_link: VbglR0SfHostReqReadLinkContigSimple failed on '%s': %Rrc\n",
+ sf_i->path->String.ach, rc));
+ }
+ kfree(pszTarget);
+ pszTarget = ERR_PTR(vbsf_convert_symlink_error(rc));
+ } else
+ pszTarget = ERR_PTR(-ENOMEM);
+ } else
+ pszTarget = ERR_PTR(-ECHILD);
+ return pszTarget;
+}
+
+#else /* < 4.5 */
+
+# if RTLNX_VER_MAX(2,6,8)
+/**
+ * Reads the link into the given buffer.
+ */
+static int vbsf_readlink(struct dentry *dentry, char *buffer, int len)
+{
+ int rc;
+ char *pszTarget = (char *)get_zeroed_page(GFP_KERNEL);
+ if (pszTarget) {
+ struct inode *inode = dentry->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ rc = VbglR0SfHostReqReadLinkContigSimple(pSuperInfo->map.root, sf_i->path->String.ach, sf_i->path->u16Length,
+ pszTarget, virt_to_phys(pszTarget), RT_MIN(PATH_MAX, PAGE_SIZE - 1));
+ if (RT_SUCCESS(rc)) {
+ pszTarget[PAGE_SIZE - 1] = '\0';
+ SFLOGFLOW(("vbsf_readlink: %s -> %*s\n", sf_i->path->String.ach, pszTarget));
+ rc = vbsf_symlink_nls_convert(pSuperInfo, pszTarget, PAGE_SIZE);
+ if (rc == 0) {
+ vbsf_dentry_chain_increase_ttl(dentry);
+ rc = vfs_readlink(dentry, buffer, len, pszTarget);
+ }
+ } else {
+ SFLOGFLOW(("vbsf_readlink: VbglR0SfHostReqReadLinkContigSimple failed on '%s': %Rrc\n", sf_i->path->String.ach, rc));
+ rc = vbsf_convert_symlink_error(rc);
+ }
+ free_page((unsigned long)pszTarget);
+ } else
+ rc = -ENOMEM;
+ return rc;
+}
+# endif /* < 2.6.8 */
+
+/**
+ * Follow link in dentry.
+ */
+# if RTLNX_VER_MIN(4,2,0)
+static const char *vbsf_follow_link(struct dentry *dentry, void **cookie)
+# elif RTLNX_VER_MIN(2,6,13)
+static void *vbsf_follow_link(struct dentry *dentry, struct nameidata *nd)
+# else
+static int vbsf_follow_link(struct dentry *dentry, struct nameidata *nd)
+# endif
+{
+ int rc;
+ char *pszTarget = (char *)get_zeroed_page(GFP_KERNEL);
+ if (pszTarget) {
+ struct inode *inode = dentry->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+
+ rc = VbglR0SfHostReqReadLinkContigSimple(pSuperInfo->map.root, sf_i->path->String.ach, sf_i->path->u16Length,
+ pszTarget, virt_to_phys(pszTarget), RT_MIN(PATH_MAX, PAGE_SIZE - 1));
+ if (RT_SUCCESS(rc)) {
+ pszTarget[PAGE_SIZE - 1] = '\0';
+ SFLOGFLOW(("vbsf_follow_link: %s -> %s\n", sf_i->path->String.ach, pszTarget));
+ rc = vbsf_symlink_nls_convert(pSuperInfo, pszTarget, PAGE_SIZE);
+ if (rc == 0) {
+ /*
+ * Succeeded. For 2.6.8 and later the page gets associated
+ * with the caller-cookie or nameidata structure and freed
+ * later by vbsf_put_link(). On earlier kernels we have to
+ * call vfs_follow_link() which will try continue the walking
+ * using the buffer we pass it here.
+ */
+ vbsf_dentry_chain_increase_ttl(dentry);
+# if RTLNX_VER_MIN(4,2,0)
+ *cookie = pszTarget;
+ return pszTarget;
+# elif RTLNX_VER_MIN(2,6,8)
+ nd_set_link(nd, pszTarget);
+# if RTLNX_VER_MIN(2,6,13)
+ return NULL;
+# else
+ return 0;
+# endif
+# else /* < 2.6.8 */
+ rc = vfs_follow_link(nd, pszTarget);
+ free_page((unsigned long)pszTarget);
+ return rc;
+# endif
+ }
+
+ /*
+ * Failed.
+ */
+ } else {
+ LogFunc(("VbglR0SfReadLink failed, caller=%s, rc=%Rrc\n", __func__, rc));
+ rc = vbsf_convert_symlink_error(rc);
+ }
+ free_page((unsigned long)pszTarget);
+ } else {
+ rc = -ENOMEM;
+ }
+# if RTLNX_VER_MIN(4,2,0)
+ *cookie = ERR_PTR(rc);
+ return (const char *)ERR_PTR(rc);
+# elif RTLNX_VER_MIN(2,6,8)
+ nd_set_link(nd, (char *)ERR_PTR(rc));
+# if RTLNX_VER_MIN(2,6,13)
+ return NULL;
+# else
+ return 0;
+# endif
+# else /* < 2.6.8 */
+ return rc;
+# endif /* < 2.6.8 */
+}
+
+# if RTLNX_VER_MIN(2,6,8)
+/**
+ * For freeing target link buffer allocated by vbsf_follow_link.
+ *
+ * For kernels before 2.6.8 memory isn't being kept around.
+ */
+# if RTLNX_VER_MIN(4,2,0)
+static void vbsf_put_link(struct inode *inode, void *cookie)
+# elif RTLNX_VER_MIN(2,6,13)
+static void vbsf_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
+# else
+static void vbsf_put_link(struct dentry *dentry, struct nameidata *nd)
+# endif
+{
+# if RTLNX_VER_MIN(2,6,13)
+ char *page = cookie;
+# else
+ char *page = nd_get_link(nd);
+# endif
+ SFLOGFLOW(("vbsf_put_link: page=%p\n", page));
+ if (!IS_ERR(page))
+ free_page((unsigned long)page);
+}
+# endif /* >= 2.6.8 */
+
+#endif /* < 4.5.0 */
+
+/**
+ * Symlink inode operations.
+ */
+struct inode_operations vbsf_lnk_iops = {
+#if RTLNX_VER_MAX(4,10,0)
+# if RTLNX_VER_MIN(2,6,8)
+ .readlink = generic_readlink,
+# else
+ .readlink = vbsf_readlink,
+# endif
+#endif
+#if RTLNX_VER_MIN(4,5,0)
+ .get_link = vbsf_get_link
+#else
+ .follow_link = vbsf_follow_link,
+# if RTLNX_VER_MIN(2,6,8)
+ .put_link = vbsf_put_link,
+# endif
+#endif
+};
+
diff --git a/src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c b/src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c
new file mode 100644
index 00000000..3bd12094
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/mount.vboxsf.c
@@ -0,0 +1,702 @@
+/* $Id: mount.vboxsf.c $ */
+/** @file
+ * VirtualBox Guest Additions for Linux - mount(8) helper.
+ *
+ * Parses options provided by mount (or user directly)
+ * Packs them into struct vbsfmount and passes to mount(2)
+ * Optionally adds entries to mtab
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
+/* #define DEBUG */
+#include <errno.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <getopt.h>
+#include <mntent.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <mntent.h>
+#include <limits.h>
+#include <iconv.h>
+#include <sys/utsname.h>
+#include <linux/version.h>
+
+#include "vbsfmount.h"
+
+#include <iprt/assertcompile.h>
+#include <iprt/param.h> /* PAGE_SIZE (used by MAX_MNTOPT_STR) */
+#include <iprt/string.h>
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#define PANIC_ATTR __attribute ((noreturn, __format__ (__printf__, 1, 2)))
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+struct vbsf_mount_opts
+{
+ unsigned long fFlags; /**< MS_XXX */
+
+ /** @name Preformatted option=value or empty if not specified.
+ * Helps eliminate duplicate options as well as simplifying concatting.
+ * @{ */
+ char szTTL[32];
+ char szMsDirCacheTTL[32];
+ char szMsInodeTTL[32];
+ char szMaxIoPages[32];
+ char szDirBuf[32];
+ char szCacheMode[32];
+ char szUid[32];
+ char szGid[32];
+ char szDMode[32];
+ char szFMode[32];
+ char szDMask[32];
+ char szFMask[32];
+ char szIoCharset[32];
+ /** @} */
+
+ bool fSloppy;
+ char *pszConvertCp;
+};
+
+
+static void PANIC_ATTR
+panic(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void PANIC_ATTR
+panic_err(const char *fmt, ...)
+{
+ va_list ap;
+ int errno_code = errno;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fprintf(stderr, ": %s\n", strerror(errno_code));
+ exit(EXIT_FAILURE);
+}
+
+static int
+safe_atoi(const char *s, size_t size, int base)
+{
+ char *endptr;
+ long long int val = strtoll(s, &endptr, base);
+
+ if ( val < INT_MIN
+ || ( val > INT_MAX
+ && (base != 8 || val != UINT_MAX) ) /* hack for printf("%o", -1) - 037777777777 */
+ || endptr < s + size)
+ {
+ errno = ERANGE;
+ panic_err("could not convert %.*s to integer, result = %lld (%d)",
+ (int)size, s, val, (int)val);
+ }
+ return (int)val;
+}
+
+static unsigned
+safe_atoiu(const char *s, size_t size, int base)
+{
+ char *endptr;
+ long long int val = strtoll(s, &endptr, base);
+
+ if ( val < 0
+ || val > UINT_MAX
+ || endptr < s + size)
+ {
+ errno = ERANGE;
+ panic_err("could not convert %.*s to unsigned integer, result = %lld (%#llx)",
+ (int)size, s, val, val);
+ }
+ return (unsigned)val;
+}
+
+static void
+process_mount_opts(const char *s, struct vbsf_mount_opts *opts)
+{
+ const char *next = s;
+ size_t len;
+ typedef enum handler_opt
+ {
+ HO_RW,
+ HO_RO,
+ HO_UID,
+ HO_GID,
+ HO_TTL,
+ HO_DENTRY_TTL,
+ HO_INODE_TTL,
+ HO_MAX_IO_PAGES,
+ HO_DIR_BUF,
+ HO_CACHE,
+ HO_DMODE,
+ HO_FMODE,
+ HO_UMASK,
+ HO_DMASK,
+ HO_FMASK,
+ HO_IOCHARSET,
+ HO_NLS,
+ HO_CONVERTCP,
+ HO_NOEXEC,
+ HO_EXEC,
+ HO_NODEV,
+ HO_DEV,
+ HO_NOSUID,
+ HO_SUID,
+ HO_REMOUNT,
+ HO_NOAUTO,
+ HO_NIGNORE
+ } handler_opt;
+ struct
+ {
+ const char *name;
+ handler_opt opt;
+ int has_arg;
+ const char *desc;
+ } handlers[] =
+ {
+ {"rw", HO_RW, 0, "mount read write (default)"},
+ {"ro", HO_RO, 0, "mount read only"},
+ {"uid", HO_UID, 1, "default file owner user id"},
+ {"gid", HO_GID, 1, "default file owner group id"},
+ {"ttl", HO_TTL, 1, "time to live for dentries & inode info"},
+ {"dcachettl", HO_DENTRY_TTL, 1, "time to live for dentries"},
+ {"inodettl", HO_INODE_TTL, 1, "time to live for inode info"},
+ {"maxiopages", HO_MAX_IO_PAGES, 1, "max buffer size for I/O with host"},
+ {"dirbuf", HO_DIR_BUF, 1, "directory buffer size (0 for default)"},
+ {"cache", HO_CACHE, 1, "cache mode: none, strict (default), read, readwrite"},
+ {"iocharset", HO_IOCHARSET, 1, "i/o charset (default utf8)"},
+ {"nls", HO_NLS, 1, "i/o charset (default utf8)"},
+ {"convertcp", HO_CONVERTCP, 1, "convert share name from given charset to utf8"},
+ {"dmode", HO_DMODE, 1, "mode of all directories"},
+ {"fmode", HO_FMODE, 1, "mode of all regular files"},
+ {"umask", HO_UMASK, 1, "umask of directories and regular files"},
+ {"dmask", HO_DMASK, 1, "umask of directories"},
+ {"fmask", HO_FMASK, 1, "umask of regular files"},
+ {"noexec", HO_NOEXEC, 0, NULL}, /* don't document these options directly here */
+ {"exec", HO_EXEC, 0, NULL}, /* as they are well known and described in the */
+ {"nodev", HO_NODEV, 0, NULL}, /* usual manpages */
+ {"dev", HO_DEV, 0, NULL},
+ {"nosuid", HO_NOSUID, 0, NULL},
+ {"suid", HO_SUID, 0, NULL},
+ {"remount", HO_REMOUNT, 0, NULL},
+ {"noauto", HO_NOAUTO, 0, NULL},
+ {"_netdev", HO_NIGNORE, 0, NULL},
+ {"relatime", HO_NIGNORE, 0, NULL},
+ {NULL, 0, 0, NULL}
+ }, *handler;
+
+ while (next)
+ {
+ const char *val;
+ size_t key_len, val_len;
+
+ s = next;
+ next = strchr(s, ',');
+ if (!next)
+ {
+ len = strlen(s);
+ }
+ else
+ {
+ len = next - s;
+ next += 1;
+ if (!*next)
+ next = 0;
+ }
+
+ val = NULL;
+ val_len = 0;
+ for (key_len = 0; key_len < len; ++key_len)
+ {
+ if (s[key_len] == '=')
+ {
+ if (key_len + 1 < len)
+ {
+ val = s + key_len + 1;
+ val_len = len - key_len - 1;
+ }
+ break;
+ }
+ }
+
+ for (handler = handlers; handler->name; ++handler)
+ {
+ size_t j;
+ for (j = 0; j < key_len && handler->name[j] == s[j]; ++j)
+ ;
+
+ if (j == key_len && !handler->name[j])
+ {
+ if (handler->has_arg)
+ {
+ if (!(val && *val))
+ {
+ panic("%.*s requires an argument (i.e. %.*s=<arg>)\n",
+ (int)len, s, (int)len, s);
+ }
+ }
+
+ switch (handler->opt)
+ {
+ case HO_RW:
+ opts->fFlags &= ~MS_RDONLY;
+ break;
+ case HO_RO:
+ opts->fFlags |= MS_RDONLY;
+ break;
+ case HO_NOEXEC:
+ opts->fFlags |= MS_NOEXEC;
+ break;
+ case HO_EXEC:
+ opts->fFlags &= ~MS_NOEXEC;
+ break;
+ case HO_NODEV:
+ opts->fFlags |= MS_NODEV;
+ break;
+ case HO_DEV:
+ opts->fFlags &= ~MS_NODEV;
+ break;
+ case HO_NOSUID:
+ opts->fFlags |= MS_NOSUID;
+ break;
+ case HO_SUID:
+ opts->fFlags &= ~MS_NOSUID;
+ break;
+ case HO_REMOUNT:
+ opts->fFlags |= MS_REMOUNT;
+ break;
+ case HO_TTL:
+ snprintf(opts->szTTL, sizeof(opts->szTTL),
+ "ttl=%d", safe_atoi(val, val_len, 10));
+ break;
+ case HO_DENTRY_TTL:
+ snprintf(opts->szMsDirCacheTTL, sizeof(opts->szMsDirCacheTTL),
+ "dcachettl=%d", safe_atoi(val, val_len, 10));
+ break;
+ case HO_INODE_TTL:
+ snprintf(opts->szMsInodeTTL, sizeof(opts->szMsInodeTTL),
+ "inodettl=%d", safe_atoi(val, val_len, 10));
+ break;
+ case HO_MAX_IO_PAGES:
+ snprintf(opts->szMaxIoPages, sizeof(opts->szMaxIoPages),
+ "maxiopages=%d", safe_atoiu(val, val_len, 10));
+ break;
+ case HO_DIR_BUF:
+ snprintf(opts->szDirBuf, sizeof(opts->szDirBuf),
+ "dirbuf=%d", safe_atoiu(val, val_len, 10));
+ break;
+ case HO_CACHE:
+#define IS_EQUAL(a_sz) (val_len == sizeof(a_sz) - 1U && strncmp(val, a_sz, sizeof(a_sz) - 1U) == 0)
+ if (IS_EQUAL("default"))
+ strcpy(opts->szCacheMode, "cache=default");
+ else if (IS_EQUAL("none"))
+ strcpy(opts->szCacheMode, "cache=none");
+ else if (IS_EQUAL("strict"))
+ strcpy(opts->szCacheMode, "cache=strict");
+ else if (IS_EQUAL("read"))
+ strcpy(opts->szCacheMode, "cache=read");
+ else if (IS_EQUAL("readwrite"))
+ strcpy(opts->szCacheMode, "cache=readwrite");
+ else
+ panic("invalid cache mode '%.*s'\n"
+ "Valid cache modes are: default, none, strict, read, readwrite\n",
+ (int)val_len, val);
+ break;
+ case HO_UID:
+ /** @todo convert string to id. */
+ snprintf(opts->szUid, sizeof(opts->szUid),
+ "uid=%d", safe_atoi(val, val_len, 10));
+ break;
+ case HO_GID:
+ /** @todo convert string to id. */
+ snprintf(opts->szGid, sizeof(opts->szGid),
+ "gid=%d", safe_atoi(val, val_len, 10));
+ break;
+ case HO_DMODE:
+ snprintf(opts->szDMode, sizeof(opts->szDMode),
+ "dmode=0%o", safe_atoi(val, val_len, 8));
+ break;
+ case HO_FMODE:
+ snprintf(opts->szFMode, sizeof(opts->szFMode),
+ "fmode=0%o", safe_atoi(val, val_len, 8));
+ break;
+ case HO_UMASK:
+ {
+ int fMask = safe_atoi(val, val_len, 8);
+ snprintf(opts->szDMask, sizeof(opts->szDMask), "dmask=0%o", fMask);
+ snprintf(opts->szFMask, sizeof(opts->szFMask), "fmask=0%o", fMask);
+ break;
+ }
+ case HO_DMASK:
+ snprintf(opts->szDMask, sizeof(opts->szDMask),
+ "dmask=0%o", safe_atoi(val, val_len, 8));
+ break;
+ case HO_FMASK:
+ snprintf(opts->szFMask, sizeof(opts->szFMask),
+ "fmask=0%o", safe_atoi(val, val_len, 8));
+ break;
+ case HO_IOCHARSET:
+ case HO_NLS:
+ if (val_len >= MAX_NLS_NAME)
+ panic("the character set name for I/O is too long: %*.*s\n", (int)val_len, (int)val_len, val);
+ snprintf(opts->szIoCharset, sizeof(opts->szIoCharset),
+ "%s=%*.*s", handler->opt == HO_IOCHARSET ? "iocharset" : "nls", (int)val_len, (int)val_len, val);
+ break;
+ case HO_CONVERTCP:
+ opts->pszConvertCp = malloc(val_len + 1);
+ if (!opts->pszConvertCp)
+ panic_err("could not allocate memory");
+ memcpy(opts->pszConvertCp, val, val_len);
+ opts->pszConvertCp[val_len] = '\0';
+ break;
+ case HO_NOAUTO:
+ case HO_NIGNORE:
+ break;
+ }
+ break;
+ }
+ continue;
+ }
+
+ if ( !handler->name
+ && !opts->fSloppy)
+ {
+ fprintf(stderr, "unknown mount option `%.*s'\n", (int)len, s);
+ fprintf(stderr, "valid options:\n");
+
+ for (handler = handlers; handler->name; ++handler)
+ {
+ if (handler->desc)
+ fprintf(stderr, " %-10s%s %s\n", handler->name,
+ handler->has_arg ? "=<arg>" : "", handler->desc);
+ }
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+/** Appends @a pszOptVal to pszOpts if not empty. */
+static size_t append_option(char *pszOpts, size_t cbOpts, size_t offOpts, const char *pszOptVal)
+{
+ if (*pszOptVal != '\0')
+ {
+ size_t cchOptVal = strlen(pszOptVal);
+ if (offOpts + (offOpts > 0) + cchOptVal < cbOpts)
+ {
+ if (offOpts)
+ pszOpts[offOpts++] = ',';
+ memcpy(&pszOpts[offOpts], pszOptVal, cchOptVal);
+ offOpts += cchOptVal;
+ pszOpts[offOpts] = '\0';
+ }
+ else
+ panic("Too many options!");
+ }
+ return offOpts;
+}
+
+static void
+convertcp(char *in_codeset, char *pszSharedFolder, char *pszDst)
+{
+ char *i = pszSharedFolder;
+ char *o = pszDst;
+ size_t ib = strlen(pszSharedFolder);
+ size_t ob = MAX_HOST_NAME - 1;
+ iconv_t cd;
+
+ cd = iconv_open("UTF-8", in_codeset);
+ if (cd == (iconv_t)-1)
+ {
+ panic_err("could not convert share name, iconv_open `%s' failed",
+ in_codeset);
+ }
+
+ while (ib)
+ {
+ size_t c = iconv(cd, &i, &ib, &o, &ob);
+ if (c == (size_t)-1)
+ {
+ panic_err("could not convert share name(%s) at %d",
+ pszSharedFolder, (int)(strlen(pszSharedFolder) - ib));
+ }
+ }
+ *o = 0;
+}
+
+
+/**
+ * Print out a usage message and exit.
+ *
+ * @returns 1
+ * @param argv0 The name of the application
+ */
+static int usage(char *argv0)
+{
+ printf("Usage: %s [OPTIONS] NAME MOUNTPOINT\n"
+ "Mount the VirtualBox shared folder NAME from the host system to MOUNTPOINT.\n"
+ "\n"
+ " -w mount the shared folder writable (the default)\n"
+ " -r mount the shared folder read-only\n"
+ " -n do not create an mtab entry\n"
+ " -s sloppy parsing, ignore unrecognized mount options\n"
+ " -o OPTION[,OPTION...] use the mount options specified\n"
+ "\n", argv0);
+ printf("Available mount options are:\n"
+ " rw mount writable (the default)\n"
+ " ro mount read only\n"
+ " uid=UID set the default file owner user id to UID\n"
+ " gid=GID set the default file owner group id to GID\n");
+ printf(" ttl=MILLIESECSONDS set the \"time to live\" for both the directory cache\n"
+ " and inode info. -1 for kernel default, 0 disables it.\n"
+ " dcachettl=MILLIES set the \"time to live\" for the directory cache,\n"
+ " overriding the 'ttl' option. Ignored if negative.\n"
+ " inodettl=MILLIES set the \"time to live\" for the inode information,\n"
+ " overriding the 'ttl' option. Ignored if negative.\n");
+ printf(" maxiopages=PAGES set the max host I/O buffers size in pages. Uses\n"
+ " default if zero.\n"
+ " dirbuf=BYTES set the directory enumeration buffer size in bytes.\n"
+ " Uses default size if zero.\n");
+ printf(" cache=MODE set the caching mode for the mount. Allowed values:\n"
+ " default: use the kernel default (strict)\n"
+ " none: no caching; may experience guest side\n"
+ " coherence issues between mmap and read.\n");
+ printf(" strict: no caching, except for writably mapped\n"
+ " files (for guest side coherence)\n"
+ " read: read via the page cache; host changes\n"
+ " may be completely ignored\n");
+ printf(" readwrite: read and write via the page cache; host\n"
+ " changes may be completely ignored and\n"
+ " guest changes takes a while to reach the host\n");
+ printf(" dmode=MODE override the mode of all directories to (octal) MODE\n"
+ " fmode=MODE override the mode of all regular files to (octal) MODE\n"
+ " umask=UMASK set the umask to (octal) UMASK\n");
+ printf(" dmask=UMASK set the umask applied to directories only\n"
+ " fmask=UMASK set the umask applied to regular files only\n"
+ " iocharset CHARSET use the character set CHARSET for I/O operations\n"
+ " (default set is utf8)\n"
+ " convertcp CHARSET convert the folder name from CHARSET to utf8\n"
+ "\n");
+ printf("Less common used options:\n"
+ " noexec,exec,nodev,dev,nosuid,suid\n");
+ return EXIT_FAILURE;
+}
+
+int
+main(int argc, char **argv)
+{
+ int c;
+ int err;
+ int saved_errno;
+ int nomtab = 0;
+ char *pszSharedFolder;
+ char *pszMountPoint;
+ struct utsname uts;
+ int major, minor, patch;
+ size_t offOpts;
+ static const char s_szSfNameOpt[] = "sf_name=";
+ char szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1 + MAX_HOST_NAME];
+ char szOpts[MAX_MNTOPT_STR];
+ struct vbsf_mount_opts opts =
+ {
+ MS_NODEV,
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ "",
+ false, /*fSloppy*/
+ NULL,
+ };
+
+ AssertCompile(sizeof(uid_t) == sizeof(int));
+ AssertCompile(sizeof(gid_t) == sizeof(int));
+
+ if (getuid())
+ panic("Only root can mount shared folders from the host.\n");
+
+ if (!argv[0])
+ argv[0] = "mount.vboxsf";
+
+ /*
+ * Parse options.
+ */
+ while ((c = getopt(argc, argv, "rwsno:h")) != -1)
+ {
+ switch (c)
+ {
+ default:
+ fprintf(stderr, "unknown option `%c:%#x'\n", c, c);
+ RT_FALL_THRU();
+ case '?':
+ case 'h':
+ return usage(argv[0]);
+
+ case 'r':
+ opts.fFlags |= MS_RDONLY;
+ break;
+
+ case 'w':
+ opts.fFlags &= ~MS_RDONLY;
+ break;
+
+ case 's':
+ opts.fSloppy = true;
+ break;
+
+ case 'o':
+ process_mount_opts(optarg, &opts);
+ break;
+
+ case 'n':
+ nomtab = 1;
+ break;
+ }
+ }
+
+ if (argc - optind < 2)
+ return usage(argv[0]);
+
+ pszSharedFolder = argv[optind];
+ pszMountPoint = argv[optind + 1];
+ if (opts.pszConvertCp)
+ {
+ convertcp(opts.pszConvertCp, pszSharedFolder, &szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1]);
+ pszSharedFolder = &szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1];
+ }
+
+ /*
+ * Concat option strings.
+ */
+ offOpts = 0;
+ szOpts[0] = '\0';
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szTTL);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szMsDirCacheTTL);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szMsInodeTTL);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szMaxIoPages);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szDirBuf);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szCacheMode);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szUid);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szGid);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szDMode);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szFMode);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szDMask);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szFMask);
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, opts.szIoCharset);
+
+ /* For pre-2.6 kernels we have to supply the shared folder name as a
+ string option because the kernel hides the device name from us. */
+ RT_ZERO(uts);
+ if ( uname(&uts) == -1
+ || sscanf(uts.release, "%d.%d.%d", &major, &minor, &patch) != 3)
+ major = minor = patch = 5;
+
+ if (KERNEL_VERSION(major, minor, patch) < KERNEL_VERSION(2,6,0))
+ {
+ memcpy(szSharedFolderIconved, s_szSfNameOpt, sizeof(s_szSfNameOpt) - 1);
+ if (!opts.pszConvertCp)
+ {
+ if (strlen(pszSharedFolder) >= MAX_HOST_NAME)
+ panic("%s: shared folder name is too long (max %d)", argv[0], (int)MAX_HOST_NAME - 1);
+ strcpy(&szSharedFolderIconved[sizeof(s_szSfNameOpt) - 1], pszSharedFolder);
+ }
+ offOpts = append_option(szOpts, sizeof(szOpts), offOpts, szSharedFolderIconved);
+ }
+
+ /*
+ * Do the actual mounting.
+ */
+ err = mount(pszSharedFolder, pszMountPoint, "vboxsf", opts.fFlags, szOpts);
+ saved_errno = errno;
+
+ if (err)
+ {
+ if (saved_errno == ENXIO)
+ panic("%s: shared folder '%s' was not found (check VM settings / spelling)\n", argv[0], pszSharedFolder);
+ else
+ panic_err("%s: mounting failed with the error", argv[0]);
+ }
+
+ if (!nomtab)
+ {
+ err = vbsfmount_complete(pszSharedFolder, pszMountPoint, opts.fFlags, szOpts);
+ switch (err)
+ {
+ case 0: /* Success. */
+ break;
+
+ case 1:
+ panic_err("%s: Could not update mount table (out of memory).", argv[0]);
+ break;
+
+ case 2:
+ panic_err("%s: Could not open mount table for update.", argv[0]);
+ break;
+
+ case 3:
+ /* panic_err("%s: Could not add an entry to the mount table.", argv[0]); */
+ break;
+
+ default:
+ panic_err("%s: Unknown error while completing mount operation: %d", argv[0], err);
+ break;
+ }
+ }
+
+ exit(EXIT_SUCCESS);
+}
+
diff --git a/src/VBox/Additions/linux/sharedfolders/regops.c b/src/VBox/Additions/linux/sharedfolders/regops.c
new file mode 100644
index 00000000..d88e76c0
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/regops.c
@@ -0,0 +1,3881 @@
+/* $Id: regops.c $ */
+/** @file
+ * vboxsf - VBox Linux Shared Folders VFS, regular file inode and file operations.
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "vfsmod.h"
+#include <linux/uio.h>
+#if RTLNX_VER_MIN(2,5,32)
+# include <linux/aio.h> /* struct kiocb before 4.1 */
+#endif
+#if RTLNX_VER_MIN(2,5,12)
+# include <linux/buffer_head.h>
+#endif
+#if RTLNX_VER_RANGE(2,5,12, 2,6,31)
+# include <linux/writeback.h>
+#endif
+#if RTLNX_VER_RANGE(2,6,23, 3,16,0)
+# include <linux/splice.h>
+#endif
+#if RTLNX_VER_RANGE(2,6,17, 2,6,23)
+# include <linux/pipe_fs_i.h>
+#endif
+#if RTLNX_VER_MIN(2,4,10)
+# include <linux/swap.h> /* for mark_page_accessed */
+#endif
+#include <iprt/err.h>
+
+#if RTLNX_VER_MAX(2,6,18)
+# define SEEK_END 2
+#endif
+
+#if RTLNX_VER_MAX(3,16,0)
+# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & ITER_KVEC) )
+#elif RTLNX_VER_MAX(3,19,0)
+# define iter_is_iovec(a_pIter) ( !((a_pIter)->type & (ITER_KVEC | ITER_BVEC)) )
+#endif
+
+#if RTLNX_VER_MAX(4,17,0)
+# define vm_fault_t int
+#endif
+
+#if RTLNX_VER_MAX(2,5,20)
+# define pgoff_t unsigned long
+#endif
+
+#if RTLNX_VER_MAX(2,5,12)
+# define PageUptodate(a_pPage) Page_Uptodate(a_pPage)
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @def VBSF_GET_ITER_TYPE
+ * Accessor for getting iov iter type member which changed name in 5.14. */
+#if RTLNX_VER_MIN(5,14,0)
+# define VBSF_GET_ITER_TYPE(a_pIter) ((a_pIter)->iter_type)
+#else
+# define VBSF_GET_ITER_TYPE(a_pIter) ((a_pIter)->type)
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+#if RTLNX_VER_MAX(3,16,0)
+struct vbsf_iov_iter {
+ unsigned int type;
+ unsigned int v_write : 1;
+ size_t iov_offset;
+ size_t nr_segs;
+ struct iovec const *iov;
+# ifdef VBOX_STRICT
+ struct iovec const *iov_org;
+ size_t nr_segs_org;
+# endif
+};
+# ifdef VBOX_STRICT
+# define VBSF_IOV_ITER_INITIALIZER(a_cSegs, a_pIov, a_fWrite) \
+ { vbsf_iov_iter_detect_type(a_pIov, a_cSegs), a_fWrite, 0, a_cSegs, a_pIov, a_pIov, a_cSegs }
+# else
+# define VBSF_IOV_ITER_INITIALIZER(a_cSegs, a_pIov, a_fWrite) \
+ { vbsf_iov_iter_detect_type(a_pIov, a_cSegs), a_fWrite, 0, a_cSegs, a_pIov }
+# endif
+# define ITER_KVEC 1
+# define iov_iter vbsf_iov_iter
+#endif
+
+#if RTLNX_VER_MIN(2,6,19)
+/** Used by vbsf_iter_lock_pages() to keep the first page of the next segment. */
+struct vbsf_iter_stash {
+ struct page *pPage;
+ size_t off;
+ size_t cb;
+# if RTLNX_VER_MAX(4,11,0)
+ size_t offFromEnd;
+ struct iov_iter Copy;
+# endif
+};
+#endif /* >= 3.16.0 */
+/** Initializer for struct vbsf_iter_stash. */
+#if RTLNX_VER_MIN(4,11,0)
+# define VBSF_ITER_STASH_INITIALIZER { NULL, 0 }
+#else
+# define VBSF_ITER_STASH_INITIALIZER { NULL, 0, ~(size_t)0 }
+#endif
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+DECLINLINE(void) vbsf_put_page(struct page *pPage);
+static void vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack);
+static void vbsf_reg_write_sync_page_cache(struct address_space *mapping, loff_t offFile, uint32_t cbRange,
+ uint8_t const *pbSrcBuf, struct page **papSrcPages,
+ uint32_t offSrcPage, size_t cSrcPages);
+
+
+/*********************************************************************************************************************************
+* Provide more recent uio.h functionality to older kernels. *
+*********************************************************************************************************************************/
+#if RTLNX_VER_RANGE(2,6,19, 3,16,0)
+
+/**
+ * Detects the vector type.
+ */
+static int vbsf_iov_iter_detect_type(struct iovec const *paIov, size_t cSegs)
+{
+ /* Check the first segment with a non-zero length. */
+ while (cSegs-- > 0) {
+ if (paIov->iov_len > 0) {
+ if (access_ok(VERIFY_READ, paIov->iov_base, paIov->iov_len))
+#if RTLNX_VER_MIN(5,10,0)
+ return (uintptr_t)paIov->iov_base >= TASK_SIZE_MAX ? ITER_KVEC : 0;
+#else
+ return (uintptr_t)paIov->iov_base >= USER_DS.seg ? ITER_KVEC : 0;
+#endif
+ AssertMsgFailed(("%p LB %#zx\n", paIov->iov_base, paIov->iov_len));
+ break;
+ }
+ paIov++;
+ }
+ return 0;
+}
+
+
+# undef iov_iter_count
+# define iov_iter_count(a_pIter) vbsf_iov_iter_count(a_pIter)
+static size_t vbsf_iov_iter_count(struct vbsf_iov_iter const *iter)
+{
+ size_t cbRet = 0;
+ size_t cLeft = iter->nr_segs;
+ struct iovec const *iov = iter->iov;
+ while (cLeft-- > 0) {
+ cbRet += iov->iov_len;
+ iov++;
+ }
+ return cbRet - iter->iov_offset;
+}
+
+
+# undef iov_iter_single_seg_count
+# define iov_iter_single_seg_count(a_pIter) vbsf_iov_iter_single_seg_count(a_pIter)
+static size_t vbsf_iov_iter_single_seg_count(struct vbsf_iov_iter const *iter)
+{
+ if (iter->nr_segs > 0)
+ return iter->iov->iov_len - iter->iov_offset;
+ return 0;
+}
+
+
+# undef iov_iter_advance
+# define iov_iter_advance(a_pIter, a_cbSkip) vbsf_iov_iter_advance(a_pIter, a_cbSkip)
+static void vbsf_iov_iter_advance(struct vbsf_iov_iter *iter, size_t cbSkip)
+{
+ SFLOG2(("vbsf_iov_iter_advance: cbSkip=%#zx\n", cbSkip));
+ if (iter->nr_segs > 0) {
+ size_t const cbLeftCur = iter->iov->iov_len - iter->iov_offset;
+ Assert(iter->iov_offset <= iter->iov->iov_len);
+ if (cbLeftCur > cbSkip) {
+ iter->iov_offset += cbSkip;
+ } else {
+ cbSkip -= cbLeftCur;
+ iter->iov_offset = 0;
+ iter->iov++;
+ iter->nr_segs--;
+ while (iter->nr_segs > 0) {
+ size_t const cbSeg = iter->iov->iov_len;
+ if (cbSeg > cbSkip) {
+ iter->iov_offset = cbSkip;
+ break;
+ }
+ cbSkip -= cbSeg;
+ iter->iov++;
+ iter->nr_segs--;
+ }
+ }
+ }
+}
+
+
+# undef iov_iter_get_pages
+# define iov_iter_get_pages(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0) \
+ vbsf_iov_iter_get_pages(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0)
+static ssize_t vbsf_iov_iter_get_pages(struct vbsf_iov_iter *iter, struct page **papPages,
+ size_t cbMax, unsigned cMaxPages, size_t *poffPg0)
+{
+ while (iter->nr_segs > 0) {
+ size_t const cbLeft = iter->iov->iov_len - iter->iov_offset;
+ Assert(iter->iov->iov_len >= iter->iov_offset);
+ if (cbLeft > 0) {
+ uintptr_t uPtrFrom = (uintptr_t)iter->iov->iov_base + iter->iov_offset;
+ size_t offPg0 = *poffPg0 = uPtrFrom & PAGE_OFFSET_MASK;
+ size_t cPagesLeft = RT_ALIGN_Z(offPg0 + cbLeft, PAGE_SIZE) >> PAGE_SHIFT;
+ size_t cPages = RT_MIN(cPagesLeft, cMaxPages);
+ struct task_struct *pTask = current;
+ size_t cPagesLocked;
+
+ down_read(&pTask->mm->mmap_sem);
+ cPagesLocked = get_user_pages(pTask, pTask->mm, uPtrFrom, cPages, iter->v_write, 1 /*force*/, papPages, NULL);
+ up_read(&pTask->mm->mmap_sem);
+ if (cPagesLocked == cPages) {
+ size_t cbRet = (cPages << PAGE_SHIFT) - offPg0;
+ if (cPages == cPagesLeft) {
+ size_t offLastPg = (uPtrFrom + cbLeft) & PAGE_OFFSET_MASK;
+ if (offLastPg)
+ cbRet -= PAGE_SIZE - offLastPg;
+ }
+ Assert(cbRet <= cbLeft);
+ return cbRet;
+ }
+ if (cPagesLocked > 0)
+ vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
+ return -EFAULT;
+ }
+ iter->iov_offset = 0;
+ iter->iov++;
+ iter->nr_segs--;
+ }
+ AssertFailed();
+ return 0;
+}
+
+
+# undef iov_iter_truncate
+# define iov_iter_truncate(iter, cbNew) vbsf_iov_iter_truncate(iter, cbNew)
+static void vbsf_iov_iter_truncate(struct vbsf_iov_iter *iter, size_t cbNew)
+{
+ /* we have no counter or stuff, so it's a no-op. */
+ RT_NOREF(iter, cbNew);
+}
+
+
+# undef iov_iter_revert
+# define iov_iter_revert(a_pIter, a_cbRewind) vbsf_iov_iter_revert(a_pIter, a_cbRewind)
+void vbsf_iov_iter_revert(struct vbsf_iov_iter *iter, size_t cbRewind)
+{
+ SFLOG2(("vbsf_iov_iter_revert: cbRewind=%#zx\n", cbRewind));
+ if (iter->iov_offset > 0) {
+ if (cbRewind <= iter->iov_offset) {
+ iter->iov_offset -= cbRewind;
+ return;
+ }
+ cbRewind -= iter->iov_offset;
+ iter->iov_offset = 0;
+ }
+
+ while (cbRewind > 0) {
+ struct iovec const *pIov = --iter->iov;
+ size_t const cbSeg = pIov->iov_len;
+ iter->nr_segs++;
+
+ Assert((uintptr_t)pIov >= (uintptr_t)iter->iov_org);
+ Assert(iter->nr_segs <= iter->nr_segs_org);
+
+ if (cbRewind <= cbSeg) {
+ iter->iov_offset = cbSeg - cbRewind;
+ break;
+ }
+ cbRewind -= cbSeg;
+ }
+}
+
+#endif /* 2.6.19 <= linux < 3.16.0 */
+#if RTLNX_VER_RANGE(3,16,0, 3,16,35)
+
+/** This is for implementing cMaxPage on 3.16 which doesn't have it. */
+static ssize_t vbsf_iov_iter_get_pages_3_16(struct iov_iter *iter, struct page **papPages,
+ size_t cbMax, unsigned cMaxPages, size_t *poffPg0)
+{
+ if (!(iter->type & ITER_BVEC)) {
+ size_t const offPg0 = iter->iov_offset & PAGE_OFFSET_MASK;
+ size_t const cbMaxPages = ((size_t)cMaxPages << PAGE_SHIFT) - offPg0;
+ if (cbMax > cbMaxPages)
+ cbMax = cbMaxPages;
+ }
+ /* else: BVEC works a page at a time and shouldn't have much of a problem here. */
+ return iov_iter_get_pages(iter, papPages, cbMax, poffPg0);
+}
+# undef iov_iter_get_pages
+# define iov_iter_get_pages(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0) \
+ vbsf_iov_iter_get_pages_3_16(a_pIter, a_papPages, a_cbMax, a_cMaxPages, a_poffPg0)
+
+#endif /* 3.16.0-3.16.34 */
+#if RTLNX_VER_RANGE(2,6,19, 3,18,0)
+
+static size_t copy_from_iter(uint8_t *pbDst, size_t cbToCopy, struct iov_iter *pSrcIter)
+{
+ size_t const cbTotal = cbToCopy;
+ Assert(iov_iter_count(pSrcIter) >= cbToCopy);
+# if RTLNX_VER_MIN(3,16,0)
+ if (pSrcIter->type & ITER_BVEC) {
+ while (cbToCopy > 0) {
+ size_t const offPage = (uintptr_t)pbDst & PAGE_OFFSET_MASK;
+ size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
+ struct page *pPage = rtR0MemObjLinuxVirtToPage(pbDst);
+ size_t cbCopied = copy_page_from_iter(pPage, offPage, cbThisCopy, pSrcIter);
+ AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
+ pbDst += cbCopied;
+ cbToCopy -= cbCopied;
+ if (cbCopied != cbToCopy)
+ break;
+ }
+ } else
+# endif
+ {
+ while (cbToCopy > 0) {
+ size_t cbThisCopy = iov_iter_single_seg_count(pSrcIter);
+ if (cbThisCopy > 0) {
+ if (cbThisCopy > cbToCopy)
+ cbThisCopy = cbToCopy;
+ if (pSrcIter->type & ITER_KVEC)
+ memcpy(pbDst, (void *)pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy);
+ else if (copy_from_user(pbDst, pSrcIter->iov->iov_base + pSrcIter->iov_offset, cbThisCopy) != 0)
+ break;
+ pbDst += cbThisCopy;
+ cbToCopy -= cbThisCopy;
+ }
+ iov_iter_advance(pSrcIter, cbThisCopy);
+ }
+ }
+ return cbTotal - cbToCopy;
+}
+
+
+static size_t copy_to_iter(uint8_t const *pbSrc, size_t cbToCopy, struct iov_iter *pDstIter)
+{
+ size_t const cbTotal = cbToCopy;
+ Assert(iov_iter_count(pDstIter) >= cbToCopy);
+# if RTLNX_VER_MIN(3,16,0)
+ if (pDstIter->type & ITER_BVEC) {
+ while (cbToCopy > 0) {
+ size_t const offPage = (uintptr_t)pbSrc & PAGE_OFFSET_MASK;
+ size_t const cbThisCopy = RT_MIN(PAGE_SIZE - offPage, cbToCopy);
+ struct page *pPage = rtR0MemObjLinuxVirtToPage((void *)pbSrc);
+ size_t cbCopied = copy_page_to_iter(pPage, offPage, cbThisCopy, pDstIter);
+ AssertStmt(cbCopied <= cbThisCopy, cbCopied = cbThisCopy);
+ pbSrc += cbCopied;
+ cbToCopy -= cbCopied;
+ if (cbCopied != cbToCopy)
+ break;
+ }
+ } else
+# endif
+ {
+ while (cbToCopy > 0) {
+ size_t cbThisCopy = iov_iter_single_seg_count(pDstIter);
+ if (cbThisCopy > 0) {
+ if (cbThisCopy > cbToCopy)
+ cbThisCopy = cbToCopy;
+ if (pDstIter->type & ITER_KVEC)
+ memcpy((void *)pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy);
+ else if (copy_to_user(pDstIter->iov->iov_base + pDstIter->iov_offset, pbSrc, cbThisCopy) != 0) {
+ break;
+ }
+ pbSrc += cbThisCopy;
+ cbToCopy -= cbThisCopy;
+ }
+ iov_iter_advance(pDstIter, cbThisCopy);
+ }
+ }
+ return cbTotal - cbToCopy;
+}
+
+#endif /* 3.16.0 <= linux < 3.18.0 */
+
+
+
+/*********************************************************************************************************************************
+* Handle management *
+*********************************************************************************************************************************/
+
+/**
+ * Called when an inode is released to unlink all handles that might impossibly
+ * still be associated with it.
+ *
+ * @param pInodeInfo The inode which handles to drop.
+ */
+void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo)
+{
+ struct vbsf_handle *pCur, *pNext;
+ unsigned long fSavedFlags;
+ SFLOGFLOW(("vbsf_handle_drop_chain: %p\n", pInodeInfo));
+ spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
+
+ RTListForEachSafe(&pInodeInfo->HandleList, pCur, pNext, struct vbsf_handle, Entry) {
+ AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
+ == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
+ pCur->fFlags |= VBSF_HANDLE_F_ON_LIST;
+ RTListNodeRemove(&pCur->Entry);
+ }
+
+ spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
+}
+
+
+/**
+ * Locates a handle that matches all the flags in @a fFlags.
+ *
+ * @returns Pointer to handle on success (retained), use vbsf_handle_release() to
+ * release it. NULL if no suitable handle was found.
+ * @param pInodeInfo The inode info to search.
+ * @param fFlagsSet The flags that must be set.
+ * @param fFlagsClear The flags that must be clear.
+ */
+struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear)
+{
+ struct vbsf_handle *pCur;
+ unsigned long fSavedFlags;
+ spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
+
+ RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
+ AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
+ == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
+ if ((pCur->fFlags & (fFlagsSet | fFlagsClear)) == fFlagsSet) {
+ uint32_t cRefs = ASMAtomicIncU32(&pCur->cRefs);
+ if (cRefs > 1) {
+ spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
+ SFLOGFLOW(("vbsf_handle_find: returns %p\n", pCur));
+ return pCur;
+ }
+ /* Oops, already being closed (safe as it's only ever increased here). */
+ ASMAtomicDecU32(&pCur->cRefs);
+ }
+ }
+
+ spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
+ SFLOGFLOW(("vbsf_handle_find: returns NULL!\n"));
+ return NULL;
+}
+
+
+/**
+ * Slow worker for vbsf_handle_release() that does the freeing.
+ *
+ * @returns 0 (ref count).
+ * @param pHandle The handle to release.
+ * @param pSuperInfo The info structure for the shared folder associated with
+ * the handle.
+ * @param pszCaller The caller name (for logging failures).
+ */
+uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *pSuperInfo, const char *pszCaller)
+{
+ int rc;
+ unsigned long fSavedFlags;
+
+ SFLOGFLOW(("vbsf_handle_release_slow: %p (%s)\n", pHandle, pszCaller));
+
+ /*
+ * Remove from the list.
+ */
+ spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
+
+ AssertMsg((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC, ("%p %#x\n", pHandle, pHandle->fFlags));
+ Assert(pHandle->pInodeInfo);
+ Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
+
+ if (pHandle->fFlags & VBSF_HANDLE_F_ON_LIST) {
+ pHandle->fFlags &= ~VBSF_HANDLE_F_ON_LIST;
+ RTListNodeRemove(&pHandle->Entry);
+ }
+
+ spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
+
+ /*
+ * Actually destroy it.
+ */
+ rc = VbglR0SfHostReqCloseSimple(pSuperInfo->map.root, pHandle->hHost);
+ if (RT_FAILURE(rc))
+ LogFunc(("Caller %s: VbglR0SfHostReqCloseSimple %#RX64 failed with rc=%Rrc\n", pszCaller, pHandle->hHost, rc));
+ pHandle->hHost = SHFL_HANDLE_NIL;
+ pHandle->fFlags = VBSF_HANDLE_F_MAGIC_DEAD;
+ kfree(pHandle);
+ return 0;
+}
+
+
+/**
+ * Appends a handle to a handle list.
+ *
+ * @param pInodeInfo The inode to add it to.
+ * @param pHandle The handle to add.
+ */
+void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle)
+{
+#ifdef VBOX_STRICT
+ struct vbsf_handle *pCur;
+#endif
+ unsigned long fSavedFlags;
+
+ SFLOGFLOW(("vbsf_handle_append: %p (to %p)\n", pHandle, pInodeInfo));
+ AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
+ ("%p %#x\n", pHandle, pHandle->fFlags));
+ Assert(pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
+
+ spin_lock_irqsave(&g_SfHandleLock, fSavedFlags);
+
+ AssertMsg((pHandle->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST)) == VBSF_HANDLE_F_MAGIC,
+ ("%p %#x\n", pHandle, pHandle->fFlags));
+#ifdef VBOX_STRICT
+ RTListForEach(&pInodeInfo->HandleList, pCur, struct vbsf_handle, Entry) {
+ Assert(pCur != pHandle);
+ AssertMsg( (pCur->fFlags & (VBSF_HANDLE_F_MAGIC_MASK | VBSF_HANDLE_F_ON_LIST))
+ == (VBSF_HANDLE_F_MAGIC | VBSF_HANDLE_F_ON_LIST), ("%p %#x\n", pCur, pCur->fFlags));
+ }
+ pHandle->pInodeInfo = pInodeInfo;
+#endif
+
+ pHandle->fFlags |= VBSF_HANDLE_F_ON_LIST;
+ RTListAppend(&pInodeInfo->HandleList, &pHandle->Entry);
+
+ spin_unlock_irqrestore(&g_SfHandleLock, fSavedFlags);
+}
+
+
+
+/*********************************************************************************************************************************
+* Misc *
+*********************************************************************************************************************************/
+
+#if RTLNX_VER_MAX(2,6,6)
+/** Any writable mappings? */
+DECLINLINE(bool) mapping_writably_mapped(struct address_space const *mapping)
+{
+# if RTLNX_VER_MIN(2,5,6)
+ return !list_empty(&mapping->i_mmap_shared);
+# else
+ return mapping->i_mmap_shared != NULL;
+# endif
+}
+#endif
+
+
+#if RTLNX_VER_MAX(2,5,12)
+/** Missing in 2.4.x, so just stub it for now. */
+DECLINLINE(bool) PageWriteback(struct page const *page)
+{
+ return false;
+}
+#endif
+
+
+/**
+ * Helper for deciding wheter we should do a read via the page cache or not.
+ *
+ * By default we will only use the page cache if there is a writable memory
+ * mapping of the file with a chance that it may have modified any of the pages
+ * already.
+ */
+DECLINLINE(bool) vbsf_should_use_cached_read(struct file *file, struct address_space *mapping, struct vbsf_super_info *pSuperInfo)
+{
+ if ( (file->f_flags & O_DIRECT)
+ || pSuperInfo->enmCacheMode == kVbsfCacheMode_None)
+ return false;
+ if ( pSuperInfo->enmCacheMode == kVbsfCacheMode_Read
+ || pSuperInfo->enmCacheMode == kVbsfCacheMode_ReadWrite)
+ return true;
+ Assert(pSuperInfo->enmCacheMode == kVbsfCacheMode_Strict);
+ return mapping
+ && mapping->nrpages > 0
+ && mapping_writably_mapped(mapping);
+}
+
+
+
+/*********************************************************************************************************************************
+* Pipe / splice stuff mainly for 2.6.17 >= linux < 2.6.31 (where no fallbacks were available) *
+*********************************************************************************************************************************/
+
+#if RTLNX_VER_RANGE(2,6,17, 3,16,0)
+
+# if RTLNX_VER_MAX(2,6,30)
+# define LOCK_PIPE(a_pPipe) do { if ((a_pPipe)->inode) mutex_lock(&(a_pPipe)->inode->i_mutex); } while (0)
+# define UNLOCK_PIPE(a_pPipe) do { if ((a_pPipe)->inode) mutex_unlock(&(a_pPipe)->inode->i_mutex); } while (0)
+# else
+# define LOCK_PIPE(a_pPipe) pipe_lock(a_pPipe)
+# define UNLOCK_PIPE(a_pPipe) pipe_unlock(a_pPipe)
+# endif
+
+
+/** Waits for the pipe buffer status to change. */
+static void vbsf_wait_pipe(struct pipe_inode_info *pPipe)
+{
+ DEFINE_WAIT(WaitStuff);
+# ifdef TASK_NONINTERACTIVE
+ prepare_to_wait(&pPipe->wait, &WaitStuff, TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE);
+# else
+ prepare_to_wait(&pPipe->wait, &WaitStuff, TASK_INTERRUPTIBLE);
+# endif
+ UNLOCK_PIPE(pPipe);
+
+ schedule();
+
+ finish_wait(&pPipe->wait, &WaitStuff);
+ LOCK_PIPE(pPipe);
+}
+
+
+/** Worker for vbsf_feed_pages_to_pipe that wakes up readers. */
+static void vbsf_wake_up_pipe(struct pipe_inode_info *pPipe, bool fReaders)
+{
+ smp_mb();
+ if (waitqueue_active(&pPipe->wait))
+ wake_up_interruptible_sync(&pPipe->wait);
+ if (fReaders)
+ kill_fasync(&pPipe->fasync_readers, SIGIO, POLL_IN);
+ else
+ kill_fasync(&pPipe->fasync_writers, SIGIO, POLL_OUT);
+}
+
+#endif
+#if RTLNX_VER_RANGE(2,6,17, 2,6,31)
+
+/** Verify pipe buffer content (needed for page-cache to ensure idle page). */
+static int vbsf_pipe_buf_confirm(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf)
+{
+ /*SFLOG3(("vbsf_pipe_buf_confirm: %p\n", pPipeBuf));*/
+ return 0;
+}
+
+
+/** Maps the buffer page. */
+static void *vbsf_pipe_buf_map(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf, int atomic)
+{
+ void *pvRet;
+ if (!atomic)
+ pvRet = kmap(pPipeBuf->page);
+ else {
+ pPipeBuf->flags |= PIPE_BUF_FLAG_ATOMIC;
+ pvRet = kmap_atomic(pPipeBuf->page, KM_USER0);
+ }
+ /*SFLOG3(("vbsf_pipe_buf_map: %p -> %p\n", pPipeBuf, pvRet));*/
+ return pvRet;
+}
+
+
+/** Unmaps the buffer page. */
+static void vbsf_pipe_buf_unmap(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf, void *pvMapping)
+{
+ /*SFLOG3(("vbsf_pipe_buf_unmap: %p/%p\n", pPipeBuf, pvMapping)); */
+ if (!(pPipeBuf->flags & PIPE_BUF_FLAG_ATOMIC))
+ kunmap(pPipeBuf->page);
+ else {
+ pPipeBuf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
+ kunmap_atomic(pvMapping, KM_USER0);
+ }
+}
+
+
+/** Gets a reference to the page. */
+static void vbsf_pipe_buf_get(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf)
+{
+ page_cache_get(pPipeBuf->page);
+ /*SFLOG3(("vbsf_pipe_buf_get: %p (return count=%d)\n", pPipeBuf, page_count(pPipeBuf->page)));*/
+}
+
+
+/** Release the buffer page (counter to vbsf_pipe_buf_get). */
+static void vbsf_pipe_buf_release(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf)
+{
+ /*SFLOG3(("vbsf_pipe_buf_release: %p (incoming count=%d)\n", pPipeBuf, page_count(pPipeBuf->page)));*/
+ page_cache_release(pPipeBuf->page);
+}
+
+
+/** Attempt to steal the page.
+ * @returns 0 success, 1 on failure. */
+static int vbsf_pipe_buf_steal(struct pipe_inode_info *pPipe, struct pipe_buffer *pPipeBuf)
+{
+ if (page_count(pPipeBuf->page) == 1) {
+ lock_page(pPipeBuf->page);
+ SFLOG3(("vbsf_pipe_buf_steal: %p -> 0\n", pPipeBuf));
+ return 0;
+ }
+ SFLOG3(("vbsf_pipe_buf_steal: %p -> 1\n", pPipeBuf));
+ return 1;
+}
+
+
+/**
+ * Pipe buffer operations for used by vbsf_feed_pages_to_pipe.
+ */
+static struct pipe_buf_operations vbsf_pipe_buf_ops = {
+ .can_merge = 0,
+# if RTLNX_VER_MIN(2,6,23)
+ .confirm = vbsf_pipe_buf_confirm,
+# else
+ .pin = vbsf_pipe_buf_confirm,
+# endif
+ .map = vbsf_pipe_buf_map,
+ .unmap = vbsf_pipe_buf_unmap,
+ .get = vbsf_pipe_buf_get,
+ .release = vbsf_pipe_buf_release,
+ .steal = vbsf_pipe_buf_steal,
+};
+
+
+/**
+ * Feeds the pages to the pipe.
+ *
+ * Pages given to the pipe are set to NULL in papPages.
+ */
+static ssize_t vbsf_feed_pages_to_pipe(struct pipe_inode_info *pPipe, struct page **papPages, size_t cPages, uint32_t offPg0,
+ uint32_t cbActual, unsigned fFlags)
+{
+ ssize_t cbRet = 0;
+ size_t iPage = 0;
+ bool fNeedWakeUp = false;
+
+ LOCK_PIPE(pPipe);
+ for (;;) {
+ if ( pPipe->readers > 0
+ && pPipe->nrbufs < PIPE_BUFFERS) {
+ struct pipe_buffer *pPipeBuf = &pPipe->bufs[(pPipe->curbuf + pPipe->nrbufs) % PIPE_BUFFERS];
+ uint32_t const cbThisPage = RT_MIN(cbActual, PAGE_SIZE - offPg0);
+ pPipeBuf->len = cbThisPage;
+ pPipeBuf->offset = offPg0;
+# if RTLNX_VER_MIN(2,6,23)
+ pPipeBuf->private = 0;
+# endif
+ pPipeBuf->ops = &vbsf_pipe_buf_ops;
+ pPipeBuf->flags = fFlags & SPLICE_F_GIFT ? PIPE_BUF_FLAG_GIFT : 0;
+ pPipeBuf->page = papPages[iPage];
+
+ papPages[iPage++] = NULL;
+ pPipe->nrbufs++;
+ fNeedWakeUp |= pPipe->inode != NULL;
+ offPg0 = 0;
+ cbRet += cbThisPage;
+
+ /* done? */
+ cbActual -= cbThisPage;
+ if (!cbActual)
+ break;
+ } else if (pPipe->readers == 0) {
+ SFLOGFLOW(("vbsf_feed_pages_to_pipe: no readers!\n"));
+ send_sig(SIGPIPE, current, 0);
+ if (cbRet == 0)
+ cbRet = -EPIPE;
+ break;
+ } else if (fFlags & SPLICE_F_NONBLOCK) {
+ if (cbRet == 0)
+ cbRet = -EAGAIN;
+ break;
+ } else if (signal_pending(current)) {
+ if (cbRet == 0)
+ cbRet = -ERESTARTSYS;
+ SFLOGFLOW(("vbsf_feed_pages_to_pipe: pending signal! (%zd)\n", cbRet));
+ break;
+ } else {
+ if (fNeedWakeUp) {
+ vbsf_wake_up_pipe(pPipe, true /*fReaders*/);
+ fNeedWakeUp = 0;
+ }
+ pPipe->waiting_writers++;
+ vbsf_wait_pipe(pPipe);
+ pPipe->waiting_writers--;
+ }
+ }
+ UNLOCK_PIPE(pPipe);
+
+ if (fNeedWakeUp)
+ vbsf_wake_up_pipe(pPipe, true /*fReaders*/);
+
+ return cbRet;
+}
+
+
+/**
+ * For splicing from a file to a pipe.
+ */
+static ssize_t vbsf_splice_read(struct file *file, loff_t *poffset, struct pipe_inode_info *pipe, size_t len, unsigned int flags)
+{
+ struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ ssize_t cbRet;
+
+ SFLOGFLOW(("vbsf_splice_read: file=%p poffset=%p{%#RX64} pipe=%p len=%#zx flags=%#x\n", file, poffset, *poffset, pipe, len, flags));
+ if (vbsf_should_use_cached_read(file, inode->i_mapping, pSuperInfo)) {
+ cbRet = generic_file_splice_read(file, poffset, pipe, len, flags);
+ } else {
+ /*
+ * Create a read request.
+ */
+ loff_t offFile = *poffset;
+ size_t cPages = RT_MIN(RT_ALIGN_Z((offFile & ~PAGE_CACHE_MASK) + len, PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT,
+ PIPE_BUFFERS);
+ VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ,
+ PgLst.aPages[cPages]));
+ if (pReq) {
+ /*
+ * Allocate pages.
+ */
+ struct page *apPages[PIPE_BUFFERS];
+ size_t i;
+ pReq->PgLst.offFirstPage = (uint16_t)offFile & (uint16_t)PAGE_OFFSET_MASK;
+ cbRet = 0;
+ for (i = 0; i < cPages; i++) {
+ struct page *pPage;
+ apPages[i] = pPage = alloc_page(GFP_USER);
+ if (pPage) {
+ pReq->PgLst.aPages[i] = page_to_phys(pPage);
+# ifdef VBOX_STRICT
+ ASMMemFill32(kmap(pPage), PAGE_SIZE, UINT32_C(0xdeadbeef));
+ kunmap(pPage);
+# endif
+ } else {
+ cbRet = -ENOMEM;
+ break;
+ }
+ }
+ if (cbRet == 0) {
+ /*
+ * Do the reading.
+ */
+ uint32_t const cbToRead = RT_MIN((cPages << PAGE_SHIFT) - (offFile & PAGE_OFFSET_MASK), len);
+ struct vbsf_reg_info *sf_r = (struct vbsf_reg_info *)file->private_data;
+ int vrc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbToRead, cPages);
+ if (RT_SUCCESS(vrc)) {
+ /*
+ * Get the number of bytes read, jettison the request
+ * and, in case of EOF, any unnecessary pages.
+ */
+ uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbActual <= cbToRead, cbActual = cbToRead);
+ SFLOG2(("vbsf_splice_read: read -> %#x bytes @ %#RX64\n", cbActual, offFile));
+
+ VbglR0PhysHeapFree(pReq);
+ pReq = NULL;
+
+ /*
+ * Now, feed it to the pipe thingy.
+ * This will take ownership of the all pages no matter what happens.
+ */
+ cbRet = vbsf_feed_pages_to_pipe(pipe, apPages, cPages, offFile & PAGE_OFFSET_MASK, cbActual, flags);
+ if (cbRet > 0)
+ *poffset = offFile + cbRet;
+ } else {
+ cbRet = -RTErrConvertToErrno(vrc);
+ SFLOGFLOW(("vbsf_splice_read: Read failed: %Rrc -> %zd\n", vrc, cbRet));
+ }
+ i = cPages;
+ }
+
+ while (i-- > 0)
+ if (apPages[i])
+ __free_pages(apPages[i], 0);
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ } else {
+ cbRet = -ENOMEM;
+ }
+ }
+ SFLOGFLOW(("vbsf_splice_read: returns %zd (%#zx), *poffset=%#RX64\n", cbRet, cbRet, *poffset));
+ return cbRet;
+}
+
+#endif /* 2.6.17 <= LINUX_VERSION_CODE < 2.6.31 */
+#if RTLNX_VER_RANGE(2,6,17, 3,16,0)
+
+/**
+ * For splicing from a pipe to a file.
+ *
+ * Since we can combine buffers and request allocations, this should be faster
+ * than the default implementation.
+ */
+static ssize_t vbsf_splice_write(struct pipe_inode_info *pPipe, struct file *file, loff_t *poffset, size_t len, unsigned int flags)
+{
+ struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ ssize_t cbRet;
+
+ SFLOGFLOW(("vbsf_splice_write: pPipe=%p file=%p poffset=%p{%#RX64} len=%#zx flags=%#x\n", pPipe, file, poffset, *poffset, len, flags));
+ /** @todo later if (false) {
+ cbRet = generic_file_splice_write(pPipe, file, poffset, len, flags);
+ } else */ {
+ /*
+ * Prepare a write request.
+ */
+# ifdef PIPE_BUFFERS
+ uint32_t const cMaxPages = RT_MIN(PIPE_BUFFERS, RT_ALIGN_Z(len, PAGE_SIZE) >> PAGE_SHIFT);
+# else
+ uint32_t const cMaxPages = RT_MIN(RT_MAX(RT_MIN(pPipe->buffers, 256), PIPE_DEF_BUFFERS),
+ RT_ALIGN_Z(len, PAGE_SIZE) >> PAGE_SHIFT);
+# endif
+ VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ,
+ PgLst.aPages[cMaxPages]));
+ if (pReq) {
+ /*
+ * Feed from the pipe.
+ */
+ struct vbsf_reg_info *sf_r = (struct vbsf_reg_info *)file->private_data;
+ struct address_space *mapping = inode->i_mapping;
+ loff_t offFile = *poffset;
+ bool fNeedWakeUp = false;
+ cbRet = 0;
+
+ LOCK_PIPE(pPipe);
+
+ for (;;) {
+ unsigned cBufs = pPipe->nrbufs;
+ /*SFLOG2(("vbsf_splice_write: nrbufs=%#x curbuf=%#x\n", cBufs, pPipe->curbuf));*/
+ if (cBufs) {
+ /*
+ * There is data available. Write it to the file.
+ */
+ int vrc;
+ struct pipe_buffer *pPipeBuf = &pPipe->bufs[pPipe->curbuf];
+ uint32_t cPagesToWrite = 1;
+ uint32_t cbToWrite = pPipeBuf->len;
+
+ Assert(pPipeBuf->offset < PAGE_SIZE);
+ Assert(pPipeBuf->offset + pPipeBuf->len <= PAGE_SIZE);
+
+ pReq->PgLst.offFirstPage = pPipeBuf->offset & PAGE_OFFSET;
+ pReq->PgLst.aPages[0] = page_to_phys(pPipeBuf->page);
+
+ /* Add any adjacent page buffers: */
+ while ( cPagesToWrite < cBufs
+ && cPagesToWrite < cMaxPages
+ && ((pReq->PgLst.offFirstPage + cbToWrite) & PAGE_OFFSET_MASK) == 0) {
+# ifdef PIPE_BUFFERS
+ struct pipe_buffer *pPipeBuf2 = &pPipe->bufs[(pPipe->curbuf + cPagesToWrite) % PIPE_BUFFERS];
+# else
+ struct pipe_buffer *pPipeBuf2 = &pPipe->bufs[(pPipe->curbuf + cPagesToWrite) % pPipe->buffers];
+# endif
+ Assert(pPipeBuf2->len <= PAGE_SIZE);
+ Assert(pPipeBuf2->offset < PAGE_SIZE);
+ if (pPipeBuf2->offset != 0)
+ break;
+ pReq->PgLst.aPages[cPagesToWrite] = page_to_phys(pPipeBuf2->page);
+ cbToWrite += pPipeBuf2->len;
+ cPagesToWrite += 1;
+ }
+
+ /* Check that we don't have signals pending before we issue the write, as
+ we'll only end up having to cancel the HGCM request 99% of the time: */
+ if (!signal_pending(current)) {
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ vrc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile,
+ cbToWrite, cPagesToWrite);
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ } else
+ vrc = VERR_INTERRUPTED;
+ if (RT_SUCCESS(vrc)) {
+ /*
+ * Get the number of bytes actually written, update file position
+ * and return value, and advance the pipe buffer.
+ */
+ uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
+ AssertStmt(cbActual <= cbToWrite, cbActual = cbToWrite);
+ SFLOG2(("vbsf_splice_write: write -> %#x bytes @ %#RX64\n", cbActual, offFile));
+
+ cbRet += cbActual;
+
+ while (cbActual > 0) {
+ uint32_t cbAdvance = RT_MIN(pPipeBuf->len, cbActual);
+
+ vbsf_reg_write_sync_page_cache(mapping, offFile, cbAdvance, NULL,
+ &pPipeBuf->page, pPipeBuf->offset, 1);
+
+ offFile += cbAdvance;
+ cbActual -= cbAdvance;
+ pPipeBuf->offset += cbAdvance;
+ pPipeBuf->len -= cbAdvance;
+
+ if (!pPipeBuf->len) {
+ struct pipe_buf_operations const *pOps = pPipeBuf->ops;
+ pPipeBuf->ops = NULL;
+ pOps->release(pPipe, pPipeBuf);
+
+# ifdef PIPE_BUFFERS
+ pPipe->curbuf = (pPipe->curbuf + 1) % PIPE_BUFFERS;
+# else
+ pPipe->curbuf = (pPipe->curbuf + 1) % pPipe->buffers;
+# endif
+ pPipe->nrbufs -= 1;
+ pPipeBuf = &pPipe->bufs[pPipe->curbuf];
+
+# if RTLNX_VER_MAX(2,6,30)
+ fNeedWakeUp |= pPipe->inode != NULL;
+# else
+ fNeedWakeUp = true;
+# endif
+ } else {
+ Assert(cbActual == 0);
+ break;
+ }
+ }
+
+ *poffset = offFile;
+ } else {
+ if (cbRet == 0)
+ cbRet = vrc == VERR_INTERRUPTED ? -ERESTARTSYS : -RTErrConvertToErrno(vrc);
+ SFLOGFLOW(("vbsf_splice_write: Write failed: %Rrc -> %zd (cbRet=%#zx)\n",
+ vrc, -RTErrConvertToErrno(vrc), cbRet));
+ break;
+ }
+ } else {
+ /*
+ * Wait for data to become available, if there is chance that'll happen.
+ */
+ /* Quit if there are no writers (think EOF): */
+ if (pPipe->writers == 0) {
+ SFLOGFLOW(("vbsf_splice_write: No buffers. No writers. The show is done!\n"));
+ break;
+ }
+
+ /* Quit if if we've written some and no writers waiting on the lock: */
+ if (cbRet > 0 && pPipe->waiting_writers == 0) {
+ SFLOGFLOW(("vbsf_splice_write: No waiting writers, returning what we've got.\n"));
+ break;
+ }
+
+ /* Quit with EAGAIN if non-blocking: */
+ if (flags & SPLICE_F_NONBLOCK) {
+ if (cbRet == 0)
+ cbRet = -EAGAIN;
+ break;
+ }
+
+ /* Quit if we've got pending signals: */
+ if (signal_pending(current)) {
+ if (cbRet == 0)
+ cbRet = -ERESTARTSYS;
+ SFLOGFLOW(("vbsf_splice_write: pending signal! (%zd)\n", cbRet));
+ break;
+ }
+
+ /* Wake up writers before we start waiting: */
+ if (fNeedWakeUp) {
+ vbsf_wake_up_pipe(pPipe, false /*fReaders*/);
+ fNeedWakeUp = false;
+ }
+ vbsf_wait_pipe(pPipe);
+ }
+ } /* feed loop */
+
+ if (fNeedWakeUp)
+ vbsf_wake_up_pipe(pPipe, false /*fReaders*/);
+
+ UNLOCK_PIPE(pPipe);
+
+ VbglR0PhysHeapFree(pReq);
+ } else {
+ cbRet = -ENOMEM;
+ }
+ }
+ SFLOGFLOW(("vbsf_splice_write: returns %zd (%#zx), *poffset=%#RX64\n", cbRet, cbRet, *poffset));
+ return cbRet;
+}
+
+#endif /* 2.6.17 <= LINUX_VERSION_CODE < 3.16.0 */
+
+#if RTLNX_VER_RANGE(2,5,30, 2,6,23)
+/**
+ * Our own senfile implementation that does not go via the page cache like
+ * generic_file_sendfile() does.
+ */
+static ssize_t vbsf_reg_sendfile(struct file *pFile, loff_t *poffFile, size_t cbToSend, read_actor_t pfnActor,
+# if RTLNX_VER_MIN(2,6,8)
+ void *pvUser
+# else
+ void __user *pvUser
+# endif
+ )
+{
+ struct inode *inode = VBSF_GET_F_DENTRY(pFile)->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ ssize_t cbRet;
+ SFLOGFLOW(("vbsf_reg_sendfile: pFile=%p poffFile=%p{%#RX64} cbToSend=%#zx pfnActor=%p pvUser=%p\n",
+ pFile, poffFile, poffFile ? *poffFile : 0, cbToSend, pfnActor, pvUser));
+ Assert(pSuperInfo);
+
+ /*
+ * Return immediately if asked to send nothing.
+ */
+ if (cbToSend == 0)
+ return 0;
+
+ /*
+ * Like for vbsf_reg_read() and vbsf_reg_read_iter(), we allow going via
+ * the page cache in some cases or configs.
+ */
+ if (vbsf_should_use_cached_read(pFile, inode->i_mapping, pSuperInfo)) {
+ cbRet = generic_file_sendfile(pFile, poffFile, cbToSend, pfnActor, pvUser);
+ SFLOGFLOW(("vbsf_reg_sendfile: returns %#zx *poffFile=%#RX64 [generic_file_sendfile]\n", cbRet, poffFile ? *poffFile : UINT64_MAX));
+ } else {
+ /*
+ * Allocate a request and a bunch of pages for reading from the file.
+ */
+ struct page *apPages[16];
+ loff_t offFile = poffFile ? *poffFile : 0;
+ size_t const cPages = cbToSend + ((size_t)offFile & PAGE_OFFSET_MASK) >= RT_ELEMENTS(apPages) * PAGE_SIZE
+ ? RT_ELEMENTS(apPages)
+ : RT_ALIGN_Z(cbToSend + ((size_t)offFile & PAGE_OFFSET_MASK), PAGE_SIZE) >> PAGE_SHIFT;
+ size_t iPage;
+ VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ,
+ PgLst.aPages[cPages]));
+ if (pReq) {
+ Assert(cPages > 0);
+ cbRet = 0;
+ for (iPage = 0; iPage < cPages; iPage++) {
+ struct page *pPage;
+ apPages[iPage] = pPage = alloc_page(GFP_USER);
+ if (pPage) {
+ Assert(page_count(pPage) == 1);
+ pReq->PgLst.aPages[iPage] = page_to_phys(pPage);
+ } else {
+ while (iPage-- > 0)
+ vbsf_put_page(apPages[iPage]);
+ cbRet = -ENOMEM;
+ break;
+ }
+ }
+ if (cbRet == 0) {
+ /*
+ * Do the job.
+ */
+ struct vbsf_reg_info *sf_r = (struct vbsf_reg_info *)pFile->private_data;
+ read_descriptor_t RdDesc;
+ RdDesc.count = cbToSend;
+# if RTLNX_VER_MIN(2,6,8)
+ RdDesc.arg.data = pvUser;
+# else
+ RdDesc.buf = pvUser;
+# endif
+ RdDesc.written = 0;
+ RdDesc.error = 0;
+
+ Assert(sf_r);
+ Assert((sf_r->Handle.fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC);
+
+ while (cbToSend > 0) {
+ /*
+ * Read another chunk. For paranoid reasons, we keep data where the page cache
+ * would keep it, i.e. page offset bits corresponds to the file offset bits.
+ */
+ uint32_t const offPg0 = (uint32_t)offFile & (uint32_t)PAGE_OFFSET_MASK;
+ uint32_t const cbToRead = RT_MIN((cPages << PAGE_SHIFT) - offPg0, cbToSend);
+ uint32_t const cPagesToRead = RT_ALIGN_Z(cbToRead + offPg0, PAGE_SIZE) >> PAGE_SHIFT;
+ int vrc;
+ pReq->PgLst.offFirstPage = (uint16_t)offPg0;
+ if (!signal_pending(current))
+ vrc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile,
+ cbToRead, cPagesToRead);
+ else
+ vrc = VERR_INTERRUPTED;
+ if (RT_SUCCESS(vrc)) {
+ /*
+ * Pass what we read to the actor.
+ */
+ uint32_t off = offPg0;
+ uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
+ bool const fIsEof = cbActual < cbToRead;
+ AssertStmt(cbActual <= cbToRead, cbActual = cbToRead);
+ SFLOG3(("vbsf_reg_sendfile: Read %#x bytes (offPg0=%#x), wanted %#x ...\n", cbActual, offPg0, cbToRead));
+
+ iPage = 0;
+ while (cbActual > 0) {
+ uint32_t const cbPage = RT_MIN(cbActual, PAGE_SIZE - off);
+ int const cbRetActor = pfnActor(&RdDesc, apPages[iPage], off, cbPage);
+ Assert(cbRetActor >= 0); /* Returns zero on failure, with RdDesc.error holding the status code. */
+
+ AssertMsg(iPage < cPages && iPage < cPagesToRead, ("iPage=%#x cPages=%#x cPagesToRead=%#x\n", iPage, cPages, cPagesToRead));
+
+ offFile += cbRetActor;
+ if ((uint32_t)cbRetActor == cbPage && RdDesc.count > 0) {
+ cbActual -= cbPage;
+ cbToSend -= cbPage;
+ iPage++;
+ } else {
+ SFLOG3(("vbsf_reg_sendfile: cbRetActor=%#x (%d) cbPage=%#x RdDesc{count=%#lx error=%d} iPage=%#x/%#x/%#x cbToSend=%#zx\n",
+ cbRetActor, cbRetActor, cbPage, RdDesc.count, RdDesc.error, iPage, cPagesToRead, cPages, cbToSend));
+ vrc = VERR_CALLBACK_RETURN;
+ break;
+ }
+ off = 0;
+ }
+
+ /*
+ * Are we done yet?
+ */
+ if (RT_FAILURE_NP(vrc) || cbToSend == 0 || RdDesc.error != 0 || fIsEof) {
+ break;
+ }
+
+ /*
+ * Replace pages held by the actor.
+ */
+ vrc = VINF_SUCCESS;
+ for (iPage = 0; iPage < cPages; iPage++) {
+ struct page *pPage = apPages[iPage];
+ if (page_count(pPage) != 1) {
+ struct page *pNewPage = alloc_page(GFP_USER);
+ if (pNewPage) {
+ SFLOGFLOW(("vbsf_reg_sendfile: Replacing page #%x: %p -> %p\n", iPage, pPage, pNewPage));
+ vbsf_put_page(pPage);
+ apPages[iPage] = pNewPage;
+ } else {
+ SFLOGFLOW(("vbsf_reg_sendfile: Failed to allocate a replacement page.\n"));
+ vrc = VERR_NO_MEMORY;
+ break;
+ }
+ }
+ }
+ if (RT_FAILURE(vrc))
+ break; /* RdDesc.written should be non-zero, so don't bother with setting error. */
+ } else {
+ RdDesc.error = vrc == VERR_INTERRUPTED ? -ERESTARTSYS : -RTErrConvertToErrno(vrc);
+ SFLOGFLOW(("vbsf_reg_sendfile: Read failed: %Rrc -> %zd (RdDesc.error=%#d)\n",
+ vrc, -RTErrConvertToErrno(vrc), RdDesc.error));
+ break;
+ }
+ }
+
+ /*
+ * Free memory.
+ */
+ for (iPage = 0; iPage < cPages; iPage++)
+ vbsf_put_page(apPages[iPage]);
+
+ /*
+ * Set the return values.
+ */
+ if (RdDesc.written) {
+ cbRet = RdDesc.written;
+ if (poffFile)
+ *poffFile = offFile;
+ } else {
+ cbRet = RdDesc.error;
+ }
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else {
+ cbRet = -ENOMEM;
+ }
+ SFLOGFLOW(("vbsf_reg_sendfile: returns %#zx offFile=%#RX64\n", cbRet, offFile));
+ }
+ return cbRet;
+}
+#endif /* 2.5.30 <= LINUX_VERSION_CODE < 2.6.23 */
+
+
+/*********************************************************************************************************************************
+* File operations on regular files *
+*********************************************************************************************************************************/
+
+/** Wrapper around put_page / page_cache_release. */
+DECLINLINE(void) vbsf_put_page(struct page *pPage)
+{
+#if RTLNX_VER_MIN(4,6,0)
+ put_page(pPage);
+#else
+ page_cache_release(pPage);
+#endif
+}
+
+
+/** Wrapper around get_page / page_cache_get. */
+DECLINLINE(void) vbsf_get_page(struct page *pPage)
+{
+#if RTLNX_VER_MIN(4,6,0)
+ get_page(pPage);
+#else
+ page_cache_get(pPage);
+#endif
+}
+
+
+/** Companion to vbsf_lock_user_pages(). */
+static void vbsf_unlock_user_pages(struct page **papPages, size_t cPages, bool fSetDirty, bool fLockPgHack)
+{
+ /* We don't mark kernel pages dirty: */
+ if (fLockPgHack)
+ fSetDirty = false;
+
+ while (cPages-- > 0)
+ {
+ struct page *pPage = papPages[cPages];
+ Assert((ssize_t)cPages >= 0);
+ if (fSetDirty && !PageReserved(pPage))
+ set_page_dirty(pPage);
+ vbsf_put_page(pPage);
+ }
+}
+
+
+/**
+ * Worker for vbsf_lock_user_pages_failed_check_kernel() and
+ * vbsf_iter_lock_pages().
+ */
+static int vbsf_lock_kernel_pages(uint8_t *pbStart, bool fWrite, size_t cPages, struct page **papPages)
+{
+ uintptr_t const uPtrFrom = (uintptr_t)pbStart;
+ uintptr_t const uPtrLast = (uPtrFrom & ~(uintptr_t)PAGE_OFFSET_MASK) + (cPages << PAGE_SHIFT) - 1;
+ uint8_t *pbPage = (uint8_t *)uPtrLast;
+ size_t iPage = cPages;
+
+ /*
+ * Touch the pages first (paranoia^2).
+ */
+ if (fWrite) {
+ uint8_t volatile *pbProbe = (uint8_t volatile *)uPtrFrom;
+ while (iPage-- > 0) {
+ *pbProbe = *pbProbe;
+ pbProbe += PAGE_SIZE;
+ }
+ } else {
+ uint8_t const *pbProbe = (uint8_t const *)uPtrFrom;
+ while (iPage-- > 0) {
+ ASMProbeReadByte(pbProbe);
+ pbProbe += PAGE_SIZE;
+ }
+ }
+
+ /*
+ * Get the pages.
+ * Note! Fixes here probably applies to rtR0MemObjNativeLockKernel as well.
+ */
+ iPage = cPages;
+ if ( uPtrFrom >= (unsigned long)__va(0)
+ && uPtrLast < (unsigned long)high_memory) {
+ /* The physical page mapping area: */
+ while (iPage-- > 0) {
+ struct page *pPage = papPages[iPage] = virt_to_page(pbPage);
+ vbsf_get_page(pPage);
+ pbPage -= PAGE_SIZE;
+ }
+ } else {
+ /* This is vmalloc or some such thing, so go thru page tables: */
+ while (iPage-- > 0) {
+ struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
+ if (pPage) {
+ papPages[iPage] = pPage;
+ vbsf_get_page(pPage);
+ pbPage -= PAGE_SIZE;
+ } else {
+ while (++iPage < cPages) {
+ pPage = papPages[iPage];
+ vbsf_put_page(pPage);
+ }
+ return -EFAULT;
+ }
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * Catches kernel_read() and kernel_write() calls and works around them.
+ *
+ * The file_operations::read and file_operations::write callbacks supposedly
+ * hands us the user buffers to read into and write out of. To allow the kernel
+ * to read and write without allocating buffers in userland, they kernel_read()
+ * and kernel_write() increases the user space address limit before calling us
+ * so that copyin/copyout won't reject it. Our problem is that get_user_pages()
+ * works on the userspace address space structures and will not be fooled by an
+ * increased addr_limit.
+ *
+ * This code tries to detect this situation and fake get_user_lock() for the
+ * kernel buffer.
+ */
+static int vbsf_lock_user_pages_failed_check_kernel(uintptr_t uPtrFrom, size_t cPages, bool fWrite, int rcFailed,
+ struct page **papPages, bool *pfLockPgHack)
+{
+ /*
+ * Check that this is valid user memory that is actually in the kernel range.
+ */
+#if RTLNX_VER_MIN(5,10,0)
+ if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
+ && uPtrFrom >= TASK_SIZE_MAX)
+#elif RTLNX_VER_MIN(5,0,0) || RTLNX_RHEL_MIN(8,1)
+ if ( access_ok((void *)uPtrFrom, cPages << PAGE_SHIFT)
+ && uPtrFrom >= USER_DS.seg)
+#else
+ if ( access_ok(fWrite ? VERIFY_WRITE : VERIFY_READ, (void *)uPtrFrom, cPages << PAGE_SHIFT)
+ && uPtrFrom >= USER_DS.seg)
+#endif
+ {
+ int rc = vbsf_lock_kernel_pages((uint8_t *)uPtrFrom, fWrite, cPages, papPages);
+ if (rc == 0) {
+ *pfLockPgHack = true;
+ return 0;
+ }
+ }
+
+ return rcFailed;
+}
+
+
+/** Wrapper around get_user_pages. */
+DECLINLINE(int) vbsf_lock_user_pages(uintptr_t uPtrFrom, size_t cPages, bool fWrite, struct page **papPages, bool *pfLockPgHack)
+{
+# if RTLNX_VER_MIN(4,9,0) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,73, 4,4,74) /** @todo Figure out when & what exactly. */) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,75, 4,4,90) /** @todo Figure out when & what exactly. */) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,92, 4,5,0) /** @todo Figure out when & what exactly. */)
+ ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, papPages,
+ fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
+# elif RTLNX_VER_MIN(4,6,0)
+ ssize_t cPagesLocked = get_user_pages_unlocked(uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
+# elif RTLNX_VER_RANGE(4,4,168, 4,5,0)
+ ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, papPages,
+ fWrite ? FOLL_WRITE | FOLL_FORCE : FOLL_FORCE);
+# elif RTLNX_VER_MIN(4,0,0)
+ ssize_t cPagesLocked = get_user_pages_unlocked(current, current->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages);
+# else
+ struct task_struct *pTask = current;
+ ssize_t cPagesLocked;
+ down_read(&pTask->mm->mmap_sem);
+ cPagesLocked = get_user_pages(pTask, pTask->mm, uPtrFrom, cPages, fWrite, 1 /*force*/, papPages, NULL);
+ up_read(&pTask->mm->mmap_sem);
+# endif
+ *pfLockPgHack = false;
+ if (cPagesLocked == cPages)
+ return 0;
+
+ /*
+ * It failed.
+ */
+ if (cPagesLocked < 0)
+ return vbsf_lock_user_pages_failed_check_kernel(uPtrFrom, cPages, fWrite, (int)cPagesLocked, papPages, pfLockPgHack);
+
+ vbsf_unlock_user_pages(papPages, cPagesLocked, false /*fSetDirty*/, false /*fLockPgHack*/);
+
+ /* We could use uPtrFrom + cPagesLocked to get the correct status here... */
+ return -EFAULT;
+}
+
+#if RTLNX_VER_MAX(5,10,0) /* No regular .read/.write for 5.10, only .read_iter/.write_iter or in-kernel reads/writes fail. */
+
+/**
+ * Read function used when accessing files that are memory mapped.
+ *
+ * We read from the page cache here to present the a cohertent picture of the
+ * the file content.
+ */
+static ssize_t vbsf_reg_read_mapped(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
+{
+# if RTLNX_VER_MIN(3,16,0)
+ struct iovec iov = { .iov_base = buf, .iov_len = size };
+ struct iov_iter iter;
+ struct kiocb kiocb;
+ ssize_t cbRet;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *off;
+ iov_iter_init(&iter, READ, &iov, 1, size);
+
+ cbRet = generic_file_read_iter(&kiocb, &iter);
+
+ *off = kiocb.ki_pos;
+ return cbRet;
+
+# elif RTLNX_VER_MIN(2,6,19)
+ struct iovec iov = { .iov_base = buf, .iov_len = size };
+ struct kiocb kiocb;
+ ssize_t cbRet;
+
+ init_sync_kiocb(&kiocb, file);
+ kiocb.ki_pos = *off;
+
+ cbRet = generic_file_aio_read(&kiocb, &iov, 1, *off);
+ if (cbRet == -EIOCBQUEUED)
+ cbRet = wait_on_sync_kiocb(&kiocb);
+
+ *off = kiocb.ki_pos;
+ return cbRet;
+
+# else /* 2.6.18 or earlier: */
+ return generic_file_read(file, buf, size, off);
+# endif
+}
+
+
+/**
+ * Fallback case of vbsf_reg_read() that locks the user buffers and let the host
+ * write directly to them.
+ */
+static ssize_t vbsf_reg_read_locking(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off,
+ struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r)
+{
+ /*
+ * Lock pages and execute the read, taking care not to pass the host
+ * more than it can handle in one go or more than we care to allocate
+ * page arrays for. The latter limit is set at just short of 32KB due
+ * to how the physical heap works.
+ */
+ struct page *apPagesStack[16];
+ struct page **papPages = &apPagesStack[0];
+ struct page **papPagesFree = NULL;
+ VBOXSFREADPGLSTREQ *pReq;
+ loff_t offFile = *off;
+ ssize_t cbRet = -ENOMEM;
+ size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
+ size_t cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 1), cPages);
+ bool fLockPgHack;
+
+ pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
+ while (!pReq && cMaxPages > 4) {
+ cMaxPages /= 2;
+ pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
+ }
+ if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
+ papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
+ if (pReq && papPages) {
+ cbRet = 0;
+ for (;;) {
+ /*
+ * Figure out how much to process now and lock the user pages.
+ */
+ int rc;
+ size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
+ pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
+ cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
+ if (cPages <= cMaxPages)
+ cbChunk = size;
+ else {
+ cPages = cMaxPages;
+ cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
+ }
+
+ rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, true /*fWrite*/, papPages, &fLockPgHack);
+ if (rc == 0) {
+ size_t iPage = cPages;
+ while (iPage-- > 0)
+ pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
+ } else {
+ cbRet = rc;
+ break;
+ }
+
+ /*
+ * Issue the request and unlock the pages.
+ */
+ rc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
+
+ Assert(cPages <= cMaxPages);
+ vbsf_unlock_user_pages(papPages, cPages, true /*fSetDirty*/, fLockPgHack);
+
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Success, advance position and buffer.
+ */
+ uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
+ cbRet += cbActual;
+ offFile += cbActual;
+ buf = (uint8_t *)buf + cbActual;
+ size -= cbActual;
+
+ /*
+ * Are we done already? If so commit the new file offset.
+ */
+ if (!size || cbActual < cbChunk) {
+ *off = offFile;
+ break;
+ }
+ } else if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
+ /*
+ * The host probably doesn't have enough heap to handle the
+ * request, reduce the page count and retry.
+ */
+ cMaxPages /= 4;
+ Assert(cMaxPages > 0);
+ } else {
+ /*
+ * If we've successfully read stuff, return it rather than
+ * the error. (Not sure if this is such a great idea...)
+ */
+ if (cbRet > 0) {
+ SFLOGFLOW(("vbsf_reg_read: read at %#RX64 -> %Rrc; got cbRet=%#zx already\n", offFile, rc, cbRet));
+ *off = offFile;
+ } else {
+ SFLOGFLOW(("vbsf_reg_read: read at %#RX64 -> %Rrc\n", offFile, rc));
+ cbRet = -EPROTO;
+ }
+ break;
+ }
+ }
+ }
+ if (papPagesFree)
+ kfree(papPages);
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_read: returns %zd (%#zx), *off=%RX64 [lock]\n", cbRet, cbRet, *off));
+ return cbRet;
+}
+
+
+/**
+ * Read from a regular file.
+ *
+ * @param file the file
+ * @param buf the buffer
+ * @param size length of the buffer
+ * @param off offset within the file (in/out).
+ * @returns the number of read bytes on success, Linux error code otherwise
+ */
+static ssize_t vbsf_reg_read(struct file *file, char /*__user*/ *buf, size_t size, loff_t *off)
+{
+ struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_reg_info *sf_r = file->private_data;
+ struct address_space *mapping = inode->i_mapping;
+
+ SFLOGFLOW(("vbsf_reg_read: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
+
+ if (!S_ISREG(inode->i_mode)) {
+ LogFunc(("read from non regular file %d\n", inode->i_mode));
+ return -EINVAL;
+ }
+
+ /** @todo XXX Check read permission according to inode->i_mode! */
+
+ if (!size)
+ return 0;
+
+ /*
+ * If there is a mapping and O_DIRECT isn't in effect, we must at a
+ * heed dirty pages in the mapping and read from them. For simplicity
+ * though, we just do page cache reading when there are writable
+ * mappings around with any kind of pages loaded.
+ */
+ if (vbsf_should_use_cached_read(file, mapping, pSuperInfo))
+ return vbsf_reg_read_mapped(file, buf, size, off);
+
+ /*
+ * For small requests, try use an embedded buffer provided we get a heap block
+ * that does not cross page boundraries (see host code).
+ */
+ if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
+ uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + size;
+ VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
+ if (pReq) {
+ if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
+ ssize_t cbRet;
+ int vrc = VbglR0SfHostReqReadEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, *off, (uint32_t)size);
+ if (RT_SUCCESS(vrc)) {
+ cbRet = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
+ if (copy_to_user(buf, pReq->abData, cbRet) == 0)
+ *off += cbRet;
+ else
+ cbRet = -EFAULT;
+ } else
+ cbRet = -EPROTO;
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_read: returns %zd (%#zx), *off=%RX64 [embed]\n", cbRet, cbRet, *off));
+ return cbRet;
+ }
+ VbglR0PhysHeapFree(pReq);
+ }
+ }
+
+# if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
+ /*
+ * For medium sized requests try use a bounce buffer.
+ */
+ if (size <= _64K /** @todo make this configurable? */) {
+ void *pvBounce = kmalloc(size, GFP_KERNEL);
+ if (pvBounce) {
+ VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ ssize_t cbRet;
+ int vrc = VbglR0SfHostReqReadContig(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, *off,
+ (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
+ if (RT_SUCCESS(vrc)) {
+ cbRet = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
+ if (copy_to_user(buf, pvBounce, cbRet) == 0)
+ *off += cbRet;
+ else
+ cbRet = -EFAULT;
+ } else
+ cbRet = -EPROTO;
+ VbglR0PhysHeapFree(pReq);
+ kfree(pvBounce);
+ SFLOGFLOW(("vbsf_reg_read: returns %zd (%#zx), *off=%RX64 [bounce]\n", cbRet, cbRet, *off));
+ return cbRet;
+ }
+ kfree(pvBounce);
+ }
+ }
+# endif
+
+ return vbsf_reg_read_locking(file, buf, size, off, pSuperInfo, sf_r);
+}
+
+#endif /* < 5.10.0 */
+
+/**
+ * Helper the synchronizes the page cache content with something we just wrote
+ * to the host.
+ */
+static void vbsf_reg_write_sync_page_cache(struct address_space *mapping, loff_t offFile, uint32_t cbRange,
+ uint8_t const *pbSrcBuf, struct page **papSrcPages,
+ uint32_t offSrcPage, size_t cSrcPages)
+{
+ Assert(offSrcPage < PAGE_SIZE);
+ if (mapping && mapping->nrpages > 0) {
+ /*
+ * Work the pages in the write range.
+ */
+ while (cbRange > 0) {
+ /*
+ * Lookup the page at offFile. We're fine if there aren't
+ * any there. We're skip if it's dirty or is being written
+ * back, at least for now.
+ */
+ size_t const offDstPage = offFile & PAGE_OFFSET_MASK;
+ size_t const cbToCopy = RT_MIN(PAGE_SIZE - offDstPage, cbRange);
+ pgoff_t const idxPage = offFile >> PAGE_SHIFT;
+ struct page *pDstPage = find_lock_page(mapping, idxPage);
+ if (pDstPage) {
+ if ( pDstPage->mapping == mapping /* ignore if re-purposed (paranoia) */
+ && pDstPage->index == idxPage
+ && !PageDirty(pDstPage) /* ignore if dirty */
+ && !PageWriteback(pDstPage) /* ignore if being written back */ ) {
+ /*
+ * Map the page and do the copying.
+ */
+ uint8_t *pbDst = (uint8_t *)kmap(pDstPage);
+ if (pbSrcBuf)
+ memcpy(&pbDst[offDstPage], pbSrcBuf, cbToCopy);
+ else {
+ uint32_t const cbSrc0 = PAGE_SIZE - offSrcPage;
+ uint8_t const *pbSrc = (uint8_t const *)kmap(papSrcPages[0]);
+ AssertMsg(cSrcPages >= 1, ("offFile=%#llx cbRange=%#zx cbToCopy=%#zx\n", offFile, cbRange, cbToCopy));
+ memcpy(&pbDst[offDstPage], &pbSrc[offSrcPage], RT_MIN(cbToCopy, cbSrc0));
+ kunmap(papSrcPages[0]);
+ if (cbToCopy > cbSrc0) {
+ AssertMsg(cSrcPages >= 2, ("offFile=%#llx cbRange=%#zx cbToCopy=%#zx\n", offFile, cbRange, cbToCopy));
+ pbSrc = (uint8_t const *)kmap(papSrcPages[1]);
+ memcpy(&pbDst[offDstPage + cbSrc0], pbSrc, cbToCopy - cbSrc0);
+ kunmap(papSrcPages[1]);
+ }
+ }
+ kunmap(pDstPage);
+ flush_dcache_page(pDstPage);
+ if (cbToCopy == PAGE_SIZE)
+ SetPageUptodate(pDstPage);
+# if RTLNX_VER_MIN(2,4,10)
+ mark_page_accessed(pDstPage);
+# endif
+ } else
+ SFLOGFLOW(("vbsf_reg_write_sync_page_cache: Skipping page %p: mapping=%p (vs %p) writeback=%d offset=%#lx (vs%#lx)\n",
+ pDstPage, pDstPage->mapping, mapping, PageWriteback(pDstPage), pDstPage->index, idxPage));
+ unlock_page(pDstPage);
+ vbsf_put_page(pDstPage);
+ }
+
+ /*
+ * Advance.
+ */
+ if (pbSrcBuf)
+ pbSrcBuf += cbToCopy;
+ else
+ {
+ offSrcPage += cbToCopy;
+ Assert(offSrcPage < PAGE_SIZE * 2);
+ if (offSrcPage >= PAGE_SIZE) {
+ offSrcPage &= PAGE_OFFSET_MASK;
+ papSrcPages++;
+# ifdef VBOX_STRICT
+ Assert(cSrcPages > 0);
+ cSrcPages--;
+# endif
+ }
+ }
+ offFile += cbToCopy;
+ cbRange -= cbToCopy;
+ }
+ }
+ RT_NOREF(cSrcPages);
+}
+
+#if RTLNX_VER_MAX(5,10,0) /* No regular .read/.write for 5.10, only .read_iter/.write_iter or in-kernel reads/writes fail. */
+
+/**
+ * Fallback case of vbsf_reg_write() that locks the user buffers and let the host
+ * write directly to them.
+ */
+static ssize_t vbsf_reg_write_locking(struct file *file, const char /*__user*/ *buf, size_t size, loff_t *off, loff_t offFile,
+ struct inode *inode, struct vbsf_inode_info *sf_i,
+ struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r)
+{
+ /*
+ * Lock pages and execute the write, taking care not to pass the host
+ * more than it can handle in one go or more than we care to allocate
+ * page arrays for. The latter limit is set at just short of 32KB due
+ * to how the physical heap works.
+ */
+ struct page *apPagesStack[16];
+ struct page **papPages = &apPagesStack[0];
+ struct page **papPagesFree = NULL;
+ VBOXSFWRITEPGLSTREQ *pReq;
+ ssize_t cbRet = -ENOMEM;
+ size_t cPages = (((uintptr_t)buf & PAGE_OFFSET_MASK) + size + PAGE_OFFSET_MASK) >> PAGE_SHIFT;
+ size_t cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 1), cPages);
+ bool fLockPgHack;
+
+ pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
+ while (!pReq && cMaxPages > 4) {
+ cMaxPages /= 2;
+ pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
+ }
+ if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
+ papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
+ if (pReq && papPages) {
+ cbRet = 0;
+ for (;;) {
+ /*
+ * Figure out how much to process now and lock the user pages.
+ */
+ int rc;
+ size_t cbChunk = (uintptr_t)buf & PAGE_OFFSET_MASK;
+ pReq->PgLst.offFirstPage = (uint16_t)cbChunk;
+ cPages = RT_ALIGN_Z(cbChunk + size, PAGE_SIZE) >> PAGE_SHIFT;
+ if (cPages <= cMaxPages)
+ cbChunk = size;
+ else {
+ cPages = cMaxPages;
+ cbChunk = (cMaxPages << PAGE_SHIFT) - cbChunk;
+ }
+
+ rc = vbsf_lock_user_pages((uintptr_t)buf, cPages, false /*fWrite*/, papPages, &fLockPgHack);
+ if (rc == 0) {
+ size_t iPage = cPages;
+ while (iPage-- > 0)
+ pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
+ } else {
+ cbRet = rc;
+ break;
+ }
+
+ /*
+ * Issue the request and unlock the pages.
+ */
+ rc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Success, advance position and buffer.
+ */
+ uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
+ AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
+
+ vbsf_reg_write_sync_page_cache(inode->i_mapping, offFile, cbActual, NULL /*pbKrnlBuf*/,
+ papPages, (uintptr_t)buf & PAGE_OFFSET_MASK, cPages);
+ Assert(cPages <= cMaxPages);
+ vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
+
+ cbRet += cbActual;
+ buf = (uint8_t *)buf + cbActual;
+ size -= cbActual;
+
+ offFile += cbActual;
+ if ((file->f_flags & O_APPEND) && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET))
+ offFile = pReq->Parms.off64Write.u.value64;
+ if (offFile > i_size_read(inode))
+ i_size_write(inode, offFile);
+
+ sf_i->force_restat = 1; /* mtime (and size) may have changed */
+
+ /*
+ * Are we done already? If so commit the new file offset.
+ */
+ if (!size || cbActual < cbChunk) {
+ *off = offFile;
+ break;
+ }
+ } else {
+ vbsf_unlock_user_pages(papPages, cPages, false /*fSetDirty*/, fLockPgHack);
+ if (rc == VERR_NO_MEMORY && cMaxPages > 4) {
+ /*
+ * The host probably doesn't have enough heap to handle the
+ * request, reduce the page count and retry.
+ */
+ cMaxPages /= 4;
+ Assert(cMaxPages > 0);
+ } else {
+ /*
+ * If we've successfully written stuff, return it rather than
+ * the error. (Not sure if this is such a great idea...)
+ */
+ if (cbRet > 0) {
+ SFLOGFLOW(("vbsf_reg_write: write at %#RX64 -> %Rrc; got cbRet=%#zx already\n", offFile, rc, cbRet));
+ *off = offFile;
+ } else {
+ SFLOGFLOW(("vbsf_reg_write: write at %#RX64 -> %Rrc\n", offFile, rc));
+ cbRet = -EPROTO;
+ }
+ break;
+ }
+ }
+ }
+ }
+ if (papPagesFree)
+ kfree(papPages);
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_write: returns %zd (%#zx), *off=%RX64 [lock]\n", cbRet, cbRet, *off));
+ return cbRet;
+}
+
+
+/**
+ * Write to a regular file.
+ *
+ * @param file the file
+ * @param buf the buffer
+ * @param size length of the buffer
+ * @param off offset within the file
+ * @returns the number of written bytes on success, Linux error code otherwise
+ */
+static ssize_t vbsf_reg_write(struct file *file, const char *buf, size_t size, loff_t * off)
+{
+ struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_reg_info *sf_r = file->private_data;
+ struct address_space *mapping = inode->i_mapping;
+ loff_t pos;
+
+ SFLOGFLOW(("vbsf_reg_write: inode=%p file=%p buf=%p size=%#zx off=%#llx\n", inode, file, buf, size, *off));
+ Assert(sf_i);
+ Assert(pSuperInfo);
+ Assert(sf_r);
+ AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
+
+ pos = *off;
+ if (file->f_flags & O_APPEND)
+ pos = i_size_read(inode);
+
+ /** @todo XXX Check write permission according to inode->i_mode! */
+
+ if (!size) {
+ if (file->f_flags & O_APPEND) /** @todo check if this is the consensus behavior... */
+ *off = pos;
+ return 0;
+ }
+
+ /** @todo Implement the read-write caching mode. */
+
+ /*
+ * If there are active writable mappings, coordinate with any
+ * pending writes via those.
+ */
+ if ( mapping
+ && mapping->nrpages > 0
+ && mapping_writably_mapped(mapping)) {
+# if RTLNX_VER_MIN(2,6,32)
+ int err = filemap_fdatawait_range(mapping, pos, pos + size - 1);
+ if (err)
+ return err;
+# else
+ /** @todo ... */
+# endif
+ }
+
+ /*
+ * For small requests, try use an embedded buffer provided we get a heap block
+ * that does not cross page boundraries (see host code).
+ */
+ if (size <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
+ uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + size;
+ VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
+ if ( pReq
+ && (PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
+ ssize_t cbRet;
+ if (copy_from_user(pReq->abData, buf, size) == 0) {
+ int vrc = VbglR0SfHostReqWriteEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost,
+ pos, (uint32_t)size);
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ if (RT_SUCCESS(vrc)) {
+ cbRet = pReq->Parms.cb32Write.u.value32;
+ AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
+ vbsf_reg_write_sync_page_cache(mapping, pos, (uint32_t)cbRet, pReq->abData,
+ NULL /*papSrcPages*/, 0 /*offSrcPage0*/, 0 /*cSrcPages*/);
+ pos += cbRet;
+ if ((file->f_flags & O_APPEND) && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET))
+ pos = pReq->Parms.off64Write.u.value64;
+ *off = pos;
+ if (pos > i_size_read(inode))
+ i_size_write(inode, pos);
+ } else
+ cbRet = -EPROTO;
+ sf_i->force_restat = 1; /* mtime (and size) may have changed */
+ } else
+ cbRet = -EFAULT;
+
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_write: returns %zd (%#zx), *off=%RX64 [embed]\n", cbRet, cbRet, *off));
+ return cbRet;
+ }
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ }
+
+# if 0 /* Turns out this is slightly slower than locking the pages even for 4KB reads (4.19/amd64). */
+ /*
+ * For medium sized requests try use a bounce buffer.
+ */
+ if (size <= _64K /** @todo make this configurable? */) {
+ void *pvBounce = kmalloc(size, GFP_KERNEL);
+ if (pvBounce) {
+ if (copy_from_user(pvBounce, buf, size) == 0) {
+ VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ ssize_t cbRet;
+ int vrc = VbglR0SfHostReqWriteContig(pSuperInfo->map.root, pReq, sf_r->handle, pos,
+ (uint32_t)size, pvBounce, virt_to_phys(pvBounce));
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ if (RT_SUCCESS(vrc)) {
+ cbRet = pReq->Parms.cb32Write.u.value32;
+ AssertStmt(cbRet <= (ssize_t)size, cbRet = size);
+ vbsf_reg_write_sync_page_cache(mapping, pos, (uint32_t)cbRet, (uint8_t const *)pvBounce,
+ NULL /*papSrcPages*/, 0 /*offSrcPage0*/, 0 /*cSrcPages*/);
+ pos += cbRet;
+ *off = pos;
+ if (pos > i_size_read(inode))
+ i_size_write(inode, pos);
+ } else
+ cbRet = -EPROTO;
+ sf_i->force_restat = 1; /* mtime (and size) may have changed */
+ VbglR0PhysHeapFree(pReq);
+ kfree(pvBounce);
+ SFLOGFLOW(("vbsf_reg_write: returns %zd (%#zx), *off=%RX64 [bounce]\n", cbRet, cbRet, *off));
+ return cbRet;
+ }
+ kfree(pvBounce);
+ } else {
+ kfree(pvBounce);
+ SFLOGFLOW(("vbsf_reg_write: returns -EFAULT, *off=%RX64 [bounce]\n", *off));
+ return -EFAULT;
+ }
+ }
+ }
+# endif
+
+ return vbsf_reg_write_locking(file, buf, size, off, pos, inode, sf_i, pSuperInfo, sf_r);
+}
+
+#endif /* < 5.10.0 */
+#if RTLNX_VER_MIN(2,6,19)
+/* See kernel 6.0.0 change eba2d3d798295dc43cae8fade102f9d083a2a741. */
+# if RTLNX_VER_MIN(6,0,0)
+# define VBOX_IOV_GET_PAGES iov_iter_get_pages2
+# else
+# define VBOX_IOV_GET_PAGES iov_iter_get_pages
+# endif
+
+/**
+ * Companion to vbsf_iter_lock_pages().
+ */
+DECLINLINE(void) vbsf_iter_unlock_pages(struct iov_iter *iter, struct page **papPages, size_t cPages, bool fSetDirty)
+{
+ /* We don't mark kernel pages dirty (KVECs, BVECs, PIPEs): */
+ if (!iter_is_iovec(iter))
+ fSetDirty = false;
+
+ while (cPages-- > 0)
+ {
+ struct page *pPage = papPages[cPages];
+ if (fSetDirty && !PageReserved(pPage))
+ set_page_dirty(pPage);
+ vbsf_put_page(pPage);
+ }
+}
+
+
+/**
+ * Locks up to @a cMaxPages from the I/O vector iterator, advancing the
+ * iterator.
+ *
+ * @returns 0 on success, negative errno value on failure.
+ * @param iter The iterator to lock pages from.
+ * @param fWrite Whether to write (true) or read (false) lock the pages.
+ * @param pStash Where we stash peek results.
+ * @param cMaxPages The maximum number of pages to get.
+ * @param papPages Where to return the locked pages.
+ * @param pcPages Where to return the number of pages.
+ * @param poffPage0 Where to return the offset into the first page.
+ * @param pcbChunk Where to return the number of bytes covered.
+ */
+static int vbsf_iter_lock_pages(struct iov_iter *iter, bool fWrite, struct vbsf_iter_stash *pStash, size_t cMaxPages,
+ struct page **papPages, size_t *pcPages, size_t *poffPage0, size_t *pcbChunk)
+{
+ size_t cbChunk = 0;
+ size_t cPages = 0;
+ size_t offPage0 = 0;
+ int rc = 0;
+
+ Assert(iov_iter_count(iter) + pStash->cb > 0);
+ if (!(VBSF_GET_ITER_TYPE(iter) & ITER_KVEC)) {
+ /*
+ * Do we have a stashed page?
+ */
+ if (pStash->pPage) {
+ papPages[0] = pStash->pPage;
+ offPage0 = pStash->off;
+ cbChunk = pStash->cb;
+ cPages = 1;
+ pStash->pPage = NULL;
+ pStash->off = 0;
+ pStash->cb = 0;
+ if ( offPage0 + cbChunk < PAGE_SIZE
+ || iov_iter_count(iter) == 0) {
+ *poffPage0 = offPage0;
+ *pcbChunk = cbChunk;
+ *pcPages = cPages;
+ SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx (stashed)\n",
+ rc, cPages, offPage0, cbChunk));
+ return 0;
+ }
+ cMaxPages -= 1;
+ SFLOG3(("vbsf_iter_lock_pages: Picked up stashed page: %#zx LB %#zx\n", offPage0, cbChunk));
+ } else {
+# if RTLNX_VER_MAX(4,11,0)
+ /*
+ * Copy out our starting point to assist rewinding.
+ */
+ pStash->offFromEnd = iov_iter_count(iter);
+ pStash->Copy = *iter;
+# endif
+ }
+
+ /*
+ * Get pages segment by segment.
+ */
+ do {
+ /*
+ * Make a special case of the first time thru here, since that's
+ * the most typical scenario.
+ */
+ ssize_t cbSegRet;
+ if (cPages == 0) {
+# if RTLNX_VER_MAX(3,19,0)
+ while (!iov_iter_single_seg_count(iter)) /* Old code didn't skip empty segments which caused EFAULTs. */
+ iov_iter_advance(iter, 0);
+# endif
+ cbSegRet = VBOX_IOV_GET_PAGES(iter, papPages, iov_iter_count(iter), cMaxPages, &offPage0);
+ if (cbSegRet > 0) {
+# if RTLNX_VER_MAX(6,0,0)
+ iov_iter_advance(iter, cbSegRet);
+#endif
+ cbChunk = (size_t)cbSegRet;
+ cPages = RT_ALIGN_Z(offPage0 + cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
+ cMaxPages -= cPages;
+ SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages -> %#zx @ %#zx; %#zx pages [first]\n", cbSegRet, offPage0, cPages));
+ if ( cMaxPages == 0
+ || ((offPage0 + (size_t)cbSegRet) & PAGE_OFFSET_MASK))
+ break;
+ } else {
+ AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
+ rc = (int)cbSegRet;
+ break;
+ }
+ } else {
+ /*
+ * Probe first page of new segment to check that we've got a zero offset and
+ * can continue on the current chunk. Stash the page if the offset isn't zero.
+ */
+ size_t offPgProbe;
+ size_t cbSeg = iov_iter_single_seg_count(iter);
+ while (!cbSeg) {
+ iov_iter_advance(iter, 0);
+ cbSeg = iov_iter_single_seg_count(iter);
+ }
+ cbSegRet = VBOX_IOV_GET_PAGES(iter, &papPages[cPages], iov_iter_count(iter), 1, &offPgProbe);
+ if (cbSegRet > 0) {
+# if RTLNX_VER_MAX(6,0,0)
+ iov_iter_advance(iter, cbSegRet); /** @todo maybe not do this if we stash the page? */
+#endif
+ Assert(offPgProbe + cbSegRet <= PAGE_SIZE);
+ if (offPgProbe == 0) {
+ cbChunk += cbSegRet;
+ cPages += 1;
+ cMaxPages -= 1;
+ SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx\n", cbSegRet, offPgProbe));
+ if ( cMaxPages == 0
+ || cbSegRet != PAGE_SIZE)
+ break;
+
+ /*
+ * Get the rest of the segment (if anything remaining).
+ */
+ cbSeg -= cbSegRet;
+ if (cbSeg > 0) {
+ cbSegRet = VBOX_IOV_GET_PAGES(iter, &papPages[cPages], iov_iter_count(iter), cMaxPages, &offPgProbe);
+ if (cbSegRet > 0) {
+ size_t const cPgRet = RT_ALIGN_Z((size_t)cbSegRet, PAGE_SIZE) >> PAGE_SHIFT;
+ Assert(offPgProbe == 0);
+# if RTLNX_VER_MAX(6,0,0)
+ iov_iter_advance(iter, cbSegRet);
+# endif
+ SFLOG3(("vbsf_iter_lock_pages: iov_iter_get_pages() -> %#zx; %#zx pages\n", cbSegRet, cPgRet));
+ cPages += cPgRet;
+ cMaxPages -= cPgRet;
+ cbChunk += cbSegRet;
+ if ( cMaxPages == 0
+ || ((size_t)cbSegRet & PAGE_OFFSET_MASK))
+ break;
+ } else {
+ AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
+ rc = (int)cbSegRet;
+ break;
+ }
+ }
+ } else {
+ /* The segment didn't start at a page boundrary, so stash it for
+ the next round: */
+ SFLOGFLOW(("vbsf_iter_lock_pages: iov_iter_get_pages(1) -> %#zx @ %#zx; stashed\n", cbSegRet, offPgProbe));
+ Assert(papPages[cPages]);
+ pStash->pPage = papPages[cPages];
+ pStash->off = offPgProbe;
+ pStash->cb = cbSegRet;
+ break;
+ }
+ } else {
+ AssertStmt(cbSegRet < 0, cbSegRet = -EFAULT);
+ rc = (int)cbSegRet;
+ break;
+ }
+ }
+ Assert(cMaxPages > 0);
+ } while (iov_iter_count(iter) > 0);
+
+ } else {
+ /*
+ * The silly iov_iter_get_pages_alloc() function doesn't handle KVECs,
+ * so everyone needs to do that by themselves.
+ *
+ * Note! Fixes here may apply to rtR0MemObjNativeLockKernel()
+ * and vbsf_lock_user_pages_failed_check_kernel() as well.
+ */
+# if RTLNX_VER_MAX(4,11,0)
+ pStash->offFromEnd = iov_iter_count(iter);
+ pStash->Copy = *iter;
+# endif
+ do {
+ uint8_t *pbBuf;
+ size_t offStart;
+ size_t cPgSeg;
+
+ size_t cbSeg = iov_iter_single_seg_count(iter);
+ while (!cbSeg) {
+ iov_iter_advance(iter, 0);
+ cbSeg = iov_iter_single_seg_count(iter);
+ }
+
+# if RTLNX_VER_MIN(3,19,0)
+ pbBuf = iter->kvec->iov_base + iter->iov_offset;
+# else
+ pbBuf = iter->iov->iov_base + iter->iov_offset;
+# endif
+ offStart = (uintptr_t)pbBuf & PAGE_OFFSET_MASK;
+ if (!cPages)
+ offPage0 = offStart;
+ else if (offStart)
+ break;
+
+ cPgSeg = RT_ALIGN_Z(cbSeg, PAGE_SIZE) >> PAGE_SHIFT;
+ if (cPgSeg > cMaxPages) {
+ cPgSeg = cMaxPages;
+ cbSeg = (cPgSeg << PAGE_SHIFT) - offStart;
+ }
+
+ rc = vbsf_lock_kernel_pages(pbBuf, fWrite, cPgSeg, &papPages[cPages]);
+ if (rc == 0) {
+ iov_iter_advance(iter, cbSeg);
+ cbChunk += cbSeg;
+ cPages += cPgSeg;
+ cMaxPages -= cPgSeg;
+ if ( cMaxPages == 0
+ || ((offStart + cbSeg) & PAGE_OFFSET_MASK) != 0)
+ break;
+ } else
+ break;
+ } while (iov_iter_count(iter) > 0);
+ }
+
+ /*
+ * Clean up if we failed; set return values.
+ */
+ if (rc == 0) {
+ /* likely */
+ } else {
+ if (cPages > 0)
+ vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
+ offPage0 = cbChunk = cPages = 0;
+ }
+ *poffPage0 = offPage0;
+ *pcbChunk = cbChunk;
+ *pcPages = cPages;
+ SFLOGFLOW(("vbsf_iter_lock_pages: returns %d - cPages=%#zx offPage0=%#zx cbChunk=%zx\n", rc, cPages, offPage0, cbChunk));
+ return rc;
+}
+
+
+/**
+ * Rewinds the I/O vector.
+ */
+static bool vbsf_iter_rewind(struct iov_iter *iter, struct vbsf_iter_stash *pStash, size_t cbToRewind, size_t cbChunk)
+{
+ size_t cbExtra;
+ if (!pStash->pPage) {
+ cbExtra = 0;
+ } else {
+ cbExtra = pStash->cb;
+ vbsf_put_page(pStash->pPage);
+ pStash->pPage = NULL;
+ pStash->cb = 0;
+ pStash->off = 0;
+ }
+
+# if RTLNX_VER_MIN(4,11,0) || RTLNX_VER_MAX(3,16,0)
+ iov_iter_revert(iter, cbToRewind + cbExtra);
+ return true;
+# else
+ /** @todo impl this */
+ return false;
+# endif
+}
+
+
+/**
+ * Cleans up the page locking stash.
+ */
+DECLINLINE(void) vbsf_iter_cleanup_stash(struct iov_iter *iter, struct vbsf_iter_stash *pStash)
+{
+ if (pStash->pPage)
+ vbsf_iter_rewind(iter, pStash, 0, 0);
+}
+
+
+/**
+ * Calculates the longest span of pages we could transfer to the host in a
+ * single request.
+ *
+ * @returns Page count, non-zero.
+ * @param iter The I/O vector iterator to inspect.
+ */
+static size_t vbsf_iter_max_span_of_pages(struct iov_iter *iter)
+{
+ size_t cPages;
+# if RTLNX_VER_MIN(3,16,0)
+ if (iter_is_iovec(iter) || (VBSF_GET_ITER_TYPE(iter) & ITER_KVEC)) {
+# endif
+ const struct iovec *pCurIov = iter->iov;
+ size_t cLeft = iter->nr_segs;
+ size_t cPagesSpan = 0;
+
+ /* iovect and kvec are identical, except for the __user tagging of iov_base. */
+ AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, struct kvec, iov_base);
+ AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, struct kvec, iov_len);
+ AssertCompile(sizeof(struct iovec) == sizeof(struct kvec));
+
+ cPages = 1;
+ AssertReturn(cLeft > 0, cPages);
+
+ /* Special case: segment offset. */
+ if (iter->iov_offset > 0) {
+ if (iter->iov_offset < pCurIov->iov_len) {
+ size_t const cbSegLeft = pCurIov->iov_len - iter->iov_offset;
+ size_t const offPage0 = ((uintptr_t)pCurIov->iov_base + iter->iov_offset) & PAGE_OFFSET_MASK;
+ cPages = cPagesSpan = RT_ALIGN_Z(offPage0 + cbSegLeft, PAGE_SIZE) >> PAGE_SHIFT;
+ if ((offPage0 + cbSegLeft) & PAGE_OFFSET_MASK)
+ cPagesSpan = 0;
+ }
+ SFLOGFLOW(("vbsf_iter: seg[0]= %p LB %#zx\n", pCurIov->iov_base, pCurIov->iov_len));
+ pCurIov++;
+ cLeft--;
+ }
+
+ /* Full segments. */
+ while (cLeft-- > 0) {
+ if (pCurIov->iov_len > 0) {
+ size_t const offPage0 = (uintptr_t)pCurIov->iov_base & PAGE_OFFSET_MASK;
+ if (offPage0 == 0) {
+ if (!(pCurIov->iov_len & PAGE_OFFSET_MASK)) {
+ cPagesSpan += pCurIov->iov_len >> PAGE_SHIFT;
+ } else {
+ cPagesSpan += RT_ALIGN_Z(pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
+ if (cPagesSpan > cPages)
+ cPages = cPagesSpan;
+ cPagesSpan = 0;
+ }
+ } else {
+ if (cPagesSpan > cPages)
+ cPages = cPagesSpan;
+ if (!((offPage0 + pCurIov->iov_len) & PAGE_OFFSET_MASK)) {
+ cPagesSpan = pCurIov->iov_len >> PAGE_SHIFT;
+ } else {
+ cPagesSpan += RT_ALIGN_Z(offPage0 + pCurIov->iov_len, PAGE_SIZE) >> PAGE_SHIFT;
+ if (cPagesSpan > cPages)
+ cPages = cPagesSpan;
+ cPagesSpan = 0;
+ }
+ }
+ }
+ SFLOGFLOW(("vbsf_iter: seg[%u]= %p LB %#zx\n", iter->nr_segs - cLeft, pCurIov->iov_base, pCurIov->iov_len));
+ pCurIov++;
+ }
+ if (cPagesSpan > cPages)
+ cPages = cPagesSpan;
+# if RTLNX_VER_MIN(3,16,0)
+ } else {
+ /* Won't bother with accurate counts for the next two types, just make
+ some rough estimates (does pipes have segments?): */
+ size_t cSegs = VBSF_GET_ITER_TYPE(iter) & ITER_BVEC ? RT_MAX(1, iter->nr_segs) : 1;
+ cPages = (iov_iter_count(iter) + (PAGE_SIZE * 2 - 2) * cSegs) >> PAGE_SHIFT;
+ }
+# endif
+ SFLOGFLOW(("vbsf_iter_max_span_of_pages: returns %#zx\n", cPages));
+ return cPages;
+}
+
+
+/**
+ * Worker for vbsf_reg_read_iter() that deals with larger reads using page
+ * locking.
+ */
+static ssize_t vbsf_reg_read_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToRead,
+ struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r)
+{
+ /*
+ * Estimate how many pages we may possible submit in a single request so
+ * that we can allocate matching request buffer and page array.
+ */
+ struct page *apPagesStack[16];
+ struct page **papPages = &apPagesStack[0];
+ struct page **papPagesFree = NULL;
+ VBOXSFREADPGLSTREQ *pReq;
+ ssize_t cbRet = 0;
+ size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
+ cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 2), cMaxPages);
+
+ pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
+ while (!pReq && cMaxPages > 4) {
+ cMaxPages /= 2;
+ pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFREADPGLSTREQ, PgLst.aPages[cMaxPages]));
+ }
+ if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
+ papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
+ if (pReq && papPages) {
+
+ /*
+ * The read loop.
+ */
+ struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
+ do {
+ /*
+ * Grab as many pages as we can. This means that if adjacent
+ * segments both starts and ends at a page boundrary, we can
+ * do them both in the same transfer from the host.
+ */
+ size_t cPages = 0;
+ size_t cbChunk = 0;
+ size_t offPage0 = 0;
+ int rc = vbsf_iter_lock_pages(iter, true /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
+ if (rc == 0) {
+ size_t iPage = cPages;
+ while (iPage-- > 0)
+ pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
+ pReq->PgLst.offFirstPage = (uint16_t)offPage0;
+ AssertStmt(cbChunk <= cbToRead, cbChunk = cbToRead);
+ } else {
+ cbRet = rc;
+ break;
+ }
+
+ /*
+ * Issue the request and unlock the pages.
+ */
+ rc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, kio->ki_pos, cbChunk, cPages);
+ SFLOGFLOW(("vbsf_reg_read_iter_locking: VbglR0SfHostReqReadPgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
+ rc, pReq->Parms.cb32Read.u.value32, cbChunk, cbToRead, cPages, offPage0));
+
+ vbsf_iter_unlock_pages(iter, papPages, cPages, true /*fSetDirty*/);
+
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Success, advance position and buffer.
+ */
+ uint32_t cbActual = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
+ cbRet += cbActual;
+ kio->ki_pos += cbActual;
+ cbToRead -= cbActual;
+
+ /*
+ * Are we done already?
+ */
+ if (!cbToRead)
+ break;
+ if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
+ if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
+ iov_iter_truncate(iter, 0);
+ break;
+ }
+ } else {
+ /*
+ * Try rewind the iter structure.
+ */
+ bool const fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
+ if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
+ /*
+ * The host probably doesn't have enough heap to handle the
+ * request, reduce the page count and retry.
+ */
+ cMaxPages /= 4;
+ Assert(cMaxPages > 0);
+ } else {
+ /*
+ * If we've successfully read stuff, return it rather than
+ * the error. (Not sure if this is such a great idea...)
+ */
+ if (cbRet <= 0)
+ cbRet = -EPROTO;
+ break;
+ }
+ }
+ } while (cbToRead > 0);
+
+ vbsf_iter_cleanup_stash(iter, &Stash);
+ }
+ else
+ cbRet = -ENOMEM;
+ if (papPagesFree)
+ kfree(papPages);
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_read_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
+ return cbRet;
+}
+
+
+/**
+ * Read into I/O vector iterator.
+ *
+ * @returns Number of bytes read on success, negative errno on error.
+ * @param kio The kernel I/O control block (or something like that).
+ * @param iter The I/O vector iterator describing the buffer.
+ */
+# if RTLNX_VER_MIN(3,16,0)
+static ssize_t vbsf_reg_read_iter(struct kiocb *kio, struct iov_iter *iter)
+# else
+static ssize_t vbsf_reg_aio_read(struct kiocb *kio, const struct iovec *iov, unsigned long cSegs, loff_t offFile)
+# endif
+{
+# if RTLNX_VER_MAX(3,16,0)
+ struct vbsf_iov_iter fake_iter = VBSF_IOV_ITER_INITIALIZER(cSegs, iov, 0 /*write*/);
+ struct vbsf_iov_iter *iter = &fake_iter;
+# endif
+ size_t cbToRead = iov_iter_count(iter);
+ struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
+ struct address_space *mapping = inode->i_mapping;
+
+ struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+
+ SFLOGFLOW(("vbsf_reg_read_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
+ inode, kio->ki_filp, cbToRead, kio->ki_pos, VBSF_GET_ITER_TYPE(iter) ));
+ AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
+
+ /*
+ * Do we have anything at all to do here?
+ */
+ if (!cbToRead)
+ return 0;
+
+ /*
+ * If there is a mapping and O_DIRECT isn't in effect, we must at a
+ * heed dirty pages in the mapping and read from them. For simplicity
+ * though, we just do page cache reading when there are writable
+ * mappings around with any kind of pages loaded.
+ */
+ if (vbsf_should_use_cached_read(kio->ki_filp, mapping, pSuperInfo)) {
+# if RTLNX_VER_MIN(3,16,0)
+ return generic_file_read_iter(kio, iter);
+# else
+ return generic_file_aio_read(kio, iov, cSegs, offFile);
+# endif
+ }
+
+ /*
+ * Now now we reject async I/O requests.
+ */
+ if (!is_sync_kiocb(kio)) {
+ SFLOGFLOW(("vbsf_reg_read_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * For small requests, try use an embedded buffer provided we get a heap block
+ * that does not cross page boundraries (see host code).
+ */
+ if (cbToRead <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) /* see allocator */) {
+ uint32_t const cbReq = RT_UOFFSETOF(VBOXSFREADEMBEDDEDREQ, abData[0]) + cbToRead;
+ VBOXSFREADEMBEDDEDREQ *pReq = (VBOXSFREADEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
+ if (pReq) {
+ if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
+ ssize_t cbRet;
+ int vrc = VbglR0SfHostReqReadEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost,
+ kio->ki_pos, (uint32_t)cbToRead);
+ if (RT_SUCCESS(vrc)) {
+ cbRet = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbRet <= (ssize_t)cbToRead, cbRet = cbToRead);
+ if (copy_to_iter(pReq->abData, cbRet, iter) == cbRet) {
+ kio->ki_pos += cbRet;
+ if (cbRet < cbToRead)
+ iov_iter_truncate(iter, 0);
+ } else
+ cbRet = -EFAULT;
+ } else
+ cbRet = -EPROTO;
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_read_iter: returns %#zx (%zd)\n", cbRet, cbRet));
+ return cbRet;
+ }
+ VbglR0PhysHeapFree(pReq);
+ }
+ }
+
+ /*
+ * Otherwise do the page locking thing.
+ */
+ return vbsf_reg_read_iter_locking(kio, iter, cbToRead, pSuperInfo, sf_r);
+}
+
+
+/**
+ * Worker for vbsf_reg_write_iter() that deals with larger writes using page
+ * locking.
+ */
+static ssize_t vbsf_reg_write_iter_locking(struct kiocb *kio, struct iov_iter *iter, size_t cbToWrite, loff_t offFile,
+ struct vbsf_super_info *pSuperInfo, struct vbsf_reg_info *sf_r, struct inode *inode,
+ struct vbsf_inode_info *sf_i, struct address_space *mapping, bool fAppend)
+{
+ /*
+ * Estimate how many pages we may possible submit in a single request so
+ * that we can allocate matching request buffer and page array.
+ */
+ struct page *apPagesStack[16];
+ struct page **papPages = &apPagesStack[0];
+ struct page **papPagesFree = NULL;
+ VBOXSFWRITEPGLSTREQ *pReq;
+ ssize_t cbRet = 0;
+ size_t cMaxPages = vbsf_iter_max_span_of_pages(iter);
+ cMaxPages = RT_MIN(RT_MAX(pSuperInfo->cMaxIoPages, 2), cMaxPages);
+
+ pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
+ while (!pReq && cMaxPages > 4) {
+ cMaxPages /= 2;
+ pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(RT_UOFFSETOF_DYN(VBOXSFWRITEPGLSTREQ, PgLst.aPages[cMaxPages]));
+ }
+ if (pReq && cMaxPages > RT_ELEMENTS(apPagesStack))
+ papPagesFree = papPages = kmalloc(cMaxPages * sizeof(sizeof(papPages[0])), GFP_KERNEL);
+ if (pReq && papPages) {
+
+ /*
+ * The write loop.
+ */
+ struct vbsf_iter_stash Stash = VBSF_ITER_STASH_INITIALIZER;
+ do {
+ /*
+ * Grab as many pages as we can. This means that if adjacent
+ * segments both starts and ends at a page boundrary, we can
+ * do them both in the same transfer from the host.
+ */
+ size_t cPages = 0;
+ size_t cbChunk = 0;
+ size_t offPage0 = 0;
+ int rc = vbsf_iter_lock_pages(iter, false /*fWrite*/, &Stash, cMaxPages, papPages, &cPages, &offPage0, &cbChunk);
+ if (rc == 0) {
+ size_t iPage = cPages;
+ while (iPage-- > 0)
+ pReq->PgLst.aPages[iPage] = page_to_phys(papPages[iPage]);
+ pReq->PgLst.offFirstPage = (uint16_t)offPage0;
+ AssertStmt(cbChunk <= cbToWrite, cbChunk = cbToWrite);
+ } else {
+ cbRet = rc;
+ break;
+ }
+
+ /*
+ * Issue the request and unlock the pages.
+ */
+ rc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root, pReq, sf_r->Handle.hHost, offFile, cbChunk, cPages);
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ SFLOGFLOW(("vbsf_reg_write_iter_locking: VbglR0SfHostReqWritePgLst -> %d (cbActual=%#x cbChunk=%#zx of %#zx cPages=%#zx offPage0=%#x\n",
+ rc, pReq->Parms.cb32Write.u.value32, cbChunk, cbToWrite, cPages, offPage0));
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Success, advance position and buffer.
+ */
+ uint32_t cbActual = pReq->Parms.cb32Write.u.value32;
+ AssertStmt(cbActual <= cbChunk, cbActual = cbChunk);
+
+ vbsf_reg_write_sync_page_cache(mapping, offFile, cbActual, NULL /*pbSrcBuf*/, papPages, offPage0, cPages);
+ vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
+
+ cbRet += cbActual;
+ cbToWrite -= cbActual;
+
+ offFile += cbActual;
+ if (fAppend && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET))
+ offFile = pReq->Parms.off64Write.u.value64;
+ kio->ki_pos = offFile;
+ if (offFile > i_size_read(inode))
+ i_size_write(inode, offFile);
+
+ sf_i->force_restat = 1; /* mtime (and size) may have changed */
+
+ /*
+ * Are we done already?
+ */
+ if (!cbToWrite)
+ break;
+ if (cbActual < cbChunk) { /* We ASSUME end-of-file here. */
+ if (vbsf_iter_rewind(iter, &Stash, cbChunk - cbActual, cbActual))
+ iov_iter_truncate(iter, 0);
+ break;
+ }
+ } else {
+ /*
+ * Try rewind the iter structure.
+ */
+ bool fRewindOkay;
+ vbsf_iter_unlock_pages(iter, papPages, cPages, false /*fSetDirty*/);
+ fRewindOkay = vbsf_iter_rewind(iter, &Stash, cbChunk, cbChunk);
+ if (rc == VERR_NO_MEMORY && cMaxPages > 4 && fRewindOkay) {
+ /*
+ * The host probably doesn't have enough heap to handle the
+ * request, reduce the page count and retry.
+ */
+ cMaxPages /= 4;
+ Assert(cMaxPages > 0);
+ } else {
+ /*
+ * If we've successfully written stuff, return it rather than
+ * the error. (Not sure if this is such a great idea...)
+ */
+ if (cbRet <= 0)
+ cbRet = -EPROTO;
+ break;
+ }
+ }
+ } while (cbToWrite > 0);
+
+ vbsf_iter_cleanup_stash(iter, &Stash);
+ }
+ else
+ cbRet = -ENOMEM;
+ if (papPagesFree)
+ kfree(papPages);
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_write_iter_locking: returns %#zx (%zd)\n", cbRet, cbRet));
+ return cbRet;
+}
+
+
+/**
+ * Write from I/O vector iterator.
+ *
+ * @returns Number of bytes written on success, negative errno on error.
+ * @param kio The kernel I/O control block (or something like that).
+ * @param iter The I/O vector iterator describing the buffer.
+ */
+# if RTLNX_VER_MIN(3,16,0)
+static ssize_t vbsf_reg_write_iter(struct kiocb *kio, struct iov_iter *iter)
+# else
+static ssize_t vbsf_reg_aio_write(struct kiocb *kio, const struct iovec *iov, unsigned long cSegs, loff_t offFile)
+# endif
+{
+# if RTLNX_VER_MAX(3,16,0)
+ struct vbsf_iov_iter fake_iter = VBSF_IOV_ITER_INITIALIZER(cSegs, iov, 1 /*write*/);
+ struct vbsf_iov_iter *iter = &fake_iter;
+# endif
+ size_t cbToWrite = iov_iter_count(iter);
+ struct inode *inode = VBSF_GET_F_DENTRY(kio->ki_filp)->d_inode;
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ struct address_space *mapping = inode->i_mapping;
+
+ struct vbsf_reg_info *sf_r = kio->ki_filp->private_data;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+# if RTLNX_VER_MIN(3,16,0)
+ loff_t offFile = kio->ki_pos;
+# endif
+# if RTLNX_VER_MIN(4,1,0)
+ bool const fAppend = RT_BOOL(kio->ki_flags & IOCB_APPEND);
+# else
+ bool const fAppend = RT_BOOL(kio->ki_filp->f_flags & O_APPEND);
+# endif
+
+
+ SFLOGFLOW(("vbsf_reg_write_iter: inode=%p file=%p size=%#zx off=%#llx type=%#x\n",
+ inode, kio->ki_filp, cbToWrite, offFile, VBSF_GET_ITER_TYPE(iter) ));
+ AssertReturn(S_ISREG(inode->i_mode), -EINVAL);
+
+ /*
+ * Enforce APPEND flag (more later).
+ */
+ if (fAppend)
+ kio->ki_pos = offFile = i_size_read(inode);
+
+ /*
+ * Do we have anything at all to do here?
+ */
+ if (!cbToWrite)
+ return 0;
+
+ /** @todo Implement the read-write caching mode. */
+
+ /*
+ * Now now we reject async I/O requests.
+ */
+ if (!is_sync_kiocb(kio)) {
+ SFLOGFLOW(("vbsf_reg_write_iter: async I/O not yet supported\n")); /** @todo extend FsPerf with AIO tests. */
+ return -EOPNOTSUPP;
+ }
+
+ /*
+ * If there are active writable mappings, coordinate with any
+ * pending writes via those.
+ */
+ if ( mapping
+ && mapping->nrpages > 0
+ && mapping_writably_mapped(mapping)) {
+# if RTLNX_VER_MIN(2,6,32)
+ int err = filemap_fdatawait_range(mapping, offFile, offFile + cbToWrite - 1);
+ if (err)
+ return err;
+# else
+ /** @todo ... */
+# endif
+ }
+
+ /*
+ * For small requests, try use an embedded buffer provided we get a heap block
+ * that does not cross page boundraries (see host code).
+ */
+ if (cbToWrite <= PAGE_SIZE / 4 * 3 - RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) /* see allocator */) {
+ uint32_t const cbReq = RT_UOFFSETOF(VBOXSFWRITEEMBEDDEDREQ, abData[0]) + cbToWrite;
+ VBOXSFWRITEEMBEDDEDREQ *pReq = (VBOXSFWRITEEMBEDDEDREQ *)VbglR0PhysHeapAlloc(cbReq);
+ if (pReq) {
+ if ((PAGE_SIZE - ((uintptr_t)pReq & PAGE_OFFSET_MASK)) >= cbReq) {
+ ssize_t cbRet;
+ if (copy_from_iter(pReq->abData, cbToWrite, iter) == cbToWrite) {
+ int vrc = VbglR0SfHostReqWriteEmbedded(pSuperInfo->map.root, pReq, sf_r->Handle.hHost,
+ offFile, (uint32_t)cbToWrite);
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ if (RT_SUCCESS(vrc)) {
+ cbRet = pReq->Parms.cb32Write.u.value32;
+ AssertStmt(cbRet <= (ssize_t)cbToWrite, cbRet = cbToWrite);
+ vbsf_reg_write_sync_page_cache(mapping, offFile, (uint32_t)cbRet, pReq->abData,
+ NULL /*papSrcPages*/, 0 /*offSrcPage0*/, 0 /*cSrcPages*/);
+
+ offFile += cbRet;
+ if (fAppend && (g_fSfFeatures & SHFL_FEATURE_WRITE_UPDATES_OFFSET))
+ offFile = pReq->Parms.off64Write.u.value64;
+ kio->ki_pos = offFile;
+ if (offFile > i_size_read(inode))
+ i_size_write(inode, offFile);
+
+# if RTLNX_VER_MIN(4,11,0)
+ if ((size_t)cbRet < cbToWrite)
+ iov_iter_revert(iter, cbToWrite - cbRet);
+# endif
+ } else
+ cbRet = -EPROTO;
+ sf_i->force_restat = 1; /* mtime (and size) may have changed */
+ } else
+ cbRet = -EFAULT;
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_write_iter: returns %#zx (%zd)\n", cbRet, cbRet));
+ return cbRet;
+ }
+ VbglR0PhysHeapFree(pReq);
+ }
+ }
+
+ /*
+ * Otherwise do the page locking thing.
+ */
+ return vbsf_reg_write_iter_locking(kio, iter, cbToWrite, offFile, pSuperInfo, sf_r, inode, sf_i, mapping, fAppend);
+}
+
+#endif /* >= 2.6.19 */
+
+/**
+ * Used by vbsf_reg_open() and vbsf_inode_atomic_open() to
+ *
+ * @returns shared folders create flags.
+ * @param fLnxOpen The linux O_XXX flags to convert.
+ * @param pfHandle Pointer to vbsf_handle::fFlags.
+ * @param pszCaller Caller, for logging purposes.
+ */
+uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller)
+{
+ uint32_t fVBoxFlags = SHFL_CF_ACCESS_DENYNONE;
+
+ /*
+ * Disposition.
+ */
+ if (fLnxOpen & O_CREAT) {
+ Log(("%s: O_CREAT set\n", pszCaller));
+ fVBoxFlags |= SHFL_CF_ACT_CREATE_IF_NEW;
+ if (fLnxOpen & O_EXCL) {
+ Log(("%s: O_EXCL set\n", pszCaller));
+ fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_EXISTS;
+ } else if (fLnxOpen & O_TRUNC) {
+ Log(("%s: O_TRUNC set\n", pszCaller));
+ fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
+ } else
+ fVBoxFlags |= SHFL_CF_ACT_OPEN_IF_EXISTS;
+ } else {
+ fVBoxFlags |= SHFL_CF_ACT_FAIL_IF_NEW;
+ if (fLnxOpen & O_TRUNC) {
+ Log(("%s: O_TRUNC set\n", pszCaller));
+ fVBoxFlags |= SHFL_CF_ACT_OVERWRITE_IF_EXISTS;
+ }
+ }
+
+ /*
+ * Access.
+ */
+ switch (fLnxOpen & O_ACCMODE) {
+ case O_RDONLY:
+ fVBoxFlags |= SHFL_CF_ACCESS_READ;
+ *pfHandle |= VBSF_HANDLE_F_READ;
+ break;
+
+ case O_WRONLY:
+ fVBoxFlags |= SHFL_CF_ACCESS_WRITE;
+ *pfHandle |= VBSF_HANDLE_F_WRITE;
+ break;
+
+ case O_RDWR:
+ fVBoxFlags |= SHFL_CF_ACCESS_READWRITE;
+ *pfHandle |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE;
+ break;
+
+ default:
+ BUG();
+ }
+
+ if (fLnxOpen & O_APPEND) {
+ Log(("%s: O_APPEND set\n", pszCaller));
+ fVBoxFlags |= SHFL_CF_ACCESS_APPEND;
+ *pfHandle |= VBSF_HANDLE_F_APPEND;
+ }
+
+ /*
+ * Only directories?
+ */
+ if (fLnxOpen & O_DIRECTORY) {
+ Log(("%s: O_DIRECTORY set\n", pszCaller));
+ fVBoxFlags |= SHFL_CF_DIRECTORY;
+ }
+
+ return fVBoxFlags;
+}
+
+
+/**
+ * Open a regular file.
+ *
+ * @param inode the inode
+ * @param file the file
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_reg_open(struct inode *inode, struct file *file)
+{
+ int rc, rc_linux = 0;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ struct dentry *dentry = VBSF_GET_F_DENTRY(file);
+ struct vbsf_reg_info *sf_r;
+ VBOXSFCREATEREQ *pReq;
+
+ SFLOGFLOW(("vbsf_reg_open: inode=%p file=%p flags=%#x %s\n", inode, file, file->f_flags, sf_i ? sf_i->path->String.ach : NULL));
+ Assert(pSuperInfo);
+ Assert(sf_i);
+
+ sf_r = kmalloc(sizeof(*sf_r), GFP_KERNEL);
+ if (!sf_r) {
+ LogRelFunc(("could not allocate reg info\n"));
+ return -ENOMEM;
+ }
+
+ RTListInit(&sf_r->Handle.Entry);
+ sf_r->Handle.cRefs = 1;
+ sf_r->Handle.fFlags = VBSF_HANDLE_F_FILE | VBSF_HANDLE_F_MAGIC;
+ sf_r->Handle.hHost = SHFL_HANDLE_NIL;
+
+ /* Already open? */
+ if (sf_i->handle != SHFL_HANDLE_NIL) {
+ /*
+ * This inode was created with vbsf_create_worker(). Check the CreateFlags:
+ * O_CREAT, O_TRUNC: inherent true (file was just created). Not sure
+ * about the access flags (SHFL_CF_ACCESS_*).
+ */
+ sf_i->force_restat = 1;
+ sf_r->Handle.hHost = sf_i->handle;
+ sf_i->handle = SHFL_HANDLE_NIL;
+ file->private_data = sf_r;
+
+ sf_r->Handle.fFlags |= VBSF_HANDLE_F_READ | VBSF_HANDLE_F_WRITE; /** @todo fix */
+ vbsf_handle_append(sf_i, &sf_r->Handle);
+ SFLOGFLOW(("vbsf_reg_open: returns 0 (#1) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
+ return 0;
+ }
+
+ pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + sf_i->path->u16Size);
+ if (!pReq) {
+ kfree(sf_r);
+ LogRelFunc(("Failed to allocate a VBOXSFCREATEREQ buffer!\n"));
+ return -ENOMEM;
+ }
+ memcpy(&pReq->StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
+ RT_ZERO(pReq->CreateParms);
+ pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
+
+ /* We check the value of pReq->CreateParms.Handle afterwards to
+ * find out if the call succeeded or failed, as the API does not seem
+ * to cleanly distinguish error and informational messages.
+ *
+ * Furthermore, we must set pReq->CreateParms.Handle to SHFL_HANDLE_NIL
+ * to make the shared folders host service use our fMode parameter */
+
+ /* We ignore O_EXCL, as the Linux kernel seems to call create
+ beforehand itself, so O_EXCL should always fail. */
+ pReq->CreateParms.CreateFlags = vbsf_linux_oflags_to_vbox(file->f_flags & ~O_EXCL, &sf_r->Handle.fFlags, __FUNCTION__);
+ pReq->CreateParms.Info.Attr.fMode = inode->i_mode;
+ LogFunc(("vbsf_reg_open: calling VbglR0SfHostReqCreate, file %s, flags=%#x, %#x\n",
+ sf_i->path->String.utf8, file->f_flags, pReq->CreateParms.CreateFlags));
+ rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq);
+ if (RT_FAILURE(rc)) {
+ LogFunc(("VbglR0SfHostReqCreate failed flags=%d,%#x rc=%Rrc\n", file->f_flags, pReq->CreateParms.CreateFlags, rc));
+ kfree(sf_r);
+ VbglR0PhysHeapFree(pReq);
+ return -RTErrConvertToErrno(rc);
+ }
+
+ if (pReq->CreateParms.Handle != SHFL_HANDLE_NIL) {
+ vbsf_dentry_chain_increase_ttl(dentry);
+ vbsf_update_inode(inode, sf_i, &pReq->CreateParms.Info, pSuperInfo, false /*fInodeLocked*/, 0 /*fSetAttrs*/);
+ rc_linux = 0;
+ } else {
+ switch (pReq->CreateParms.Result) {
+ case SHFL_PATH_NOT_FOUND:
+ vbsf_dentry_invalidate_ttl(dentry);
+ rc_linux = -ENOENT;
+ break;
+ case SHFL_FILE_NOT_FOUND:
+ vbsf_dentry_invalidate_ttl(dentry);
+ /** @todo sf_dentry_increase_parent_ttl(file->f_dentry); if we can trust it. */
+ rc_linux = -ENOENT;
+ break;
+ case SHFL_FILE_EXISTS:
+ vbsf_dentry_chain_increase_ttl(dentry);
+ vbsf_update_inode(inode, sf_i, &pReq->CreateParms.Info, pSuperInfo, false /*fInodeLocked*/, 0 /*fSetAttrs*/);
+ rc_linux = -EEXIST;
+ break;
+ default:
+ vbsf_dentry_chain_increase_parent_ttl(dentry);
+ rc_linux = 0;
+ break;
+ }
+ }
+
+ sf_r->Handle.hHost = pReq->CreateParms.Handle;
+ file->private_data = sf_r;
+ vbsf_handle_append(sf_i, &sf_r->Handle);
+ VbglR0PhysHeapFree(pReq);
+ SFLOGFLOW(("vbsf_reg_open: returns 0 (#2) - sf_i=%p hHost=%#llx\n", sf_i, sf_r->Handle.hHost));
+ return rc_linux;
+}
+
+
+/**
+ * Close a regular file.
+ *
+ * @param inode the inode
+ * @param file the file
+ * @returns 0 on success, Linux error code otherwise
+ */
+static int vbsf_reg_release(struct inode *inode, struct file *file)
+{
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ struct vbsf_reg_info *sf_r = file->private_data;
+
+ SFLOGFLOW(("vbsf_reg_release: inode=%p file=%p\n", inode, file));
+ if (sf_r) {
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct address_space *mapping = inode->i_mapping;
+ Assert(pSuperInfo);
+
+ /* If we're closing the last handle for this inode, make sure the flush
+ the mapping or we'll end up in vbsf_writepage without a handle. */
+ if ( mapping
+ && mapping->nrpages > 0
+ /** @todo && last writable handle */ ) {
+#if RTLNX_VER_MIN(2,4,25)
+ if (filemap_fdatawrite(mapping) != -EIO)
+#else
+ if ( filemap_fdatasync(mapping) == 0
+ && fsync_inode_data_buffers(inode) == 0)
+#endif
+ filemap_fdatawait(inode->i_mapping);
+ }
+
+ /* Release sf_r, closing the handle if we're the last user. */
+ file->private_data = NULL;
+ vbsf_handle_release(&sf_r->Handle, pSuperInfo, "vbsf_reg_release");
+
+ sf_i->handle = SHFL_HANDLE_NIL;
+ }
+ return 0;
+}
+
+
+/**
+ * Wrapper around generic/default seek function that ensures that we've got
+ * the up-to-date file size when doing anything relative to EOF.
+ *
+ * The issue is that the host may extend the file while we weren't looking and
+ * if the caller wishes to append data, it may end up overwriting existing data
+ * if we operate with a stale size. So, we always retrieve the file size on EOF
+ * relative seeks.
+ */
+static loff_t vbsf_reg_llseek(struct file *file, loff_t off, int whence)
+{
+ SFLOGFLOW(("vbsf_reg_llseek: file=%p off=%lld whence=%d\n", file, off, whence));
+
+ switch (whence) {
+#ifdef SEEK_HOLE
+ case SEEK_HOLE:
+ case SEEK_DATA:
+#endif
+ case SEEK_END: {
+ struct vbsf_reg_info *sf_r = file->private_data;
+ int rc = vbsf_inode_revalidate_with_handle(VBSF_GET_F_DENTRY(file), sf_r->Handle.hHost,
+ true /*fForce*/, false /*fInodeLocked*/);
+ if (rc == 0)
+ break;
+ return rc;
+ }
+ }
+
+#if RTLNX_VER_MIN(2,4,8)
+ return generic_file_llseek(file, off, whence);
+#else
+ return default_llseek(file, off, whence);
+#endif
+}
+
+
+/**
+ * Flush region of file - chiefly mmap/msync.
+ *
+ * We cannot use the noop_fsync / simple_sync_file here as that means
+ * msync(,,MS_SYNC) will return before the data hits the host, thereby
+ * causing coherency issues with O_DIRECT access to the same file as
+ * well as any host interaction with the file.
+ */
+#if RTLNX_VER_MIN(3,1,0) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_MIN(3,0,101) /** @todo figure when exactly */)
+static int vbsf_reg_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+# if RTLNX_VER_MIN(3,16,0)
+ return __generic_file_fsync(file, start, end, datasync);
+# else
+ return generic_file_fsync(file, start, end, datasync);
+# endif
+}
+#elif RTLNX_VER_MIN(2,6,35)
+static int vbsf_reg_fsync(struct file *file, int datasync)
+{
+ return generic_file_fsync(file, datasync);
+}
+#else /* < 2.6.35 */
+static int vbsf_reg_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+# if RTLNX_VER_MIN(2,6,31)
+ return simple_fsync(file, dentry, datasync);
+# else
+ int rc;
+ struct inode *inode = dentry->d_inode;
+ AssertReturn(inode, -EINVAL);
+
+ /** @todo What about file_fsync()? (<= 2.5.11) */
+
+# if RTLNX_VER_MIN(2,5,12)
+ rc = sync_mapping_buffers(inode->i_mapping);
+ if ( rc == 0
+ && (inode->i_state & I_DIRTY)
+ && ((inode->i_state & I_DIRTY_DATASYNC) || !datasync)
+ ) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0
+ };
+ rc = sync_inode(inode, &wbc);
+ }
+# else /* < 2.5.12 */
+ /** @todo
+ * Somethings is buggy here or in the 2.4.21-27.EL kernel I'm testing on.
+ *
+ * In theory we shouldn't need to do anything here, since msync will call
+ * writepage() on each dirty page and we write them out synchronously. So, the
+ * problem is elsewhere... Doesn't happen all the time either. Sigh.
+ */
+ rc = fsync_inode_buffers(inode);
+# if RTLNX_VER_MIN(2,4,10)
+ if (rc == 0 && datasync)
+ rc = fsync_inode_data_buffers(inode);
+# endif
+
+# endif /* < 2.5.12 */
+ return rc;
+# endif
+}
+#endif /* < 2.6.35 */
+
+
+#if RTLNX_VER_MIN(4,5,0)
+/**
+ * Copy a datablock from one file to another on the host side.
+ */
+static ssize_t vbsf_reg_copy_file_range(struct file *pFileSrc, loff_t offSrc, struct file *pFileDst, loff_t offDst,
+ size_t cbRange, unsigned int fFlags)
+{
+ ssize_t cbRet;
+ if (g_uSfLastFunction >= SHFL_FN_COPY_FILE_PART) {
+ struct inode *pInodeSrc = pFileSrc->f_inode;
+ struct vbsf_inode_info *pInodeInfoSrc = VBSF_GET_INODE_INFO(pInodeSrc);
+ struct vbsf_super_info *pSuperInfoSrc = VBSF_GET_SUPER_INFO(pInodeSrc->i_sb);
+ struct vbsf_reg_info *pFileInfoSrc = (struct vbsf_reg_info *)pFileSrc->private_data;
+ struct inode *pInodeDst = pInodeSrc;
+ struct vbsf_inode_info *pInodeInfoDst = VBSF_GET_INODE_INFO(pInodeDst);
+ struct vbsf_super_info *pSuperInfoDst = VBSF_GET_SUPER_INFO(pInodeDst->i_sb);
+ struct vbsf_reg_info *pFileInfoDst = (struct vbsf_reg_info *)pFileDst->private_data;
+ VBOXSFCOPYFILEPARTREQ *pReq;
+
+ /*
+ * Some extra validation.
+ */
+ AssertPtrReturn(pInodeInfoSrc, -EOPNOTSUPP);
+ Assert(pInodeInfoSrc->u32Magic == SF_INODE_INFO_MAGIC);
+ AssertPtrReturn(pInodeInfoDst, -EOPNOTSUPP);
+ Assert(pInodeInfoDst->u32Magic == SF_INODE_INFO_MAGIC);
+
+# if RTLNX_VER_MAX(4,11,0)
+ if (!S_ISREG(pInodeSrc->i_mode) || !S_ISREG(pInodeDst->i_mode))
+ return S_ISDIR(pInodeSrc->i_mode) || S_ISDIR(pInodeDst->i_mode) ? -EISDIR : -EINVAL;
+# endif
+
+ /*
+ * Allocate the request and issue it.
+ */
+ pReq = (VBOXSFCOPYFILEPARTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ int vrc = VbglR0SfHostReqCopyFilePart(pSuperInfoSrc->map.root, pFileInfoSrc->Handle.hHost, offSrc,
+ pSuperInfoDst->map.root, pFileInfoDst->Handle.hHost, offDst,
+ cbRange, 0 /*fFlags*/, pReq);
+ if (RT_SUCCESS(vrc))
+ cbRet = pReq->Parms.cb64ToCopy.u.value64;
+ else if (vrc == VERR_NOT_IMPLEMENTED)
+ cbRet = -EOPNOTSUPP;
+ else
+ cbRet = -RTErrConvertToErrno(vrc);
+
+ VbglR0PhysHeapFree(pReq);
+ } else
+ cbRet = -ENOMEM;
+ } else {
+ cbRet = -EOPNOTSUPP;
+ }
+ SFLOGFLOW(("vbsf_reg_copy_file_range: returns %zd\n", cbRet));
+ return cbRet;
+}
+#endif /* > 4.5 */
+
+
+#ifdef SFLOG_ENABLED
+/*
+ * This is just for logging page faults and such.
+ */
+
+/** Pointer to the ops generic_file_mmap returns the first time it's called. */
+static struct vm_operations_struct const *g_pGenericFileVmOps = NULL;
+/** Merge of g_LoggingVmOpsTemplate and g_pGenericFileVmOps. */
+static struct vm_operations_struct g_LoggingVmOps;
+
+
+/* Generic page fault callback: */
+# if RTLNX_VER_MIN(4,11,0)
+static vm_fault_t vbsf_vmlog_fault(struct vm_fault *vmf)
+{
+ vm_fault_t rc;
+ SFLOGFLOW(("vbsf_vmlog_fault: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
+ rc = g_pGenericFileVmOps->fault(vmf);
+ SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
+ return rc;
+}
+# elif RTLNX_VER_MIN(2,6,23)
+static int vbsf_vmlog_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int rc;
+# if RTLNX_VER_MIN(4,10,0)
+ SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address));
+# else
+ SFLOGFLOW(("vbsf_vmlog_fault: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
+# endif
+ rc = g_pGenericFileVmOps->fault(vma, vmf);
+ SFLOGFLOW(("vbsf_vmlog_fault: returns %d\n", rc));
+ return rc;
+}
+# endif
+
+
+/* Special/generic page fault handler: */
+# if RTLNX_VER_MIN(2,6,26)
+# elif RTLNX_VER_MIN(2,6,1)
+static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int *type)
+{
+ struct page *page;
+ SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p type=%p:{%#x}\n", vma, address, type, type ? *type : 0));
+ page = g_pGenericFileVmOps->nopage(vma, address, type);
+ SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
+ return page;
+}
+# else
+static struct page *vbsf_vmlog_nopage(struct vm_area_struct *vma, unsigned long address, int write_access_or_unused)
+{
+ struct page *page;
+ SFLOGFLOW(("vbsf_vmlog_nopage: vma=%p address=%p wau=%d\n", vma, address, write_access_or_unused));
+ page = g_pGenericFileVmOps->nopage(vma, address, write_access_or_unused);
+ SFLOGFLOW(("vbsf_vmlog_nopage: returns %p\n", page));
+ return page;
+}
+# endif /* < 2.6.26 */
+
+
+/* Special page fault callback for making something writable: */
+# if RTLNX_VER_MIN(4,11,0)
+static vm_fault_t vbsf_vmlog_page_mkwrite(struct vm_fault *vmf)
+{
+ vm_fault_t rc;
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vmf=%p flags=%#x addr=%p\n", vmf, vmf->flags, vmf->address));
+ rc = g_pGenericFileVmOps->page_mkwrite(vmf);
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
+ return rc;
+}
+# elif RTLNX_VER_MIN(2,6,30)
+static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ int rc;
+# if RTLNX_VER_MIN(4,10,0)
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->address));
+# else
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p vmf=%p flags=%#x addr=%p\n", vma, vmf, vmf->flags, vmf->virtual_address));
+# endif
+ rc = g_pGenericFileVmOps->page_mkwrite(vma, vmf);
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
+ return rc;
+}
+# elif RTLNX_VER_MIN(2,6,18)
+static int vbsf_vmlog_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ int rc;
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: vma=%p page=%p\n", vma, page));
+ rc = g_pGenericFileVmOps->page_mkwrite(vma, page);
+ SFLOGFLOW(("vbsf_vmlog_page_mkwrite: returns %d\n", rc));
+ return rc;
+}
+# endif
+
+
+/* Special page fault callback for mapping pages: */
+# if RTLNX_VER_MIN(5,12,0)
+static vm_fault_t vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end)
+{
+ vm_fault_t rc;
+ SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end));
+ rc = g_pGenericFileVmOps->map_pages(vmf, start, end);
+ SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
+ return rc;
+}
+# elif RTLNX_VER_MIN(4,10,0)
+static void vbsf_vmlog_map_pages(struct vm_fault *vmf, pgoff_t start, pgoff_t end)
+{
+ SFLOGFLOW(("vbsf_vmlog_map_pages: vmf=%p (flags=%#x addr=%p) start=%p end=%p\n", vmf, vmf->flags, vmf->address, start, end));
+ g_pGenericFileVmOps->map_pages(vmf, start, end);
+ SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
+}
+# elif RTLNX_VER_MIN(4,8,0)
+static void vbsf_vmlog_map_pages(struct fault_env *fenv, pgoff_t start, pgoff_t end)
+{
+ SFLOGFLOW(("vbsf_vmlog_map_pages: fenv=%p (flags=%#x addr=%p) start=%p end=%p\n", fenv, fenv->flags, fenv->address, start, end));
+ g_pGenericFileVmOps->map_pages(fenv, start, end);
+ SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
+}
+# elif RTLNX_VER_MIN(3,15,0)
+static void vbsf_vmlog_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ SFLOGFLOW(("vbsf_vmlog_map_pages: vma=%p vmf=%p (flags=%#x addr=%p)\n", vma, vmf, vmf->flags, vmf->virtual_address));
+ g_pGenericFileVmOps->map_pages(vma, vmf);
+ SFLOGFLOW(("vbsf_vmlog_map_pages: returns\n"));
+}
+# endif
+
+
+/** Overload template. */
+static struct vm_operations_struct const g_LoggingVmOpsTemplate = {
+# if RTLNX_VER_MIN(2,6,23)
+ .fault = vbsf_vmlog_fault,
+# endif
+# if RTLNX_VER_MAX(2,6,26)
+ .nopage = vbsf_vmlog_nopage,
+# endif
+# if RTLNX_VER_MIN(2,6,18)
+ .page_mkwrite = vbsf_vmlog_page_mkwrite,
+# endif
+# if RTLNX_VER_MIN(3,15,0)
+ .map_pages = vbsf_vmlog_map_pages,
+# endif
+};
+
+/** file_operations::mmap wrapper for logging purposes. */
+extern int vbsf_reg_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ int rc;
+ SFLOGFLOW(("vbsf_reg_mmap: file=%p vma=%p\n", file, vma));
+ rc = generic_file_mmap(file, vma);
+ if (rc == 0) {
+ /* Merge the ops and template the first time thru (there's a race here). */
+ if (g_pGenericFileVmOps == NULL) {
+ uintptr_t const *puSrc1 = (uintptr_t *)vma->vm_ops;
+ uintptr_t const *puSrc2 = (uintptr_t *)&g_LoggingVmOpsTemplate;
+ uintptr_t volatile *puDst = (uintptr_t *)&g_LoggingVmOps;
+ size_t cbLeft = sizeof(g_LoggingVmOps) / sizeof(*puDst);
+ while (cbLeft-- > 0) {
+ *puDst = *puSrc2 && *puSrc1 ? *puSrc2 : *puSrc1;
+ puSrc1++;
+ puSrc2++;
+ puDst++;
+ }
+ g_pGenericFileVmOps = vma->vm_ops;
+ vma->vm_ops = &g_LoggingVmOps;
+ } else if (g_pGenericFileVmOps == vma->vm_ops)
+ vma->vm_ops = &g_LoggingVmOps;
+ else
+ SFLOGFLOW(("vbsf_reg_mmap: Warning: vm_ops=%p, expected %p!\n", vma->vm_ops, g_pGenericFileVmOps));
+ }
+ SFLOGFLOW(("vbsf_reg_mmap: returns %d\n", rc));
+ return rc;
+}
+
+#endif /* SFLOG_ENABLED */
+
+
+/**
+ * File operations for regular files.
+ *
+ * Note on splice_read/splice_write/sendfile:
+ * - Splice was introduced in 2.6.17. The generic_file_splice_read/write
+ * methods go thru the page cache, which is undesirable and is why we
+ * need to cook our own versions of the code as long as we cannot track
+ * host-side writes and correctly invalidate the guest page-cache.
+ * - Sendfile reimplemented using splice in 2.6.23.
+ * - The default_file_splice_read/write no-page-cache fallback functions,
+ * were introduced in 2.6.31. The write one work in page units.
+ * - Since linux 3.16 there is iter_file_splice_write that uses iter_write.
+ * - Since linux 4.9 the generic_file_splice_read function started using
+ * read_iter.
+ */
+struct file_operations vbsf_reg_fops = {
+ .open = vbsf_reg_open,
+#if RTLNX_VER_MAX(5,10,0) /* No regular .read/.write for 5.10, only .read_iter/.write_iter or in-kernel reads/writes fail. */
+ .read = vbsf_reg_read,
+ .write = vbsf_reg_write,
+#endif
+#if RTLNX_VER_MIN(3,16,0)
+ .read_iter = vbsf_reg_read_iter,
+ .write_iter = vbsf_reg_write_iter,
+#elif RTLNX_VER_MIN(2,6,19)
+ .aio_read = vbsf_reg_aio_read,
+ .aio_write = vbsf_reg_aio_write,
+#endif
+ .release = vbsf_reg_release,
+#ifdef SFLOG_ENABLED
+ .mmap = vbsf_reg_mmap,
+#else
+ .mmap = generic_file_mmap,
+#endif
+#if RTLNX_VER_RANGE(2,6,17, 2,6,31)
+ .splice_read = vbsf_splice_read,
+#endif
+#if RTLNX_VER_MIN(3,16,0)
+ .splice_write = iter_file_splice_write,
+#elif RTLNX_VER_MIN(2,6,17)
+ .splice_write = vbsf_splice_write,
+#endif
+#if RTLNX_VER_RANGE(2,5,30, 2,6,23)
+ .sendfile = vbsf_reg_sendfile,
+#endif
+ .llseek = vbsf_reg_llseek,
+ .fsync = vbsf_reg_fsync,
+#if RTLNX_VER_MIN(4,5,0)
+ .copy_file_range = vbsf_reg_copy_file_range,
+#endif
+};
+
+
+/**
+ * Inodes operations for regular files.
+ */
+struct inode_operations vbsf_reg_iops = {
+#if RTLNX_VER_MIN(2,5,18)
+ .getattr = vbsf_inode_getattr,
+#else
+ .revalidate = vbsf_inode_revalidate,
+#endif
+ .setattr = vbsf_inode_setattr,
+};
+
+
+
+/*********************************************************************************************************************************
+* Address Space Operations on Regular Files (for mmap, sendfile, direct I/O) *
+*********************************************************************************************************************************/
+
+/**
+ * Used to read the content of a page into the page cache.
+ *
+ * Needed for mmap and reads+writes when the file is mmapped in a
+ * shared+writeable fashion.
+ */
+#if RTLNX_VER_MIN(5,19,0)
+static int vbsf_read_folio(struct file *file, struct folio *folio)
+{
+ struct page *page = &folio->page;
+#else
+static int vbsf_readpage(struct file *file, struct page *page)
+{
+#endif
+ struct inode *inode = VBSF_GET_F_DENTRY(file)->d_inode;
+ int err;
+
+ SFLOGFLOW(("vbsf_readpage: inode=%p file=%p page=%p off=%#llx\n", inode, file, page, (uint64_t)page->index << PAGE_SHIFT));
+ Assert(PageLocked(page));
+
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ return 0;
+ }
+
+ if (!is_bad_inode(inode)) {
+ VBOXSFREADPGLSTREQ *pReq = (VBOXSFREADPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ struct vbsf_reg_info *sf_r = file->private_data;
+ uint32_t cbRead;
+ int vrc;
+
+ pReq->PgLst.offFirstPage = 0;
+ pReq->PgLst.aPages[0] = page_to_phys(page);
+ vrc = VbglR0SfHostReqReadPgLst(pSuperInfo->map.root,
+ pReq,
+ sf_r->Handle.hHost,
+ (uint64_t)page->index << PAGE_SHIFT,
+ PAGE_SIZE,
+ 1 /*cPages*/);
+
+ cbRead = pReq->Parms.cb32Read.u.value32;
+ AssertStmt(cbRead <= PAGE_SIZE, cbRead = PAGE_SIZE);
+ VbglR0PhysHeapFree(pReq);
+
+ if (RT_SUCCESS(vrc)) {
+ if (cbRead == PAGE_SIZE) {
+ /* likely */
+ } else {
+ uint8_t *pbMapped = (uint8_t *)kmap(page);
+ RT_BZERO(&pbMapped[cbRead], PAGE_SIZE - cbRead);
+ kunmap(page);
+ /** @todo truncate the inode file size? */
+ }
+
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
+ err = -RTErrConvertToErrno(vrc);
+ } else
+ err = -ENOMEM;
+ } else
+ err = -EIO;
+ SetPageError(page);
+ unlock_page(page);
+ return err;
+}
+
+
+/**
+ * Used to write out the content of a dirty page cache page to the host file.
+ *
+ * Needed for mmap and writes when the file is mmapped in a shared+writeable
+ * fashion.
+ */
+#if RTLNX_VER_MIN(2,5,52)
+static int vbsf_writepage(struct page *page, struct writeback_control *wbc)
+#else
+static int vbsf_writepage(struct page *page)
+#endif
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(inode);
+ struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, VBSF_HANDLE_F_APPEND);
+ int err;
+
+ SFLOGFLOW(("vbsf_writepage: inode=%p page=%p off=%#llx pHandle=%p (%#llx)\n",
+ inode, page, (uint64_t)page->index << PAGE_SHIFT, pHandle, pHandle ? pHandle->hHost : 0));
+
+ if (pHandle) {
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(inode->i_sb);
+ VBOXSFWRITEPGLSTREQ *pReq = (VBOXSFWRITEPGLSTREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ uint64_t const cbFile = i_size_read(inode);
+ uint64_t const offInFile = (uint64_t)page->index << PAGE_SHIFT;
+ uint32_t const cbToWrite = page->index != (cbFile >> PAGE_SHIFT) ? PAGE_SIZE
+ : (uint32_t)cbFile & (uint32_t)PAGE_OFFSET_MASK;
+ int vrc;
+
+ pReq->PgLst.offFirstPage = 0;
+ pReq->PgLst.aPages[0] = page_to_phys(page);
+ vrc = VbglR0SfHostReqWritePgLst(pSuperInfo->map.root,
+ pReq,
+ pHandle->hHost,
+ offInFile,
+ cbToWrite,
+ 1 /*cPages*/);
+ sf_i->ModificationTimeAtOurLastWrite = sf_i->ModificationTime;
+ AssertMsgStmt(pReq->Parms.cb32Write.u.value32 == cbToWrite || RT_FAILURE(vrc), /* lazy bird */
+ ("%#x vs %#x\n", pReq->Parms.cb32Write, cbToWrite),
+ vrc = VERR_WRITE_ERROR);
+ VbglR0PhysHeapFree(pReq);
+
+ if (RT_SUCCESS(vrc)) {
+ /* Update the inode if we've extended the file. */
+ /** @todo is this necessary given the cbToWrite calc above? */
+ uint64_t const offEndOfWrite = offInFile + cbToWrite;
+ if ( offEndOfWrite > cbFile
+ && offEndOfWrite > i_size_read(inode))
+ i_size_write(inode, offEndOfWrite);
+
+ /* Update and unlock the page. */
+ if (PageError(page))
+ ClearPageError(page);
+ SetPageUptodate(page);
+ unlock_page(page);
+
+ vbsf_handle_release(pHandle, pSuperInfo, "vbsf_writepage");
+ return 0;
+ }
+
+ /*
+ * We failed.
+ */
+ err = -EIO;
+ } else
+ err = -ENOMEM;
+ vbsf_handle_release(pHandle, pSuperInfo, "vbsf_writepage");
+ } else {
+ /** @todo we could re-open the file here and deal with this... */
+ static uint64_t volatile s_cCalls = 0;
+ if (s_cCalls++ < 16)
+ printk("vbsf_writepage: no writable handle for %s..\n", sf_i->path->String.ach);
+ err = -EIO;
+ }
+ SetPageError(page);
+ unlock_page(page);
+ return err;
+}
+
+
+#if RTLNX_VER_MIN(2,6,24)
+/**
+ * Called when writing thru the page cache (which we shouldn't be doing).
+ */
+static inline void vbsf_write_begin_warn(loff_t pos, unsigned len, unsigned flags)
+{
+ /** @todo r=bird: We shouldn't ever get here, should we? Because we don't use
+ * the page cache for any writes AFAIK. We could just as well use
+ * simple_write_begin & simple_write_end here if we think we really
+ * need to have non-NULL function pointers in the table... */
+ static uint64_t volatile s_cCalls = 0;
+ if (s_cCalls++ < 16) {
+ printk("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
+ (unsigned long long)pos, len, flags);
+ RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_begin(pos=%#llx len=%#x flags=%#x)! Please report.\n",
+ (unsigned long long)pos, len, flags);
+# ifdef WARN_ON
+ WARN_ON(1);
+# endif
+ }
+}
+
+# if RTLNX_VER_MIN(5,19,0)
+int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
+ unsigned len, struct page **pagep, void **fsdata)
+{
+ vbsf_write_begin_warn(pos, len, 0);
+ return simple_write_begin(file, mapping, pos, len, pagep, fsdata);
+}
+# else
+int vbsf_write_begin(struct file *file, struct address_space *mapping, loff_t pos,
+ unsigned len, unsigned flags, struct page **pagep, void **fsdata)
+{
+ vbsf_write_begin_warn(pos, len, flags);
+ return simple_write_begin(file, mapping, pos, len, flags, pagep, fsdata);
+}
+# endif
+
+#endif /* KERNEL_VERSION >= 2.6.24 */
+
+#if RTLNX_VER_MIN(5,14,0)
+/**
+ * Companion to vbsf_write_begin (i.e. shouldn't be called).
+ */
+static int vbsf_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned int len, unsigned int copied,
+ struct page *page, void *fsdata)
+{
+ static uint64_t volatile s_cCalls = 0;
+ if (s_cCalls++ < 16)
+ {
+ printk("vboxsf: Unexpected call to vbsf_write_end(pos=%#llx len=%#x)! Please report.\n",
+ (unsigned long long)pos, len);
+ RTLogBackdoorPrintf("vboxsf: Unexpected call to vbsf_write_end(pos=%#llx len=%#x)! Please report.\n",
+ (unsigned long long)pos, len);
+# ifdef WARN_ON
+ WARN_ON(1);
+# endif
+ }
+ return -ENOTSUPP;
+}
+#endif /* KERNEL_VERSION >= 5.14.0 */
+
+
+#if RTLNX_VER_MIN(2,4,10)
+
+# ifdef VBOX_UEK
+# undef iov_iter /* HACK ALERT! Don't put anything needing vbsf_iov_iter after this fun! */
+# endif
+
+/**
+ * This is needed to make open accept O_DIRECT as well as dealing with direct
+ * I/O requests if we don't intercept them earlier.
+ */
+# if RTLNX_VER_MIN(4, 7, 0) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,73, 4,4,74) /** @todo Figure out when exactly. */) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,75, 4,4,90) /** @todo Figure out when exactly. */) \
+ || (defined(CONFIG_SUSE_KERNEL) && RTLNX_VER_RANGE(4,4,92, 4,5,0) /** @todo Figure out when exactly. */)
+static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+# elif RTLNX_VER_MIN(4, 1, 0)
+static ssize_t vbsf_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
+# elif RTLNX_VER_MIN(3, 16, 0) || defined(VBOX_UEK)
+static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
+# elif RTLNX_VER_MIN(2, 6, 6)
+static ssize_t vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+# elif RTLNX_VER_MIN(2, 5, 55)
+static int vbsf_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+# elif RTLNX_VER_MIN(2, 5, 41)
+static int vbsf_direct_IO(int rw, struct file *file, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+# elif RTLNX_VER_MIN(2, 5, 35)
+static int vbsf_direct_IO(int rw, struct inode *inode, const struct iovec *iov, loff_t offset, unsigned long nr_segs)
+# elif RTLNX_VER_MIN(2, 5, 26)
+static int vbsf_direct_IO(int rw, struct inode *inode, char *buf, loff_t offset, size_t count)
+# elif LINUX_VERSION_CODE == KERNEL_VERSION(2, 4, 21) && defined(I_NEW) /* RHEL3 Frankenkernel. */
+static int vbsf_direct_IO(int rw, struct file *file, struct kiobuf *buf, unsigned long whatever1, int whatever2)
+# else
+static int vbsf_direct_IO(int rw, struct inode *inode, struct kiobuf *buf, unsigned long whatever1, int whatever2)
+# endif
+{
+ TRACE();
+ return -EINVAL;
+}
+
+#endif
+
+/**
+ * Address space (for the page cache) operations for regular files.
+ *
+ * @todo the FsPerf touch/flush (mmap) test fails on 4.4.0 (ubuntu 16.04 lts).
+ */
+struct address_space_operations vbsf_reg_aops = {
+#if RTLNX_VER_MIN(5,19,0)
+ .read_folio = vbsf_read_folio,
+#else
+ .readpage = vbsf_readpage,
+#endif
+ .writepage = vbsf_writepage,
+ /** @todo Need .writepages if we want msync performance... */
+#if RTLNX_VER_MIN(5,18,0) || RTLNX_RHEL_RANGE(9,2, 9,99)
+ .dirty_folio = filemap_dirty_folio,
+#elif RTLNX_VER_MIN(2,5,12)
+ .set_page_dirty = __set_page_dirty_buffers,
+#endif
+#if RTLNX_VER_MIN(5,14,0)
+ .write_begin = vbsf_write_begin,
+ .write_end = vbsf_write_end,
+#elif RTLNX_VER_MIN(2,6,24)
+ .write_begin = vbsf_write_begin,
+ .write_end = simple_write_end,
+#elif RTLNX_VER_MIN(2,5,45)
+ .prepare_write = simple_prepare_write,
+ .commit_write = simple_commit_write,
+#endif
+#if RTLNX_VER_MIN(2,4,10)
+ .direct_IO = vbsf_direct_IO,
+#endif
+};
diff --git a/src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c b/src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c
new file mode 100644
index 00000000..468fe587
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/testcase/tstmmap.c
@@ -0,0 +1,126 @@
+/* $Id: tstmmap.c $ */
+/** @file
+ * vboxsf - Simple writable mmap testcase.
+ */
+
+/*
+ * Copyright (C) 2019-2022 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+
+
+int main(int argc, char **argv)
+{
+ uint8_t abBuf[4096];
+ int fd;
+ size_t cErrors = 0;
+ size_t cbFile;
+ size_t offFile;
+ uint8_t *pbMapping;
+ const char *pszFile = "tstmmap-file1";
+ if (argc > 1)
+ pszFile = argv[1];
+
+ fd = open(pszFile, O_CREAT | O_TRUNC | O_RDWR, 0660);
+ if (fd < 0)
+ {
+ fprintf(stderr, "error creating file: %s\n", pszFile);
+ return 1;
+ }
+
+ /* write 64 KB to the file: */
+ memset(abBuf, 0xf6, sizeof(abBuf));
+ for (cbFile = 0; cbFile < 0x10000; cbFile += sizeof(abBuf))
+ if (write(fd, abBuf, sizeof(abBuf)) != sizeof(abBuf))
+ {
+ fprintf(stderr, "error writing file: %s\n", pszFile);
+ return 1;
+ }
+ fsync(fd);
+
+ /* Map the file: */
+ pbMapping = (uint8_t *)mmap(NULL, cbFile, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (pbMapping == (void *)-1)
+ {
+ fprintf(stderr, "error mapping file: %s\n", pszFile);
+ return 1;
+ }
+
+ /* Modify the mapping and sync it: */
+ memset(pbMapping, 0xf7, cbFile);
+ if (msync(pbMapping, cbFile, MS_SYNC) != 0)
+ {
+ fprintf(stderr, "error msync'ing file: %s\n", pszFile);
+ return 1;
+ }
+
+ /* Unmap and close it: */
+ if (munmap(pbMapping, cbFile) != 0)
+ fprintf(stderr, "error munmap'ing file: %s\n", pszFile);
+ close(fd);
+
+ /*
+ * Open it again and check the content.
+ */
+ fd = open(pszFile, O_RDWR, 0);
+ if (fd < 0)
+ {
+ fprintf(stderr, "error reopening file: %s\n", pszFile);
+ return 1;
+ }
+
+ while (offFile < cbFile && cErrors < 42)
+ {
+ size_t offBuf;
+ ssize_t cbRead = read(fd, abBuf, sizeof(abBuf));
+ if (cbRead != (ssize_t)sizeof(abBuf))
+ {
+ fprintf(stderr, "error reading file: %zd, off %#zx (%s)\n", cbRead, offFile, pszFile);
+ return 1;
+ }
+
+ for (offBuf = 0; offBuf < sizeof(abBuf); offBuf++)
+ if (abBuf[offBuf] != 0xf7)
+ {
+ fprintf(stderr, "mismatch at %#zx: %#x, expected %#x\n", offFile + offBuf, abBuf[offBuf], 0xf7);
+ cErrors++;
+ if (cErrors > 42)
+ break;
+ }
+
+ offFile += sizeof(abBuf);
+ }
+
+ close(fd);
+
+ return cErrors == 0 ? 0 : 1;
+}
+
diff --git a/src/VBox/Additions/linux/sharedfolders/utils.c b/src/VBox/Additions/linux/sharedfolders/utils.c
new file mode 100644
index 00000000..56618222
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/utils.c
@@ -0,0 +1,1263 @@
+/* $Id: utils.c $ */
+/** @file
+ * vboxsf - VBox Linux Shared Folders VFS, utility functions.
+ *
+ * Utility functions (mainly conversion from/to VirtualBox/Linux data structures).
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "vfsmod.h"
+#include <iprt/asm.h>
+#include <iprt/err.h>
+#include <linux/vfs.h>
+
+
+int vbsf_nlscpy(struct vbsf_super_info *pSuperInfo, char *name, size_t name_bound_len,
+ const unsigned char *utf8_name, size_t utf8_len)
+{
+ Assert(name_bound_len > 1);
+ Assert(RTStrNLen(utf8_name, utf8_len) == utf8_len);
+
+ if (pSuperInfo->nls) {
+ const char *in = utf8_name;
+ size_t in_bound_len = utf8_len;
+ char *out = name;
+ size_t out_bound_len = name_bound_len - 1;
+
+ while (in_bound_len) {
+#if RTLNX_VER_MIN(2,6,31)
+ unicode_t uni;
+ int cbInEnc = utf8_to_utf32(in, in_bound_len, &uni);
+#else
+ linux_wchar_t uni;
+ int cbInEnc = utf8_mbtowc(&uni, in, in_bound_len);
+#endif
+ if (cbInEnc >= 0) {
+ int cbOutEnc = pSuperInfo->nls->uni2char(uni, out, out_bound_len);
+ if (cbOutEnc >= 0) {
+ /*SFLOG3(("vbsf_nlscpy: cbOutEnc=%d cbInEnc=%d uni=%#x in_bound_len=%u\n", cbOutEnc, cbInEnc, uni, in_bound_len));*/
+ out += cbOutEnc;
+ out_bound_len -= cbOutEnc;
+
+ in += cbInEnc;
+ in_bound_len -= cbInEnc;
+ } else {
+ SFLOG(("vbsf_nlscpy: nls->uni2char failed with %d on %#x (pos %u in '%s'), out_bound_len=%u\n",
+ cbOutEnc, uni, in - (const char *)utf8_name, (const char *)utf8_name, (unsigned)out_bound_len));
+ return cbOutEnc;
+ }
+ } else {
+ SFLOG(("vbsf_nlscpy: utf8_to_utf32/utf8_mbtowc failed with %d on %x (pos %u in '%s'), in_bound_len=%u!\n",
+ cbInEnc, *in, in - (const char *)utf8_name, (const char *)utf8_name, (unsigned)in_bound_len));
+ return -EINVAL;
+ }
+ }
+
+ *out = '\0';
+ } else {
+ if (utf8_len + 1 > name_bound_len)
+ return -ENAMETOOLONG;
+
+ memcpy(name, utf8_name, utf8_len + 1);
+ }
+ return 0;
+}
+
+
+/**
+ * Converts the given NLS string to a host one, kmalloc'ing
+ * the output buffer (use kfree on result).
+ */
+int vbsf_nls_to_shflstring(struct vbsf_super_info *pSuperInfo, const char *pszNls, PSHFLSTRING *ppString)
+{
+ int rc;
+ size_t const cchNls = strlen(pszNls);
+ PSHFLSTRING pString = NULL;
+ if (pSuperInfo->nls) {
+ /*
+ * NLS -> UTF-8 w/ SHLF string header.
+ */
+ /* Calc length first: */
+ size_t cchUtf8 = 0;
+ size_t offNls = 0;
+ while (offNls < cchNls) {
+ linux_wchar_t uc; /* Note! We renamed the type due to clashes. */
+ int const cbNlsCodepoint = pSuperInfo->nls->char2uni(&pszNls[offNls], cchNls - offNls, &uc);
+ if (cbNlsCodepoint >= 0) {
+ char achTmp[16];
+#if RTLNX_VER_MIN(2,6,31)
+ int cbUtf8Codepoint = utf32_to_utf8(uc, achTmp, sizeof(achTmp));
+#else
+ int cbUtf8Codepoint = utf8_wctomb(achTmp, uc, sizeof(achTmp));
+#endif
+ if (cbUtf8Codepoint > 0) {
+ cchUtf8 += cbUtf8Codepoint;
+ offNls += cbNlsCodepoint;
+ } else {
+ Log(("vbsf_nls_to_shflstring: nls->uni2char(%#x) failed: %d\n", uc, cbUtf8Codepoint));
+ return -EINVAL;
+ }
+ } else {
+ Log(("vbsf_nls_to_shflstring: nls->char2uni(%.*Rhxs) failed: %d\n",
+ RT_MIN(8, cchNls - offNls), &pszNls[offNls], cbNlsCodepoint));
+ return -EINVAL;
+ }
+ }
+ if (cchUtf8 + 1 < _64K) {
+ /* Allocate: */
+ pString = (PSHFLSTRING)kmalloc(SHFLSTRING_HEADER_SIZE + cchUtf8 + 1, GFP_KERNEL);
+ if (pString) {
+ char *pchDst = pString->String.ach;
+ pString->u16Length = (uint16_t)cchUtf8;
+ pString->u16Size = (uint16_t)(cchUtf8 + 1);
+
+ /* Do the conversion (cchUtf8 is counted down): */
+ rc = 0;
+ offNls = 0;
+ while (offNls < cchNls) {
+ linux_wchar_t uc; /* Note! We renamed the type due to clashes. */
+ int const cbNlsCodepoint = pSuperInfo->nls->char2uni(&pszNls[offNls], cchNls - offNls, &uc);
+ if (cbNlsCodepoint >= 0) {
+#if RTLNX_VER_MIN(2,6,31)
+ int cbUtf8Codepoint = utf32_to_utf8(uc, pchDst, cchUtf8);
+#else
+ int cbUtf8Codepoint = utf8_wctomb(pchDst, uc, cchUtf8);
+#endif
+ if (cbUtf8Codepoint > 0) {
+ AssertBreakStmt(cbUtf8Codepoint <= cchUtf8, rc = -EINVAL);
+ cchUtf8 -= cbUtf8Codepoint;
+ pchDst += cbUtf8Codepoint;
+ offNls += cbNlsCodepoint;
+ } else {
+ Log(("vbsf_nls_to_shflstring: nls->uni2char(%#x) failed! %d, cchUtf8=%zu\n",
+ uc, cbUtf8Codepoint, cchUtf8));
+ rc = -EINVAL;
+ break;
+ }
+ } else {
+ Log(("vbsf_nls_to_shflstring: nls->char2uni(%.*Rhxs) failed! %d\n",
+ RT_MIN(8, cchNls - offNls), &pszNls[offNls], cbNlsCodepoint));
+ rc = -EINVAL;
+ break;
+ }
+ }
+ if (rc == 0) {
+ /*
+ * Succeeded. Just terminate the string and we're good.
+ */
+ Assert(pchDst - pString->String.ach == pString->u16Length);
+ *pchDst = '\0';
+ } else {
+ kfree(pString);
+ pString = NULL;
+ }
+ } else {
+ Log(("vbsf_nls_to_shflstring: failed to allocate %u bytes\n", SHFLSTRING_HEADER_SIZE + cchUtf8 + 1));
+ rc = -ENOMEM;
+ }
+ } else {
+ Log(("vbsf_nls_to_shflstring: too long: %zu bytes (%zu nls bytes)\n", cchUtf8, cchNls));
+ rc = -ENAMETOOLONG;
+ }
+ } else {
+ /*
+ * UTF-8 -> UTF-8 w/ SHLF string header.
+ */
+ if (cchNls + 1 < _64K) {
+ pString = (PSHFLSTRING)kmalloc(SHFLSTRING_HEADER_SIZE + cchNls + 1, GFP_KERNEL);
+ if (pString) {
+ pString->u16Length = (uint16_t)cchNls;
+ pString->u16Size = (uint16_t)(cchNls + 1);
+ memcpy(pString->String.ach, pszNls, cchNls);
+ pString->String.ach[cchNls] = '\0';
+ rc = 0;
+ } else {
+ Log(("vbsf_nls_to_shflstring: failed to allocate %u bytes\n", SHFLSTRING_HEADER_SIZE + cchNls + 1));
+ rc = -ENOMEM;
+ }
+ } else {
+ Log(("vbsf_nls_to_shflstring: too long: %zu bytes\n", cchNls));
+ rc = -ENAMETOOLONG;
+ }
+ }
+ *ppString = pString;
+ return rc;
+}
+
+
+/**
+ * Convert from VBox to linux time.
+ */
+#if RTLNX_VER_MAX(2,6,0)
+DECLINLINE(void) vbsf_time_to_linux(time_t *pLinuxDst, PCRTTIMESPEC pVBoxSrc)
+{
+ int64_t t = RTTimeSpecGetNano(pVBoxSrc);
+ do_div(t, RT_NS_1SEC);
+ *pLinuxDst = t;
+}
+#else /* >= 2.6.0 */
+# if RTLNX_VER_MAX(4,18,0)
+DECLINLINE(void) vbsf_time_to_linux(struct timespec *pLinuxDst, PCRTTIMESPEC pVBoxSrc)
+# else
+DECLINLINE(void) vbsf_time_to_linux(struct timespec64 *pLinuxDst, PCRTTIMESPEC pVBoxSrc)
+# endif
+{
+ int64_t t = RTTimeSpecGetNano(pVBoxSrc);
+ pLinuxDst->tv_nsec = do_div(t, RT_NS_1SEC);
+ pLinuxDst->tv_sec = t;
+}
+#endif /* >= 2.6.0 */
+
+
+/**
+ * Convert from linux to VBox time.
+ */
+#if RTLNX_VER_MAX(2,6,0)
+DECLINLINE(void) vbsf_time_to_vbox(PRTTIMESPEC pVBoxDst, time_t *pLinuxSrc)
+{
+ RTTimeSpecSetNano(pVBoxDst, RT_NS_1SEC_64 * *pLinuxSrc);
+}
+#else /* >= 2.6.0 */
+# if RTLNX_VER_MAX(4,18,0)
+DECLINLINE(void) vbsf_time_to_vbox(PRTTIMESPEC pVBoxDst, struct timespec const *pLinuxSrc)
+# else
+DECLINLINE(void) vbsf_time_to_vbox(PRTTIMESPEC pVBoxDst, struct timespec64 const *pLinuxSrc)
+# endif
+{
+ RTTimeSpecSetNano(pVBoxDst, pLinuxSrc->tv_nsec + pLinuxSrc->tv_sec * (int64_t)RT_NS_1SEC);
+}
+#endif /* >= 2.6.0 */
+
+
+/**
+ * Converts VBox access permissions to Linux ones (mode & 0777).
+ *
+ * @note Currently identical.
+ * @sa sf_access_permissions_to_vbox
+ */
+DECLINLINE(int) sf_access_permissions_to_linux(uint32_t fAttr)
+{
+ /* Access bits should be the same: */
+ AssertCompile(RTFS_UNIX_IRUSR == S_IRUSR);
+ AssertCompile(RTFS_UNIX_IWUSR == S_IWUSR);
+ AssertCompile(RTFS_UNIX_IXUSR == S_IXUSR);
+ AssertCompile(RTFS_UNIX_IRGRP == S_IRGRP);
+ AssertCompile(RTFS_UNIX_IWGRP == S_IWGRP);
+ AssertCompile(RTFS_UNIX_IXGRP == S_IXGRP);
+ AssertCompile(RTFS_UNIX_IROTH == S_IROTH);
+ AssertCompile(RTFS_UNIX_IWOTH == S_IWOTH);
+ AssertCompile(RTFS_UNIX_IXOTH == S_IXOTH);
+
+ return fAttr & RTFS_UNIX_ALL_ACCESS_PERMS;
+}
+
+
+/**
+ * Produce the Linux mode mask, given VBox, mount options and file type.
+ */
+DECLINLINE(int) sf_file_mode_to_linux(uint32_t fVBoxMode, int fFixedMode, int fClearMask, int fType)
+{
+ int fLnxMode = sf_access_permissions_to_linux(fVBoxMode);
+ if (fFixedMode != ~0)
+ fLnxMode = fFixedMode & 0777;
+ fLnxMode &= ~fClearMask;
+ fLnxMode |= fType;
+ return fLnxMode;
+}
+
+
+/**
+ * Initializes the @a inode attributes based on @a pObjInfo and @a pSuperInfo
+ * options.
+ */
+void vbsf_init_inode(struct inode *inode, struct vbsf_inode_info *sf_i, PSHFLFSOBJINFO pObjInfo,
+ struct vbsf_super_info *pSuperInfo)
+{
+ PCSHFLFSOBJATTR pAttr = &pObjInfo->Attr;
+
+ TRACE();
+
+ sf_i->ts_up_to_date = jiffies;
+ sf_i->force_restat = 0;
+
+ if (RTFS_IS_DIRECTORY(pAttr->fMode)) {
+ inode->i_mode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->dmode, pSuperInfo->dmask, S_IFDIR);
+ inode->i_op = &vbsf_dir_iops;
+ inode->i_fop = &vbsf_dir_fops;
+
+ /* XXX: this probably should be set to the number of entries
+ in the directory plus two (. ..) */
+ set_nlink(inode, 1);
+ }
+ else if (RTFS_IS_SYMLINK(pAttr->fMode)) {
+ /** @todo r=bird: Aren't System V symlinks w/o any mode mask? IIRC there is
+ * no lchmod on Linux. */
+ inode->i_mode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFLNK);
+ inode->i_op = &vbsf_lnk_iops;
+ set_nlink(inode, 1);
+ } else {
+ inode->i_mode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFREG);
+ inode->i_op = &vbsf_reg_iops;
+ inode->i_fop = &vbsf_reg_fops;
+ inode->i_mapping->a_ops = &vbsf_reg_aops;
+#if RTLNX_VER_RANGE(2,5,17, 4,0,0)
+ inode->i_mapping->backing_dev_info = &pSuperInfo->bdi; /* This is needed for mmap. */
+#endif
+ set_nlink(inode, 1);
+ }
+
+#if RTLNX_VER_MIN(3,5,0)
+ inode->i_uid = make_kuid(current_user_ns(), pSuperInfo->uid);
+ inode->i_gid = make_kgid(current_user_ns(), pSuperInfo->gid);
+#else
+ inode->i_uid = pSuperInfo->uid;
+ inode->i_gid = pSuperInfo->gid;
+#endif
+
+ inode->i_size = pObjInfo->cbObject;
+#if RTLNX_VER_MAX(2,6,19) && !defined(KERNEL_FC6)
+ inode->i_blksize = 4096;
+#endif
+#if RTLNX_VER_MIN(2,4,11)
+ inode->i_blkbits = 12;
+#endif
+ /* i_blocks always in units of 512 bytes! */
+ inode->i_blocks = (pObjInfo->cbAllocated + 511) / 512;
+
+ vbsf_time_to_linux(&inode->i_atime, &pObjInfo->AccessTime);
+ vbsf_time_to_linux(&inode->i_ctime, &pObjInfo->ChangeTime);
+ vbsf_time_to_linux(&inode->i_mtime, &pObjInfo->ModificationTime);
+ sf_i->BirthTime = pObjInfo->BirthTime;
+ sf_i->ModificationTime = pObjInfo->ModificationTime;
+ RTTimeSpecSetSeconds(&sf_i->ModificationTimeAtOurLastWrite, 0);
+}
+
+
+/**
+ * Update the inode with new object info from the host.
+ *
+ * Called by sf_inode_revalidate() and sf_inode_revalidate_with_handle().
+ */
+void vbsf_update_inode(struct inode *pInode, struct vbsf_inode_info *pInodeInfo, PSHFLFSOBJINFO pObjInfo,
+ struct vbsf_super_info *pSuperInfo, bool fInodeLocked, unsigned fSetAttrs)
+{
+ PCSHFLFSOBJATTR pAttr = &pObjInfo->Attr;
+ int fMode;
+
+ TRACE();
+
+#if RTLNX_VER_MIN(4,5,0)
+ if (!fInodeLocked)
+ inode_lock(pInode);
+#endif
+
+ /*
+ * Calc new mode mask and update it if it changed.
+ */
+ if (RTFS_IS_DIRECTORY(pAttr->fMode))
+ fMode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->dmode, pSuperInfo->dmask, S_IFDIR);
+ else if (RTFS_IS_SYMLINK(pAttr->fMode))
+ /** @todo r=bird: Aren't System V symlinks w/o any mode mask? IIRC there is
+ * no lchmod on Linux. */
+ fMode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFLNK);
+ else
+ fMode = sf_file_mode_to_linux(pAttr->fMode, pSuperInfo->fmode, pSuperInfo->fmask, S_IFREG);
+
+ if (fMode == pInode->i_mode) {
+ /* likely */
+ } else {
+ if ((fMode & S_IFMT) == (pInode->i_mode & S_IFMT))
+ pInode->i_mode = fMode;
+ else {
+ SFLOGFLOW(("vbsf_update_inode: Changed from %o to %o (%s)\n",
+ pInode->i_mode & S_IFMT, fMode & S_IFMT, pInodeInfo->path->String.ach));
+ /** @todo we probably need to be more drastic... */
+ vbsf_init_inode(pInode, pInodeInfo, pObjInfo, pSuperInfo);
+
+#if RTLNX_VER_MIN(4,5,0)
+ if (!fInodeLocked)
+ inode_unlock(pInode);
+#endif
+ return;
+ }
+ }
+
+ /*
+ * Update the sizes.
+ * Note! i_blocks is always in units of 512 bytes!
+ */
+ pInode->i_blocks = (pObjInfo->cbAllocated + 511) / 512;
+ i_size_write(pInode, pObjInfo->cbObject);
+
+ /*
+ * Update the timestamps.
+ */
+ vbsf_time_to_linux(&pInode->i_atime, &pObjInfo->AccessTime);
+ vbsf_time_to_linux(&pInode->i_ctime, &pObjInfo->ChangeTime);
+ vbsf_time_to_linux(&pInode->i_mtime, &pObjInfo->ModificationTime);
+ pInodeInfo->BirthTime = pObjInfo->BirthTime;
+
+ /*
+ * Mark it as up to date.
+ * Best to do this before we start with any expensive map invalidation.
+ */
+ pInodeInfo->ts_up_to_date = jiffies;
+ pInodeInfo->force_restat = 0;
+
+ /*
+ * If the modification time changed, we may have to invalidate the page
+ * cache pages associated with this inode if we suspect the change was
+ * made by the host. How supicious we are depends on the cache mode.
+ *
+ * Note! The invalidate_inode_pages() call is pretty weak. It will _not_
+ * touch pages that are already mapped into an address space, but it
+ * will help if the file isn't currently mmap'ed or if we're in read
+ * or read/write caching mode.
+ */
+ if (!RTTimeSpecIsEqual(&pInodeInfo->ModificationTime, &pObjInfo->ModificationTime)) {
+ if (RTFS_IS_FILE(pAttr->fMode)) {
+ if (!(fSetAttrs & (ATTR_MTIME | ATTR_SIZE))) {
+ bool fInvalidate;
+ if (pSuperInfo->enmCacheMode == kVbsfCacheMode_None) {
+ fInvalidate = true; /* No-caching: always invalidate. */
+ } else {
+ if (RTTimeSpecIsEqual(&pInodeInfo->ModificationTimeAtOurLastWrite, &pInodeInfo->ModificationTime)) {
+ fInvalidate = false; /* Could be our write, so don't invalidate anything */
+ RTTimeSpecSetSeconds(&pInodeInfo->ModificationTimeAtOurLastWrite, 0);
+ } else {
+ /*RTLogBackdoorPrintf("vbsf_update_inode: Invalidating the mapping %s - %RU64 vs %RU64 vs %RU64 - %#x\n",
+ pInodeInfo->path->String.ach,
+ RTTimeSpecGetNano(&pInodeInfo->ModificationTimeAtOurLastWrite),
+ RTTimeSpecGetNano(&pInodeInfo->ModificationTime),
+ RTTimeSpecGetNano(&pObjInfo->ModificationTime), fSetAttrs);*/
+ fInvalidate = true; /* We haven't modified the file recently, so probably a host update. */
+ }
+ }
+ pInodeInfo->ModificationTime = pObjInfo->ModificationTime;
+
+ if (fInvalidate) {
+ struct address_space *mapping = pInode->i_mapping;
+ if (mapping && mapping->nrpages > 0) {
+ SFLOGFLOW(("vbsf_update_inode: Invalidating the mapping %s (%#x)\n", pInodeInfo->path->String.ach, fSetAttrs));
+#if RTLNX_VER_MIN(2,6,34)
+ invalidate_mapping_pages(mapping, 0, ~(pgoff_t)0);
+#elif RTLNX_VER_MIN(2,5,41)
+ invalidate_inode_pages(mapping);
+#else
+ invalidate_inode_pages(pInode);
+#endif
+ }
+ }
+ } else {
+ RTTimeSpecSetSeconds(&pInodeInfo->ModificationTimeAtOurLastWrite, 0);
+ pInodeInfo->ModificationTime = pObjInfo->ModificationTime;
+ }
+ } else
+ pInodeInfo->ModificationTime = pObjInfo->ModificationTime;
+ }
+
+ /*
+ * Done.
+ */
+#if RTLNX_VER_MIN(4,5,0)
+ if (!fInodeLocked)
+ inode_unlock(pInode);
+#endif
+}
+
+
+/** @note Currently only used for the root directory during (re-)mount. */
+int vbsf_stat(const char *caller, struct vbsf_super_info *pSuperInfo, SHFLSTRING *path, PSHFLFSOBJINFO result, int ok_to_fail)
+{
+ int rc;
+ VBOXSFCREATEREQ *pReq;
+ NOREF(caller);
+
+ TRACE();
+
+ pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + path->u16Size);
+ if (pReq) {
+ RT_ZERO(*pReq);
+ memcpy(&pReq->StrPath, path, SHFLSTRING_HEADER_SIZE + path->u16Size);
+ pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
+ pReq->CreateParms.CreateFlags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW;
+
+ LogFunc(("Calling VbglR0SfHostReqCreate on %s\n", path->String.utf8));
+ rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq);
+ if (RT_SUCCESS(rc)) {
+ if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) {
+ *result = pReq->CreateParms.Info;
+ rc = 0;
+ } else {
+ if (!ok_to_fail)
+ LogFunc(("VbglR0SfHostReqCreate on %s: file does not exist: %d (caller=%s)\n",
+ path->String.utf8, pReq->CreateParms.Result, caller));
+ rc = -ENOENT;
+ }
+ } else if (rc == VERR_INVALID_NAME) {
+ rc = -ENOENT; /* this can happen for names like 'foo*' on a Windows host */
+ } else {
+ LogFunc(("VbglR0SfHostReqCreate failed on %s: %Rrc (caller=%s)\n", path->String.utf8, rc, caller));
+ rc = -EPROTO;
+ }
+ VbglR0PhysHeapFree(pReq);
+ }
+ else
+ rc = -ENOMEM;
+ return rc;
+}
+
+
+/**
+ * Revalidate an inode, inner worker.
+ *
+ * @sa sf_inode_revalidate()
+ */
+int vbsf_inode_revalidate_worker(struct dentry *dentry, bool fForced, bool fInodeLocked)
+{
+ int rc;
+ struct inode *pInode = dentry ? dentry->d_inode : NULL;
+ if (pInode) {
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(pInode);
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pInode->i_sb);
+ AssertReturn(sf_i, -EINVAL);
+ AssertReturn(pSuperInfo, -EINVAL);
+
+ /*
+ * Can we get away without any action here?
+ */
+ if ( !fForced
+ && !sf_i->force_restat
+ && jiffies - sf_i->ts_up_to_date < pSuperInfo->cJiffiesInodeTTL)
+ rc = 0;
+ else {
+ /*
+ * No, we have to query the file info from the host.
+ * Try get a handle we can query, any kind of handle will do here.
+ */
+ struct vbsf_handle *pHandle = vbsf_handle_find(sf_i, 0, 0);
+ if (pHandle) {
+ /* Query thru pHandle. */
+ VBOXSFOBJINFOREQ *pReq = (VBOXSFOBJINFOREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ RT_ZERO(*pReq);
+ rc = VbglR0SfHostReqQueryObjInfo(pSuperInfo->map.root, pReq, pHandle->hHost);
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Reset the TTL and copy the info over into the inode structure.
+ */
+ vbsf_update_inode(pInode, sf_i, &pReq->ObjInfo, pSuperInfo, fInodeLocked, 0 /*fSetAttrs*/);
+ } else if (rc == VERR_INVALID_HANDLE) {
+ rc = -ENOENT; /* Restore.*/
+ } else {
+ LogFunc(("VbglR0SfHostReqQueryObjInfo failed on %#RX64: %Rrc\n", pHandle->hHost, rc));
+ rc = -RTErrConvertToErrno(rc);
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else
+ rc = -ENOMEM;
+ vbsf_handle_release(pHandle, pSuperInfo, "vbsf_inode_revalidate_worker");
+
+ } else {
+ /* Query via path. */
+ SHFLSTRING *pPath = sf_i->path;
+ VBOXSFCREATEREQ *pReq = (VBOXSFCREATEREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq) + pPath->u16Size);
+ if (pReq) {
+ RT_ZERO(*pReq);
+ memcpy(&pReq->StrPath, pPath, SHFLSTRING_HEADER_SIZE + pPath->u16Size);
+ pReq->CreateParms.Handle = SHFL_HANDLE_NIL;
+ pReq->CreateParms.CreateFlags = SHFL_CF_LOOKUP | SHFL_CF_ACT_FAIL_IF_NEW;
+
+ rc = VbglR0SfHostReqCreate(pSuperInfo->map.root, pReq);
+ if (RT_SUCCESS(rc)) {
+ if (pReq->CreateParms.Result == SHFL_FILE_EXISTS) {
+ /*
+ * Reset the TTL and copy the info over into the inode structure.
+ */
+ vbsf_update_inode(pInode, sf_i, &pReq->CreateParms.Info, pSuperInfo, fInodeLocked, 0 /*fSetAttrs*/);
+ rc = 0;
+ } else {
+ rc = -ENOENT;
+ }
+ } else if (rc == VERR_INVALID_NAME) {
+ rc = -ENOENT; /* this can happen for names like 'foo*' on a Windows host */
+ } else {
+ LogFunc(("VbglR0SfHostReqCreate failed on %s: %Rrc\n", pPath->String.ach, rc));
+ rc = -EPROTO;
+ }
+ VbglR0PhysHeapFree(pReq);
+ }
+ else
+ rc = -ENOMEM;
+ }
+ }
+ } else {
+ LogFunc(("no dentry(%p) or inode(%p)\n", dentry, pInode));
+ rc = -EINVAL;
+ }
+ return rc;
+}
+
+
+#if RTLNX_VER_MAX(2,5,18)
+/**
+ * Revalidate an inode for 2.4.
+ *
+ * This is called in the stat(), lstat() and readlink() code paths. In the stat
+ * cases the caller will use the result afterwards to produce the stat data.
+ *
+ * @note 2.4.x has a getattr() inode operation too, but it is not used.
+ */
+int vbsf_inode_revalidate(struct dentry *dentry)
+{
+ /*
+ * We pretend the inode is locked here, as 2.4.x does not have inode level locking.
+ */
+ return vbsf_inode_revalidate_worker(dentry, false /*fForced*/, true /*fInodeLocked*/);
+}
+#endif /* < 2.5.18 */
+
+
+/**
+ * Similar to sf_inode_revalidate, but uses associated host file handle as that
+ * is quite a bit faster.
+ */
+int vbsf_inode_revalidate_with_handle(struct dentry *dentry, SHFLHANDLE hHostFile, bool fForced, bool fInodeLocked)
+{
+ int err;
+ struct inode *pInode = dentry ? dentry->d_inode : NULL;
+ if (!pInode) {
+ LogFunc(("no dentry(%p) or inode(%p)\n", dentry, pInode));
+ err = -EINVAL;
+ } else {
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(pInode);
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pInode->i_sb);
+ AssertReturn(sf_i, -EINVAL);
+ AssertReturn(pSuperInfo, -EINVAL);
+
+ /*
+ * Can we get away without any action here?
+ */
+ if ( !fForced
+ && !sf_i->force_restat
+ && jiffies - sf_i->ts_up_to_date < pSuperInfo->cJiffiesInodeTTL)
+ err = 0;
+ else {
+ /*
+ * No, we have to query the file info from the host.
+ */
+ VBOXSFOBJINFOREQ *pReq = (VBOXSFOBJINFOREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ RT_ZERO(*pReq);
+ err = VbglR0SfHostReqQueryObjInfo(pSuperInfo->map.root, pReq, hHostFile);
+ if (RT_SUCCESS(err)) {
+ /*
+ * Reset the TTL and copy the info over into the inode structure.
+ */
+ vbsf_update_inode(pInode, sf_i, &pReq->ObjInfo, pSuperInfo, fInodeLocked, 0 /*fSetAttrs*/);
+ } else {
+ LogFunc(("VbglR0SfHostReqQueryObjInfo failed on %#RX64: %Rrc\n", hHostFile, err));
+ err = -RTErrConvertToErrno(err);
+ }
+ VbglR0PhysHeapFree(pReq);
+ } else
+ err = -ENOMEM;
+ }
+ }
+ return err;
+}
+
+
+/* on 2.6 this is a proxy for [sf_inode_revalidate] which (as a side
+ effect) updates inode attributes for [dentry] (given that [dentry]
+ has inode at all) from these new attributes we derive [kstat] via
+ [generic_fillattr] */
+#if RTLNX_VER_MIN(2,5,18)
+
+# if RTLNX_VER_MIN(5,12,0)
+int vbsf_inode_getattr(struct user_namespace *ns, const struct path *path,
+ struct kstat *kstat, u32 request_mask, unsigned int flags)
+# elif RTLNX_VER_MIN(4,11,0)
+int vbsf_inode_getattr(const struct path *path, struct kstat *kstat, u32 request_mask, unsigned int flags)
+# else
+int vbsf_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat)
+# endif
+{
+ int rc;
+# if RTLNX_VER_MIN(4,11,0)
+ struct dentry *dentry = path->dentry;
+# endif
+
+# if RTLNX_VER_MIN(4,11,0)
+ SFLOGFLOW(("vbsf_inode_getattr: dentry=%p request_mask=%#x flags=%#x\n", dentry, request_mask, flags));
+# else
+ SFLOGFLOW(("vbsf_inode_getattr: dentry=%p\n", dentry));
+# endif
+
+# if RTLNX_VER_MIN(4,11,0)
+ /*
+ * With the introduction of statx() userland can control whether we
+ * update the inode information or not.
+ */
+ switch (flags & AT_STATX_SYNC_TYPE) {
+ default:
+ rc = vbsf_inode_revalidate_worker(dentry, false /*fForced*/, false /*fInodeLocked*/);
+ break;
+
+ case AT_STATX_FORCE_SYNC:
+ rc = vbsf_inode_revalidate_worker(dentry, true /*fForced*/, false /*fInodeLocked*/);
+ break;
+
+ case AT_STATX_DONT_SYNC:
+ rc = 0;
+ break;
+ }
+# else
+ rc = vbsf_inode_revalidate_worker(dentry, false /*fForced*/, false /*fInodeLocked*/);
+# endif
+ if (rc == 0) {
+ /* Do generic filling in of info. */
+# if RTLNX_VER_MIN(5,12,0)
+ generic_fillattr(ns, dentry->d_inode, kstat);
+# else
+ generic_fillattr(dentry->d_inode, kstat);
+# endif
+
+ /* Add birth time. */
+# if RTLNX_VER_MIN(4,11,0)
+ if (dentry->d_inode) {
+ struct vbsf_inode_info *pInodeInfo = VBSF_GET_INODE_INFO(dentry->d_inode);
+ if (pInodeInfo) {
+ vbsf_time_to_linux(&kstat->btime, &pInodeInfo->BirthTime);
+ kstat->result_mask |= STATX_BTIME;
+ }
+ }
+# endif
+
+ /*
+ * FsPerf shows the following numbers for sequential file access against
+ * a tmpfs folder on an AMD 1950X host running debian buster/sid:
+ *
+ * block size = r128600 ----- r128755 -----
+ * reads reads writes
+ * 4096 KB = 2254 MB/s 4953 MB/s 3668 MB/s
+ * 2048 KB = 2368 MB/s 4908 MB/s 3541 MB/s
+ * 1024 KB = 2208 MB/s 4011 MB/s 3291 MB/s
+ * 512 KB = 1908 MB/s 3399 MB/s 2721 MB/s
+ * 256 KB = 1625 MB/s 2679 MB/s 2251 MB/s
+ * 128 KB = 1413 MB/s 1967 MB/s 1684 MB/s
+ * 64 KB = 1152 MB/s 1409 MB/s 1265 MB/s
+ * 32 KB = 726 MB/s 815 MB/s 783 MB/s
+ * 16 KB = 683 MB/s 475 MB/s
+ * 8 KB = 294 MB/s 286 MB/s
+ * 4 KB = 145 MB/s 156 MB/s 149 MB/s
+ *
+ */
+ if (S_ISREG(kstat->mode))
+ kstat->blksize = _1M;
+ else if (S_ISDIR(kstat->mode))
+ /** @todo this may need more tuning after we rewrite the directory handling. */
+ kstat->blksize = _16K;
+ }
+ return rc;
+}
+#endif /* >= 2.5.18 */
+
+
+/**
+ * Modify inode attributes.
+ */
+#if RTLNX_VER_MIN(5,12,0)
+int vbsf_inode_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *iattr)
+#else
+int vbsf_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+#endif
+{
+ struct inode *pInode = dentry->d_inode;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(pInode->i_sb);
+ struct vbsf_inode_info *sf_i = VBSF_GET_INODE_INFO(pInode);
+ int vrc;
+ int rc;
+
+ SFLOGFLOW(("vbsf_inode_setattr: dentry=%p inode=%p ia_valid=%#x %s\n",
+ dentry, pInode, iattr->ia_valid, sf_i ? sf_i->path->String.ach : NULL));
+ AssertReturn(sf_i, -EINVAL);
+
+ /*
+ * Do minimal attribute permission checks. We set ATTR_FORCE since we cannot
+ * preserve ownership and such and would end up with EPERM here more often than
+ * we would like. For instance it would cause 'cp' to complain about EPERM
+ * from futimes() when asked to preserve times, see ticketref:18569.
+ */
+ iattr->ia_valid |= ATTR_FORCE;
+#if (RTLNX_VER_RANGE(3,16,39, 3,17,0)) || RTLNX_VER_MIN(4,9,0) || (RTLNX_VER_RANGE(4,1,37, 4,2,0)) || RTLNX_UBUNTU_ABI_MIN(4,4,255,208)
+# if RTLNX_VER_MIN(5,12,0)
+ rc = setattr_prepare(ns, dentry, iattr);
+# else
+ rc = setattr_prepare(dentry, iattr);
+# endif
+#else
+ rc = inode_change_ok(pInode, iattr);
+#endif
+ if (rc == 0) {
+ /*
+ * Don't modify MTIME and CTIME for open(O_TRUNC) and ftruncate, those
+ * operations will set those timestamps automatically. Saves a host call.
+ */
+ unsigned fAttrs = iattr->ia_valid;
+#if RTLNX_VER_MIN(2,6,15)
+ fAttrs &= ~ATTR_FILE;
+#endif
+ if ( fAttrs == (ATTR_SIZE | ATTR_MTIME | ATTR_CTIME)
+#if RTLNX_VER_MIN(2,6,24)
+ || (fAttrs & (ATTR_OPEN | ATTR_SIZE)) == (ATTR_OPEN | ATTR_SIZE)
+#endif
+ )
+ fAttrs &= ~(ATTR_MTIME | ATTR_CTIME);
+
+ /*
+ * We only implement a handful of attributes, so ignore any attempts
+ * at setting bits we don't support.
+ */
+ if (fAttrs & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME | ATTR_SIZE)) {
+ /*
+ * Try find a handle which allows us to modify the attributes, otherwise
+ * open the file/dir/whatever.
+ */
+ union SetAttrReqs
+ {
+ VBOXSFCREATEREQ Create;
+ VBOXSFOBJINFOREQ Info;
+ VBOXSFSETFILESIZEREQ SetSize;
+ VBOXSFCLOSEREQ Close;
+ } *pReq;
+ size_t cbReq;
+ SHFLHANDLE hHostFile;
+ /** @todo ATTR_FILE (2.6.15+) could be helpful here if we like. */
+ struct vbsf_handle *pHandle = fAttrs & ATTR_SIZE
+ ? vbsf_handle_find(sf_i, VBSF_HANDLE_F_WRITE, 0)
+ : vbsf_handle_find(sf_i, 0, 0);
+ if (pHandle) {
+ hHostFile = pHandle->hHost;
+ cbReq = RT_MAX(sizeof(VBOXSFOBJINFOREQ), sizeof(VBOXSFSETFILESIZEREQ));
+ pReq = (union SetAttrReqs *)VbglR0PhysHeapAlloc(cbReq);
+ if (pReq) {
+ /* likely */
+ } else
+ rc = -ENOMEM;
+ } else {
+ hHostFile = SHFL_HANDLE_NIL;
+ cbReq = RT_MAX(sizeof(pReq->Info), sizeof(pReq->Create) + SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
+ pReq = (union SetAttrReqs *)VbglR0PhysHeapAlloc(cbReq);
+ if (pReq) {
+ RT_ZERO(pReq->Create.CreateParms);
+ pReq->Create.CreateParms.Handle = SHFL_HANDLE_NIL;
+ pReq->Create.CreateParms.CreateFlags = SHFL_CF_ACT_OPEN_IF_EXISTS
+ | SHFL_CF_ACT_FAIL_IF_NEW
+ | SHFL_CF_ACCESS_ATTR_WRITE;
+ if (fAttrs & ATTR_SIZE)
+ pReq->Create.CreateParms.CreateFlags |= SHFL_CF_ACCESS_WRITE;
+ memcpy(&pReq->Create.StrPath, sf_i->path, SHFLSTRING_HEADER_SIZE + sf_i->path->u16Size);
+ vrc = VbglR0SfHostReqCreate(pSuperInfo->map.root, &pReq->Create);
+ if (RT_SUCCESS(vrc)) {
+ if (pReq->Create.CreateParms.Result == SHFL_FILE_EXISTS) {
+ hHostFile = pReq->Create.CreateParms.Handle;
+ Assert(hHostFile != SHFL_HANDLE_NIL);
+ vbsf_dentry_chain_increase_ttl(dentry);
+ } else {
+ LogFunc(("file %s does not exist\n", sf_i->path->String.utf8));
+ vbsf_dentry_invalidate_ttl(dentry);
+ sf_i->force_restat = true;
+ rc = -ENOENT;
+ }
+ } else {
+ rc = -RTErrConvertToErrno(vrc);
+ LogFunc(("VbglR0SfCreate(%s) failed vrc=%Rrc rc=%d\n", sf_i->path->String.ach, vrc, rc));
+ }
+ } else
+ rc = -ENOMEM;
+ }
+ if (rc == 0) {
+ /*
+ * Set mode and/or timestamps.
+ */
+ if (fAttrs & (ATTR_MODE | ATTR_ATIME | ATTR_MTIME | ATTR_CTIME)) {
+ /* Fill in the attributes. Start by setting all to zero
+ since the host will ignore zeroed fields. */
+ RT_ZERO(pReq->Info.ObjInfo);
+
+ if (fAttrs & ATTR_MODE) {
+ pReq->Info.ObjInfo.Attr.fMode = sf_access_permissions_to_vbox(iattr->ia_mode);
+ if (iattr->ia_mode & S_IFDIR)
+ pReq->Info.ObjInfo.Attr.fMode |= RTFS_TYPE_DIRECTORY;
+ else if (iattr->ia_mode & S_IFLNK)
+ pReq->Info.ObjInfo.Attr.fMode |= RTFS_TYPE_SYMLINK;
+ else
+ pReq->Info.ObjInfo.Attr.fMode |= RTFS_TYPE_FILE;
+ }
+ if (fAttrs & ATTR_ATIME)
+ vbsf_time_to_vbox(&pReq->Info.ObjInfo.AccessTime, &iattr->ia_atime);
+ if (fAttrs & ATTR_MTIME)
+ vbsf_time_to_vbox(&pReq->Info.ObjInfo.ModificationTime, &iattr->ia_mtime);
+ if (fAttrs & ATTR_CTIME)
+ vbsf_time_to_vbox(&pReq->Info.ObjInfo.ChangeTime, &iattr->ia_ctime);
+
+ /* Make the change. */
+ vrc = VbglR0SfHostReqSetObjInfo(pSuperInfo->map.root, &pReq->Info, hHostFile);
+ if (RT_SUCCESS(vrc)) {
+ vbsf_update_inode(pInode, sf_i, &pReq->Info.ObjInfo, pSuperInfo, true /*fLocked*/, fAttrs);
+ } else {
+ rc = -RTErrConvertToErrno(vrc);
+ LogFunc(("VbglR0SfHostReqSetObjInfo(%s) failed vrc=%Rrc rc=%d\n", sf_i->path->String.ach, vrc, rc));
+ }
+ }
+
+ /*
+ * Change the file size.
+ * Note! Old API is more convenient here as it gives us up to date
+ * inode info back.
+ */
+ if ((fAttrs & ATTR_SIZE) && rc == 0) {
+ /*vrc = VbglR0SfHostReqSetFileSize(pSuperInfo->map.root, &pReq->SetSize, hHostFile, iattr->ia_size);
+ if (RT_SUCCESS(vrc)) {
+ i_size_write(pInode, iattr->ia_size);
+ } else if (vrc == VERR_NOT_IMPLEMENTED)*/ {
+ /* Fallback for pre 6.0 hosts: */
+ RT_ZERO(pReq->Info.ObjInfo);
+ pReq->Info.ObjInfo.cbObject = iattr->ia_size;
+ vrc = VbglR0SfHostReqSetFileSizeOld(pSuperInfo->map.root, &pReq->Info, hHostFile);
+ if (RT_SUCCESS(vrc))
+ vbsf_update_inode(pInode, sf_i, &pReq->Info.ObjInfo, pSuperInfo, true /*fLocked*/, fAttrs);
+ }
+ if (RT_SUCCESS(vrc)) {
+ /** @todo there is potentially more to be done here if there are mappings of
+ * the lovely file. */
+ } else {
+ rc = -RTErrConvertToErrno(vrc);
+ LogFunc(("VbglR0SfHostReqSetFileSize(%s, %#llx) failed vrc=%Rrc rc=%d\n",
+ sf_i->path->String.ach, (unsigned long long)iattr->ia_size, vrc, rc));
+ }
+ }
+
+ /*
+ * Clean up.
+ */
+ if (!pHandle) {
+ vrc = VbglR0SfHostReqClose(pSuperInfo->map.root, &pReq->Close, hHostFile);
+ if (RT_FAILURE(vrc))
+ LogFunc(("VbglR0SfHostReqClose(%s [%#llx]) failed vrc=%Rrc\n", sf_i->path->String.utf8, hHostFile, vrc));
+ }
+ }
+ if (pReq)
+ VbglR0PhysHeapFree(pReq);
+ if (pHandle)
+ vbsf_handle_release(pHandle, pSuperInfo, "vbsf_inode_setattr");
+ } else
+ SFLOGFLOW(("vbsf_inode_setattr: Nothing to do here: %#x (was %#x).\n", fAttrs, iattr->ia_valid));
+ }
+ return rc;
+}
+
+
+static int vbsf_make_path(const char *caller, struct vbsf_inode_info *sf_i,
+ const char *d_name, size_t d_len, SHFLSTRING **result)
+{
+ size_t path_len, shflstring_len;
+ SHFLSTRING *tmp;
+ uint16_t p_len;
+ uint8_t *p_name;
+ int fRoot = 0;
+
+ TRACE();
+ p_len = sf_i->path->u16Length;
+ p_name = sf_i->path->String.utf8;
+
+ if (p_len == 1 && *p_name == '/') {
+ path_len = d_len + 1;
+ fRoot = 1;
+ } else {
+ /* lengths of constituents plus terminating zero plus slash */
+ path_len = p_len + d_len + 2;
+ if (path_len > 0xffff) {
+ LogFunc(("path too long. caller=%s, path_len=%zu\n",
+ caller, path_len));
+ return -ENAMETOOLONG;
+ }
+ }
+
+ shflstring_len = offsetof(SHFLSTRING, String.utf8) + path_len;
+ tmp = kmalloc(shflstring_len, GFP_KERNEL);
+ if (!tmp) {
+ LogRelFunc(("kmalloc failed, caller=%s\n", caller));
+ return -ENOMEM;
+ }
+ tmp->u16Length = path_len - 1;
+ tmp->u16Size = path_len;
+
+ if (fRoot)
+ memcpy(&tmp->String.utf8[0], d_name, d_len + 1);
+ else {
+ memcpy(&tmp->String.utf8[0], p_name, p_len);
+ tmp->String.utf8[p_len] = '/';
+ memcpy(&tmp->String.utf8[p_len + 1], d_name, d_len);
+ tmp->String.utf8[p_len + 1 + d_len] = '\0';
+ }
+
+ *result = tmp;
+ return 0;
+}
+
+
+/**
+ * [dentry] contains string encoded in coding system that corresponds
+ * to [pSuperInfo]->nls, we must convert it to UTF8 here and pass down to
+ * [vbsf_make_path] which will allocate SHFLSTRING and fill it in
+ */
+int vbsf_path_from_dentry(struct vbsf_super_info *pSuperInfo, struct vbsf_inode_info *sf_i, struct dentry *dentry,
+ SHFLSTRING **result, const char *caller)
+{
+ int err;
+ const char *d_name;
+ size_t d_len;
+ const char *name;
+ size_t len = 0;
+
+ TRACE();
+ d_name = dentry->d_name.name;
+ d_len = dentry->d_name.len;
+
+ if (pSuperInfo->nls) {
+ size_t in_len, i, out_bound_len;
+ const char *in;
+ char *out;
+
+ in = d_name;
+ in_len = d_len;
+
+ out_bound_len = PATH_MAX;
+ out = kmalloc(out_bound_len, GFP_KERNEL);
+ name = out;
+
+ for (i = 0; i < d_len; ++i) {
+ /* We renamed the linux kernel wchar_t type to linux_wchar_t in
+ the-linux-kernel.h, as it conflicts with the C++ type of that name. */
+ linux_wchar_t uni;
+ int nb;
+
+ nb = pSuperInfo->nls->char2uni(in, in_len, &uni);
+ if (nb < 0) {
+ LogFunc(("nls->char2uni failed %x %d\n",
+ *in, in_len));
+ err = -EINVAL;
+ goto fail1;
+ }
+ in_len -= nb;
+ in += nb;
+
+#if RTLNX_VER_MIN(2,6,31)
+ nb = utf32_to_utf8(uni, out, out_bound_len);
+#else
+ nb = utf8_wctomb(out, uni, out_bound_len);
+#endif
+ if (nb < 0) {
+ LogFunc(("nls->uni2char failed %x %d\n",
+ uni, out_bound_len));
+ err = -EINVAL;
+ goto fail1;
+ }
+ out_bound_len -= nb;
+ out += nb;
+ len += nb;
+ }
+ if (len >= PATH_MAX - 1) {
+ err = -ENAMETOOLONG;
+ goto fail1;
+ }
+
+ LogFunc(("result(%d) = %.*s\n", len, len, name));
+ *out = 0;
+ } else {
+ name = d_name;
+ len = d_len;
+ }
+
+ err = vbsf_make_path(caller, sf_i, name, len, result);
+ if (name != d_name)
+ kfree(name);
+
+ return err;
+
+ fail1:
+ kfree(name);
+ return err;
+}
+
+
+/**
+ * This is called during name resolution/lookup to check if the @a dentry in the
+ * cache is still valid. The actual validation is job is handled by
+ * vbsf_inode_revalidate_worker().
+ *
+ * @note Caller holds no relevant locks, just a dentry reference.
+ */
+#if RTLNX_VER_MIN(3,6,0)
+static int vbsf_dentry_revalidate(struct dentry *dentry, unsigned flags)
+#elif RTLNX_VER_MIN(2,6,0)
+static int vbsf_dentry_revalidate(struct dentry *dentry, struct nameidata *nd)
+#else
+static int vbsf_dentry_revalidate(struct dentry *dentry, int flags)
+#endif
+{
+#if RTLNX_VER_RANGE(2,6,0, 3,6,0)
+ int const flags = nd ? nd->flags : 0;
+#endif
+
+ int rc;
+
+ Assert(dentry);
+ SFLOGFLOW(("vbsf_dentry_revalidate: %p %#x %s\n", dentry, flags,
+ dentry->d_inode ? VBSF_GET_INODE_INFO(dentry->d_inode)->path->String.ach : "<negative>"));
+
+ /*
+ * See Documentation/filesystems/vfs.txt why we skip LOOKUP_RCU.
+ *
+ * Also recommended: https://lwn.net/Articles/649115/
+ * https://lwn.net/Articles/649729/
+ * https://lwn.net/Articles/650786/
+ *
+ */
+#if RTLNX_VER_MIN(2,6,38)
+ if (flags & LOOKUP_RCU) {
+ rc = -ECHILD;
+ SFLOGFLOW(("vbsf_dentry_revalidate: RCU -> -ECHILD\n"));
+ } else
+#endif
+ {
+ /*
+ * Do we have an inode or not? If not it's probably a negative cache
+ * entry, otherwise most likely a positive one.
+ */
+ struct inode *pInode = dentry->d_inode;
+ if (pInode) {
+ /*
+ * Positive entry.
+ *
+ * Note! We're more aggressive here than other remote file systems,
+ * current (4.19) CIFS will for instance revalidate the inode
+ * and ignore the dentry timestamp for positive entries.
+ */
+ unsigned long const cJiffiesAge = jiffies - vbsf_dentry_get_update_jiffies(dentry);
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(dentry->d_sb);
+ if (cJiffiesAge < pSuperInfo->cJiffiesDirCacheTTL) {
+ SFLOGFLOW(("vbsf_dentry_revalidate: age: %lu vs. TTL %lu -> 1\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL));
+ rc = 1;
+ } else if (!vbsf_inode_revalidate_worker(dentry, true /*fForced*/, false /*fInodeLocked*/)) {
+ vbsf_dentry_set_update_jiffies(dentry, jiffies);
+ SFLOGFLOW(("vbsf_dentry_revalidate: age: %lu vs. TTL %lu -> reval -> 1\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL));
+ rc = 1;
+ } else {
+ SFLOGFLOW(("vbsf_dentry_revalidate: age: %lu vs. TTL %lu -> reval -> 0\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL));
+ rc = 0;
+ }
+ } else {
+ /*
+ * Negative entry.
+ *
+ * Invalidate dentries for open and renames here as we'll revalidate
+ * these when taking the actual action (also good for case preservation
+ * if we do case-insensitive mounts against windows + mac hosts at some
+ * later point).
+ */
+#if RTLNX_VER_MIN(2,6,28)
+ if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
+#elif RTLNX_VER_MIN(2,5,75)
+ if (flags & LOOKUP_CREATE)
+#else
+ if (0)
+#endif
+ {
+ SFLOGFLOW(("vbsf_dentry_revalidate: negative: create or rename target -> 0\n"));
+ rc = 0;
+ } else {
+ /* Can we skip revalidation based on TTL? */
+ unsigned long const cJiffiesAge = vbsf_dentry_get_update_jiffies(dentry) - jiffies;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(dentry->d_sb);
+ if (cJiffiesAge < pSuperInfo->cJiffiesDirCacheTTL) {
+ SFLOGFLOW(("vbsf_dentry_revalidate: negative: age: %lu vs. TTL %lu -> 1\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL));
+ rc = 1;
+ } else {
+ /* We could revalidate it here, but we could instead just
+ have the caller kick it out. */
+ /** @todo stat the direntry and see if it exists now. */
+ SFLOGFLOW(("vbsf_dentry_revalidate: negative: age: %lu vs. TTL %lu -> 0\n", cJiffiesAge, pSuperInfo->cJiffiesDirCacheTTL));
+ rc = 0;
+ }
+ }
+ }
+ }
+ return rc;
+}
+
+#ifdef SFLOG_ENABLED
+
+/** For logging purposes only. */
+# if RTLNX_VER_MIN(2,6,38)
+static int vbsf_dentry_delete(const struct dentry *pDirEntry)
+# else
+static int vbsf_dentry_delete(struct dentry *pDirEntry)
+# endif
+{
+ SFLOGFLOW(("vbsf_dentry_delete: %p\n", pDirEntry));
+ return 0;
+}
+
+# if RTLNX_VER_MIN(4,8,0)
+/** For logging purposes only. */
+static int vbsf_dentry_init(struct dentry *pDirEntry)
+{
+ SFLOGFLOW(("vbsf_dentry_init: %p\n", pDirEntry));
+ return 0;
+}
+# endif
+
+#endif /* SFLOG_ENABLED */
+
+/**
+ * Directory entry operations.
+ *
+ * Since 2.6.38 this is used via the super_block::s_d_op member.
+ */
+struct dentry_operations vbsf_dentry_ops = {
+ .d_revalidate = vbsf_dentry_revalidate,
+#ifdef SFLOG_ENABLED
+ .d_delete = vbsf_dentry_delete,
+# if RTLNX_VER_MIN(4,8,0)
+ .d_init = vbsf_dentry_init,
+# endif
+#endif
+};
+
diff --git a/src/VBox/Additions/linux/sharedfolders/vbsfmount.c b/src/VBox/Additions/linux/sharedfolders/vbsfmount.c
new file mode 100644
index 00000000..bc557879
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/vbsfmount.c
@@ -0,0 +1,113 @@
+/* $Id: vbsfmount.c $ */
+/** @file
+ * vbsfmount - Commonly used code to mount shared folders on Linux-based
+ * systems. Currently used by mount.vboxsf and VBoxService.
+ */
+
+/*
+ * Copyright (C) 2010-2022 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include <assert.h>
+#include <ctype.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/mount.h>
+
+#include "vbsfmount.h"
+
+
+/** @todo Use defines for return values! */
+int vbsfmount_complete(const char *pszSharedFolder, const char *pszMountPoint,
+ unsigned long fFlags, const char *pszOpts)
+{
+ /*
+ * Combine pszOpts and fFlags.
+ */
+ int rc;
+ size_t const cchFlags = (fFlags & MS_NOSUID ? strlen(MNTOPT_NOSUID) + 1 : 0)
+ + (fFlags & MS_RDONLY ? strlen(MNTOPT_RO) : strlen(MNTOPT_RW));
+ size_t const cchOpts = pszOpts ? 1 + strlen(pszOpts) : 0;
+ char *pszBuf = (char *)malloc(cchFlags + cchOpts + 8);
+ if (pszBuf)
+ {
+ char *psz = pszBuf;
+ FILE *pMTab;
+
+ strcpy(psz, fFlags & MS_RDONLY ? MNTOPT_RO : MNTOPT_RW);
+ psz += strlen(psz);
+
+ if (fFlags & MS_NOSUID)
+ {
+ *psz++ = ',';
+ strcpy(psz, MNTOPT_NOSUID);
+ psz += strlen(psz);
+ }
+
+ if (cchOpts)
+ {
+ *psz++ = ',';
+ strcpy(psz, pszOpts);
+ }
+
+ assert(strlen(pszBuf) <= cchFlags + cchOpts);
+
+ /*
+ * Open the mtab and update it:
+ */
+ pMTab = setmntent(MOUNTED, "a+");
+ if (pMTab)
+ {
+ struct mntent Entry;
+ Entry.mnt_fsname = (char*)pszSharedFolder;
+ Entry.mnt_dir = (char *)pszMountPoint;
+ Entry.mnt_type = "vboxsf";
+ Entry.mnt_opts = pszBuf;
+ Entry.mnt_freq = 0;
+ Entry.mnt_passno = 0;
+
+ if (!addmntent(pMTab, &Entry))
+ rc = 0; /* success. */
+ else
+ rc = 3; /* Could not add an entry to the mount table. */
+
+ endmntent(pMTab);
+ }
+ else
+ rc = 2; /* Could not open mount table for update. */
+
+ free(pszBuf);
+ }
+ else
+ rc = 1; /* allocation error */
+ return rc;
+}
+
diff --git a/src/VBox/Additions/linux/sharedfolders/vbsfmount.h b/src/VBox/Additions/linux/sharedfolders/vbsfmount.h
new file mode 100644
index 00000000..3eb19460
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/vbsfmount.h
@@ -0,0 +1,142 @@
+/* $Id: vbsfmount.h $ */
+/** @file
+ * vboxsf - VBox Linux Shared Folders VFS, mount(2) parameter structure.
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef GA_INCLUDED_SRC_linux_sharedfolders_vbsfmount_h
+#define GA_INCLUDED_SRC_linux_sharedfolders_vbsfmount_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+/* Linux constrains the size of data mount argument to PAGE_SIZE - 1. */
+#define MAX_MNTOPT_STR PAGE_SIZE
+#define MAX_HOST_NAME 256
+#define MAX_NLS_NAME 32
+#define VBSF_DEFAULT_TTL_MS 200
+
+#define VBSF_MOUNT_SIGNATURE_BYTE_0 '\377'
+#define VBSF_MOUNT_SIGNATURE_BYTE_1 '\376'
+#define VBSF_MOUNT_SIGNATURE_BYTE_2 '\375'
+
+/**
+ * VBox Linux Shared Folders VFS caching mode.
+ */
+enum vbsf_cache_mode {
+ /** Use the kernel modules default caching mode (kVbsfCacheMode_Strict). */
+ kVbsfCacheMode_Default = 0,
+ /** No caching, go to the host for everything. This will have some minor
+ * coherency issues for memory mapping with unsynced dirty pages. */
+ kVbsfCacheMode_None,
+ /** No caching, except for files with writable memory mappings.
+ * (Note to future: if we do oplock like stuff, it goes in here.) */
+ kVbsfCacheMode_Strict,
+ /** Use page cache for reads.
+ * This improves guest performance for read intensive jobs, like compiling
+ * building. The flip side is that the guest may not see host modification in a
+ * timely manner and possibly update files with out-of-date cache information,
+ * as there exists no protocol for the host to notify the guest about file
+ * modifications. */
+ kVbsfCacheMode_Read,
+ /** Use page cache for both reads and writes as far as that's possible.
+ * This is good for guest performance, but the price is that the guest possibly
+ * ignoring host changes and the host not seeing guest changes in a timely
+ * manner. */
+ kVbsfCacheMode_ReadWrite,
+ /** End of valid values (exclusive). */
+ kVbsfCacheMode_End,
+ /** Make sure the enum is sizeof(int32_t). */
+ kVbsfCacheMode_32BitHack = 0x7fffffff
+};
+
+/**
+ * VBox Linux Shared Folders VFS mount options.
+ */
+struct vbsf_mount_info_new {
+ /**
+ * The old version of the mount_info struct started with a
+ * char name[MAX_HOST_NAME] field, where name cannot be '\0'.
+ * So the new version of the mount_info struct starts with a
+ * nullchar field which is always 0 so that we can detect and
+ * reject the old structure being passed.
+ */
+ char nullchar;
+ /** Signature */
+ char signature[3];
+ /** Length of the whole structure */
+ int length;
+ /** Share name */
+ char name[MAX_HOST_NAME];
+ /** Name of an I/O charset */
+ char nls_name[MAX_NLS_NAME];
+ /** User ID for all entries, default 0=root */
+ int uid;
+ /** Group ID for all entries, default 0=root */
+ int gid;
+ /** Directory entry and inode time to live in milliseconds.
+ * -1 for kernel default, 0 to disable caching.
+ * @sa vbsf_mount_info_new::msDirCacheTTL, vbsf_mount_info_new::msInodeTTL */
+ int ttl;
+ /** Mode for directories if != -1. */
+ int dmode;
+ /** Mode for regular files if != -1. */
+ int fmode;
+ /** umask applied to directories */
+ int dmask;
+ /** umask applied to regular files */
+ int fmask;
+ /** Mount tag for VBoxService automounter.
+ * @since 6.0.0 */
+ char szTag[32];
+ /** Max pages to read & write at a time.
+ * @since 6.0.6 */
+ uint32_t cMaxIoPages;
+ /** The directory content buffer size. Set to 0 for kernel module default.
+ * Larger value reduces the number of host calls on large directories. */
+ uint32_t cbDirBuf;
+ /** The time to live for directory entries (in milliseconds). @a ttl is used
+ * if negative.
+ * @since 6.0.6 */
+ int32_t msDirCacheTTL;
+ /** The time to live for inode information (in milliseconds). @a ttl is used
+ * if negative.
+ * @since 6.0.6 */
+ int32_t msInodeTTL;
+ /** The cache and coherency mode.
+ * @since 6.0.6 */
+ enum vbsf_cache_mode enmCacheMode;
+};
+#ifdef AssertCompileSize
+AssertCompileSize(struct vbsf_mount_info_new, 2*4 + MAX_HOST_NAME + MAX_NLS_NAME + 7*4 + 32 + 5*4);
+#endif
+
+/** Completes the mount operation by adding the new mount point to mtab if required. */
+int vbsfmount_complete(const char *pszSharedFolder, const char *pszMountPoint,
+ unsigned long fFlags, const char *pszOpts);
+
+#endif /* !GA_INCLUDED_SRC_linux_sharedfolders_vbsfmount_h */
diff --git a/src/VBox/Additions/linux/sharedfolders/vfsmod.c b/src/VBox/Additions/linux/sharedfolders/vfsmod.c
new file mode 100644
index 00000000..ce670f76
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/vfsmod.c
@@ -0,0 +1,1753 @@
+/* $Id: vfsmod.c $ */
+/** @file
+ * vboxsf - VBox Linux Shared Folders VFS, module init/term, super block management.
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @note Anyone wishing to make changes here might wish to take a look at
+ * https://github.com/torvalds/linux/blob/master/Documentation/filesystems/vfs.txt
+ * which seems to be the closest there is to official documentation on
+ * writing filesystem drivers for Linux.
+ *
+ * See also: http://us1.samba.org/samba/ftp/cifs-cvs/ols2006-fs-tutorial-smf.odp
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "vfsmod.h"
+#include "version-generated.h"
+#include "revision-generated.h"
+#include "product-generated.h"
+#if RTLNX_VER_MIN(5,0,0) || RTLNX_RHEL_MIN(8,4)
+# include <uapi/linux/mount.h> /* for MS_REMOUNT */
+#elif RTLNX_VER_MAX(3,3,0)
+# include <linux/mount.h>
+#endif
+#include <linux/seq_file.h>
+#include <linux/vfs.h>
+#if RTLNX_VER_RANGE(2,5,62, 5,8,0)
+# include <linux/vermagic.h>
+#endif
+#include <VBox/err.h>
+#include <iprt/path.h>
+#if RTLNX_VER_MIN(5,1,0)
+# include <linux/fs_context.h>
+# include <linux/fs_parser.h>
+#elif RTLNX_VER_MIN(2,6,0)
+# include <linux/parser.h>
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#define VBSF_DEFAULT_MAX_IO_PAGES RT_MIN(_16K / sizeof(RTGCPHYS64) /* => 8MB buffer */, VMMDEV_MAX_HGCM_DATA_SIZE >> PAGE_SHIFT)
+#define VBSF_DEFAULT_DIR_BUF_SIZE _64K
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+VBGLSFCLIENT g_SfClient;
+uint32_t g_fHostFeatures = 0;
+/** Last valid shared folders function number. */
+uint32_t g_uSfLastFunction = SHFL_FN_SET_FILE_SIZE;
+/** Shared folders features (SHFL_FEATURE_XXX). */
+uint64_t g_fSfFeatures = 0;
+
+/** Protects all the vbsf_inode_info::HandleList lists. */
+spinlock_t g_SfHandleLock;
+
+/** The 'follow_symlinks' module parameter.
+ * @todo Figure out how do this for 2.4.x! */
+static int g_fFollowSymlinks = 0;
+
+/* forward declaration */
+static struct super_operations g_vbsf_super_ops;
+
+
+
+/**
+ * Copies options from the mount info structure into @a pSuperInfo.
+ *
+ * This is used both by vbsf_super_info_alloc_and_map_it() and
+ * vbsf_remount_fs().
+ */
+static void vbsf_super_info_copy_remount_options(struct vbsf_super_info *pSuperInfo, struct vbsf_mount_info_new *info)
+{
+ pSuperInfo->uid = info->uid;
+ pSuperInfo->gid = info->gid;
+
+ if ((unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, szTag)) {
+ /* new fields */
+ pSuperInfo->dmode = info->dmode;
+ pSuperInfo->fmode = info->fmode;
+ pSuperInfo->dmask = info->dmask;
+ pSuperInfo->fmask = info->fmask;
+ } else {
+ pSuperInfo->dmode = ~0;
+ pSuperInfo->fmode = ~0;
+ }
+
+ if ((unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, cMaxIoPages)) {
+ AssertCompile(sizeof(pSuperInfo->szTag) >= sizeof(info->szTag));
+ memcpy(pSuperInfo->szTag, info->szTag, sizeof(info->szTag));
+ pSuperInfo->szTag[sizeof(pSuperInfo->szTag) - 1] = '\0';
+ } else {
+ pSuperInfo->szTag[0] = '\0';
+ }
+
+ /* The max number of pages in an I/O request. This must take into
+ account that the physical heap generally grows in 64 KB chunks,
+ so we should not try push that limit. It also needs to take
+ into account that the host will allocate temporary heap buffers
+ for the I/O bytes we send/receive, so don't push the host heap
+ too hard as we'd have to retry with smaller requests when this
+ happens, which isn't too efficient. */
+ pSuperInfo->cMaxIoPages = VBSF_DEFAULT_MAX_IO_PAGES;
+ if ( (unsigned)info->length >= sizeof(struct vbsf_mount_info_new)
+ && info->cMaxIoPages > 0) {
+ if (info->cMaxIoPages <= VMMDEV_MAX_HGCM_DATA_SIZE >> PAGE_SHIFT)
+ pSuperInfo->cMaxIoPages = RT_MAX(info->cMaxIoPages, 2); /* read_iter/write_iter requires a minimum of 2. */
+ else
+ printk(KERN_WARNING "vboxsf: max I/O page count (%#x) is out of range, using default (%#x) instead.\n",
+ info->cMaxIoPages, pSuperInfo->cMaxIoPages);
+ }
+
+ pSuperInfo->cbDirBuf = VBSF_DEFAULT_DIR_BUF_SIZE;
+ if ( (unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, cbDirBuf)
+ && info->cbDirBuf > 0) {
+ if (info->cbDirBuf <= _16M)
+ pSuperInfo->cbDirBuf = RT_ALIGN_32(info->cbDirBuf, PAGE_SIZE);
+ else
+ printk(KERN_WARNING "vboxsf: max directory buffer size (%#x) is out of range, using default (%#x) instead.\n",
+ info->cMaxIoPages, pSuperInfo->cMaxIoPages);
+ }
+
+ /*
+ * TTLs.
+ */
+ pSuperInfo->msTTL = info->ttl;
+ if (info->ttl > 0)
+ pSuperInfo->cJiffiesDirCacheTTL = msecs_to_jiffies(info->ttl);
+ else if (info->ttl == 0 || info->ttl != -1)
+ pSuperInfo->cJiffiesDirCacheTTL = pSuperInfo->msTTL = 0;
+ else
+ pSuperInfo->cJiffiesDirCacheTTL = msecs_to_jiffies(VBSF_DEFAULT_TTL_MS);
+ pSuperInfo->cJiffiesInodeTTL = pSuperInfo->cJiffiesDirCacheTTL;
+
+ pSuperInfo->msDirCacheTTL = -1;
+ if ( (unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, msDirCacheTTL)
+ && info->msDirCacheTTL >= 0) {
+ if (info->msDirCacheTTL > 0) {
+ pSuperInfo->msDirCacheTTL = info->msDirCacheTTL;
+ pSuperInfo->cJiffiesDirCacheTTL = msecs_to_jiffies(info->msDirCacheTTL);
+ } else {
+ pSuperInfo->msDirCacheTTL = 0;
+ pSuperInfo->cJiffiesDirCacheTTL = 0;
+ }
+ }
+
+ pSuperInfo->msInodeTTL = -1;
+ if ( (unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, msInodeTTL)
+ && info->msInodeTTL >= 0) {
+ if (info->msInodeTTL > 0) {
+ pSuperInfo->msInodeTTL = info->msInodeTTL;
+ pSuperInfo->cJiffiesInodeTTL = msecs_to_jiffies(info->msInodeTTL);
+ } else {
+ pSuperInfo->msInodeTTL = 0;
+ pSuperInfo->cJiffiesInodeTTL = 0;
+ }
+ }
+
+ /*
+ * Caching.
+ */
+ pSuperInfo->enmCacheMode = kVbsfCacheMode_Strict;
+ if ((unsigned)info->length >= RT_UOFFSETOF(struct vbsf_mount_info_new, enmCacheMode)) {
+ switch (info->enmCacheMode) {
+ case kVbsfCacheMode_Default:
+ case kVbsfCacheMode_Strict:
+ break;
+ case kVbsfCacheMode_None:
+ case kVbsfCacheMode_Read:
+ case kVbsfCacheMode_ReadWrite:
+ pSuperInfo->enmCacheMode = info->enmCacheMode;
+ break;
+ default:
+ printk(KERN_WARNING "vboxsf: cache mode (%#x) is out of range, using default instead.\n", info->enmCacheMode);
+ break;
+ }
+ }
+}
+
+/**
+ * Allocate the super info structure and try map the host share.
+ */
+static int vbsf_super_info_alloc_and_map_it(struct vbsf_mount_info_new *info, struct vbsf_super_info **sf_gp)
+{
+ int rc;
+ SHFLSTRING *str_name;
+ size_t name_len, str_len;
+ struct vbsf_super_info *pSuperInfo;
+
+ TRACE();
+ *sf_gp = NULL; /* (old gcc maybe used initialized) */
+
+ name_len = RTStrNLen(info->name, sizeof(info->name));
+ if (name_len >= sizeof(info->name)) {
+ SFLOGRELBOTH(("vboxsf: Specified shared folder name is not zero terminated!\n"));
+ return -EINVAL;
+ }
+ if (RTStrNLen(info->nls_name, sizeof(info->nls_name)) >= sizeof(info->nls_name)) {
+ SFLOGRELBOTH(("vboxsf: Specified nls name is not zero terminated!\n"));
+ return -EINVAL;
+ }
+
+ /*
+ * Allocate memory.
+ */
+ str_len = offsetof(SHFLSTRING, String.utf8) + name_len + 1;
+ str_name = (PSHFLSTRING)kmalloc(str_len, GFP_KERNEL);
+ pSuperInfo = (struct vbsf_super_info *)kmalloc(sizeof(*pSuperInfo), GFP_KERNEL);
+ if (pSuperInfo && str_name) {
+ RT_ZERO(*pSuperInfo);
+
+ str_name->u16Length = name_len;
+ str_name->u16Size = name_len + 1;
+ memcpy(str_name->String.utf8, info->name, name_len + 1);
+
+ /*
+ * Init the NLS support, if needed.
+ */
+ rc = 0;
+#define _IS_UTF8(_str) (strcmp(_str, "utf8") == 0)
+#define _IS_EMPTY(_str) (strcmp(_str, "") == 0)
+
+ /* Check if NLS charset is valid and not points to UTF8 table */
+ pSuperInfo->fNlsIsUtf8 = true;
+ if (info->nls_name[0]) {
+ if (_IS_UTF8(info->nls_name)) {
+ SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=utf8\n"));
+ pSuperInfo->nls = NULL;
+ } else {
+ pSuperInfo->fNlsIsUtf8 = false;
+ pSuperInfo->nls = load_nls(info->nls_name);
+ if (pSuperInfo->nls) {
+ SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=%s -> %p\n", info->nls_name, pSuperInfo->nls));
+ } else {
+ SFLOGRELBOTH(("vboxsf: Failed to load nls '%s'!\n", info->nls_name));
+ rc = -EINVAL;
+ }
+ }
+ } else {
+#ifdef CONFIG_NLS_DEFAULT
+ /* If no NLS charset specified, try to load the default
+ * one if it's not points to UTF8. */
+ if (!_IS_UTF8(CONFIG_NLS_DEFAULT)
+ && !_IS_EMPTY(CONFIG_NLS_DEFAULT)) {
+ pSuperInfo->fNlsIsUtf8 = false;
+ pSuperInfo->nls = load_nls_default();
+ SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: CONFIG_NLS_DEFAULT=%s -> %p\n", CONFIG_NLS_DEFAULT, pSuperInfo->nls));
+ } else {
+ SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=utf8 (default %s)\n", CONFIG_NLS_DEFAULT));
+ pSuperInfo->nls = NULL;
+ }
+#else
+ SFLOGFLOW(("vbsf_super_info_alloc_and_map_it: nls=utf8 (no default)\n"));
+ pSuperInfo->nls = NULL;
+#endif
+ }
+#undef _IS_UTF8
+#undef _IS_EMPTY
+ if (rc == 0) {
+ /*
+ * Try mount it.
+ */
+ rc = VbglR0SfHostReqMapFolderWithContigSimple(str_name, virt_to_phys(str_name), RTPATH_DELIMITER,
+ true /*fCaseSensitive*/, &pSuperInfo->map.root);
+ if (RT_SUCCESS(rc)) {
+ kfree(str_name);
+
+ /* The rest is shared with remount. */
+ vbsf_super_info_copy_remount_options(pSuperInfo, info);
+
+ *sf_gp = pSuperInfo;
+ return 0;
+ }
+
+ /*
+ * bail out:
+ */
+ if (rc == VERR_FILE_NOT_FOUND) {
+ LogRel(("vboxsf: SHFL_FN_MAP_FOLDER failed for '%s': share not found\n", info->name));
+ rc = -ENXIO;
+ } else {
+ LogRel(("vboxsf: SHFL_FN_MAP_FOLDER failed for '%s': %Rrc\n", info->name, rc));
+ rc = -EPROTO;
+ }
+ if (pSuperInfo->nls)
+ unload_nls(pSuperInfo->nls);
+ }
+ } else {
+ SFLOGRELBOTH(("vboxsf: Could not allocate memory for super info!\n"));
+ rc = -ENOMEM;
+ }
+ if (str_name)
+ kfree(str_name);
+ if (pSuperInfo)
+ kfree(pSuperInfo);
+ return rc;
+}
+
+/* unmap the share and free super info [pSuperInfo] */
+static void vbsf_super_info_free(struct vbsf_super_info *pSuperInfo)
+{
+ int rc;
+
+ TRACE();
+ rc = VbglR0SfHostReqUnmapFolderSimple(pSuperInfo->map.root);
+ if (RT_FAILURE(rc))
+ LogFunc(("VbglR0SfHostReqUnmapFolderSimple failed rc=%Rrc\n", rc));
+
+ if (pSuperInfo->nls)
+ unload_nls(pSuperInfo->nls);
+
+ kfree(pSuperInfo);
+}
+
+
+/**
+ * Initialize backing device related matters.
+ */
+static int vbsf_init_backing_dev(struct super_block *sb, struct vbsf_super_info *pSuperInfo)
+{
+ int rc = 0;
+#if RTLNX_VER_MIN(2,6,0)
+ /* Each new shared folder map gets a new uint64_t identifier,
+ * allocated in sequence. We ASSUME the sequence will not wrap. */
+# if RTLNX_VER_MIN(2,6,26)
+ static uint64_t s_u64Sequence = 0;
+ uint64_t idSeqMine = ASMAtomicIncU64(&s_u64Sequence);
+# endif
+ struct backing_dev_info *bdi;
+
+# if RTLNX_VER_RANGE(4,0,0, 4,2,0)
+ pSuperInfo->bdi_org = sb->s_bdi;
+# endif
+
+# if RTLNX_VER_MIN(4,12,0)
+ rc = super_setup_bdi_name(sb, "vboxsf-%llu", (unsigned long long)idSeqMine);
+ if (!rc)
+ bdi = sb->s_bdi;
+ else
+ return rc;
+# else
+ bdi = &pSuperInfo->bdi;
+# endif
+
+ bdi->ra_pages = 0; /* No readahead */
+
+# if RTLNX_VER_MIN(2,6,12)
+ bdi->capabilities = 0
+# ifdef BDI_CAP_MAP_DIRECT
+ | BDI_CAP_MAP_DIRECT /* MAP_SHARED */
+# endif
+# ifdef BDI_CAP_MAP_COPY
+ | BDI_CAP_MAP_COPY /* MAP_PRIVATE */
+# endif
+# ifdef BDI_CAP_READ_MAP
+ | BDI_CAP_READ_MAP /* can be mapped for reading */
+# endif
+# ifdef BDI_CAP_WRITE_MAP
+ | BDI_CAP_WRITE_MAP /* can be mapped for writing */
+# endif
+# ifdef BDI_CAP_EXEC_MAP
+ | BDI_CAP_EXEC_MAP /* can be mapped for execution */
+# endif
+# ifdef BDI_CAP_STRICTLIMIT
+# if RTLNX_VER_MIN(4,19,0) /* Trouble with 3.16.x/debian8. Process stops after dirty page throttling.
+ * Only tested successfully with 4.19. Maybe skip altogether? */
+ | BDI_CAP_STRICTLIMIT;
+# endif
+# endif
+ ;
+# ifdef BDI_CAP_STRICTLIMIT
+ /* Smalles possible amount of dirty pages: %1 of RAM. We set this to
+ try reduce amount of data that's out of sync with the host side.
+ Besides, writepages isn't implemented, so flushing is extremely slow.
+ Note! Extremely slow linux 3.0.0 msync doesn't seem to be related to this setting. */
+ bdi_set_max_ratio(bdi, 1);
+# endif
+# endif /* >= 2.6.12 */
+
+# if RTLNX_VER_RANGE(2,6,24, 4,12,0)
+ rc = bdi_init(&pSuperInfo->bdi);
+# if RTLNX_VER_MIN(2,6,26)
+ if (!rc)
+ rc = bdi_register(&pSuperInfo->bdi, NULL, "vboxsf-%llu", (unsigned long long)idSeqMine);
+# endif /* >= 2.6.26 */
+# endif /* 4.11.0 > version >= 2.6.24 */
+
+# if RTLNX_VER_RANGE(2,6,34, 4,12,0)
+ if (!rc)
+ sb->s_bdi = bdi;
+# endif
+
+#endif /* >= 2.6.0 */
+ return rc;
+}
+
+
+/**
+ * Undoes what vbsf_init_backing_dev did.
+ */
+static void vbsf_done_backing_dev(struct super_block *sb, struct vbsf_super_info *pSuperInfo)
+{
+#if RTLNX_VER_RANGE(2,6,24, 4,12,0)
+ bdi_destroy(&pSuperInfo->bdi); /* includes bdi_unregister() */
+
+ /* Paranoia: Make sb->s_bdi not point at pSuperInfo->bdi, in case someone
+ trouches it after this point (we may screw up something). */
+# if RTLNX_VER_RANGE(4,0,0, 4,2,0)
+ sb->s_bdi = pSuperInfo->bdi_org; /* (noop_backing_dev_info is not exported) */
+# elif RTLNX_VER_RANGE(2,6,34, 4,10,0)
+ sb->s_bdi = &noop_backing_dev_info;
+# endif
+#endif
+}
+
+
+/**
+ * Creates the root inode and attaches it to the super block.
+ *
+ * @returns 0 on success, negative errno on failure.
+ * @param sb The super block.
+ * @param pSuperInfo Our super block info.
+ */
+static int vbsf_create_root_inode(struct super_block *sb, struct vbsf_super_info *pSuperInfo)
+{
+ SHFLFSOBJINFO fsinfo;
+ int rc;
+
+ /*
+ * Allocate and initialize the memory for our inode info structure.
+ */
+ struct vbsf_inode_info *sf_i = kmalloc(sizeof(*sf_i), GFP_KERNEL);
+ SHFLSTRING *path = kmalloc(sizeof(SHFLSTRING) + 1, GFP_KERNEL);
+ if (sf_i && path) {
+ sf_i->handle = SHFL_HANDLE_NIL;
+ sf_i->force_restat = false;
+ RTListInit(&sf_i->HandleList);
+#ifdef VBOX_STRICT
+ sf_i->u32Magic = SF_INODE_INFO_MAGIC;
+#endif
+ sf_i->path = path;
+
+ path->u16Length = 1;
+ path->u16Size = 2;
+ path->String.utf8[0] = '/';
+ path->String.utf8[1] = 0;
+
+ /*
+ * Stat the root directory (for inode info).
+ */
+ rc = vbsf_stat(__func__, pSuperInfo, sf_i->path, &fsinfo, 0);
+ if (rc == 0) {
+ /*
+ * Create the actual inode structure.
+ * Note! ls -la does display '.' and '..' entries with st_ino == 0, so root is #1.
+ */
+#if RTLNX_VER_MIN(2,4,25)
+ struct inode *iroot = iget_locked(sb, 1);
+#else
+ struct inode *iroot = iget(sb, 1);
+#endif
+ if (iroot) {
+ vbsf_init_inode(iroot, sf_i, &fsinfo, pSuperInfo);
+ VBSF_SET_INODE_INFO(iroot, sf_i);
+
+#if RTLNX_VER_MIN(2,4,25)
+ unlock_new_inode(iroot);
+#endif
+
+ /*
+ * Now make it a root inode.
+ */
+#if RTLNX_VER_MIN(3,4,0)
+ sb->s_root = d_make_root(iroot);
+#else
+ sb->s_root = d_alloc_root(iroot);
+#endif
+ if (sb->s_root) {
+
+ return 0;
+ }
+
+ SFLOGRELBOTH(("vboxsf: d_make_root failed!\n"));
+#if RTLNX_VER_MAX(3,4,0) /* d_make_root calls iput */
+ iput(iroot);
+#endif
+ /* iput() will call vbsf_evict_inode()/vbsf_clear_inode(). */
+ sf_i = NULL;
+ path = NULL;
+
+ rc = -ENOMEM;
+ } else {
+ SFLOGRELBOTH(("vboxsf: failed to allocate root inode!\n"));
+ rc = -ENOMEM;
+ }
+ } else
+ SFLOGRELBOTH(("vboxsf: could not stat root of share: %d\n", rc));
+ } else {
+ SFLOGRELBOTH(("vboxsf: Could not allocate memory for root inode info!\n"));
+ rc = -ENOMEM;
+ }
+ if (sf_i)
+ kfree(sf_i);
+ if (path)
+ kfree(path);
+ return rc;
+}
+
+
+#if RTLNX_VER_MAX(5,1,0)
+static void vbsf_init_mount_info(struct vbsf_mount_info_new *mount_info,
+ const char *sf_name)
+{
+ mount_info->ttl = mount_info->msDirCacheTTL = mount_info->msInodeTTL = -1;
+ mount_info->dmode = mount_info->fmode = ~0U;
+ mount_info->enmCacheMode = kVbsfCacheMode_Strict;
+ mount_info->length = sizeof(struct vbsf_mount_info_new);
+ if (sf_name) {
+# if RTLNX_VER_MAX(2,5,69)
+ strncpy(mount_info->name, sf_name, sizeof(mount_info->name));
+ mount_info->name[sizeof(mount_info->name)-1] = 0;
+# else
+ strlcpy(mount_info->name, sf_name, sizeof(mount_info->name));
+# endif
+ }
+}
+#endif
+
+#if RTLNX_VER_RANGE(2,6,0, 5,1,0)
+/**
+ * The following section of code uses the Linux match_token() family of
+ * routines to parse string-based mount options.
+ */
+enum {
+ Opt_iocharset, /* nls_name[] */
+ Opt_nls, /* alias for iocharset */
+ Opt_uid,
+ Opt_gid,
+ Opt_ttl,
+ Opt_dmode,
+ Opt_fmode,
+ Opt_dmask,
+ Opt_fmask,
+ Opt_umask,
+ Opt_maxiopages,
+ Opt_dirbuf,
+ Opt_dcachettl,
+ Opt_inodettl,
+ Opt_cachemode, /* enum vbsf_cache_mode */
+ Opt_tag,
+ Opt_err
+};
+
+# if RTLNX_VER_MAX(2,6,28)
+static match_table_t vbsf_tokens = {
+# else
+static const match_table_t vbsf_tokens = {
+# endif
+ { Opt_iocharset, "iocharset=%s" },
+ { Opt_nls, "nls=%s" },
+ { Opt_uid, "uid=%u" },
+ { Opt_gid, "gid=%u" },
+ { Opt_ttl, "ttl=%u" },
+ { Opt_dmode, "dmode=%o" },
+ { Opt_fmode, "fmode=%o" },
+ { Opt_dmask, "dmask=%o" },
+ { Opt_fmask, "fmask=%o" },
+ { Opt_umask, "umask=%o" },
+ { Opt_maxiopages, "maxiopages=%u" },
+ { Opt_dirbuf, "dirbuf=%u" },
+ { Opt_dcachettl, "dcachettl=%u" },
+ { Opt_inodettl, "inodettl=%u" },
+ { Opt_cachemode, "cache=%s" },
+ { Opt_tag, "tag=%s" }, /* private option for automounter */
+ { Opt_err, NULL }
+};
+
+static int vbsf_parse_mount_options(char *options,
+ struct vbsf_mount_info_new *mount_info)
+{
+ substring_t args[MAX_OPT_ARGS];
+ int option;
+ int token;
+ char *p;
+ char *iocharset;
+ char *cachemode;
+ char *tag;
+
+ if (!options)
+ return -EINVAL;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ token = match_token(p, vbsf_tokens, args);
+ switch (token) {
+ case Opt_iocharset:
+ case Opt_nls:
+ iocharset = match_strdup(&args[0]);
+ if (!iocharset) {
+ SFLOGRELBOTH(("vboxsf: Could not allocate memory for iocharset!\n"));
+ return -ENOMEM;
+ }
+ strlcpy(mount_info->nls_name, iocharset,
+ sizeof(mount_info->nls_name));
+ kfree(iocharset);
+ break;
+ case Opt_uid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->uid = option;
+ break;
+ case Opt_gid:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->gid = option;
+ break;
+ case Opt_ttl:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->ttl = option;
+ break;
+ case Opt_dmode:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ mount_info->dmode = option;
+ break;
+ case Opt_fmode:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ mount_info->fmode = option;
+ break;
+ case Opt_dmask:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ mount_info->dmask = option;
+ break;
+ case Opt_fmask:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ mount_info->fmask = option;
+ break;
+ case Opt_umask:
+ if (match_octal(&args[0], &option))
+ return -EINVAL;
+ mount_info->dmask = mount_info->fmask = option;
+ break;
+ case Opt_maxiopages:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->cMaxIoPages = option;
+ break;
+ case Opt_dirbuf:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->cbDirBuf = option;
+ break;
+ case Opt_dcachettl:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->msDirCacheTTL = option;
+ break;
+ case Opt_inodettl:
+ if (match_int(&args[0], &option))
+ return -EINVAL;
+ mount_info->msInodeTTL = option;
+ break;
+ case Opt_cachemode: {
+ cachemode = match_strdup(&args[0]);
+ if (!cachemode) {
+ SFLOGRELBOTH(("vboxsf: Could not allocate memory for cachemode!\n"));
+ return -ENOMEM;
+ }
+ if (!strcmp(cachemode, "default") || !strcmp(cachemode, "strict"))
+ mount_info->enmCacheMode = kVbsfCacheMode_Strict;
+ else if (!strcmp(cachemode, "none"))
+ mount_info->enmCacheMode = kVbsfCacheMode_None;
+ else if (!strcmp(cachemode, "read"))
+ mount_info->enmCacheMode = kVbsfCacheMode_Read;
+ else if (!strcmp(cachemode, "readwrite"))
+ mount_info->enmCacheMode = kVbsfCacheMode_ReadWrite;
+ else
+ printk(KERN_WARNING "vboxsf: cache mode (%s) is out of range, using default instead.\n", cachemode);
+ kfree(cachemode);
+ break;
+ }
+ case Opt_tag:
+ tag = match_strdup(&args[0]);
+ if (!tag) {
+ SFLOGRELBOTH(("vboxsf: Could not allocate memory for automount tag!\n"));
+ return -ENOMEM;
+ }
+ strlcpy(mount_info->szTag, tag, sizeof(mount_info->szTag));
+ kfree(tag);
+ break;
+ default:
+ printk(KERN_ERR "unrecognised mount option \"%s\"", p);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+#endif /* 5.1.0 > version >= 2.6.0 */
+
+
+#if RTLNX_VER_MAX(2,6,0)
+/**
+ * Linux kernel versions older than 2.6.0 don't have the match_token() routines
+ * so we parse the string-based mount options manually here.
+ */
+static int vbsf_parse_mount_options(char *options,
+ struct vbsf_mount_info_new *mount_info)
+{
+ char *value;
+ char *option;
+
+ if (!options)
+ return -EINVAL;
+
+# if RTLNX_VER_MIN(2,3,9)
+ while ((option = strsep(&options, ",")) != NULL) {
+# else
+ for (option = strtok(options, ","); option; option = strtok(NULL, ",")) {
+# endif
+ if (!*option)
+ continue;
+
+ value = strchr(option, '=');
+ if (value)
+ *value++ = '\0';
+
+ if (!strcmp(option, "iocharset") || !strcmp(option, "nls")) {
+ if (!value || !*value)
+ return -EINVAL;
+ strncpy(mount_info->nls_name, value, sizeof(mount_info->nls_name));
+ mount_info->nls_name[sizeof(mount_info->nls_name)-1] = 0;
+ } else if (!strcmp(option, "uid")) {
+ mount_info->uid = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "gid")) {
+ mount_info->gid = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "ttl")) {
+ mount_info->ttl = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "dmode")) {
+ mount_info->dmode = simple_strtoul(value, &value, 8);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "fmode")) {
+ mount_info->fmode = simple_strtoul(value, &value, 8);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "dmask")) {
+ mount_info->dmask = simple_strtoul(value, &value, 8);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "fmask")) {
+ mount_info->fmask = simple_strtoul(value, &value, 8);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "umask")) {
+ mount_info->dmask = mount_info->fmask = simple_strtoul(value,
+ &value, 8);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "maxiopages")) {
+ mount_info->cMaxIoPages = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "dirbuf")) {
+ mount_info->cbDirBuf = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "dcachettl")) {
+ mount_info->msDirCacheTTL = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "inodettl")) {
+ mount_info->msInodeTTL = simple_strtoul(value, &value, 0);
+ if (*value)
+ return -EINVAL;
+ } else if (!strcmp(option, "cache")) {
+ if (!value || !*value)
+ return -EINVAL;
+ if (!strcmp(value, "default") || !strcmp(value, "strict"))
+ mount_info->enmCacheMode = kVbsfCacheMode_Strict;
+ else if (!strcmp(value, "none"))
+ mount_info->enmCacheMode = kVbsfCacheMode_None;
+ else if (!strcmp(value, "read"))
+ mount_info->enmCacheMode = kVbsfCacheMode_Read;
+ else if (!strcmp(value, "readwrite"))
+ mount_info->enmCacheMode = kVbsfCacheMode_ReadWrite;
+ else
+ printk(KERN_WARNING "vboxsf: cache mode (%s) is out of range, using default instead.\n", value);
+ } else if (!strcmp(option, "tag")) {
+ if (!value || !*value)
+ return -EINVAL;
+ strncpy(mount_info->szTag, value, sizeof(mount_info->szTag));
+ mount_info->szTag[sizeof(mount_info->szTag)-1] = 0;
+ } else if (!strcmp(option, "sf_name")) {
+ if (!value || !*value)
+ return -EINVAL;
+ strncpy(mount_info->name, value, sizeof(mount_info->name));
+ mount_info->name[sizeof(mount_info->name)-1] = 0;
+ } else {
+ printk(KERN_ERR "unrecognised mount option \"%s\"", option);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+#endif
+
+
+/**
+ * This is called by vbsf_read_super_24(), vbsf_read_super_26(), and
+ * vbsf_get_tree() when vfs mounts the fs and wants to read the super_block.
+ *
+ * Calls vbsf_super_info_alloc_and_map_it() to map the folder and allocate super
+ * information structure.
+ *
+ * Initializes @a sb, initializes root inode and dentry.
+ *
+ * Should respect @a flags.
+ */
+#if RTLNX_VER_MIN(5,1,0)
+static int vbsf_read_super_aux(struct super_block *sb, struct fs_context *fc)
+#else
+static int vbsf_read_super_aux(struct super_block *sb, void *data, int flags)
+#endif
+{
+ int rc;
+ struct vbsf_super_info *pSuperInfo;
+
+ TRACE();
+#if RTLNX_VER_MAX(5,1,0)
+ if (!data) {
+ SFLOGRELBOTH(("vboxsf: No mount data. Is mount.vboxsf installed (typically in /sbin)?\n"));
+ return -EINVAL;
+ }
+
+ if (flags & MS_REMOUNT) {
+ SFLOGRELBOTH(("vboxsf: Remounting is not supported!\n"));
+ return -ENOSYS;
+ }
+#endif
+
+ /*
+ * Create our super info structure and map the shared folder.
+ */
+#if RTLNX_VER_MIN(5,1,0)
+ struct vbsf_mount_info_new *info = fc->fs_private;
+ rc = vbsf_super_info_alloc_and_map_it(info, &pSuperInfo);
+#else
+ rc = vbsf_super_info_alloc_and_map_it((struct vbsf_mount_info_new *)data, &pSuperInfo);
+#endif
+ if (rc == 0) {
+ /*
+ * Initialize the super block structure (must be done before
+ * root inode creation).
+ */
+ sb->s_magic = 0xface;
+ sb->s_blocksize = 1024;
+#if RTLNX_VER_MIN(2,4,3)
+ /* Required for seek/sendfile (see 'loff_t max' in fs/read_write.c / do_sendfile()). */
+# if defined MAX_LFS_FILESIZE
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+# elif BITS_PER_LONG == 32
+ sb->s_maxbytes = (loff_t)ULONG_MAX << PAGE_SHIFT;
+# else
+ sb->s_maxbytes = INT64_MAX;
+# endif
+#endif
+#if RTLNX_VER_MIN(2,6,11)
+ sb->s_time_gran = 1; /* This might be a little optimistic for windows hosts, where it should be 100. */
+#endif
+ sb->s_op = &g_vbsf_super_ops;
+#if RTLNX_VER_MIN(2,6,38)
+ sb->s_d_op = &vbsf_dentry_ops;
+#endif
+
+ /*
+ * Initialize the backing device. This is important for memory mapped
+ * files among other things.
+ */
+ rc = vbsf_init_backing_dev(sb, pSuperInfo);
+ if (rc == 0) {
+ /*
+ * Create the root inode and we're done.
+ */
+ rc = vbsf_create_root_inode(sb, pSuperInfo);
+ if (rc == 0) {
+ VBSF_SET_SUPER_INFO(sb, pSuperInfo);
+ SFLOGFLOW(("vbsf_read_super_aux: returns successfully\n"));
+ return 0;
+ }
+ vbsf_done_backing_dev(sb, pSuperInfo);
+ } else
+ SFLOGRELBOTH(("vboxsf: backing device information initialization failed: %d\n", rc));
+ vbsf_super_info_free(pSuperInfo);
+ }
+ return rc;
+}
+
+
+/**
+ * This is called when vfs is about to destroy the @a inode.
+ *
+ * We must free the inode info structure here.
+ */
+#if RTLNX_VER_MIN(2,6,36)
+static void vbsf_evict_inode(struct inode *inode)
+#else
+static void vbsf_clear_inode(struct inode *inode)
+#endif
+{
+ struct vbsf_inode_info *sf_i;
+
+ TRACE();
+
+ /*
+ * Flush stuff.
+ */
+#if RTLNX_VER_MIN(2,6,36)
+ truncate_inode_pages(&inode->i_data, 0);
+# if RTLNX_VER_MIN(3,5,0)
+ clear_inode(inode);
+# else
+ end_writeback(inode);
+# endif
+#endif
+ /*
+ * Clean up our inode info.
+ */
+ sf_i = VBSF_GET_INODE_INFO(inode);
+ if (sf_i) {
+ VBSF_SET_INODE_INFO(inode, NULL);
+
+ Assert(sf_i->u32Magic == SF_INODE_INFO_MAGIC);
+ BUG_ON(!sf_i->path);
+ kfree(sf_i->path);
+ vbsf_handle_drop_chain(sf_i);
+# ifdef VBOX_STRICT
+ sf_i->u32Magic = SF_INODE_INFO_MAGIC_DEAD;
+# endif
+ kfree(sf_i);
+ }
+}
+
+
+/* this is called by vfs when it wants to populate [inode] with data.
+ the only thing that is known about inode at this point is its index
+ hence we can't do anything here, and let lookup/whatever with the
+ job to properly fill then [inode] */
+#if RTLNX_VER_MAX(2,6,25)
+static void vbsf_read_inode(struct inode *inode)
+{
+}
+#endif
+
+
+/* vfs is done with [sb] (umount called) call [vbsf_super_info_free] to unmap
+ the folder and free [pSuperInfo] */
+static void vbsf_put_super(struct super_block *sb)
+{
+ struct vbsf_super_info *pSuperInfo;
+
+ pSuperInfo = VBSF_GET_SUPER_INFO(sb);
+ BUG_ON(!pSuperInfo);
+ vbsf_done_backing_dev(sb, pSuperInfo);
+ vbsf_super_info_free(pSuperInfo);
+}
+
+
+/**
+ * Get file system statistics.
+ */
+#if RTLNX_VER_MIN(2,6,18)
+static int vbsf_statfs(struct dentry *dentry, struct kstatfs *stat)
+#elif RTLNX_VER_MIN(2,5,73)
+static int vbsf_statfs(struct super_block *sb, struct kstatfs *stat)
+#else
+static int vbsf_statfs(struct super_block *sb, struct statfs *stat)
+#endif
+{
+#if RTLNX_VER_MIN(2,6,18)
+ struct super_block *sb = dentry->d_inode->i_sb;
+#endif
+ int rc;
+ VBOXSFVOLINFOREQ *pReq = (VBOXSFVOLINFOREQ *)VbglR0PhysHeapAlloc(sizeof(*pReq));
+ if (pReq) {
+ SHFLVOLINFO *pVolInfo = &pReq->VolInfo;
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(sb);
+ rc = VbglR0SfHostReqQueryVolInfo(pSuperInfo->map.root, pReq, SHFL_HANDLE_ROOT);
+ if (RT_SUCCESS(rc)) {
+ stat->f_type = UINT32_C(0x786f4256); /* 'VBox' little endian */
+ stat->f_bsize = pVolInfo->ulBytesPerAllocationUnit;
+#if RTLNX_VER_MIN(2,5,73)
+ stat->f_frsize = pVolInfo->ulBytesPerAllocationUnit;
+#endif
+ stat->f_blocks = pVolInfo->ullTotalAllocationBytes
+ / pVolInfo->ulBytesPerAllocationUnit;
+ stat->f_bfree = pVolInfo->ullAvailableAllocationBytes
+ / pVolInfo->ulBytesPerAllocationUnit;
+ stat->f_bavail = pVolInfo->ullAvailableAllocationBytes
+ / pVolInfo->ulBytesPerAllocationUnit;
+ stat->f_files = 1000;
+ stat->f_ffree = 1000000; /* don't return 0 here since the guest may think
+ * that it is not possible to create any more files */
+ stat->f_fsid.val[0] = 0;
+ stat->f_fsid.val[1] = 0;
+ stat->f_namelen = 255;
+#if RTLNX_VER_MIN(2,6,36)
+ stat->f_flags = 0; /* not valid */
+#endif
+ RT_ZERO(stat->f_spare);
+ rc = 0;
+ } else
+ rc = -RTErrConvertToErrno(rc);
+ VbglR0PhysHeapFree(pReq);
+ } else
+ rc = -ENOMEM;
+ return rc;
+}
+
+#if RTLNX_VER_MIN(5,1,0)
+static int vbsf_remount_fs(struct super_block *sb,
+ struct vbsf_mount_info_new *info)
+#else
+static int vbsf_remount_fs(struct super_block *sb, int *flags, char *data)
+#endif
+{
+#if RTLNX_VER_MIN(2,4,23)
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(sb);
+ struct vbsf_inode_info *sf_i;
+ struct inode *iroot;
+ SHFLFSOBJINFO fsinfo;
+ int err;
+ Assert(pSuperInfo);
+
+# if RTLNX_VER_MIN(5,1,0)
+ vbsf_super_info_copy_remount_options(pSuperInfo, info);
+# else
+ if (VBSF_IS_MOUNT_VBOXSF_DATA(data)) {
+ vbsf_super_info_copy_remount_options(pSuperInfo, (struct vbsf_mount_info_new *)data);
+ } else {
+ struct vbsf_mount_info_new mount_opts = { '\0' };
+ vbsf_init_mount_info(&mount_opts, NULL);
+ err = vbsf_parse_mount_options(data, &mount_opts);
+ if (err)
+ return err;
+ vbsf_super_info_copy_remount_options(pSuperInfo, &mount_opts);
+ }
+# endif
+
+ /* '.' and '..' entries are st_ino == 0 so root is #1 */
+ iroot = ilookup(sb, 1);
+ if (!iroot)
+ return -ENOSYS;
+
+ sf_i = VBSF_GET_INODE_INFO(iroot);
+ err = vbsf_stat(__func__, pSuperInfo, sf_i->path, &fsinfo, 0);
+ BUG_ON(err != 0);
+ vbsf_init_inode(iroot, sf_i, &fsinfo, pSuperInfo);
+ iput(iroot);
+ return 0;
+#else /* < 2.4.23 */
+ return -ENOSYS;
+#endif /* < 2.4.23 */
+}
+
+
+/**
+ * Show mount options.
+ *
+ * This is needed by the VBoxService automounter in order for it to pick up
+ * the the 'szTag' option value it sets on its mount.
+ */
+#if RTLNX_VER_MAX(3,3,0)
+static int vbsf_show_options(struct seq_file *m, struct vfsmount *mnt)
+#else
+static int vbsf_show_options(struct seq_file *m, struct dentry *root)
+#endif
+{
+#if RTLNX_VER_MAX(3,3,0)
+ struct super_block *sb = mnt->mnt_sb;
+#else
+ struct super_block *sb = root->d_sb;
+#endif
+ struct vbsf_super_info *pSuperInfo = VBSF_GET_SUPER_INFO(sb);
+ if (pSuperInfo) {
+ /* Performance related options: */
+ if (pSuperInfo->msTTL != -1)
+ seq_printf(m, ",ttl=%d", pSuperInfo->msTTL);
+ if (pSuperInfo->msDirCacheTTL >= 0)
+ seq_printf(m, ",dcachettl=%d", pSuperInfo->msDirCacheTTL);
+ if (pSuperInfo->msInodeTTL >= 0)
+ seq_printf(m, ",inodettl=%d", pSuperInfo->msInodeTTL);
+ if (pSuperInfo->cMaxIoPages != VBSF_DEFAULT_MAX_IO_PAGES)
+ seq_printf(m, ",maxiopages=%u", pSuperInfo->cMaxIoPages);
+ if (pSuperInfo->cbDirBuf != VBSF_DEFAULT_DIR_BUF_SIZE)
+ seq_printf(m, ",dirbuf=%u", pSuperInfo->cbDirBuf);
+ switch (pSuperInfo->enmCacheMode) {
+ default: AssertFailed(); RT_FALL_THRU();
+ case kVbsfCacheMode_Strict:
+ break;
+ case kVbsfCacheMode_None: seq_puts(m, ",cache=none"); break;
+ case kVbsfCacheMode_Read: seq_puts(m, ",cache=read"); break;
+ case kVbsfCacheMode_ReadWrite: seq_puts(m, ",cache=readwrite"); break;
+ }
+
+ /* Attributes and NLS: */
+ seq_printf(m, ",iocharset=%s", pSuperInfo->nls ? pSuperInfo->nls->charset : "utf8");
+ seq_printf(m, ",uid=%u,gid=%u", pSuperInfo->uid, pSuperInfo->gid);
+ if (pSuperInfo->dmode != ~0)
+ seq_printf(m, ",dmode=0%o", pSuperInfo->dmode);
+ if (pSuperInfo->fmode != ~0)
+ seq_printf(m, ",fmode=0%o", pSuperInfo->fmode);
+ if (pSuperInfo->dmask != 0)
+ seq_printf(m, ",dmask=0%o", pSuperInfo->dmask);
+ if (pSuperInfo->fmask != 0)
+ seq_printf(m, ",fmask=0%o", pSuperInfo->fmask);
+
+ /* Misc: */
+ if (pSuperInfo->szTag[0] != '\0') {
+ seq_puts(m, ",tag=");
+ seq_escape(m, pSuperInfo->szTag, " \t\n\\");
+ }
+ }
+ return 0;
+}
+
+
+/**
+ * Super block operations.
+ */
+static struct super_operations g_vbsf_super_ops = {
+#if RTLNX_VER_MAX(2,6,36)
+ .clear_inode = vbsf_clear_inode,
+#else
+ .evict_inode = vbsf_evict_inode,
+#endif
+#if RTLNX_VER_MAX(2,6,25)
+ .read_inode = vbsf_read_inode,
+#endif
+ .put_super = vbsf_put_super,
+ .statfs = vbsf_statfs,
+#if RTLNX_VER_MAX(5,1,0)
+ .remount_fs = vbsf_remount_fs,
+#endif
+ .show_options = vbsf_show_options
+};
+
+
+
+/*********************************************************************************************************************************
+* File system type related stuff. *
+*********************************************************************************************************************************/
+
+#if RTLNX_VER_RANGE(2,5,4, 5,1,0)
+
+static int vbsf_read_super_26(struct super_block *sb, void *data, int flags)
+{
+ int err;
+
+ TRACE();
+ err = vbsf_read_super_aux(sb, data, flags);
+ if (err)
+ printk(KERN_DEBUG "vbsf_read_super_aux err=%d\n", err);
+
+ return err;
+}
+
+# if RTLNX_VER_MIN(2,6,39)
+static struct dentry *sf_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+{
+ TRACE();
+
+ if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) {
+ int rc;
+ struct vbsf_mount_info_new mount_opts = { '\0' };
+
+ vbsf_init_mount_info(&mount_opts, dev_name);
+ rc = vbsf_parse_mount_options(data, &mount_opts);
+ if (rc)
+ return ERR_PTR(rc);
+ return mount_nodev(fs_type, flags, &mount_opts, vbsf_read_super_26);
+ } else {
+ return mount_nodev(fs_type, flags, data, vbsf_read_super_26);
+ }
+}
+# elif RTLNX_VER_MIN(2,6,18)
+static int vbsf_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+ TRACE();
+
+ if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) {
+ int rc;
+ struct vbsf_mount_info_new mount_opts = { '\0' };
+
+ vbsf_init_mount_info(&mount_opts, dev_name);
+ rc = vbsf_parse_mount_options(data, &mount_opts);
+ if (rc)
+ return rc;
+ return get_sb_nodev(fs_type, flags, &mount_opts, vbsf_read_super_26,
+ mnt);
+ } else {
+ return get_sb_nodev(fs_type, flags, data, vbsf_read_super_26, mnt);
+ }
+}
+# else /* 2.6.18 > version >= 2.5.4 */
+static struct super_block *vbsf_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
+{
+ TRACE();
+
+ if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) {
+ int rc;
+ struct vbsf_mount_info_new mount_opts = { '\0' };
+
+ vbsf_init_mount_info(&mount_opts, dev_name);
+ rc = vbsf_parse_mount_options(data, &mount_opts);
+ if (rc)
+ return ERR_PTR(rc);
+ return get_sb_nodev(fs_type, flags, &mount_opts, vbsf_read_super_26);
+ } else {
+ return get_sb_nodev(fs_type, flags, data, vbsf_read_super_26);
+ }
+}
+# endif
+#endif /* 5.1.0 > version >= 2.5.4 */
+
+#if RTLNX_VER_MAX(2,5,4) /* < 2.5.4 */
+
+static struct super_block *vbsf_read_super_24(struct super_block *sb, void *data, int flags)
+{
+ int err;
+
+ TRACE();
+
+ if (!VBSF_IS_MOUNT_VBOXSF_DATA(data)) {
+ int rc;
+ struct vbsf_mount_info_new mount_opts = { '\0' };
+
+ vbsf_init_mount_info(&mount_opts, NULL);
+ rc = vbsf_parse_mount_options(data, &mount_opts);
+ if (rc)
+ return ERR_PTR(rc);
+ err = vbsf_read_super_aux(sb, &mount_opts, flags);
+ } else {
+ err = vbsf_read_super_aux(sb, data, flags);
+ }
+ if (err) {
+ printk(KERN_DEBUG "vbsf_read_super_aux err=%d\n", err);
+ return NULL;
+ }
+
+ return sb;
+}
+
+static DECLARE_FSTYPE(g_vboxsf_fs_type, "vboxsf", vbsf_read_super_24, 0);
+
+#endif /* < 2.5.4 */
+
+#if RTLNX_VER_MIN(5,1,0)
+
+/**
+ * The following section of code uses the Linux filesystem mount API (also
+ * known as the "filesystem context API") to parse string-based mount options.
+ * The API is described here:
+ * https://www.kernel.org/doc/Documentation/filesystems/mount_api.txt
+ */
+enum vbsf_cache_modes {
+ VBSF_CACHE_DEFAULT,
+ VBSF_CACHE_NONE,
+ VBSF_CACHE_STRICT,
+ VBSF_CACHE_READ,
+ VBSF_CACHE_RW
+};
+
+static const struct constant_table vbsf_param_cache_mode[] = {
+ { "default", VBSF_CACHE_DEFAULT },
+ { "none", VBSF_CACHE_NONE },
+ { "strict", VBSF_CACHE_STRICT },
+ { "read", VBSF_CACHE_READ },
+ { "readwrite", VBSF_CACHE_RW },
+ {}
+};
+
+enum {
+ Opt_iocharset, /* nls_name[] */
+ Opt_nls, /* alias for iocharset */
+ Opt_uid,
+ Opt_gid,
+ Opt_ttl,
+ Opt_dmode,
+ Opt_fmode,
+ Opt_dmask,
+ Opt_fmask,
+ Opt_umask,
+ Opt_maxiopages,
+ Opt_dirbuf,
+ Opt_dcachettl,
+ Opt_inodettl,
+ Opt_cachemode, /* enum vbsf_cache_mode */
+ Opt_tag
+};
+
+# if RTLNX_VER_MAX(5,6,0)
+static const struct fs_parameter_spec vbsf_fs_specs[] = {
+# else
+static const struct fs_parameter_spec vbsf_fs_parameters[] = {
+# endif
+ fsparam_string("iocharset", Opt_iocharset),
+ fsparam_string("nls", Opt_nls),
+ fsparam_u32 ("uid", Opt_uid),
+ fsparam_u32 ("gid", Opt_gid),
+ fsparam_u32 ("ttl", Opt_ttl),
+ fsparam_u32oct("dmode", Opt_dmode),
+ fsparam_u32oct("fmode", Opt_fmode),
+ fsparam_u32oct("dmask", Opt_dmask),
+ fsparam_u32oct("fmask", Opt_fmask),
+ fsparam_u32oct("umask", Opt_umask),
+ fsparam_u32 ("maxiopages", Opt_maxiopages),
+ fsparam_u32 ("dirbuf", Opt_dirbuf),
+ fsparam_u32 ("dcachettl", Opt_dcachettl),
+ fsparam_u32 ("inodettl", Opt_inodettl),
+# if RTLNX_VER_MAX(5,6,0)
+ fsparam_enum ("cache", Opt_cachemode),
+# else
+ fsparam_enum ("cache", Opt_cachemode, vbsf_param_cache_mode),
+# endif
+ fsparam_string("tag", Opt_tag),
+ {}
+};
+
+# if RTLNX_VER_MAX(5,6,0)
+static const struct fs_parameter_enum vbsf_fs_enums[] = {
+ { Opt_cachemode, "default", VBSF_CACHE_DEFAULT },
+ { Opt_cachemode, "none", VBSF_CACHE_NONE },
+ { Opt_cachemode, "strict", VBSF_CACHE_STRICT },
+ { Opt_cachemode, "read", VBSF_CACHE_READ },
+ { Opt_cachemode, "readwrite", VBSF_CACHE_RW },
+ {}
+};
+
+static const struct fs_parameter_description vbsf_fs_parameters = {
+ .name = "vboxsf",
+ .specs = vbsf_fs_specs,
+ .enums = vbsf_fs_enums
+};
+# endif
+
+/**
+ * Parse the (string-based) mount options passed in as -o foo,bar=123,etc.
+ */
+static int vbsf_parse_param(struct fs_context *fc, struct fs_parameter *param)
+{
+ struct fs_parse_result result;
+ struct vbsf_mount_info_new *info = fc->fs_private;
+ int opt;
+
+# if RTLNX_VER_MAX(5,6,0)
+ opt = fs_parse(fc, &vbsf_fs_parameters, param, &result);
+# else
+ opt = fs_parse(fc, vbsf_fs_parameters, param, &result);
+# endif
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_iocharset:
+ case Opt_nls:
+ strlcpy(info->nls_name, param->string, sizeof(info->nls_name));
+ break;
+ case Opt_uid:
+ info->uid = result.uint_32;
+ break;
+ case Opt_gid:
+ info->gid = result.uint_32;
+ break;
+ case Opt_ttl:
+ info->ttl = result.uint_32;
+ break;
+ case Opt_dmode:
+ if (result.uint_32 & ~0777)
+ return invalf(fc, "Invalid dmode specified: '%o'", result.uint_32);
+ info->dmode = result.uint_32;
+ break;
+ case Opt_fmode:
+ if (result.uint_32 & ~0777)
+ return invalf(fc, "Invalid fmode specified: '%o'", result.uint_32);
+ info->fmode = result.uint_32;
+ break;
+ case Opt_dmask:
+ if (result.uint_32 & ~07777)
+ return invalf(fc, "Invalid dmask specified: '%o'", result.uint_32);
+ info->dmask = result.uint_32;
+ break;
+ case Opt_fmask:
+ if (result.uint_32 & ~07777)
+ return invalf(fc, "Invalid fmask specified: '%o'", result.uint_32);
+ info->fmask = result.uint_32;
+ break;
+ case Opt_umask:
+ if (result.uint_32 & ~07777)
+ return invalf(fc, "Invalid umask specified: '%o'", result.uint_32);
+ info->dmask = info->fmask = result.uint_32;
+ break;
+ case Opt_maxiopages:
+ info->cMaxIoPages = result.uint_32;
+ break;
+ case Opt_dirbuf:
+ info->cbDirBuf = result.uint_32;
+ break;
+ case Opt_dcachettl:
+ info->msDirCacheTTL = result.uint_32;
+ break;
+ case Opt_inodettl:
+ info->msInodeTTL = result.uint_32;
+ break;
+ case Opt_cachemode:
+ if (result.uint_32 == VBSF_CACHE_DEFAULT || result.uint_32 == VBSF_CACHE_STRICT)
+ info->enmCacheMode = kVbsfCacheMode_Strict;
+ else if (result.uint_32 == VBSF_CACHE_NONE)
+ info->enmCacheMode = kVbsfCacheMode_None;
+ else if (result.uint_32 == VBSF_CACHE_READ)
+ info->enmCacheMode = kVbsfCacheMode_Read;
+ else if (result.uint_32 == VBSF_CACHE_RW)
+ info->enmCacheMode = kVbsfCacheMode_ReadWrite;
+ else
+ printk(KERN_WARNING "vboxsf: cache mode (%u) is out of range, using default instead.\n", result.uint_32);
+ break;
+ case Opt_tag:
+ strlcpy(info->szTag, param->string, sizeof(info->szTag));
+ break;
+ default:
+ return invalf(fc, "Invalid mount option: '%s'", param->key);
+ }
+
+ return 0;
+}
+
+/**
+ * Parse the mount options provided whether by the mount.vboxsf utility
+ * which supplies the mount information as a page of data or else as a
+ * string in the following format: key[=val][,key[=val]]*.
+ */
+static int vbsf_parse_monolithic(struct fs_context *fc, void *data)
+{
+ struct vbsf_mount_info_new *info = fc->fs_private;
+
+ if (data) {
+ if (VBSF_IS_MOUNT_VBOXSF_DATA(data)) {
+ memcpy(info, data, sizeof(struct vbsf_mount_info_new));
+ } else {
+ /* this will call vbsf_parse_param() */
+ return generic_parse_monolithic(fc, data);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Clean up the filesystem-specific part of the filesystem context.
+ */
+static void vbsf_free_ctx(struct fs_context *fc)
+{
+ struct vbsf_mount_info_new *info = fc->fs_private;
+
+ if (info) {
+ kfree(info);
+ fc->fs_private = NULL;
+ }
+}
+
+/**
+ * Create the mountable root and superblock which can then be used later for
+ * mounting the shared folder. The superblock is populated by
+ * vbsf_read_super_aux() which also sets up the shared folder mapping and the
+ * related paperwork in preparation for mounting the shared folder.
+ */
+static int vbsf_get_tree(struct fs_context *fc)
+{
+ struct vbsf_mount_info_new *info = fc->fs_private;
+
+ if (!fc->source) {
+ SFLOGRELBOTH(("vboxsf: No shared folder specified\n"));
+ return invalf(fc, "vboxsf: No shared folder specified");
+ }
+
+ /* fc->source (the shared folder name) is set after vbsf_init_fs_ctx() */
+ strlcpy(info->name, fc->source, sizeof(info->name));
+
+# if RTLNX_VER_MAX(5,3,0)
+ return vfs_get_super(fc, vfs_get_independent_super, vbsf_read_super_aux);
+# else
+ return get_tree_nodev(fc, vbsf_read_super_aux);
+# endif
+}
+
+/**
+ * Reconfigures the superblock based on the mount information stored in the
+ * filesystem context. Called via '-o remount' (aka mount(2) with MS_REMOUNT)
+ * and is the equivalent of .fs_remount.
+ */
+static int vbsf_reconfigure(struct fs_context *fc)
+{
+ struct vbsf_mount_info_new *info = fc->fs_private;
+ struct super_block *sb = fc->root->d_sb;
+
+ return vbsf_remount_fs(sb, info);
+}
+
+static const struct fs_context_operations vbsf_context_ops = {
+ .parse_param = vbsf_parse_param,
+ .parse_monolithic = vbsf_parse_monolithic,
+ .free = vbsf_free_ctx,
+ .get_tree = vbsf_get_tree,
+ .reconfigure = vbsf_reconfigure
+};
+
+/**
+ * Set up the filesystem mount context.
+ */
+static int vbsf_init_fs_context(struct fs_context *fc)
+{
+ struct vbsf_mount_info_new *info = fc->fs_private;
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ SFLOGRELBOTH(("vboxsf: Could not allocate memory for mount options\n"));
+ return -ENOMEM;
+ }
+
+ /* set default values for the mount information structure */
+ info->ttl = info->msDirCacheTTL = info->msInodeTTL = -1;
+ info->dmode = info->fmode = ~0U;
+ info->enmCacheMode = kVbsfCacheMode_Strict;
+ info->length = sizeof(struct vbsf_mount_info_new);
+
+ fc->fs_private = info;
+ fc->ops = &vbsf_context_ops;
+
+ return 0;
+}
+#endif /* >= 5.1.0 */
+
+
+#if RTLNX_VER_MIN(2,5,4)
+/**
+ * File system registration structure.
+ */
+static struct file_system_type g_vboxsf_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "vboxsf",
+# if RTLNX_VER_MIN(5,1,0)
+ .init_fs_context = vbsf_init_fs_context,
+# if RTLNX_VER_MAX(5,6,0)
+ .parameters = &vbsf_fs_parameters,
+# else
+ .parameters = vbsf_fs_parameters,
+# endif
+# elif RTLNX_VER_MIN(2,6,39)
+ .mount = sf_mount,
+# else
+ .get_sb = vbsf_get_sb,
+# endif
+ .kill_sb = kill_anon_super
+};
+#endif /* >= 2.5.4 */
+
+
+/*********************************************************************************************************************************
+* Module stuff *
+*********************************************************************************************************************************/
+
+/**
+ * Called on module initialization.
+ */
+static int __init init(void)
+{
+ int rc;
+ SFLOGFLOW(("vboxsf: init\n"));
+
+ /*
+ * Must be paranoid about the vbsf_mount_info_new size.
+ */
+ AssertCompile(sizeof(struct vbsf_mount_info_new) <= PAGE_SIZE);
+ if (sizeof(struct vbsf_mount_info_new) > PAGE_SIZE) {
+ printk(KERN_ERR
+ "vboxsf: Mount information structure is too large %lu\n"
+ "vboxsf: Must be less than or equal to %lu\n",
+ (unsigned long)sizeof(struct vbsf_mount_info_new),
+ (unsigned long)PAGE_SIZE);
+ return -EINVAL;
+ }
+
+ /*
+ * Initialize stuff.
+ */
+ spin_lock_init(&g_SfHandleLock);
+ rc = VbglR0SfInit();
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Try connect to the shared folder HGCM service.
+ * It is possible it is not there.
+ */
+ rc = VbglR0SfConnect(&g_SfClient);
+ if (RT_SUCCESS(rc)) {
+ /*
+ * Query host HGCM features and afterwards (must be last) shared folder features.
+ */
+ rc = VbglR0QueryHostFeatures(&g_fHostFeatures);
+ if (RT_FAILURE(rc))
+ {
+ LogRel(("vboxsf: VbglR0QueryHostFeatures failed: rc=%Rrc (ignored)\n", rc));
+ g_fHostFeatures = 0;
+ }
+ VbglR0SfHostReqQueryFeaturesSimple(&g_fSfFeatures, &g_uSfLastFunction);
+ LogRel(("vboxsf: g_fHostFeatures=%#x g_fSfFeatures=%#RX64 g_uSfLastFunction=%u\n",
+ g_fHostFeatures, g_fSfFeatures, g_uSfLastFunction));
+
+ /*
+ * Tell the shared folder service about our expectations:
+ * - UTF-8 strings (rather than UTF-16)
+ * - Wheter to return or follow (default) symbolic links.
+ */
+ rc = VbglR0SfHostReqSetUtf8Simple();
+ if (RT_SUCCESS(rc)) {
+ if (!g_fFollowSymlinks) {
+ rc = VbglR0SfHostReqSetSymlinksSimple();
+ if (RT_FAILURE(rc))
+ printk(KERN_WARNING "vboxsf: Host unable to enable showing symlinks, rc=%d\n", rc);
+ }
+ /*
+ * Now that we're ready for action, try register the
+ * file system with the kernel.
+ */
+ rc = register_filesystem(&g_vboxsf_fs_type);
+ if (rc == 0) {
+ printk(KERN_INFO "vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) "\n");
+#ifdef VERMAGIC_STRING
+ LogRel(("vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) " on %s (LINUX_VERSION_CODE=%#x)\n",
+ VERMAGIC_STRING, LINUX_VERSION_CODE));
+#elif defined(UTS_RELEASE)
+ LogRel(("vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) " on %s (LINUX_VERSION_CODE=%#x)\n",
+ UTS_RELEASE, LINUX_VERSION_CODE));
+#else
+ LogRel(("vboxsf: Successfully loaded version " VBOX_VERSION_STRING " r" __stringify(VBOX_SVN_REV) " (LINUX_VERSION_CODE=%#x)\n", LINUX_VERSION_CODE));
+#endif
+ return 0;
+ }
+
+ /*
+ * Failed. Bail out.
+ */
+ LogRel(("vboxsf: register_filesystem failed: rc=%d\n", rc));
+ } else {
+ LogRel(("vboxsf: VbglR0SfSetUtf8 failed, rc=%Rrc\n", rc));
+ rc = -EPROTO;
+ }
+ VbglR0SfDisconnect(&g_SfClient);
+ } else {
+ LogRel(("vboxsf: VbglR0SfConnect failed, rc=%Rrc\n", rc));
+ rc = rc == VERR_HGCM_SERVICE_NOT_FOUND ? -EHOSTDOWN : -ECONNREFUSED;
+ }
+ VbglR0SfTerm();
+ } else {
+ LogRel(("vboxsf: VbglR0SfInit failed, rc=%Rrc\n", rc));
+ rc = -EPROTO;
+ }
+ return rc;
+}
+
+
+/**
+ * Called on module finalization.
+ */
+static void __exit fini(void)
+{
+ SFLOGFLOW(("vboxsf: fini\n"));
+
+ unregister_filesystem(&g_vboxsf_fs_type);
+ VbglR0SfDisconnect(&g_SfClient);
+ VbglR0SfTerm();
+}
+
+
+/*
+ * Module parameters.
+ */
+#if RTLNX_VER_MIN(2,5,52)
+module_param_named(follow_symlinks, g_fFollowSymlinks, int, 0);
+MODULE_PARM_DESC(follow_symlinks,
+ "Let host resolve symlinks rather than showing them");
+#endif
+
+
+/*
+ * Module declaration related bits.
+ */
+module_init(init);
+module_exit(fini);
+
+MODULE_DESCRIPTION(VBOX_PRODUCT " VFS Module for Host File System Access");
+MODULE_AUTHOR(VBOX_VENDOR);
+MODULE_LICENSE("GPL and additional rights");
+#ifdef MODULE_ALIAS_FS
+MODULE_ALIAS_FS("vboxsf");
+#endif
+#ifdef MODULE_VERSION
+MODULE_VERSION(VBOX_VERSION_STRING " r" RT_XSTR(VBOX_SVN_REV));
+#endif
+
diff --git a/src/VBox/Additions/linux/sharedfolders/vfsmod.h b/src/VBox/Additions/linux/sharedfolders/vfsmod.h
new file mode 100644
index 00000000..f7d221da
--- /dev/null
+++ b/src/VBox/Additions/linux/sharedfolders/vfsmod.h
@@ -0,0 +1,478 @@
+/* $Id: vfsmod.h $ */
+/** @file
+ * vboxsf - Linux Shared Folders VFS, internal header.
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef GA_INCLUDED_SRC_linux_sharedfolders_vfsmod_h
+#define GA_INCLUDED_SRC_linux_sharedfolders_vfsmod_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#if 0 /* Enables strict checks. */
+# define RT_STRICT
+# define VBOX_STRICT
+#endif
+
+#define LOG_GROUP LOG_GROUP_SHARED_FOLDERS
+#include "the-linux-kernel.h"
+#include <iprt/list.h>
+#include <iprt/asm.h>
+#include <VBox/log.h>
+
+#if RTLNX_VER_MIN(2,6,0)
+# include <linux/backing-dev.h>
+#endif
+
+#include <VBox/VBoxGuestLibSharedFolders.h>
+#include <VBox/VBoxGuestLibSharedFoldersInline.h>
+#include <iprt/asm.h>
+#include "vbsfmount.h"
+
+
+/*
+ * Logging wrappers.
+ */
+#if 1
+# define TRACE() LogFunc(("tracepoint\n"))
+# define SFLOG(aArgs) Log(aArgs)
+# define SFLOGFLOW(aArgs) LogFlow(aArgs)
+# define SFLOG2(aArgs) Log2(aArgs)
+# define SFLOG3(aArgs) Log3(aArgs)
+# define SFLOGRELBOTH(aArgs) LogRel(aArgs)
+# ifdef LOG_ENABLED
+# define SFLOG_ENABLED 1
+# endif
+#else
+# define TRACE() RTLogBackdoorPrintf("%s: tracepoint\n", __FUNCTION__)
+# define SFLOG(aArgs) RTLogBackdoorPrintf aArgs
+# define SFLOGFLOW(aArgs) RTLogBackdoorPrintf aArgs
+# define SFLOG2(aArgs) RTLogBackdoorPrintf aArgs
+# define SFLOG3(aArgs) RTLogBackdoorPrintf aArgs
+# define SFLOG_ENABLED 1
+# define SFLOGRELBOTH(aArgs) do { RTLogBackdoorPrintf aArgs; printk aArgs; } while (0)
+#endif
+
+
+/*
+ * inode compatibility glue.
+ */
+#if RTLNX_VER_MAX(2,6,0)
+
+DECLINLINE(loff_t) i_size_read(struct inode *pInode)
+{
+ AssertCompile(sizeof(loff_t) == sizeof(uint64_t));
+ return ASMAtomicReadU64((uint64_t volatile *)&pInode->i_size);
+}
+
+DECLINLINE(void) i_size_write(struct inode *pInode, loff_t cbNew)
+{
+ AssertCompile(sizeof(pInode->i_size) == sizeof(uint64_t));
+ ASMAtomicWriteU64((uint64_t volatile *)&pInode->i_size, cbNew);
+}
+
+#endif /* < 2.6.0 */
+
+#if RTLNX_VER_MAX(3,2,0) && !RTLNX_RHEL_MIN(6, 10)
+DECLINLINE(void) set_nlink(struct inode *pInode, unsigned int cLinks)
+{
+ pInode->i_nlink = cLinks;
+}
+#endif
+
+
+/* global variables */
+extern VBGLSFCLIENT g_SfClient;
+extern spinlock_t g_SfHandleLock;
+extern uint32_t g_uSfLastFunction;
+extern uint64_t g_fSfFeatures;
+
+extern struct inode_operations vbsf_dir_iops;
+extern struct inode_operations vbsf_lnk_iops;
+extern struct inode_operations vbsf_reg_iops;
+extern struct file_operations vbsf_dir_fops;
+extern struct file_operations vbsf_reg_fops;
+extern struct dentry_operations vbsf_dentry_ops;
+extern struct address_space_operations vbsf_reg_aops;
+
+
+/**
+ * VBox specific per-mount (shared folder) information.
+ */
+struct vbsf_super_info {
+ VBGLSFMAP map;
+ struct nls_table *nls;
+ /** Set if the NLS table is UTF-8. */
+ bool fNlsIsUtf8;
+ int uid;
+ int gid;
+ int dmode;
+ int fmode;
+ int dmask;
+ int fmask;
+ /** Maximum number of pages to allow in an I/O buffer with the host.
+ * This applies to read and write operations. */
+ uint32_t cMaxIoPages;
+ /** The default directory buffer size. */
+ uint32_t cbDirBuf;
+ /** The time to live for directory entries in jiffies, zero if disabled. */
+ uint32_t cJiffiesDirCacheTTL;
+ /** The time to live for inode information in jiffies, zero if disabled. */
+ uint32_t cJiffiesInodeTTL;
+ /** The cache and coherency mode. */
+ enum vbsf_cache_mode enmCacheMode;
+ /** Mount tag for VBoxService automounter. @since 6.0 */
+ char szTag[32];
+#if RTLNX_VER_RANGE(2,6,0, 4,12,0)
+ /** The backing device info structure. */
+ struct backing_dev_info bdi;
+#endif
+ /** The mount option value for /proc/mounts. */
+ int32_t msTTL;
+ /** The time to live for directory entries in milliseconds, for /proc/mounts. */
+ int32_t msDirCacheTTL;
+ /** The time to live for inode information in milliseconds, for /proc/mounts. */
+ int32_t msInodeTTL;
+#if RTLNX_VER_RANGE(4,0,0, 4,2,0)
+ /** 4.0 and 4.1 are missing noop_backing_dev_info export, so take down the
+ * initial value so we can restore it in vbsf_done_backing_dev(). (paranoia) */
+ struct backing_dev_info *bdi_org;
+#endif
+};
+
+/* Following casts are here to prevent assignment of void * to
+ pointers of arbitrary type */
+#if RTLNX_VER_MAX(2,6,0)
+# define VBSF_GET_SUPER_INFO(sb) ((struct vbsf_super_info *)(sb)->u.generic_sbp)
+# define VBSF_SET_SUPER_INFO(sb, a_pSuperInfo) do { (sb)->u.generic_sbp = a_pSuperInfo; } while (0)
+#else
+# define VBSF_GET_SUPER_INFO(sb) ((struct vbsf_super_info *)(sb)->s_fs_info)
+# define VBSF_SET_SUPER_INFO(sb, a_pSuperInfo) do { (sb)->s_fs_info = a_pSuperInfo;} while (0)
+#endif
+
+
+/**
+ * For associating inodes with host handles.
+ *
+ * This is necessary for address_space_operations::vbsf_writepage and allows
+ * optimizing stat, lookups and other operations on open files and directories.
+ */
+struct vbsf_handle {
+ /** List entry (head vbsf_inode_info::HandleList). */
+ RTLISTNODE Entry;
+ /** Host file/whatever handle. */
+ SHFLHANDLE hHost;
+ /** VBSF_HANDLE_F_XXX */
+ uint32_t fFlags;
+ /** Reference counter.
+ * Close the handle and free the structure when it reaches zero. */
+ uint32_t volatile cRefs;
+#ifdef VBOX_STRICT
+ /** For strictness checks. */
+ struct vbsf_inode_info *pInodeInfo;
+#endif
+};
+
+/** @name VBSF_HANDLE_F_XXX - Handle summary flags (vbsf_handle::fFlags).
+ * @{ */
+#define VBSF_HANDLE_F_READ UINT32_C(0x00000001)
+#define VBSF_HANDLE_F_WRITE UINT32_C(0x00000002)
+#define VBSF_HANDLE_F_APPEND UINT32_C(0x00000004)
+#define VBSF_HANDLE_F_FILE UINT32_C(0x00000010)
+#define VBSF_HANDLE_F_DIR UINT32_C(0x00000020)
+#define VBSF_HANDLE_F_ON_LIST UINT32_C(0x00000080)
+#define VBSF_HANDLE_F_MAGIC_MASK UINT32_C(0xffffff00)
+#define VBSF_HANDLE_F_MAGIC UINT32_C(0x75030700) /**< Maurice Ravel (1875-03-07). */
+#define VBSF_HANDLE_F_MAGIC_DEAD UINT32_C(0x19371228)
+/** @} */
+
+
+/**
+ * VBox specific per-inode information.
+ */
+struct vbsf_inode_info {
+ /** Which file */
+ SHFLSTRING *path;
+ /** Some information was changed, update data on next revalidate */
+ bool force_restat;
+ /** The timestamp (jiffies) where the inode info was last updated. */
+ unsigned long ts_up_to_date;
+ /** The birth time. */
+ RTTIMESPEC BirthTime;
+
+ /** @name Host modification detection stats.
+ * @{ */
+ /** The raw modification time, for mapping invalidation purposes. */
+ RTTIMESPEC ModificationTime;
+ /** Copy of ModificationTime from the last time we wrote to the the file. */
+ RTTIMESPEC ModificationTimeAtOurLastWrite;
+ /** @} */
+
+ /** handle valid if a file was created with vbsf_create_worker until it will
+ * be opened with vbsf_reg_open()
+ * @todo r=bird: figure this one out... */
+ SHFLHANDLE handle;
+
+ /** List of open handles (struct vbsf_handle), protected by g_SfHandleLock. */
+ RTLISTANCHOR HandleList;
+#ifdef VBOX_STRICT
+ uint32_t u32Magic;
+# define SF_INODE_INFO_MAGIC UINT32_C(0x18620822) /**< Claude Debussy */
+# define SF_INODE_INFO_MAGIC_DEAD UINT32_C(0x19180325)
+#endif
+};
+
+#if RTLNX_VER_MIN(2,6,19) || defined(KERNEL_FC6)
+/* FC6 kernel 2.6.18, vanilla kernel 2.6.19+ */
+# define VBSF_GET_INODE_INFO(i) ((struct vbsf_inode_info *) (i)->i_private)
+# define VBSF_SET_INODE_INFO(i, sf_i) (i)->i_private = sf_i
+#else
+/* vanilla kernel up to 2.6.18 */
+# define VBSF_GET_INODE_INFO(i) ((struct vbsf_inode_info *) (i)->u.generic_ip)
+# define VBSF_SET_INODE_INFO(i, sf_i) (i)->u.generic_ip = sf_i
+#endif
+
+extern void vbsf_init_inode(struct inode *inode, struct vbsf_inode_info *sf_i, PSHFLFSOBJINFO info,
+ struct vbsf_super_info *pSuperInfo);
+extern void vbsf_update_inode(struct inode *pInode, struct vbsf_inode_info *pInodeInfo, PSHFLFSOBJINFO pObjInfo,
+ struct vbsf_super_info *pSuperInfo, bool fInodeLocked, unsigned fSetAttrs);
+extern int vbsf_inode_revalidate_worker(struct dentry *dentry, bool fForced, bool fInodeLocked);
+extern int vbsf_inode_revalidate_with_handle(struct dentry *dentry, SHFLHANDLE hHostFile, bool fForced, bool fInodeLocked);
+#if RTLNX_VER_MIN(2,5,18)
+# if RTLNX_VER_MIN(5,12,0)
+extern int vbsf_inode_getattr(struct user_namespace *ns, const struct path *path,
+ struct kstat *kstat, u32 request_mask, unsigned int query_flags);
+# elif RTLNX_VER_MIN(4,11,0)
+extern int vbsf_inode_getattr(const struct path *path, struct kstat *kstat, u32 request_mask, unsigned int query_flags);
+# else
+extern int vbsf_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *kstat);
+# endif
+#else /* < 2.5.44 */
+extern int vbsf_inode_revalidate(struct dentry *dentry);
+#endif /* < 2.5.44 */
+#if RTLNX_VER_MIN(5,12,0)
+extern int vbsf_inode_setattr(struct user_namespace *ns, struct dentry *dentry, struct iattr *iattr);
+#else
+extern int vbsf_inode_setattr(struct dentry *dentry, struct iattr *iattr);
+#endif
+
+
+extern void vbsf_handle_drop_chain(struct vbsf_inode_info *pInodeInfo);
+extern struct vbsf_handle *vbsf_handle_find(struct vbsf_inode_info *pInodeInfo, uint32_t fFlagsSet, uint32_t fFlagsClear);
+extern uint32_t vbsf_handle_release_slow(struct vbsf_handle *pHandle, struct vbsf_super_info *pSuperInfo,
+ const char *pszCaller);
+extern void vbsf_handle_append(struct vbsf_inode_info *pInodeInfo, struct vbsf_handle *pHandle);
+
+/**
+ * Releases a handle.
+ *
+ * @returns New reference count.
+ * @param pHandle The handle to release.
+ * @param pSuperInfo The info structure for the shared folder associated
+ * with the handle.
+ * @param pszCaller The caller name (for logging failures).
+ */
+DECLINLINE(uint32_t) vbsf_handle_release(struct vbsf_handle *pHandle, struct vbsf_super_info *pSuperInfo, const char *pszCaller)
+{
+ uint32_t cRefs;
+
+ Assert((pHandle->fFlags & VBSF_HANDLE_F_MAGIC_MASK) == VBSF_HANDLE_F_MAGIC);
+ Assert(pHandle->pInodeInfo);
+ Assert(pHandle->pInodeInfo && pHandle->pInodeInfo->u32Magic == SF_INODE_INFO_MAGIC);
+
+ cRefs = ASMAtomicDecU32(&pHandle->cRefs);
+ Assert(cRefs < _64M);
+ if (cRefs)
+ return cRefs;
+ return vbsf_handle_release_slow(pHandle, pSuperInfo, pszCaller);
+}
+
+
+/**
+ * VBox specific information for a regular file.
+ */
+struct vbsf_reg_info {
+ /** Handle tracking structure.
+ * @note Must be first! */
+ struct vbsf_handle Handle;
+};
+
+uint32_t vbsf_linux_oflags_to_vbox(unsigned fLnxOpen, uint32_t *pfHandle, const char *pszCaller);
+
+
+/**
+ * VBox specific information for an open directory.
+ */
+struct vbsf_dir_info {
+ /** Handle tracking structure.
+ * @note Must be first! */
+ struct vbsf_handle Handle;
+ /** Semaphore protecting everything below. */
+ struct semaphore Lock;
+ /** A magic number (VBSF_DIR_INFO_MAGIC). */
+ uint32_t u32Magic;
+ /** Size of the buffer for directory entries. */
+ uint32_t cbBuf;
+ /** Buffer for directory entries on the physical heap. */
+ PSHFLDIRINFO pBuf;
+ /** Number of valid bytes in the buffer. */
+ uint32_t cbValid;
+ /** Number of entries left in the buffer. */
+ uint32_t cEntriesLeft;
+ /** The position of the next entry. Incremented by one for each entry. */
+ loff_t offPos;
+ /** The next entry. */
+ PSHFLDIRINFO pEntry;
+ /** Set if there are no more files. */
+ bool fNoMoreFiles;
+};
+
+/** Magic number for vbsf_dir_info::u32Magic (Robert Anson Heinlein). */
+#define VBSF_DIR_INFO_MAGIC UINT32_C(0x19070707)
+/** Value of vbsf_dir_info::u32Magic when freed. */
+#define VBSF_DIR_INFO_MAGIC_DEAD UINT32_C(0x19880508)
+
+
+/**
+ * Sets the update-jiffies value for a dentry.
+ *
+ * This is used together with vbsf_super_info::cJiffiesDirCacheTTL to reduce
+ * re-validation of dentry structures while walking.
+ *
+ * This used to be living in d_time, but since 4.9.0 that seems to have become
+ * unfashionable and d_fsdata is now used to for this purpose. We do this all
+ * the way back, since d_time seems only to have been used by the file system
+ * specific code (at least going back to 2.4.0).
+ */
+DECLINLINE(void) vbsf_dentry_set_update_jiffies(struct dentry *pDirEntry, unsigned long uToSet)
+{
+ /*SFLOG3(("vbsf_dentry_set_update_jiffies: %p: %lx -> %#lx\n", pDirEntry, (unsigned long)pDirEntry->d_fsdata, uToSet));*/
+ pDirEntry->d_fsdata = (void *)uToSet;
+}
+
+/**
+ * Get the update-jiffies value for a dentry.
+ */
+DECLINLINE(unsigned long) vbsf_dentry_get_update_jiffies(struct dentry *pDirEntry)
+{
+ return (unsigned long)pDirEntry->d_fsdata;
+}
+
+/**
+ * Invalidates the update TTL for the given directory entry so that it is
+ * revalidate the next time it is used.
+ * @param pDirEntry The directory entry cache entry to invalidate.
+ */
+DECLINLINE(void) vbsf_dentry_invalidate_ttl(struct dentry *pDirEntry)
+{
+ vbsf_dentry_set_update_jiffies(pDirEntry, jiffies - INT32_MAX / 2);
+}
+
+/**
+ * Increase the time-to-live of @a pDirEntry and all ancestors.
+ * @param pDirEntry The directory entry cache entry which ancestors
+ * we should increase the TTL for.
+ */
+DECLINLINE(void) vbsf_dentry_chain_increase_ttl(struct dentry *pDirEntry)
+{
+#ifdef VBOX_STRICT
+ struct super_block * const pSuper = pDirEntry->d_sb;
+#endif
+ unsigned long const uToSet = jiffies;
+ do {
+ Assert(pDirEntry->d_sb == pSuper);
+ vbsf_dentry_set_update_jiffies(pDirEntry, uToSet);
+ pDirEntry = pDirEntry->d_parent;
+ } while (!IS_ROOT(pDirEntry));
+}
+
+/**
+ * Increase the time-to-live of all ancestors.
+ * @param pDirEntry The directory entry cache entry which ancestors
+ * we should increase the TTL for.
+ */
+DECLINLINE(void) vbsf_dentry_chain_increase_parent_ttl(struct dentry *pDirEntry)
+{
+ Assert(!pDirEntry->d_parent || pDirEntry->d_parent->d_sb == pDirEntry->d_sb);
+ pDirEntry = pDirEntry->d_parent;
+ if (pDirEntry)
+ vbsf_dentry_chain_increase_ttl(pDirEntry);
+}
+
+/** Macro for getting the dentry for a struct file. */
+#if RTLNX_VER_MIN(4,6,0)
+# define VBSF_GET_F_DENTRY(f) file_dentry(f)
+#elif RTLNX_VER_MIN(2,6,20)
+# define VBSF_GET_F_DENTRY(f) (f->f_path.dentry)
+#else
+# define VBSF_GET_F_DENTRY(f) (f->f_dentry)
+#endif
+
+/**
+ * Macro for checking if the 'data' argument passed in via mount(2) was supplied
+ * by the mount.vboxsf command line utility as a page of data containing the
+ * vbsf_mount_info_new structure.
+ */
+#define VBSF_IS_MOUNT_VBOXSF_DATA(data) \
+ (((struct vbsf_mount_info_new *)data)->nullchar == '\0' && \
+ ((struct vbsf_mount_info_new *)data)->signature[0] == VBSF_MOUNT_SIGNATURE_BYTE_0 && \
+ ((struct vbsf_mount_info_new *)data)->signature[1] == VBSF_MOUNT_SIGNATURE_BYTE_1 && \
+ ((struct vbsf_mount_info_new *)data)->signature[2] == VBSF_MOUNT_SIGNATURE_BYTE_2)
+
+extern int vbsf_stat(const char *caller, struct vbsf_super_info *pSuperInfo, SHFLSTRING * path, PSHFLFSOBJINFO result,
+ int ok_to_fail);
+extern int vbsf_path_from_dentry(struct vbsf_super_info *pSuperInfo, struct vbsf_inode_info *sf_i, struct dentry *dentry,
+ SHFLSTRING ** result, const char *caller);
+extern int vbsf_nlscpy(struct vbsf_super_info *pSuperInfo, char *name, size_t name_bound_len,
+ const unsigned char *utf8_name, size_t utf8_len);
+extern int vbsf_nls_to_shflstring(struct vbsf_super_info *pSuperInfo, const char *pszNls, PSHFLSTRING *ppString);
+
+
+/**
+ * Converts Linux access permissions to VBox ones (mode & 0777).
+ *
+ * @note Currently identical.
+ * @sa sf_access_permissions_to_linux
+ */
+DECLINLINE(uint32_t) sf_access_permissions_to_vbox(int fAttr)
+{
+ /* Access bits should be the same: */
+ AssertCompile(RTFS_UNIX_IRUSR == S_IRUSR);
+ AssertCompile(RTFS_UNIX_IWUSR == S_IWUSR);
+ AssertCompile(RTFS_UNIX_IXUSR == S_IXUSR);
+ AssertCompile(RTFS_UNIX_IRGRP == S_IRGRP);
+ AssertCompile(RTFS_UNIX_IWGRP == S_IWGRP);
+ AssertCompile(RTFS_UNIX_IXGRP == S_IXGRP);
+ AssertCompile(RTFS_UNIX_IROTH == S_IROTH);
+ AssertCompile(RTFS_UNIX_IWOTH == S_IWOTH);
+ AssertCompile(RTFS_UNIX_IXOTH == S_IXOTH);
+
+ return fAttr & RTFS_UNIX_ALL_ACCESS_PERMS;
+}
+
+#endif /* !GA_INCLUDED_SRC_linux_sharedfolders_vfsmod_h */