From f215e02bf85f68d3a6106c2a1f4f7f063f819064 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Thu, 11 Apr 2024 10:17:27 +0200 Subject: Adding upstream version 7.0.14-dfsg. Signed-off-by: Daniel Baumann --- .../Additions/solaris/SharedFolders/vboxfs_vnode.c | 2500 ++++++++++++++++++++ 1 file changed, 2500 insertions(+) create mode 100644 src/VBox/Additions/solaris/SharedFolders/vboxfs_vnode.c (limited to 'src/VBox/Additions/solaris/SharedFolders/vboxfs_vnode.c') diff --git a/src/VBox/Additions/solaris/SharedFolders/vboxfs_vnode.c b/src/VBox/Additions/solaris/SharedFolders/vboxfs_vnode.c new file mode 100644 index 00000000..e639468c --- /dev/null +++ b/src/VBox/Additions/solaris/SharedFolders/vboxfs_vnode.c @@ -0,0 +1,2500 @@ +/* $Id: vboxfs_vnode.c $ */ +/** @file + * VirtualBox File System for Solaris Guests, vnode implementation. + * Portions contributed by: Ronald. + */ + +/* + * Copyright (C) 2009-2023 Oracle and/or its affiliates. + * + * This file is part of VirtualBox base platform packages, as + * available from https://www.virtualbox.org. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, in version 3 of the + * License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + * + * The contents of this file may alternatively be used under the terms + * of the Common Development and Distribution License Version 1.0 + * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included + * in the VirtualBox distribution, in which case the provisions of the + * CDDL are applicable instead of those of the GPL. + * + * You may elect to license modified versions of this file under the + * terms and conditions of either the GPL or the CDDL or both. + * + * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0 + */ + +/* + * Shared Folder File System is used from Solaris when run as a guest operating + * system on VirtualBox, though is meant to be usable with any hypervisor that + * can provide similar functionality. The sffs code handles all the Solaris + * specific semantics and relies on a provider module to actually access + * directories, files, etc. The provider interfaces are described in + * "vboxfs_prov.h" and the module implementing them is shipped as part of the + * VirtualBox Guest Additions for Solaris. + * + * The shared folder file system is similar to a networked file system, + * but with some caveats. The sffs code caches minimal information and proxies + * out to the provider whenever possible. Here are some things that are + * handled in this code and not by the proxy: + * + * - a way to open ".." from any already open directory + * - st_ino numbers + * - detecting directory changes that happened on the host. + * + * The implementation builds a cache of information for every file/directory + * ever accessed in all mounted sffs filesystems using sf_node structures. + * + * This information for both open or closed files can become invalid if + * asynchronous changes are made on the host. Solaris should not panic() in + * this event, but some file system operations may return unexpected errors. + * Information for such directories or files while they have active vnodes + * is removed from the regular cache and stored in a "stale" bucket until + * the vnode becomes completely inactive. + * + * We suppport only read-only mmap (VBOXVFS_WITH_MMAP) i.e. MAP_SHARED, + * MAP_PRIVATE in PROT_READ, this data caching would not be coherent with + * normal simultaneous read()/write() operations, nor will it be coherent + * with data access on the host. Writable mmap(MAP_SHARED) access is not + * implemented, as guaranteeing any kind of coherency with concurrent + * activity on the host would be near impossible with the existing + * interfaces. + * + * A note about locking. sffs is not a high performance file system. + * No fine grained locking is done. The one sffs_lock protects just about + * everything. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if !defined(VBOX_VFS_SOLARIS_10U6) +# include +#endif +#include +#include +#include +#include +#include +#undef u /* /usr/include/sys/user.h:249:1 is where this is defined to (curproc->p_user). very cool. */ + +#include "vboxfs_prov.h" +#include "vboxfs_vnode.h" +#include "vboxfs_vfs.h" + +/* + * Solaris 11u1b10 Extended Policy putback CR 7121445 removes secpolicy_vnode_access from sys/policy.h + */ +#ifdef VBOX_VFS_EXTENDED_POLICY +int secpolicy_vnode_access(const cred_t *, vnode_t *, uid_t, mode_t); +#endif + +#define VBOXVFS_WITH_MMAP + +static struct vnodeops *sffs_ops = NULL; + +kmutex_t sffs_lock; +static avl_tree_t sfnodes; +static avl_tree_t stale_sfnodes; + +/* + * For now we'll use an I/O buffer that doesn't page fault for VirtualBox + * to transfer data into. + */ +char *sffs_buffer; + +/* + * sfnode_compare() is needed for AVL tree functionality. + * The nodes are sorted by mounted filesystem, then path. If the + * nodes are stale, the node pointer itself is used to force uniqueness. + */ +static int +sfnode_compare(const void *a, const void *b) +{ + sfnode_t *x = (sfnode_t *)a; + sfnode_t *y = (sfnode_t *)b; + int diff; + + if (x->sf_is_stale) { + ASSERT(y->sf_is_stale); + diff = strcmp(x->sf_path, y->sf_path); + if (diff == 0) + diff = (uintptr_t)y - (uintptr_t)x; + } else { + ASSERT(!y->sf_is_stale); + diff = (uintptr_t)y->sf_sffs - (uintptr_t)x->sf_sffs; + if (diff == 0) + diff = strcmp(x->sf_path, y->sf_path); + } + if (diff < 0) + return (-1); + if (diff > 0) + return (1); + return (0); +} + +/* + * Construct a new pathname given an sfnode plus an optional tail component. + * This handles ".." and "." + */ +static char * +sfnode_construct_path(sfnode_t *node, char *tail) +{ + char *p; + + if (strcmp(tail, ".") == 0 || strcmp(tail, "..") == 0) + panic("construct path for %s", tail); + p = kmem_alloc(strlen(node->sf_path) + 1 + strlen(tail) + 1, KM_SLEEP); + strcpy(p, node->sf_path); + strcat(p, "/"); + strcat(p, tail); + return (p); +} + +/* + * Clears the (cached) directory listing for the node. + */ +static void +sfnode_clear_dir_list(sfnode_t *node) +{ + ASSERT(MUTEX_HELD(&sffs_lock)); + + while (node->sf_dir_list != NULL) { + sffs_dirents_t *next = node->sf_dir_list->sf_next; + kmem_free(node->sf_dir_list, SFFS_DIRENTS_SIZE); + node->sf_dir_list = next; + } +} + +/* + * Open the provider file associated with a vnode. Holding the file open is + * the only way we have of trying to have a vnode continue to refer to the + * same host file in the host in light of the possibility of host side renames. + */ +static void +sfnode_open(sfnode_t *node, int flag) +{ + int error; + sfp_file_t *fp; + + if (node->sf_file != NULL) + return; + error = sfprov_open(node->sf_sffs->sf_handle, node->sf_path, &fp, flag); + if (error == 0) + { + node->sf_file = fp; + node->sf_flag = flag; + } + else + node->sf_flag = ~0; +} + +/* + * get a new vnode reference for an sfnode + */ +vnode_t * +sfnode_get_vnode(sfnode_t *node) +{ + vnode_t *vp; + + if (node->sf_vnode != NULL) { + VN_HOLD(node->sf_vnode); + } else { + vp = vn_alloc(KM_SLEEP); + LogFlowFunc((" %s gets vnode 0x%p\n", node->sf_path, vp)); + vp->v_type = node->sf_type; + vp->v_vfsp = node->sf_sffs->sf_vfsp; + vn_setops(vp, sffs_ops); + vp->v_flag = VNOSWAP; +#ifndef VBOXVFS_WITH_MMAP + vp->v_flag |= VNOMAP; +#endif + vn_exists(vp); + vp->v_data = node; + node->sf_vnode = vp; + } + return (node->sf_vnode); +} + +/* + * Allocate and initialize a new sfnode and assign it a vnode + */ +sfnode_t * +sfnode_make( + sffs_data_t *sffs, + char *path, + vtype_t type, + sfp_file_t *fp, + sfnode_t *parent, /* can be NULL for root */ + sffs_stat_t *stat, + uint64_t stat_time) +{ + sfnode_t *node; + avl_index_t where; + + ASSERT(MUTEX_HELD(&sffs_lock)); + ASSERT(path != NULL); + + /* + * build the sfnode + */ + LogFlowFunc(("sffs_make(%s)\n", path)); + node = kmem_alloc(sizeof (*node), KM_SLEEP); + node->sf_sffs = sffs; + VFS_HOLD(node->sf_sffs->sf_vfsp); + node->sf_path = path; + node->sf_ino = sffs->sf_ino++; + node->sf_type = type; + node->sf_is_stale = 0; /* never stale at creation */ + node->sf_file = fp; + node->sf_flag = ~0; + node->sf_vnode = NULL; /* do this before any sfnode_get_vnode() */ + node->sf_children = 0; + node->sf_parent = parent; + if (parent) + ++parent->sf_children; + node->sf_dir_list = NULL; + if (stat != NULL) { + node->sf_stat = *stat; + node->sf_stat_time = stat_time; + } else { + node->sf_stat_time = 0; + } + + /* + * add the new node to our cache + */ + if (avl_find(&sfnodes, node, &where) != NULL) + panic("sffs_create_sfnode(%s): duplicate sfnode_t", path); + avl_insert(&sfnodes, node, where); + return (node); +} + +/* + * destroy an sfnode + */ +static void +sfnode_destroy(sfnode_t *node) +{ + avl_index_t where; + avl_tree_t *tree; + sfnode_t *parent; +top: + parent = node->sf_parent; + ASSERT(MUTEX_HELD(&sffs_lock)); + ASSERT(node->sf_path != NULL); + LogFlowFunc(("sffs_destroy(%s)%s\n", node->sf_path, node->sf_is_stale ? " stale": "")); + if (node->sf_children != 0) + panic("sfnode_destroy(%s) has %d children", node->sf_path, node->sf_children); + if (node->sf_vnode != NULL) + panic("sfnode_destroy(%s) has active vnode", node->sf_path); + + if (node->sf_is_stale) + tree = &stale_sfnodes; + else + tree = &sfnodes; + if (avl_find(tree, node, &where) == NULL) + panic("sfnode_destroy(%s) not found", node->sf_path); + avl_remove(tree, node); + + VFS_RELE(node->sf_sffs->sf_vfsp); + sfnode_clear_dir_list(node); + kmem_free(node->sf_path, strlen(node->sf_path) + 1); + kmem_free(node, sizeof (*node)); + if (parent != NULL) { + sfnode_clear_dir_list(parent); + if (parent->sf_children == 0) + panic("sfnode_destroy parent (%s) has no child", parent->sf_path); + --parent->sf_children; + if (parent->sf_children == 0 && + parent->sf_is_stale && + parent->sf_vnode == NULL) { + node = parent; + goto top; + } + } +} + +/* + * Some sort of host operation on an sfnode has failed or it has been + * deleted. Mark this node and any children as stale, deleting knowledge + * about any which do not have active vnodes or children + * This also handle deleting an inactive node that was already stale. + */ +static void +sfnode_make_stale(sfnode_t *node) +{ + sfnode_t *n; + int len; + ASSERT(MUTEX_HELD(&sffs_lock)); + avl_index_t where; + + /* + * First deal with any children of a directory node. + * If a directory becomes stale, anything below it becomes stale too. + */ + if (!node->sf_is_stale && node->sf_type == VDIR) { + len = strlen(node->sf_path); + + n = node; + while ((n = AVL_NEXT(&sfnodes, node)) != NULL) { + ASSERT(!n->sf_is_stale); + + /* + * quit when no longer seeing children of node + */ + if (n->sf_sffs != node->sf_sffs || + strncmp(node->sf_path, n->sf_path, len) != 0 || + n->sf_path[len] != '/') + break; + + /* + * Either mark the child as stale or destroy it + */ + if (n->sf_vnode == NULL && n->sf_children == 0) { + sfnode_destroy(n); + } else { + LogFlowFunc(("sffs_make_stale(%s) sub\n", n->sf_path)); + sfnode_clear_dir_list(n); + if (avl_find(&sfnodes, n, &where) == NULL) + panic("sfnode_make_stale(%s)" + " not in sfnodes", n->sf_path); + avl_remove(&sfnodes, n); + n->sf_is_stale = 1; + if (avl_find(&stale_sfnodes, n, &where) != NULL) + panic("sffs_make_stale(%s) duplicates", + n->sf_path); + avl_insert(&stale_sfnodes, n, where); + } + } + } + + /* + * Now deal with the given node. + */ + if (node->sf_vnode == NULL && node->sf_children == 0) { + sfnode_destroy(node); + } else if (!node->sf_is_stale) { + LogFlowFunc(("sffs_make_stale(%s)\n", node->sf_path)); + sfnode_clear_dir_list(node); + if (node->sf_parent) + sfnode_clear_dir_list(node->sf_parent); + if (avl_find(&sfnodes, node, &where) == NULL) + panic("sfnode_make_stale(%s) not in sfnodes", + node->sf_path); + avl_remove(&sfnodes, node); + node->sf_is_stale = 1; + if (avl_find(&stale_sfnodes, node, &where) != NULL) + panic("sffs_make_stale(%s) duplicates", node->sf_path); + avl_insert(&stale_sfnodes, node, where); + } +} + +static uint64_t +sfnode_cur_time_usec(void) +{ + clock_t now = drv_hztousec(ddi_get_lbolt()); + return now; +} + +static int +sfnode_stat_cached(sfnode_t *node) +{ + return (sfnode_cur_time_usec() - node->sf_stat_time) < + node->sf_sffs->sf_stat_ttl * 1000L; +} + +static void +sfnode_invalidate_stat_cache(sfnode_t *node) +{ + node->sf_stat_time = 0; +} + +static int +sfnode_update_stat_cache(sfnode_t *node) +{ + int error; + + error = sfprov_get_attr(node->sf_sffs->sf_handle, node->sf_path, + &node->sf_stat); + if (error == ENOENT) + sfnode_make_stale(node); + if (error == 0) + node->sf_stat_time = sfnode_cur_time_usec(); + + return (error); +} + +/* + * Rename a file or a directory + */ +static void +sfnode_rename(sfnode_t *node, sfnode_t *newparent, char *path) +{ + sfnode_t *n; + sfnode_t template; + avl_index_t where; + int len = strlen(path); + int old_len; + char *new_path; + char *tail; + ASSERT(MUTEX_HELD(&sffs_lock)); + + ASSERT(!node->sf_is_stale); + + /* + * Have to remove anything existing that had the new name. + */ + template.sf_sffs = node->sf_sffs; + template.sf_path = path; + template.sf_is_stale = 0; + n = avl_find(&sfnodes, &template, &where); + if (n != NULL) + sfnode_make_stale(n); + + /* + * Do the renaming, deal with any children of this node first. + */ + if (node->sf_type == VDIR) { + old_len = strlen(node->sf_path); + while ((n = AVL_NEXT(&sfnodes, node)) != NULL) { + + /* + * quit when no longer seeing children of node + */ + if (n->sf_sffs != node->sf_sffs || + strncmp(node->sf_path, n->sf_path, old_len) != 0 || + n->sf_path[old_len] != '/') + break; + + /* + * Rename the child: + * - build the new path name + * - unlink the AVL node + * - assign the new name + * - re-insert the AVL name + */ + ASSERT(strlen(n->sf_path) > old_len); + tail = n->sf_path + old_len; /* includes initial "/" */ + new_path = kmem_alloc(len + strlen(tail) + 1, + KM_SLEEP); + strcpy(new_path, path); + strcat(new_path, tail); + if (avl_find(&sfnodes, n, &where) == NULL) + panic("sfnode_rename(%s) not in sfnodes", + n->sf_path); + avl_remove(&sfnodes, n); + LogFlowFunc(("sfnode_rname(%s to %s) sub\n", n->sf_path, new_path)); + kmem_free(n->sf_path, strlen(n->sf_path) + 1); + n->sf_path = new_path; + if (avl_find(&sfnodes, n, &where) != NULL) + panic("sfnode_rename(%s) duplicates", + n->sf_path); + avl_insert(&sfnodes, n, where); + } + } + + /* + * Deal with the given node. + */ + if (avl_find(&sfnodes, node, &where) == NULL) + panic("sfnode_rename(%s) not in sfnodes", node->sf_path); + avl_remove(&sfnodes, node); + LogFlowFunc(("sfnode_rname(%s to %s)\n", node->sf_path, path)); + kmem_free(node->sf_path, strlen(node->sf_path) + 1); + node->sf_path = path; + if (avl_find(&sfnodes, node, &where) != NULL) + panic("sfnode_rename(%s) duplicates", node->sf_path); + avl_insert(&sfnodes, node, where); + + /* + * change the parent + */ + if (node->sf_parent == NULL) + panic("sfnode_rename(%s) no parent", node->sf_path); + if (node->sf_parent->sf_children == 0) + panic("sfnode_rename(%s) parent has no child", node->sf_path); + sfnode_clear_dir_list(node->sf_parent); + sfnode_clear_dir_list(newparent); + --node->sf_parent->sf_children; + node->sf_parent = newparent; + ++newparent->sf_children; +} + +/* + * Look for a cached node, if not found either handle ".." or try looking + * via the provider. Create an entry in sfnodes if found but not cached yet. + * If the create flag is set, a file or directory is created. If the file + * already existed, an error is returned. + * Nodes returned from this routine always have a vnode with its ref count + * bumped by 1. + */ +static sfnode_t * +sfnode_lookup( + sfnode_t *dir, + char *name, + vtype_t create, + mode_t c_mode, + sffs_stat_t *stat, + uint64_t stat_time, + int *err) +{ + avl_index_t where; + sfnode_t template; + sfnode_t *node; + int error = 0; + int type; + char *fullpath; + sfp_file_t *fp; + sffs_stat_t tmp_stat; + + ASSERT(MUTEX_HELD(&sffs_lock)); + + if (err) + *err = error; + + /* + * handle referencing myself + */ + if (strcmp(name, "") == 0 || strcmp(name, ".") == 0) + return (dir); + + /* + * deal with parent + */ + if (strcmp(name, "..") == 0) + return (dir->sf_parent); + + /* + * Look for an existing node. + */ + fullpath = sfnode_construct_path(dir, name); + template.sf_sffs = dir->sf_sffs; + template.sf_path = fullpath; + template.sf_is_stale = 0; + node = avl_find(&sfnodes, &template, &where); + if (node != NULL) { + kmem_free(fullpath, strlen(fullpath) + 1); + if (create != VNON) + return (NULL); + return (node); + } + + /* + * No entry for this path currently. + * Check if the file exists with the provider and get the type from + * there. + */ + if (create == VREG) { + type = VREG; + stat = &tmp_stat; + error = sfprov_create(dir->sf_sffs->sf_handle, fullpath, c_mode, + &fp, stat); + stat_time = sfnode_cur_time_usec(); + } else if (create == VDIR) { + type = VDIR; + stat = &tmp_stat; + error = sfprov_mkdir(dir->sf_sffs->sf_handle, fullpath, c_mode, + &fp, stat); + stat_time = sfnode_cur_time_usec(); + } else { + mode_t m; + fp = NULL; + type = VNON; + if (stat == NULL) { + stat = &tmp_stat; + error = sfprov_get_attr(dir->sf_sffs->sf_handle, + fullpath, stat); + stat_time = sfnode_cur_time_usec(); + } else { + error = 0; + } + m = stat->sf_mode; + if (error != 0) + error = ENOENT; + else if (S_ISDIR(m)) + type = VDIR; + else if (S_ISREG(m)) + type = VREG; + else if (S_ISLNK(m)) + type = VLNK; + } + + if (err) + *err = error; + + /* + * If no errors, make a new node and return it. + */ + if (error) { + kmem_free(fullpath, strlen(fullpath) + 1); + return (NULL); + } + node = sfnode_make(dir->sf_sffs, fullpath, type, fp, dir, stat, + stat_time); + return (node); +} + + +/* + * uid and gid in sffs determine owner and group for all files. + */ +static int +sfnode_access(sfnode_t *node, mode_t mode, cred_t *cr) +{ + sffs_data_t *sffs = node->sf_sffs; + mode_t m; + int shift = 0; + int error; + vnode_t *vp; + + ASSERT(MUTEX_HELD(&sffs_lock)); + + /* + * get the mode from the cache or provider + */ + if (sfnode_stat_cached(node)) + error = 0; + else + error = sfnode_update_stat_cache(node); + m = (error == 0) ? (node->sf_stat.sf_mode & MODEMASK) : 0; + + /* + * mask off the permissions based on uid/gid + */ + if (crgetuid(cr) != sffs->sf_handle->sf_uid) { + shift += 3; + if (groupmember(sffs->sf_handle->sf_gid, cr) == 0) + shift += 3; + } + mode &= ~(m << shift); + + if (mode == 0) { + error = 0; + } else { + /** @todo r=ramshankar: This can probably be optimized by holding static vnode + * templates for dir/file, as it only checks the type rather than + * fetching/allocating the real vnode. */ + vp = sfnode_get_vnode(node); + error = secpolicy_vnode_access(cr, vp, sffs->sf_handle->sf_uid, mode); + VN_RELE(vp); + } + return (error); +} + + +/* + * + * Everything below this point are the vnode operations used by Solaris VFS + */ +static int +sffs_readdir( + vnode_t *vp, + uio_t *uiop, + cred_t *cred, + int *eofp, + caller_context_t *ct, + int flag) +{ + sfnode_t *dir = VN2SFN(vp); + sfnode_t *node; + struct sffs_dirent *dirent = NULL; + sffs_dirents_t *cur_buf; + offset_t offset = 0; + offset_t orig_off = uiop->uio_loffset; + int dummy_eof; + int error = 0; + + if (uiop->uio_iovcnt != 1) + return (EINVAL); + + if (vp->v_type != VDIR) + return (ENOTDIR); + + if (eofp == NULL) + eofp = &dummy_eof; + *eofp = 0; + + if (uiop->uio_loffset >= MAXOFFSET_T) { + *eofp = 1; + return (0); + } + + /* + * Get the directory entry names from the host. This gets all + * entries. These are stored in a linked list of sffs_dirents_t + * buffers, each of which contains a list of dirent64_t's. + */ + mutex_enter(&sffs_lock); + + if (dir->sf_dir_list == NULL) { + error = sfprov_readdir(dir->sf_sffs->sf_handle, dir->sf_path, + &dir->sf_dir_list, flag); + if (error != 0) + goto done; + } + + /* + * Validate and skip to the desired offset. + */ + cur_buf = dir->sf_dir_list; + offset = 0; + + while (cur_buf != NULL && + offset + cur_buf->sf_len <= uiop->uio_loffset) { + offset += cur_buf->sf_len; + cur_buf = cur_buf->sf_next; + } + + if (cur_buf == NULL && offset != uiop->uio_loffset) { + error = EINVAL; + goto done; + } + if (cur_buf != NULL && offset != uiop->uio_loffset) { + offset_t off = offset; + int step; + dirent = &cur_buf->sf_entries[0]; + + while (off < uiop->uio_loffset) { + if (dirent->sf_entry.d_off == uiop->uio_loffset) + break; + step = sizeof(sffs_stat_t) + dirent->sf_entry.d_reclen; + dirent = (struct sffs_dirent *) (((char *) dirent) + step); + off += step; + } + + if (off >= uiop->uio_loffset) { + error = EINVAL; + goto done; + } + } + + offset = uiop->uio_loffset - offset; + + /* + * Lookup each of the names, so that we have ino's, and copy to + * result buffer. + */ + while (cur_buf != NULL) { + if (offset >= cur_buf->sf_len) { + cur_buf = cur_buf->sf_next; + offset = 0; + continue; + } + + dirent = (struct sffs_dirent *) + (((char *) &cur_buf->sf_entries[0]) + offset); + if (dirent->sf_entry.d_reclen > uiop->uio_resid) + break; + + if (strcmp(dirent->sf_entry.d_name, ".") == 0) { + node = dir; + } else if (strcmp(dirent->sf_entry.d_name, "..") == 0) { + node = dir->sf_parent; + if (node == NULL) + node = dir; + } else { + node = sfnode_lookup(dir, dirent->sf_entry.d_name, VNON, + 0, &dirent->sf_stat, sfnode_cur_time_usec(), NULL); + if (node == NULL) + panic("sffs_readdir() lookup failed"); + } + dirent->sf_entry.d_ino = node->sf_ino; + + error = uiomove(&dirent->sf_entry, dirent->sf_entry.d_reclen, UIO_READ, uiop); + if (error != 0) + break; + + uiop->uio_loffset= dirent->sf_entry.d_off; + offset += sizeof(sffs_stat_t) + dirent->sf_entry.d_reclen; + } + if (error == 0 && cur_buf == NULL) + *eofp = 1; +done: + mutex_exit(&sffs_lock); + if (error != 0) + uiop->uio_loffset = orig_off; + return (error); +} + + +#if defined(VBOX_VFS_SOLARIS_10U6) +/* + * HERE JOE.. this may need more logic, need to look at other file systems + */ +static int +sffs_pathconf( + vnode_t *vp, + int cmd, + ulong_t *valp, + cred_t *cr) +{ + return (fs_pathconf(vp, cmd, valp, cr)); +} +#else +/* + * HERE JOE.. this may need more logic, need to look at other file systems + */ +static int +sffs_pathconf( + vnode_t *vp, + int cmd, + ulong_t *valp, + cred_t *cr, + caller_context_t *ct) +{ + return (fs_pathconf(vp, cmd, valp, cr, ct)); +} +#endif + +static int +sffs_getattr( + vnode_t *vp, + vattr_t *vap, + int flags, + cred_t *cred, + caller_context_t *ct) +{ + sfnode_t *node = VN2SFN(vp); + sffs_data_t *sffs = node->sf_sffs; + mode_t mode; + int error = 0; + + mutex_enter(&sffs_lock); + vap->va_type = vp->v_type; + vap->va_uid = sffs->sf_handle->sf_uid; + vap->va_gid = sffs->sf_handle->sf_gid; + vap->va_fsid = sffs->sf_vfsp->vfs_dev; + vap->va_nodeid = node->sf_ino; + vap->va_nlink = 1; + vap->va_rdev = sffs->sf_vfsp->vfs_dev; + vap->va_seq = 0; + + if (!sfnode_stat_cached(node)) { + error = sfnode_update_stat_cache(node); + if (error != 0) + goto done; + } + + vap->va_atime = node->sf_stat.sf_atime; + vap->va_mtime = node->sf_stat.sf_mtime; + vap->va_ctime = node->sf_stat.sf_ctime; + + mode = node->sf_stat.sf_mode; + vap->va_mode = mode & MODEMASK; + + vap->va_size = node->sf_stat.sf_size; + vap->va_blksize = 512; + vap->va_nblocks = (node->sf_stat.sf_alloc + 511) / 512; + +done: + mutex_exit(&sffs_lock); + return (error); +} + +static int +sffs_setattr( + vnode_t *vp, + vattr_t *vap, + int flags, + cred_t *cred, + caller_context_t *ct) +{ + sfnode_t *node = VN2SFN(vp); + int error; + mode_t mode; + + mode = vap->va_mode; + if (vp->v_type == VREG) + mode |= S_IFREG; + else if (vp->v_type == VDIR) + mode |= S_IFDIR; + else if (vp->v_type == VBLK) + mode |= S_IFBLK; + else if (vp->v_type == VCHR) + mode |= S_IFCHR; + else if (vp->v_type == VLNK) + mode |= S_IFLNK; + else if (vp->v_type == VFIFO) + mode |= S_IFIFO; + else if (vp->v_type == VSOCK) + mode |= S_IFSOCK; + + mutex_enter(&sffs_lock); + + sfnode_invalidate_stat_cache(node); + error = sfprov_set_attr(node->sf_sffs->sf_handle, node->sf_path, + vap->va_mask, mode, vap->va_atime, vap->va_mtime, vap->va_ctime); + if (error == ENOENT) + sfnode_make_stale(node); + + mutex_exit(&sffs_lock); + return (error); +} + +static int +sffs_space( + vnode_t *vp, + int cmd, + struct flock64 *bfp, + int flags, + offset_t off, + cred_t *cred, + caller_context_t *ct) +{ + sfnode_t *node = VN2SFN(vp); + int error; + + /* we only support changing the length of the file */ + if (bfp->l_whence != SEEK_SET || bfp->l_len != 0) + return ENOSYS; + + mutex_enter(&sffs_lock); + + sfnode_invalidate_stat_cache(node); + + error = sfprov_set_size(node->sf_sffs->sf_handle, node->sf_path, + bfp->l_start); + if (error == ENOENT) + sfnode_make_stale(node); + + mutex_exit(&sffs_lock); + return (error); +} + +/*ARGSUSED*/ +static int +sffs_read( + vnode_t *vp, + struct uio *uio, + int ioflag, + cred_t *cred, + caller_context_t *ct) +{ + sfnode_t *node = VN2SFN(vp); + int error = 0; + uint32_t bytes; + uint32_t done; + ulong_t offset; + ssize_t total; + + if (vp->v_type == VDIR) + return (EISDIR); + if (vp->v_type != VREG) + return (EINVAL); + if (uio->uio_loffset >= MAXOFFSET_T) + return (0); + if (uio->uio_loffset < 0) + return (EINVAL); + total = uio->uio_resid; + if (total == 0) + return (0); + + mutex_enter(&sffs_lock); + if (node->sf_file == NULL) { + ASSERT(node->sf_flag != ~0); + sfnode_open(node, node->sf_flag); + if (node->sf_file == NULL) + return (EBADF); + } + + do { + offset = uio->uio_offset; + done = bytes = MIN(PAGESIZE, uio->uio_resid); + error = sfprov_read(node->sf_file, sffs_buffer, offset, &done); + if (error == 0 && done > 0) + error = uiomove(sffs_buffer, done, UIO_READ, uio); + } while (error == 0 && uio->uio_resid > 0 && done > 0); + + mutex_exit(&sffs_lock); + + /* + * a partial read is never an error + */ + if (total != uio->uio_resid) + error = 0; + return (error); +} + +/*ARGSUSED*/ +static int +sffs_write( + vnode_t *vp, + struct uio *uiop, + int ioflag, + cred_t *cred, + caller_context_t *ct) +{ + sfnode_t *node = VN2SFN(vp); + int error = 0; + uint32_t bytes; + uint32_t done; + ulong_t offset; + ssize_t total; + rlim64_t limit = uiop->uio_llimit; + + if (vp->v_type == VDIR) + return (EISDIR); + if (vp->v_type != VREG) + return (EINVAL); + + /* + * We have to hold this lock for a long time to keep + * multiple FAPPEND writes from intermixing + */ + mutex_enter(&sffs_lock); + if (node->sf_file == NULL) { + ASSERT(node->sf_flag != ~0); + sfnode_open(node, node->sf_flag); + if (node->sf_file == NULL) + return (EBADF); + } + + sfnode_invalidate_stat_cache(node); + + if (ioflag & FAPPEND) { + uint64_t endoffile; + + error = sfprov_get_size(node->sf_sffs->sf_handle, + node->sf_path, &endoffile); + if (error == ENOENT) + sfnode_make_stale(node); + if (error != 0) { + mutex_exit(&sffs_lock); + return (error); + } + uiop->uio_loffset = endoffile; + } + + if (vp->v_type != VREG || uiop->uio_loffset < 0) { + mutex_exit(&sffs_lock); + return (EINVAL); + } + if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T) + limit = MAXOFFSET_T; + + if (uiop->uio_loffset >= limit) { + mutex_exit(&sffs_lock); + return (EFBIG); + } + + if (uiop->uio_loffset >= MAXOFFSET_T) { + mutex_exit(&sffs_lock); + return (EFBIG); + } + + total = uiop->uio_resid; + if (total == 0) { + mutex_exit(&sffs_lock); + return (0); + } + + do { + offset = uiop->uio_offset; + bytes = MIN(PAGESIZE, uiop->uio_resid); + if (offset + bytes >= limit) { + if (offset >= limit) { + error = EFBIG; + break; + } + bytes = limit - offset; + } + error = uiomove(sffs_buffer, bytes, UIO_WRITE, uiop); + if (error != 0) + break; + done = bytes; + if (error == 0) + error = sfprov_write(node->sf_file, sffs_buffer, + offset, &done); + total -= done; + if (done != bytes) { + uiop->uio_resid += bytes - done; + break; + } + } while (error == 0 && uiop->uio_resid > 0 && done > 0); + + mutex_exit(&sffs_lock); + + /* + * A short write is never really an error. + */ + if (total != uiop->uio_resid) + error = 0; + return (error); +} + +/*ARGSUSED*/ +static int +sffs_access(vnode_t *vp, int mode, int flags, cred_t *cr, caller_context_t *ct) +{ + sfnode_t *node = VN2SFN(vp); + int error; + + mutex_enter(&sffs_lock); + error = sfnode_access(node, mode, cr); + mutex_exit(&sffs_lock); + return (error); +} + +/* + * Lookup an entry in a directory and create a new vnode if found. + */ +/* ARGSUSED3 */ +static int +sffs_lookup( + vnode_t *dvp, /* the directory vnode */ + char *name, /* the name of the file or directory */ + vnode_t **vpp, /* the vnode we found or NULL */ + struct pathname *pnp, + int flags, + vnode_t *rdir, + cred_t *cred, + caller_context_t *ct, + int *direntflags, + struct pathname *realpnp) +{ + int error; + sfnode_t *node; + + /* + * dvp must be a directory + */ + if (dvp->v_type != VDIR) + return (ENOTDIR); + + /* + * An empty component name or just "." means the directory itself. + * Don't do any further lookup or checking. + */ + if (strcmp(name, "") == 0 || strcmp(name, ".") == 0) { + VN_HOLD(dvp); + *vpp = dvp; + return (0); + } + + /* + * Check permission to look at this directory. We always allow "..". + */ + mutex_enter(&sffs_lock); + if (strcmp(name, "..") != 0) { + error = sfnode_access(VN2SFN(dvp), VEXEC, cred); + if (error) { + mutex_exit(&sffs_lock); + return (error); + } + } + + /* + * Lookup the node. + */ + node = sfnode_lookup(VN2SFN(dvp), name, VNON, 0, NULL, 0, NULL); + if (node != NULL) + *vpp = sfnode_get_vnode(node); + mutex_exit(&sffs_lock); + return ((node == NULL) ? ENOENT : 0); +} + +/*ARGSUSED*/ +static int +sffs_create( + vnode_t *dvp, + char *name, + struct vattr *vap, + vcexcl_t exclusive, + int mode, + vnode_t **vpp, + cred_t *cr, + int flag, + caller_context_t *ct, + vsecattr_t *vsecp) +{ + vnode_t *vp; + sfnode_t *node; + int error; + + ASSERT(name != NULL); + + /* + * this is used for regular files, not mkdir + */ + if (vap->va_type == VDIR) + return (EISDIR); + if (vap->va_type != VREG) + return (EINVAL); + + /* + * is this a pre-existing file? + */ + error = sffs_lookup(dvp, name, &vp, + NULL, 0, NULL, cr, ct, NULL, NULL); + if (error == ENOENT) + vp = NULL; + else if (error != 0) + return (error); + + /* + * Operation on a pre-existing file. + */ + if (vp != NULL) { + if (exclusive == EXCL) { + VN_RELE(vp); + return (EEXIST); + } + if (vp->v_type == VDIR && (mode & VWRITE) == VWRITE) { + VN_RELE(vp); + return (EISDIR); + } + + mutex_enter(&sffs_lock); + node = VN2SFN(vp); + error = sfnode_access(node, mode, cr); + if (error != 0) { + mutex_exit(&sffs_lock); + VN_RELE(vp); + return (error); + } + + sfnode_invalidate_stat_cache(VN2SFN(dvp)); + + /* + * handle truncating an existing file + */ + if (vp->v_type == VREG && (vap->va_mask & AT_SIZE) && + vap->va_size == 0) { + sfnode_open(node, flag | FTRUNC); + if (node->sf_path == NULL) { + mutex_exit(&sffs_lock); + VN_RELE(vp); + return (ENOENT); + } + } + mutex_exit(&sffs_lock); + *vpp = vp; + return (0); + } + + /* + * Create a new node. First check for a race creating it. + */ + mutex_enter(&sffs_lock); + node = sfnode_lookup(VN2SFN(dvp), name, VNON, 0, NULL, 0, NULL); + if (node != NULL) { + mutex_exit(&sffs_lock); + return (EEXIST); + } + + /* + * Doesn't exist yet and we have the lock, so create it. + */ + sfnode_invalidate_stat_cache(VN2SFN(dvp)); + int lookuperr; + node = sfnode_lookup(VN2SFN(dvp), name, VREG, + (vap->va_mask & AT_MODE) ? vap->va_mode : 0, NULL, 0, &lookuperr); + + if (node && node->sf_parent) + sfnode_clear_dir_list(node->sf_parent); + + mutex_exit(&sffs_lock); + if (node == NULL) + return (lookuperr); + *vpp = sfnode_get_vnode(node); + return (0); +} + +/*ARGSUSED*/ +static int +sffs_mkdir( + vnode_t *dvp, + char *nm, + vattr_t *va, + vnode_t **vpp, + cred_t *cred, + caller_context_t *ct, + int flags, + vsecattr_t *vsecp) +{ + sfnode_t *node; + vnode_t *vp; + int error; + + /* + * These should never happen + */ + ASSERT(nm != NULL); + ASSERT(strcmp(nm, "") != 0); + ASSERT(strcmp(nm, ".") != 0); + ASSERT(strcmp(nm, "..") != 0); + + /* + * Do an unlocked look up first + */ + error = sffs_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); + if (error == 0) { + VN_RELE(vp); + return (EEXIST); + } + if (error != ENOENT) + return (error); + + /* + * Must be able to write in current directory + */ + mutex_enter(&sffs_lock); + error = sfnode_access(VN2SFN(dvp), VWRITE, cred); + if (error) { + mutex_exit(&sffs_lock); + return (error); + } + + sfnode_invalidate_stat_cache(VN2SFN(dvp)); + int lookuperr = EACCES; + node = sfnode_lookup(VN2SFN(dvp), nm, VDIR, + (va->va_mode & AT_MODE) ? va->va_mode : 0, NULL, 0, &lookuperr); + + if (node && node->sf_parent) + sfnode_clear_dir_list(node->sf_parent); + + mutex_exit(&sffs_lock); + if (node == NULL) + return (lookuperr); + *vpp = sfnode_get_vnode(node); + return (0); +} + +/*ARGSUSED*/ +static int +sffs_rmdir( + struct vnode *dvp, + char *nm, + vnode_t *cdir, + cred_t *cred, + caller_context_t *ct, + int flags) +{ + sfnode_t *node; + vnode_t *vp; + int error; + + /* + * Return error when removing . and .. + */ + if (strcmp(nm, ".") == 0 || strcmp(nm, "") == 0) + return (EINVAL); + if (strcmp(nm, "..") == 0) + return (EEXIST); + + error = sffs_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); + if (error) + return (error); + if (vp->v_type != VDIR) { + VN_RELE(vp); + return (ENOTDIR); + } + +#ifdef VBOXVFS_WITH_MMAP + if (vn_vfswlock(vp)) { + VN_RELE(vp); + return (EBUSY); + } +#endif + + if (vn_mountedvfs(vp)) { + VN_RELE(vp); + return (EBUSY); + } + + node = VN2SFN(vp); + + mutex_enter(&sffs_lock); + error = sfnode_access(VN2SFN(dvp), VEXEC | VWRITE, cred); + if (error) + goto done; + + /* + * If anything else is using this vnode, then fail the remove. + * Why? Windows hosts can't remove something that is open, + * so we have to sfprov_close() it first. + * There is no errno for this - since it's not a problem on UNIX, + * but EINVAL is the closest. + */ + if (node->sf_file != NULL) { + if (vp->v_count > 1) { + error = EINVAL; + goto done; + } + (void)sfprov_close(node->sf_file); + node->sf_file = NULL; + } + + /* + * Remove the directory on the host and mark the node as stale. + */ + sfnode_invalidate_stat_cache(VN2SFN(dvp)); + error = sfprov_rmdir(node->sf_sffs->sf_handle, node->sf_path); + if (error == ENOENT || error == 0) + sfnode_make_stale(node); + + if (node->sf_parent) + sfnode_clear_dir_list(node->sf_parent); +done: + mutex_exit(&sffs_lock); +#ifdef VBOXVFS_WITH_MMAP + vn_vfsunlock(vp); +#endif + VN_RELE(vp); + return (error); +} + + +#ifdef VBOXVFS_WITH_MMAP +static caddr_t +sffs_page_map( + page_t *ppage, + enum seg_rw segaccess) +{ + /* Use seg_kpm driver if possible (64-bit) */ + if (kpm_enable) + return (hat_kpm_mapin(ppage, NULL)); + ASSERT(segaccess == S_READ || segaccess == S_WRITE); + return (ppmapin(ppage, PROT_READ | ((segaccess == S_WRITE) ? PROT_WRITE : 0), (caddr_t)-1)); +} + + +static void +sffs_page_unmap( + page_t *ppage, + caddr_t addr) +{ + if (kpm_enable) + hat_kpm_mapout(ppage, NULL, addr); + else + ppmapout(addr); +} + + +/* + * Called when there's no page in the cache. This will create new page(s) and read + * the file data into it. + */ +static int +sffs_readpages( + vnode_t *dvp, + offset_t off, + page_t *pagelist[], + size_t pagelistsize, + struct seg *segp, + caddr_t addr, + enum seg_rw segaccess) +{ + ASSERT(MUTEX_HELD(&sffs_lock)); + + int error = 0; + u_offset_t io_off, total; + size_t io_len; + page_t *ppages; + page_t *pcur; + + sfnode_t *node = VN2SFN(dvp); + ASSERT(node); + ASSERT(node->sf_file); + + if (pagelistsize == PAGESIZE) + { + io_off = off; + io_len = PAGESIZE; + ppages = page_create_va(dvp, io_off, io_len, PG_WAIT | PG_EXCL, segp, addr); + } + else + ppages = pvn_read_kluster(dvp, off, segp, addr, &io_off, &io_len, off, pagelistsize, 0); + + /* If page already exists return success */ + if (!ppages) + { + *pagelist = NULL; + return (0); + } + + /* + * Map & read page-by-page. + */ + total = io_off + io_len; + pcur = ppages; + while (io_off < total) + { + ASSERT3U(io_off, ==, pcur->p_offset); + + caddr_t virtaddr = sffs_page_map(pcur, segaccess); + uint32_t bytes = PAGESIZE; + error = sfprov_read(node->sf_file, virtaddr, io_off, &bytes); + /* + * If we reuse pages without zero'ing them, one process can mmap() and read-past the length + * to read previously mmap'd contents (from possibly other processes). + */ + if (error == 0 && bytes < PAGESIZE) + memset(virtaddr + bytes, 0, PAGESIZE - bytes); + sffs_page_unmap(pcur, virtaddr); + if (error != 0) + { + cmn_err(CE_WARN, "sffs_readpages: sfprov_read() failed. error=%d bytes=%u\n", error, bytes); + /* Get rid of all kluster pages read & bail. */ + pvn_read_done(ppages, B_ERROR); + return (error); + } + pcur = pcur->p_next; + io_off += PAGESIZE; + } + + /* + * Fill in the pagelist from kluster at the requested offset. + */ + pvn_plist_init(ppages, pagelist, pagelistsize, off, io_len, segaccess); + ASSERT(pagelist == NULL || (*pagelist)->p_offset == off); + return (0); +} + + +/*ARGSUSED*/ +static int +sffs_getpage( + vnode_t *dvp, + offset_t off, + size_t len, + uint_t *protp, + page_t *pagelist[], + size_t pagelistsize, + struct seg *segp, + caddr_t addr, + enum seg_rw segaccess, + cred_t *credp +#if !defined(VBOX_VFS_SOLARIS_10U6) + , caller_context_t *ct +#endif + ) +{ + int error = 0; + int is_recursive = 0; + page_t **pageliststart = pagelist; + sfnode_t *node = VN2SFN(dvp); + ASSERT(node); + ASSERT(node->sf_file); + + if (segaccess == S_WRITE) + return (ENOSYS); /* Will this ever happen? */ + + /* Don't bother about faultahead for now. */ + if (pagelist == NULL) + return (0); + + if (len > pagelistsize) + len = pagelistsize; + else + len = P2ROUNDUP(len, PAGESIZE); + ASSERT(pagelistsize >= len); + + if (protp) + *protp = PROT_ALL; + + /* + * The buffer passed to sffs_write may be mmap'd so we may get a + * pagefault there, in which case we'll end up here with this thread + * already owning the mutex. Mutexes aren't recursive. + */ + if (mutex_owner(&sffs_lock) == curthread) + is_recursive = 1; + else + mutex_enter(&sffs_lock); + + /* Don't map pages past end of the file. */ + if (off + len > node->sf_stat.sf_size + PAGEOFFSET) + { + if (!is_recursive) + mutex_exit(&sffs_lock); + return (EFAULT); + } + + while (len > 0) + { + /* + * Look for pages in the requested offset range, or create them if we can't find any. + */ + if ((*pagelist = page_lookup(dvp, off, SE_SHARED)) != NULL) + *(pagelist + 1) = NULL; + else if ((error = sffs_readpages(dvp, off, pagelist, pagelistsize, segp, addr, segaccess)) != 0) + { + while (pagelist > pageliststart) + page_unlock(*--pagelist); + + *pagelist = NULL; + if (!is_recursive) + mutex_exit(&sffs_lock); + return (error); + } + + while (*pagelist) + { + ASSERT3U((*pagelist)->p_offset, ==, off); + off += PAGESIZE; + addr += PAGESIZE; + if (len > 0) + { + ASSERT3U(len, >=, PAGESIZE); + len -= PAGESIZE; + } + + ASSERT3U(pagelistsize, >=, PAGESIZE); + pagelistsize -= PAGESIZE; + pagelist++; + } + } + + /* + * Fill the page list array with any pages left in the cache. + */ + while ( pagelistsize > 0 + && (*pagelist++ = page_lookup_nowait(dvp, off, SE_SHARED))) + { + off += PAGESIZE; + pagelistsize -= PAGESIZE; + } + + *pagelist = NULL; + if (!is_recursive) + mutex_exit(&sffs_lock); + return (error); +} + + +/*ARGSUSED*/ +static int +sffs_putpage( + vnode_t *dvp, + offset_t off, + size_t len, + int flags, + cred_t *credp +#if !defined(VBOX_VFS_SOLARIS_10U6) + , caller_context_t *ct +#endif + ) +{ + /* + * We don't support PROT_WRITE mmaps. + */ + return (ENOSYS); +} + + +/*ARGSUSED*/ +static int +sffs_discardpage( + vnode_t *dvp, + page_t *ppage, + u_offset_t *poff, + size_t *plen, + int flags, + cred_t *pcred) +{ + /* + * This would not get invoked i.e. via pvn_vplist_dirty() since we don't support + * PROT_WRITE mmaps and therefore will not have dirty pages. + */ + pvn_write_done(ppage, B_INVAL | B_ERROR | B_FORCE); + return (0); +} + + +/*ARGSUSED*/ +static int +sffs_map( + vnode_t *dvp, + offset_t off, + struct as *asp, + caddr_t *addrp, + size_t len, + uchar_t prot, + uchar_t maxprot, + uint_t flags, + cred_t *credp +#if !defined(VBOX_VFS_SOLARIS_10U6) + , caller_context_t *ct +#endif + ) +{ + /* + * Invocation: mmap()->smmap_common()->VOP_MAP()->sffs_map(). Once the + * segment driver creates the new segment via segvn_create(), it'll + * invoke down the line VOP_ADDMAP()->sffs_addmap() + */ + int error = 0; + sfnode_t *node = VN2SFN(dvp); + ASSERT(node); + if ((flags & MAP_SHARED) && (prot & PROT_WRITE)) + return (ENOTSUP); + + if (off < 0 || len > MAXOFFSET_T - off) + return (ENXIO); + + if (dvp->v_type != VREG) + return (ENODEV); + + if (dvp->v_flag & VNOMAP) + return (ENOSYS); + + if (vn_has_mandatory_locks(dvp, node->sf_stat.sf_mode)) + return (EAGAIN); + + mutex_enter(&sffs_lock); + as_rangelock(asp); + +#if defined(VBOX_VFS_SOLARIS_10U6) + if ((flags & MAP_FIXED) == 0) + { + if (g_fVBoxVFS_SolOldAddrMap) + g_VBoxVFS_SolAddrMap.MapAddr.pfnSol_map_addr_old(addrp, len, off, 1, flags); + else + g_VBoxVFS_SolAddrMap.MapAddr.pfnSol_map_addr(addrp, len, off, flags); + if (*addrp == NULL) + error = ENOMEM; + } + else + as_unmap(asp, *addrp, len); /* User specified address, remove any previous mappings */ +#else + if (g_fVBoxVFS_SolOldAddrMap) + error = g_VBoxVFS_SolAddrMap.ChooseAddr.pfnSol_choose_addr_old(asp, addrp, len, off, 1, flags); + else + error = g_VBoxVFS_SolAddrMap.ChooseAddr.pfnSol_choose_addr(asp, addrp, len, off, flags); +#endif + + if (error) + { + as_rangeunlock(asp); + mutex_exit(&sffs_lock); + return (error); + } + + segvn_crargs_t vnodeargs; + memset(&vnodeargs, 0, sizeof(vnodeargs)); + vnodeargs.vp = dvp; + vnodeargs.cred = credp; + vnodeargs.offset = off; + vnodeargs.type = flags & MAP_TYPE; + vnodeargs.prot = prot; + vnodeargs.maxprot = maxprot; + vnodeargs.flags = flags & ~MAP_TYPE; + vnodeargs.amp = NULL; /* anon. mapping */ + vnodeargs.szc = 0; /* preferred page size code */ + vnodeargs.lgrp_mem_policy_flags = 0; + + error = as_map(asp, *addrp, len, segvn_create, &vnodeargs); + + as_rangeunlock(asp); + mutex_exit(&sffs_lock); + return (error); +} + + +/*ARGSUSED*/ +static int +sffs_addmap( + vnode_t *dvp, + offset_t off, + struct as *asp, + caddr_t addr, + size_t len, + uchar_t prot, + uchar_t maxprot, + uint_t flags, + cred_t *credp +#if !defined(VBOX_VFS_SOLARIS_10U6) + , caller_context_t *ct +#endif + ) +{ + if (dvp->v_flag & VNOMAP) + return (ENOSYS); + return (0); +} + + +/*ARGSUSED*/ +static int +sffs_delmap( + vnode_t *dvp, + offset_t off, + struct as *asp, + caddr_t addr, + size_t len, + uint_t prot, + uint_t maxprot, + uint_t flags, + cred_t *credp +#if !defined(VBOX_VFS_SOLARIS_10U6) + , caller_context_t *ct +#endif + ) +{ + if (dvp->v_flag & VNOMAP) + return (ENOSYS); + + return (0); +} +#endif /* VBOXVFS_WITH_MMAP */ + + +/*ARGSUSED*/ +static int +sffs_readlink( + vnode_t *vp, + uio_t *uiop, + cred_t *cred +#if !defined(VBOX_VFS_SOLARIS_10U6) + , + caller_context_t *ct +#endif + ) +{ + sfnode_t *node; + int error = 0; + char *target = NULL; + + if (uiop->uio_iovcnt != 1) + return (EINVAL); + + if (vp->v_type != VLNK) + return (EINVAL); + + mutex_enter(&sffs_lock); + node = VN2SFN(vp); + + target = kmem_alloc(MAXPATHLEN, KM_SLEEP); + + error = sfprov_readlink(node->sf_sffs->sf_handle, node->sf_path, target, + MAXPATHLEN); + if (error) + goto done; + + error = uiomove(target, strlen(target), UIO_READ, uiop); + +done: + mutex_exit(&sffs_lock); + if (target) + kmem_free(target, MAXPATHLEN); + return (error); +} + + +/*ARGSUSED*/ +static int +sffs_symlink( + vnode_t *dvp, + char *linkname, + vattr_t *vap, + char *target, + cred_t *cred +#if !defined(VBOX_VFS_SOLARIS_10U6) + , + caller_context_t *ct, + int flags +#endif + ) +{ + sfnode_t *dir; + sfnode_t *node; + sffs_stat_t stat; + int error = 0; + char *fullpath; + + /* + * These should never happen + */ + ASSERT(linkname != NULL); + ASSERT(strcmp(linkname, "") != 0); + ASSERT(strcmp(linkname, ".") != 0); + ASSERT(strcmp(linkname, "..") != 0); + + /* + * Basic checks. + */ + if (vap->va_type != VLNK) + return (EINVAL); + + mutex_enter(&sffs_lock); + + if (sfnode_lookup(VN2SFN(dvp), linkname, VNON, 0, NULL, 0, NULL) != + NULL) { + error = EEXIST; + goto done; + } + + dir = VN2SFN(dvp); + error = sfnode_access(dir, VWRITE, cred); + if (error) + goto done; + + /* + * Create symlink. Note that we ignore vap->va_mode because generally + * we can't change the attributes of the symlink itself. + */ + fullpath = sfnode_construct_path(dir, linkname); + error = sfprov_symlink(dir->sf_sffs->sf_handle, fullpath, target, + &stat); + kmem_free(fullpath, strlen(fullpath) + 1); + if (error) + goto done; + + node = sfnode_lookup(dir, linkname, VLNK, 0, &stat, + sfnode_cur_time_usec(), NULL); + + sfnode_invalidate_stat_cache(dir); + sfnode_clear_dir_list(dir); + +done: + mutex_exit(&sffs_lock); + return (error); +} + + +/*ARGSUSED*/ +static int +sffs_remove( + vnode_t *dvp, + char *name, + cred_t *cred, + caller_context_t *ct, + int flags) +{ + vnode_t *vp; + sfnode_t *node; + int error; + + /* + * These should never happen + */ + ASSERT(name != NULL); + ASSERT(strcmp(name, "..") != 0); + + error = sffs_lookup(dvp, name, &vp, + NULL, 0, NULL, cred, ct, NULL, NULL); + if (error) + return (error); + node = VN2SFN(vp); + + mutex_enter(&sffs_lock); + error = sfnode_access(VN2SFN(dvp), VEXEC | VWRITE, cred); + if (error) + goto done; + + /* + * If anything else is using this vnode, then fail the remove. + * Why? Windows hosts can't sfprov_remove() a file that is open, + * so we have to sfprov_close() it first. + * There is no errno for this - since it's not a problem on UNIX, + * but ETXTBSY is the closest. + */ + if (node->sf_file != NULL) { + if (vp->v_count > 1) { + error = ETXTBSY; + goto done; + } + (void)sfprov_close(node->sf_file); + node->sf_file = NULL; + } + + /* + * Remove the file on the host and mark the node as stale. + */ + sfnode_invalidate_stat_cache(VN2SFN(dvp)); + + error = sfprov_remove(node->sf_sffs->sf_handle, node->sf_path, + node->sf_type == VLNK); + if (error == ENOENT || error == 0) + sfnode_make_stale(node); + + if (node->sf_parent) + sfnode_clear_dir_list(node->sf_parent); +done: + mutex_exit(&sffs_lock); + VN_RELE(vp); + return (error); +} + +/*ARGSUSED*/ +static int +sffs_rename( + vnode_t *old_dir, + char *old_nm, + vnode_t *new_dir, + char *new_nm, + cred_t *cred, + caller_context_t *ct, + int flags) +{ + char *newpath; + int error; + sfnode_t *node; + + if (strcmp(new_nm, "") == 0 || + strcmp(new_nm, ".") == 0 || + strcmp(new_nm, "..") == 0 || + strcmp(old_nm, "") == 0 || + strcmp(old_nm, ".") == 0 || + strcmp(old_nm, "..") == 0) + return (EINVAL); + + /* + * make sure we have permission to do the rename + */ + mutex_enter(&sffs_lock); + error = sfnode_access(VN2SFN(old_dir), VEXEC | VWRITE, cred); + if (error == 0 && new_dir != old_dir) + error = sfnode_access(VN2SFN(new_dir), VEXEC | VWRITE, cred); + if (error) + goto done; + + node = sfnode_lookup(VN2SFN(old_dir), old_nm, VNON, 0, NULL, 0, NULL); + if (node == NULL) { + error = ENOENT; + goto done; + } + + /* + * Rename the file on the host and in our caches. + */ + sfnode_invalidate_stat_cache(node); + sfnode_invalidate_stat_cache(VN2SFN(old_dir)); + sfnode_invalidate_stat_cache(VN2SFN(new_dir)); + + newpath = sfnode_construct_path(VN2SFN(new_dir), new_nm); + error = sfprov_rename(node->sf_sffs->sf_handle, node->sf_path, newpath, + node->sf_type == VDIR); + if (error == 0) + sfnode_rename(node, VN2SFN(new_dir), newpath); + else { + kmem_free(newpath, strlen(newpath) + 1); + if (error == ENOENT) + sfnode_make_stale(node); + } +done: + mutex_exit(&sffs_lock); + return (error); +} + + +/*ARGSUSED*/ +static int +sffs_fsync(vnode_t *vp, int flag, cred_t *cr, caller_context_t *ct) +{ + sfnode_t *node; + int error; + + /* + * Ask the host to sync any data it may have cached for open files. + */ + mutex_enter(&sffs_lock); + node = VN2SFN(vp); + if (node->sf_file == NULL) + error = EBADF; + else if (node->sf_sffs->sf_fsync) + error = sfprov_fsync(node->sf_file); + else + error = 0; + mutex_exit(&sffs_lock); + return (error); +} + +/* + * This may be the last reference, possibly time to close the file and + * destroy the vnode. If the sfnode is stale, we'll destroy that too. + */ +/*ARGSUSED*/ +static void +#if defined(VBOX_VFS_SOLARIS_10U6) +sffs_inactive(vnode_t *vp, cred_t *cr) +#else +sffs_inactive(vnode_t *vp, cred_t *cr, caller_context_t *ct) +#endif +{ + sfnode_t *node; + + /* + * nothing to do if this isn't the last use + */ + mutex_enter(&sffs_lock); + node = VN2SFN(vp); + mutex_enter(&vp->v_lock); + if (vp->v_count > 1) { + --vp->v_count; + mutex_exit(&vp->v_lock); + mutex_exit(&sffs_lock); + return; + } + + if (vn_has_cached_data(vp)) { +#ifdef VBOXVFS_WITH_MMAP + /* We're fine with releasing the vnode lock here as we should be covered by the sffs_lock */ + mutex_exit(&vp->v_lock); + /* We won't have any dirty pages, this will just invalidate (destroy) the pages and move it to the cachelist. */ + pvn_vplist_dirty(vp, 0 /* offset */, sffs_discardpage, B_INVAL, cr); + mutex_enter(&vp->v_lock); +#else + panic("sffs_inactive() found cached data"); +#endif + } + + /* + * destroy the vnode + */ + node->sf_vnode = NULL; + mutex_exit(&vp->v_lock); + vn_invalid(vp); + vn_free(vp); + LogFlowFunc((" %s vnode cleared\n", node->sf_path)); + + /* + * Close the sf_file for the node. + */ + if (node->sf_file != NULL) { + (void)sfprov_close(node->sf_file); + node->sf_file = NULL; + } + + /* + * Free the directory entries for the node. This should normally + * have been taken care of in sffs_close(), but better safe than + * sorry. + */ + sfnode_clear_dir_list(node); + + /* + * If the node is stale, we can also destroy it. + */ + if (node->sf_is_stale && node->sf_children == 0) + sfnode_destroy(node); + + mutex_exit(&sffs_lock); + return; +} + +/* + * All the work for this is really done in sffs_lookup(). + */ +/*ARGSUSED*/ +static int +sffs_open(vnode_t **vpp, int flag, cred_t *cr, caller_context_t *ct) +{ + sfnode_t *node; + int error = 0; + + mutex_enter(&sffs_lock); + + node = VN2SFN(*vpp); + sfnode_open(node, flag); + if (node->sf_file == NULL) + error = EINVAL; + mutex_exit(&sffs_lock); + + return (error); +} + +/* + * All the work for this is really done in inactive. + */ +/*ARGSUSED*/ +static int +sffs_close( + vnode_t *vp, + int flag, + int count, + offset_t offset, + cred_t *cr, + caller_context_t *ct) +{ + sfnode_t *node; + + mutex_enter(&sffs_lock); + node = VN2SFN(vp); + + /* + * Free the directory entries for the node. We do this on this call + * here because the directory node may not become inactive for a long + * time after the readdir is over. Case in point, if somebody cd's into + * the directory then it won't become inactive until they cd away again. + * In such a case we would end up with the directory listing not getting + * updated (i.e. the result of 'ls' always being the same) until they + * change the working directory. + */ + sfnode_clear_dir_list(node); + + sfnode_invalidate_stat_cache(node); + + if (node->sf_file != NULL && vp->v_count <= 1) + { + (void)sfprov_close(node->sf_file); + node->sf_file = NULL; + } + + mutex_exit(&sffs_lock); + return (0); +} + +/* ARGSUSED */ +static int +sffs_seek(vnode_t *v, offset_t o, offset_t *no, caller_context_t *ct) +{ + if (*no < 0 || *no > MAXOFFSET_T) + return (EINVAL); + + if (v->v_type == VDIR) + { + sffs_dirents_t *cur_buf = VN2SFN(v)->sf_dir_list; + off_t offset = 0; + + if (cur_buf == NULL) + return (0); + + while (cur_buf != NULL) { + if (*no >= offset && *no <= offset + cur_buf->sf_len) + return (0); + offset += cur_buf->sf_len; + cur_buf = cur_buf->sf_next; + } + return (EINVAL); + } + return (0); +} + + + +/* + * By returning an error for this, we prevent anything in sffs from + * being re-exported by NFS + */ +/* ARGSUSED */ +static int +sffs_fid(vnode_t *vp, fid_t *fidp, caller_context_t *ct) +{ + return (ENOTSUP); +} + +/* + * vnode operations for regular files + */ +const fs_operation_def_t sffs_ops_template[] = { +#if defined(VBOX_VFS_SOLARIS_10U6) + VOPNAME_ACCESS, sffs_access, + VOPNAME_CLOSE, sffs_close, + VOPNAME_CREATE, sffs_create, + VOPNAME_FID, sffs_fid, + VOPNAME_FSYNC, sffs_fsync, + VOPNAME_GETATTR, sffs_getattr, + VOPNAME_INACTIVE, sffs_inactive, + VOPNAME_LOOKUP, sffs_lookup, + VOPNAME_MKDIR, sffs_mkdir, + VOPNAME_OPEN, sffs_open, + VOPNAME_PATHCONF, sffs_pathconf, + VOPNAME_READ, sffs_read, + VOPNAME_READDIR, sffs_readdir, + VOPNAME_READLINK, sffs_readlink, + VOPNAME_REMOVE, sffs_remove, + VOPNAME_RENAME, sffs_rename, + VOPNAME_RMDIR, sffs_rmdir, + VOPNAME_SEEK, sffs_seek, + VOPNAME_SETATTR, sffs_setattr, + VOPNAME_SPACE, sffs_space, + VOPNAME_SYMLINK, sffs_symlink, + VOPNAME_WRITE, sffs_write, + +# ifdef VBOXVFS_WITH_MMAP + VOPNAME_MAP, sffs_map, + VOPNAME_ADDMAP, sffs_addmap, + VOPNAME_DELMAP, sffs_delmap, + VOPNAME_GETPAGE, sffs_getpage, + VOPNAME_PUTPAGE, sffs_putpage, +# endif + + NULL, NULL +#else + VOPNAME_ACCESS, { .vop_access = sffs_access }, + VOPNAME_CLOSE, { .vop_close = sffs_close }, + VOPNAME_CREATE, { .vop_create = sffs_create }, + VOPNAME_FID, { .vop_fid = sffs_fid }, + VOPNAME_FSYNC, { .vop_fsync = sffs_fsync }, + VOPNAME_GETATTR, { .vop_getattr = sffs_getattr }, + VOPNAME_INACTIVE, { .vop_inactive = sffs_inactive }, + VOPNAME_LOOKUP, { .vop_lookup = sffs_lookup }, + VOPNAME_MKDIR, { .vop_mkdir = sffs_mkdir }, + VOPNAME_OPEN, { .vop_open = sffs_open }, + VOPNAME_PATHCONF, { .vop_pathconf = sffs_pathconf }, + VOPNAME_READ, { .vop_read = sffs_read }, + VOPNAME_READDIR, { .vop_readdir = sffs_readdir }, + VOPNAME_READLINK, { .vop_readlink = sffs_readlink }, + VOPNAME_REMOVE, { .vop_remove = sffs_remove }, + VOPNAME_RENAME, { .vop_rename = sffs_rename }, + VOPNAME_RMDIR, { .vop_rmdir = sffs_rmdir }, + VOPNAME_SEEK, { .vop_seek = sffs_seek }, + VOPNAME_SETATTR, { .vop_setattr = sffs_setattr }, + VOPNAME_SPACE, { .vop_space = sffs_space }, + VOPNAME_SYMLINK, { .vop_symlink = sffs_symlink }, + VOPNAME_WRITE, { .vop_write = sffs_write }, + +# ifdef VBOXVFS_WITH_MMAP + VOPNAME_MAP, { .vop_map = sffs_map }, + VOPNAME_ADDMAP, { .vop_addmap = sffs_addmap }, + VOPNAME_DELMAP, { .vop_delmap = sffs_delmap }, + VOPNAME_GETPAGE, { .vop_getpage = sffs_getpage }, + VOPNAME_PUTPAGE, { .vop_putpage = sffs_putpage }, +# endif + + NULL, NULL +#endif +}; + +/* + * Also, init and fini functions... + */ +int +sffs_vnode_init(void) +{ + int err; + + err = vn_make_ops("sffs", sffs_ops_template, &sffs_ops); + if (err) + return (err); + + avl_create(&sfnodes, sfnode_compare, sizeof (sfnode_t), + offsetof(sfnode_t, sf_linkage)); + avl_create(&stale_sfnodes, sfnode_compare, sizeof (sfnode_t), + offsetof(sfnode_t, sf_linkage)); + + sffs_buffer = kmem_alloc(PAGESIZE, KM_SLEEP); + + return (0); +} + +void +sffs_vnode_fini(void) +{ + if (sffs_ops) + vn_freevnodeops(sffs_ops); + ASSERT(avl_first(&sfnodes) == NULL); + avl_destroy(&sfnodes); + if (sffs_buffer != NULL) { + kmem_free(sffs_buffer, PAGESIZE); + sffs_buffer = NULL; + } +} + +/* + * Utility at unmount to get all nodes in that mounted filesystem removed. + */ +int +sffs_purge(struct sffs_data *sffs) +{ + sfnode_t *node; + sfnode_t *prev; + + /* + * Check that no vnodes are active. + */ + if (sffs->sf_rootnode->v_count > 1) + return (-1); + for (node = avl_first(&sfnodes); node; + node = AVL_NEXT(&sfnodes, node)) { + if (node->sf_sffs == sffs && node->sf_vnode && + node->sf_vnode != sffs->sf_rootnode) + return (-1); + } + for (node = avl_first(&stale_sfnodes); node; + node = AVL_NEXT(&stale_sfnodes, node)) { + if (node->sf_sffs == sffs && node->sf_vnode && + node->sf_vnode != sffs->sf_rootnode) + return (-1); + } + + /* + * All clear to destroy all node information. Since there are no + * vnodes, the make stale will cause deletion. + */ + VN_RELE(sffs->sf_rootnode); + mutex_enter(&sffs_lock); + for (prev = NULL;;) { + if (prev == NULL) + node = avl_first(&sfnodes); + else + node = AVL_NEXT(&sfnodes, prev); + + if (node == NULL) + break; + + if (node->sf_sffs == sffs) { + if (node->sf_vnode != NULL) + panic("vboxfs: purge hit active vnode"); + sfnode_make_stale(node); + } else { + prev = node; + } + } + mutex_exit(&sffs_lock); + return (0); +} + +#if 0 +/* Debug helper functions */ +static void +sfnode_print(sfnode_t *node) +{ + Log(("0x%p", node)); + Log((" type=%s (%d)", + node->sf_type == VDIR ? "VDIR" : + node->sf_type == VNON ? "VNON" : + node->sf_type == VLNK ? "VLNK" : + node->sf_type == VREG ? "VREG" : "other", node->sf_type)); + Log((" ino=%d", (uint_t)node->sf_ino)); + Log((" path=%s", node->sf_path)); + Log((" parent=0x%p", node->sf_parent)); + if (node->sf_children) + Log((" children=%d", node->sf_children)); + if (node->sf_vnode) + Log((" vnode=0x%p", node->sf_vnode)); + Log(("%s\n", node->sf_is_stale ? " STALE" : "")); +} + +static void +sfnode_list(void) +{ + sfnode_t *n; + for (n = avl_first(&sfnodes); n != NULL; n = AVL_NEXT(&sfnodes, n)) + sfnode_print(n); + for (n = avl_first(&stale_sfnodes); n != NULL; + n = AVL_NEXT(&stale_sfnodes, n)) + sfnode_print(n); +} +#endif + -- cgit v1.2.3