summaryrefslogtreecommitdiffstats
path: root/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c')
-rw-r--r--src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c1783
1 files changed, 1783 insertions, 0 deletions
diff --git a/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c b/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c
new file mode 100644
index 00000000..ab36dd68
--- /dev/null
+++ b/src/VBox/HostDrivers/Support/linux/SUPDrv-linux.c
@@ -0,0 +1,1783 @@
+/* $Id: SUPDrv-linux.c $ */
+/** @file
+ * VBoxDrv - The VirtualBox Support Driver - Linux specifics.
+ */
+
+/*
+ * Copyright (C) 2006-2022 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP LOG_GROUP_SUP_DRV
+#include "../SUPDrvInternal.h"
+#include "the-linux-kernel.h"
+#include "version-generated.h"
+#include "product-generated.h"
+#include "revision-generated.h"
+
+#include <iprt/assert.h>
+#include <iprt/spinlock.h>
+#include <iprt/semaphore.h>
+#include <iprt/initterm.h>
+#include <iprt/process.h>
+#include <iprt/thread.h>
+#include <VBox/err.h>
+#include <iprt/mem.h>
+#include <VBox/log.h>
+#include <iprt/mp.h>
+
+/** @todo figure out the exact version number */
+#if RTLNX_VER_MIN(2,6,16)
+# include <iprt/power.h>
+# define VBOX_WITH_SUSPEND_NOTIFICATION
+#endif
+
+#include <linux/sched.h>
+#include <linux/miscdevice.h>
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+# include <linux/platform_device.h>
+#endif
+#if (RTLNX_VER_MIN(2,6,28)) && defined(SUPDRV_WITH_MSR_PROBER)
+# define SUPDRV_LINUX_HAS_SAFE_MSR_API
+# include <asm/msr.h>
+#endif
+
+#include <asm/desc.h>
+
+#include <iprt/asm-amd64-x86.h>
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/* check kernel version */
+# ifndef SUPDRV_AGNOSTIC
+# if RTLNX_VER_MAX(2,6,0)
+# error Unsupported kernel version!
+# endif
+# endif
+
+#ifdef CONFIG_X86_HIGH_ENTRY
+# error "CONFIG_X86_HIGH_ENTRY is not supported by VBoxDrv at this time."
+#endif
+
+/* We cannot include x86.h, so we copy the defines we need here: */
+#define X86_EFL_IF RT_BIT(9)
+#define X86_EFL_AC RT_BIT(18)
+#define X86_EFL_DF RT_BIT(10)
+#define X86_EFL_IOPL (RT_BIT(12) | RT_BIT(13))
+
+/* To include the version number of VirtualBox into kernel backtraces: */
+#define VBoxDrvLinuxVersion RT_CONCAT3(RT_CONCAT(VBOX_VERSION_MAJOR, _), \
+ RT_CONCAT(VBOX_VERSION_MINOR, _), \
+ VBOX_VERSION_BUILD)
+#define VBoxDrvLinuxIOCtl RT_CONCAT(VBoxDrvLinuxIOCtl_,VBoxDrvLinuxVersion)
+
+/* Once externally provided, this string will be printed into kernel log on
+ * module start together with the rest of versioning information. */
+#ifndef VBOX_EXTRA_VERSION_STRING
+# define VBOX_EXTRA_VERSION_STRING ""
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+#if RTLNX_VER_MIN(5,0,0)
+/** Wrapper module list entry. */
+typedef struct SUPDRVLNXMODULE
+{
+ RTLISTNODE ListEntry;
+ struct module *pModule;
+} SUPDRVLNXMODULE;
+/** Pointer to a wrapper module list entry. */
+typedef SUPDRVLNXMODULE *PSUPDRVLNXMODULE;
+#endif
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static int __init VBoxDrvLinuxInit(void);
+static void __exit VBoxDrvLinuxUnload(void);
+static int VBoxDrvLinuxCreateSys(struct inode *pInode, struct file *pFilp);
+static int VBoxDrvLinuxCreateUsr(struct inode *pInode, struct file *pFilp);
+static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp);
+#ifdef HAVE_UNLOCKED_IOCTL
+static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
+#else
+static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg);
+#endif
+static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg, PSUPDRVSESSION pSession);
+static int VBoxDrvLinuxErr2LinuxErr(int);
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+static int VBoxDrvProbe(struct platform_device *pDev);
+# if RTLNX_VER_MIN(2,6,30)
+static int VBoxDrvSuspend(struct device *pDev);
+static int VBoxDrvResume(struct device *pDev);
+# else
+static int VBoxDrvSuspend(struct platform_device *pDev, pm_message_t State);
+static int VBoxDrvResume(struct platform_device *pDev);
+# endif
+static void VBoxDevRelease(struct device *pDev);
+#endif
+#if RTLNX_VER_MIN(5,0,0)
+static int supdrvLinuxLdrModuleNotifyCallback(struct notifier_block *pBlock,
+ unsigned long uModuleState, void *pvModule);
+#endif
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/**
+ * Device extention & session data association structure.
+ */
+static SUPDRVDEVEXT g_DevExt;
+
+/** Module parameter.
+ * Not prefixed because the name is used by macros and the end of this file. */
+static int force_async_tsc = 0;
+
+/** The system device name. */
+#define DEVICE_NAME_SYS "vboxdrv"
+/** The user device name. */
+#define DEVICE_NAME_USR "vboxdrvu"
+
+/** The file_operations structure. */
+static struct file_operations gFileOpsVBoxDrvSys =
+{
+ owner: THIS_MODULE,
+ open: VBoxDrvLinuxCreateSys,
+ release: VBoxDrvLinuxClose,
+#ifdef HAVE_UNLOCKED_IOCTL
+ unlocked_ioctl: VBoxDrvLinuxIOCtl,
+#else
+ ioctl: VBoxDrvLinuxIOCtl,
+#endif
+};
+
+/** The file_operations structure. */
+static struct file_operations gFileOpsVBoxDrvUsr =
+{
+ owner: THIS_MODULE,
+ open: VBoxDrvLinuxCreateUsr,
+ release: VBoxDrvLinuxClose,
+#ifdef HAVE_UNLOCKED_IOCTL
+ unlocked_ioctl: VBoxDrvLinuxIOCtl,
+#else
+ ioctl: VBoxDrvLinuxIOCtl,
+#endif
+};
+
+/** The miscdevice structure for vboxdrv. */
+static struct miscdevice gMiscDeviceSys =
+{
+ minor: MISC_DYNAMIC_MINOR,
+ name: DEVICE_NAME_SYS,
+ fops: &gFileOpsVBoxDrvSys,
+# if RTLNX_VER_MAX(2,6,18)
+ devfs_name: DEVICE_NAME_SYS,
+# endif
+};
+/** The miscdevice structure for vboxdrvu. */
+static struct miscdevice gMiscDeviceUsr =
+{
+ minor: MISC_DYNAMIC_MINOR,
+ name: DEVICE_NAME_USR,
+ fops: &gFileOpsVBoxDrvUsr,
+# if RTLNX_VER_MAX(2,6,18)
+ devfs_name: DEVICE_NAME_USR,
+# endif
+};
+
+
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+
+# if RTLNX_VER_MIN(2,6,30)
+static struct dev_pm_ops gPlatformPMOps =
+{
+ .suspend = VBoxDrvSuspend, /* before entering deep sleep */
+ .resume = VBoxDrvResume, /* after wakeup from deep sleep */
+ .freeze = VBoxDrvSuspend, /* before creating hibernation image */
+ .restore = VBoxDrvResume, /* after waking up from hibernation */
+};
+# endif
+
+static struct platform_driver gPlatformDriver =
+{
+ .probe = VBoxDrvProbe,
+# if RTLNX_VER_MAX(2,6,30)
+ .suspend = VBoxDrvSuspend,
+ .resume = VBoxDrvResume,
+# endif
+ /** @todo .shutdown? */
+ .driver =
+ {
+ .name = "vboxdrv",
+# if RTLNX_VER_MIN(2,6,30)
+ .pm = &gPlatformPMOps,
+# endif
+ }
+};
+
+static struct platform_device gPlatformDevice =
+{
+ .name = "vboxdrv",
+ .dev =
+ {
+ .release = VBoxDevRelease
+ }
+};
+
+#endif /* VBOX_WITH_SUSPEND_NOTIFICATION */
+
+#if RTLNX_VER_MIN(5,0,0)
+/** Module load/unload notification registration record. */
+static struct notifier_block g_supdrvLinuxModuleNotifierBlock =
+{
+ .notifier_call = supdrvLinuxLdrModuleNotifyCallback,
+ .priority = 0
+};
+/** Spinlock protecting g_supdrvLinuxWrapperModuleList. */
+static spinlock_t g_supdrvLinuxWrapperModuleSpinlock;
+/** List of potential wrapper modules (PSUPDRVLNXMODULE). */
+static RTLISTANCHOR g_supdrvLinuxWrapperModuleList;
+#endif
+
+
+/** Get the kernel UID for the current process. */
+DECLINLINE(RTUID) vboxdrvLinuxKernUid(void)
+{
+#if RTLNX_VER_MIN(2,6,29)
+# if RTLNX_VER_MIN(3,5,0)
+ return __kuid_val(current->cred->uid);
+# else
+ return current->cred->uid;
+# endif
+#else
+ return current->uid;
+#endif
+}
+
+
+/** Get the kernel GID for the current process. */
+DECLINLINE(RTGID) vboxdrvLinuxKernGid(void)
+{
+#if RTLNX_VER_MIN(2,6,29)
+# if RTLNX_VER_MIN(3,5,0)
+ return __kgid_val(current->cred->gid);
+# else
+ return current->cred->gid;
+# endif
+#else
+ return current->gid;
+#endif
+}
+
+
+#ifdef VBOX_WITH_HARDENING
+/** Get the effective UID within the current user namespace. */
+DECLINLINE(RTUID) vboxdrvLinuxEuidInNs(void)
+{
+# if RTLNX_VER_MIN(2,6,29)
+# if RTLNX_VER_MIN(3,5,0)
+ return from_kuid(current_user_ns(), current->cred->euid);
+# else
+ return current->cred->euid;
+# endif
+# else
+ return current->euid;
+# endif
+}
+#endif
+
+
+/**
+ * Initialize module.
+ *
+ * @returns appropriate status code.
+ */
+static int __init VBoxDrvLinuxInit(void)
+{
+ int rc;
+
+#if RTLNX_VER_MIN(5,0,0)
+ spin_lock_init(&g_supdrvLinuxWrapperModuleSpinlock);
+ RTListInit(&g_supdrvLinuxWrapperModuleList);
+#endif
+
+ /*
+ * Check for synchronous/asynchronous TSC mode.
+ */
+ printk(KERN_DEBUG "vboxdrv: Found %u processor cores/threads\n", (unsigned)RTMpGetOnlineCount());
+ rc = misc_register(&gMiscDeviceSys);
+ if (rc)
+ {
+ printk(KERN_ERR "vboxdrv: Can't register system misc device! rc=%d\n", rc);
+ return rc;
+ }
+ rc = misc_register(&gMiscDeviceUsr);
+ if (rc)
+ {
+ printk(KERN_ERR "vboxdrv: Can't register user misc device! rc=%d\n", rc);
+ misc_deregister(&gMiscDeviceSys);
+ return rc;
+ }
+ if (!rc)
+ {
+ /*
+ * Initialize the runtime.
+ * On AMD64 we'll have to donate the high rwx memory block to the exec allocator.
+ */
+ rc = RTR0Init(0);
+ if (RT_SUCCESS(rc))
+ {
+ Log(("VBoxDrv::ModuleInit\n"));
+
+ /*
+ * Initialize the device extension.
+ */
+ rc = supdrvInitDevExt(&g_DevExt, sizeof(SUPDRVSESSION));
+ if (RT_SUCCESS(rc))
+ {
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+ rc = platform_driver_register(&gPlatformDriver);
+ if (rc == 0)
+ {
+ rc = platform_device_register(&gPlatformDevice);
+ if (rc == 0)
+#endif
+ {
+#if RTLNX_VER_MIN(5,0,0)
+ /*
+ * Register the module notifier.
+ */
+ int rc2 = register_module_notifier(&g_supdrvLinuxModuleNotifierBlock);
+ if (rc2)
+ printk(KERN_WARNING "vboxdrv: failed to register module notifier! rc2=%d\n", rc2);
+#endif
+
+
+ printk(KERN_INFO "vboxdrv: TSC mode is %s, tentative frequency %llu Hz\n",
+ SUPGetGIPModeName(g_DevExt.pGip), g_DevExt.pGip->u64CpuHz);
+ LogFlow(("VBoxDrv::ModuleInit returning %#x\n", rc));
+ printk(KERN_DEBUG "vboxdrv: Successfully loaded version "
+ VBOX_VERSION_STRING " r" RT_XSTR(VBOX_SVN_REV)
+ VBOX_EXTRA_VERSION_STRING
+ " (interface " RT_XSTR(SUPDRV_IOC_VERSION) ")\n");
+ return rc;
+ }
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+ else
+ platform_driver_unregister(&gPlatformDriver);
+ }
+#endif
+ }
+
+ rc = -EINVAL;
+ RTR0TermForced();
+ }
+ else
+ rc = -EINVAL;
+
+ /*
+ * Failed, cleanup and return the error code.
+ */
+ }
+ misc_deregister(&gMiscDeviceSys);
+ misc_deregister(&gMiscDeviceUsr);
+ Log(("VBoxDrv::ModuleInit returning %#x (minor:%d & %d)\n", rc, gMiscDeviceSys.minor, gMiscDeviceUsr.minor));
+ return rc;
+}
+
+
+/**
+ * Unload the module.
+ */
+static void __exit VBoxDrvLinuxUnload(void)
+{
+ Log(("VBoxDrvLinuxUnload\n"));
+
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+ platform_device_unregister(&gPlatformDevice);
+ platform_driver_unregister(&gPlatformDriver);
+#endif
+
+#if RTLNX_VER_MIN(5,0,0)
+ /*
+ * Kick the list of potential wrapper modules.
+ */
+ unregister_module_notifier(&g_supdrvLinuxModuleNotifierBlock);
+
+ spin_lock(&g_supdrvLinuxWrapperModuleSpinlock);
+ while (!RTListIsEmpty(&g_supdrvLinuxWrapperModuleList))
+ {
+ PSUPDRVLNXMODULE pCur = RTListRemoveFirst(&g_supdrvLinuxWrapperModuleList, SUPDRVLNXMODULE, ListEntry);
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+
+ pCur->pModule = NULL;
+ RTMemFree(pCur);
+
+ spin_lock(&g_supdrvLinuxWrapperModuleSpinlock);
+ }
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+#endif
+
+ /*
+ * I Don't think it's possible to unload a driver which processes have
+ * opened, at least we'll blindly assume that here.
+ */
+ misc_deregister(&gMiscDeviceUsr);
+ misc_deregister(&gMiscDeviceSys);
+
+ /*
+ * Destroy GIP, delete the device extension and terminate IPRT.
+ */
+ supdrvDeleteDevExt(&g_DevExt);
+ RTR0TermForced();
+}
+
+
+/**
+ * Common open code.
+ *
+ * @param pInode Pointer to inode info structure.
+ * @param pFilp Associated file pointer.
+ * @param fUnrestricted Indicates which device node which was opened.
+ */
+static int vboxdrvLinuxCreateCommon(struct inode *pInode, struct file *pFilp, bool fUnrestricted)
+{
+ int rc;
+ PSUPDRVSESSION pSession;
+ Log(("VBoxDrvLinuxCreate: pFilp=%p pid=%d/%d %s\n", pFilp, RTProcSelf(), current->pid, current->comm));
+
+#ifdef VBOX_WITH_HARDENING
+ /*
+ * Only root is allowed to access the unrestricted device, enforce it!
+ */
+ if ( fUnrestricted
+ && vboxdrvLinuxEuidInNs() != 0 /* root */ )
+ {
+ Log(("VBoxDrvLinuxCreate: euid=%d, expected 0 (root)\n", vboxdrvLinuxEuidInNs()));
+ return -EPERM;
+ }
+#endif /* VBOX_WITH_HARDENING */
+
+ /*
+ * Call common code for the rest.
+ */
+ rc = supdrvCreateSession(&g_DevExt, true /* fUser */, fUnrestricted, &pSession);
+ if (!rc)
+ {
+ pSession->Uid = vboxdrvLinuxKernUid();
+ pSession->Gid = vboxdrvLinuxKernGid();
+ }
+
+ pFilp->private_data = pSession;
+
+ Log(("VBoxDrvLinuxCreate: g_DevExt=%p pSession=%p rc=%d/%d (pid=%d/%d %s)\n",
+ &g_DevExt, pSession, rc, VBoxDrvLinuxErr2LinuxErr(rc),
+ RTProcSelf(), current->pid, current->comm));
+ return VBoxDrvLinuxErr2LinuxErr(rc);
+}
+
+
+/** /dev/vboxdrv. */
+static int VBoxDrvLinuxCreateSys(struct inode *pInode, struct file *pFilp)
+{
+ return vboxdrvLinuxCreateCommon(pInode, pFilp, true);
+}
+
+
+/** /dev/vboxdrvu. */
+static int VBoxDrvLinuxCreateUsr(struct inode *pInode, struct file *pFilp)
+{
+ return vboxdrvLinuxCreateCommon(pInode, pFilp, false);
+}
+
+
+/**
+ * Close device.
+ *
+ * @param pInode Pointer to inode info structure.
+ * @param pFilp Associated file pointer.
+ */
+static int VBoxDrvLinuxClose(struct inode *pInode, struct file *pFilp)
+{
+ Log(("VBoxDrvLinuxClose: pFilp=%p pSession=%p pid=%d/%d %s\n",
+ pFilp, pFilp->private_data, RTProcSelf(), current->pid, current->comm));
+ supdrvSessionRelease((PSUPDRVSESSION)pFilp->private_data);
+ pFilp->private_data = NULL;
+ return 0;
+}
+
+
+#ifdef VBOX_WITH_SUSPEND_NOTIFICATION
+/**
+ * Dummy device release function. We have to provide this function,
+ * otherwise the kernel will complain.
+ *
+ * @param pDev Pointer to the platform device.
+ */
+static void VBoxDevRelease(struct device *pDev)
+{
+}
+
+/**
+ * Dummy probe function.
+ *
+ * @param pDev Pointer to the platform device.
+ */
+static int VBoxDrvProbe(struct platform_device *pDev)
+{
+ return 0;
+}
+
+/**
+ * Suspend callback.
+ * @param pDev Pointer to the platform device.
+ * @param State Message type, see Documentation/power/devices.txt.
+ * Ignored.
+ */
+# if RTLNX_VER_MIN(2,6,30) && !defined(DOXYGEN_RUNNING)
+static int VBoxDrvSuspend(struct device *pDev)
+# else
+static int VBoxDrvSuspend(struct platform_device *pDev, pm_message_t State)
+# endif
+{
+ RTPowerSignalEvent(RTPOWEREVENT_SUSPEND);
+ return 0;
+}
+
+/**
+ * Resume callback.
+ *
+ * @param pDev Pointer to the platform device.
+ */
+# if RTLNX_VER_MIN(2,6,30)
+static int VBoxDrvResume(struct device *pDev)
+# else
+static int VBoxDrvResume(struct platform_device *pDev)
+# endif
+{
+ RTPowerSignalEvent(RTPOWEREVENT_RESUME);
+ return 0;
+}
+#endif /* VBOX_WITH_SUSPEND_NOTIFICATION */
+
+
+/**
+ * Device I/O Control entry point.
+ *
+ * @param pFilp Associated file pointer.
+ * @param uCmd The function specified to ioctl().
+ * @param ulArg The argument specified to ioctl().
+ */
+#if defined(HAVE_UNLOCKED_IOCTL) || defined(DOXYGEN_RUNNING)
+static long VBoxDrvLinuxIOCtl(struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
+#else
+static int VBoxDrvLinuxIOCtl(struct inode *pInode, struct file *pFilp, unsigned int uCmd, unsigned long ulArg)
+#endif
+{
+ PSUPDRVSESSION pSession = (PSUPDRVSESSION)pFilp->private_data;
+ int rc;
+#ifndef VBOX_WITHOUT_EFLAGS_AC_SET_IN_VBOXDRV
+# if defined(VBOX_STRICT) || defined(VBOX_WITH_EFLAGS_AC_SET_IN_VBOXDRV)
+ RTCCUINTREG fSavedEfl;
+
+ /*
+ * Refuse all I/O control calls if we've ever detected EFLAGS.AC being cleared.
+ *
+ * This isn't a problem, as there is absolutely nothing in the kernel context that
+ * depend on user context triggering cleanups. That would be pretty wild, right?
+ */
+ if (RT_UNLIKELY(g_DevExt.cBadContextCalls > 0))
+ {
+ SUPR0Printf("VBoxDrvLinuxIOCtl: EFLAGS.AC=0 detected %u times, refusing all I/O controls!\n", g_DevExt.cBadContextCalls);
+ return ESPIPE;
+ }
+
+ fSavedEfl = ASMAddFlags(X86_EFL_AC);
+# else
+ stac();
+# endif
+#endif
+
+ /*
+ * Deal with the two high-speed IOCtl that takes it's arguments from
+ * the session and iCmd, and only returns a VBox status code.
+ */
+ AssertCompile(_IOC_NRSHIFT == 0 && _IOC_NRBITS == 8);
+#ifdef HAVE_UNLOCKED_IOCTL
+ if (RT_LIKELY( (unsigned int)(uCmd - SUP_IOCTL_FAST_DO_FIRST) < (unsigned int)32
+ && pSession->fUnrestricted))
+ rc = supdrvIOCtlFast(uCmd - SUP_IOCTL_FAST_DO_FIRST, ulArg, &g_DevExt, pSession);
+ else
+ rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg, pSession);
+#else /* !HAVE_UNLOCKED_IOCTL */
+ unlock_kernel();
+ if (RT_LIKELY( (unsigned int)(uCmd - SUP_IOCTL_FAST_DO_FIRST) < (unsigned int)32
+ && pSession->fUnrestricted))
+ rc = supdrvIOCtlFast(uCmd - SUP_IOCTL_FAST_DO_FIRST, ulArg, &g_DevExt, pSession);
+ else
+ rc = VBoxDrvLinuxIOCtlSlow(pFilp, uCmd, ulArg, pSession);
+ lock_kernel();
+#endif /* !HAVE_UNLOCKED_IOCTL */
+
+#ifndef VBOX_WITHOUT_EFLAGS_AC_SET_IN_VBOXDRV
+# if defined(VBOX_STRICT) || defined(VBOX_WITH_EFLAGS_AC_SET_IN_VBOXDRV)
+ /*
+ * Before we restore AC and the rest of EFLAGS, check if the IOCtl handler code
+ * accidentially modified it or some other important flag.
+ */
+ if (RT_UNLIKELY( (ASMGetFlags() & (X86_EFL_AC | X86_EFL_IF | X86_EFL_DF))
+ != ((fSavedEfl & (X86_EFL_AC | X86_EFL_IF | X86_EFL_DF)) | X86_EFL_AC) ))
+ {
+ char szTmp[48];
+ RTStrPrintf(szTmp, sizeof(szTmp), "uCmd=%#x: %#x->%#x!", _IOC_NR(uCmd), (uint32_t)fSavedEfl, (uint32_t)ASMGetFlags());
+ supdrvBadContext(&g_DevExt, "SUPDrv-linux.c", __LINE__, szTmp);
+ }
+ ASMSetFlags(fSavedEfl);
+# else
+ clac();
+# endif
+#endif
+ return rc;
+}
+
+
+/**
+ * Device I/O Control entry point.
+ *
+ * @param pFilp Associated file pointer.
+ * @param uCmd The function specified to ioctl().
+ * @param ulArg The argument specified to ioctl().
+ * @param pSession The session instance.
+ */
+static int VBoxDrvLinuxIOCtlSlow(struct file *pFilp, unsigned int uCmd, unsigned long ulArg, PSUPDRVSESSION pSession)
+{
+ int rc;
+ SUPREQHDR Hdr;
+ PSUPREQHDR pHdr;
+ uint32_t cbBuf;
+
+ Log6(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p pid=%d/%d\n", pFilp, uCmd, (void *)ulArg, RTProcSelf(), current->pid));
+
+ /*
+ * Read the header.
+ */
+ if (RT_FAILURE(RTR0MemUserCopyFrom(&Hdr, ulArg, sizeof(Hdr))))
+ {
+ Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx,) failed; uCmd=%#x\n", ulArg, uCmd));
+ return -EFAULT;
+ }
+ if (RT_UNLIKELY((Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK) != SUPREQHDR_FLAGS_MAGIC))
+ {
+ Log(("VBoxDrvLinuxIOCtl: bad header magic %#x; uCmd=%#x\n", Hdr.fFlags & SUPREQHDR_FLAGS_MAGIC_MASK, uCmd));
+ return -EINVAL;
+ }
+
+ /*
+ * Buffer the request.
+ */
+ cbBuf = RT_MAX(Hdr.cbIn, Hdr.cbOut);
+ if (RT_UNLIKELY(cbBuf > _1M*16))
+ {
+ Log(("VBoxDrvLinuxIOCtl: too big cbBuf=%#x; uCmd=%#x\n", cbBuf, uCmd));
+ return -E2BIG;
+ }
+ if (RT_UNLIKELY(_IOC_SIZE(uCmd) ? cbBuf != _IOC_SIZE(uCmd) : Hdr.cbIn < sizeof(Hdr)))
+ {
+ Log(("VBoxDrvLinuxIOCtl: bad ioctl cbBuf=%#x _IOC_SIZE=%#x; uCmd=%#x\n", cbBuf, _IOC_SIZE(uCmd), uCmd));
+ return -EINVAL;
+ }
+ pHdr = RTMemAlloc(cbBuf);
+ if (RT_UNLIKELY(!pHdr))
+ {
+ OSDBGPRINT(("VBoxDrvLinuxIOCtl: failed to allocate buffer of %d bytes for uCmd=%#x\n", cbBuf, uCmd));
+ return -ENOMEM;
+ }
+ if (RT_FAILURE(RTR0MemUserCopyFrom(pHdr, ulArg, Hdr.cbIn)))
+ {
+ Log(("VBoxDrvLinuxIOCtl: copy_from_user(,%#lx, %#x) failed; uCmd=%#x\n", ulArg, Hdr.cbIn, uCmd));
+ RTMemFree(pHdr);
+ return -EFAULT;
+ }
+ if (Hdr.cbIn < cbBuf)
+ RT_BZERO((uint8_t *)pHdr + Hdr.cbIn, cbBuf - Hdr.cbIn);
+
+ /*
+ * Process the IOCtl.
+ */
+ rc = supdrvIOCtl(uCmd, &g_DevExt, pSession, pHdr, cbBuf);
+
+ /*
+ * Copy ioctl data and output buffer back to user space.
+ */
+ if (RT_LIKELY(!rc))
+ {
+ uint32_t cbOut = pHdr->cbOut;
+ if (RT_UNLIKELY(cbOut > cbBuf))
+ {
+ OSDBGPRINT(("VBoxDrvLinuxIOCtl: too much output! %#x > %#x; uCmd=%#x!\n", cbOut, cbBuf, uCmd));
+ cbOut = cbBuf;
+ }
+ if (RT_FAILURE(RTR0MemUserCopyTo(ulArg, pHdr, cbOut)))
+ {
+ /* this is really bad! */
+ OSDBGPRINT(("VBoxDrvLinuxIOCtl: copy_to_user(%#lx,,%#x); uCmd=%#x!\n", ulArg, cbOut, uCmd));
+ rc = -EFAULT;
+ }
+ }
+ else
+ {
+ Log(("VBoxDrvLinuxIOCtl: pFilp=%p uCmd=%#x ulArg=%p failed, rc=%d\n", pFilp, uCmd, (void *)ulArg, rc));
+ rc = -EINVAL;
+ }
+ RTMemFree(pHdr);
+
+ Log6(("VBoxDrvLinuxIOCtl: returns %d (pid=%d/%d)\n", rc, RTProcSelf(), current->pid));
+ return rc;
+}
+
+
+/**
+ * The SUPDRV IDC entry point.
+ *
+ * @returns VBox status code, see supdrvIDC.
+ * @param uReq The request code.
+ * @param pReq The request.
+ */
+int VBOXCALL SUPDrvLinuxIDC(uint32_t uReq, PSUPDRVIDCREQHDR pReq)
+{
+ PSUPDRVSESSION pSession;
+
+ /*
+ * Some quick validations.
+ */
+ if (RT_UNLIKELY(!RT_VALID_PTR(pReq)))
+ return VERR_INVALID_POINTER;
+
+ pSession = pReq->pSession;
+ if (pSession)
+ {
+ if (RT_UNLIKELY(!RT_VALID_PTR(pSession)))
+ return VERR_INVALID_PARAMETER;
+ if (RT_UNLIKELY(pSession->pDevExt != &g_DevExt))
+ return VERR_INVALID_PARAMETER;
+ }
+ else if (RT_UNLIKELY(uReq != SUPDRV_IDC_REQ_CONNECT))
+ return VERR_INVALID_PARAMETER;
+
+ /*
+ * Do the job.
+ */
+ return supdrvIDC(uReq, &g_DevExt, pSession, pReq);
+}
+EXPORT_SYMBOL(SUPDrvLinuxIDC);
+
+
+#if RTLNX_VER_MIN(5,0,0)
+
+/**
+ * Checks if the given module is one of our potential wrapper modules or not.
+ */
+static bool supdrvLinuxLdrIsPotentialWrapperModule(struct module const *pModule)
+{
+ if ( pModule
+ && strncmp(pModule->name, RT_STR_TUPLE("vbox_")) == 0)
+ return true;
+ return false;
+}
+
+/**
+ * Called when a kernel module changes state.
+ *
+ * We use this to listen for wrapper modules being loaded, since some evil
+ * bugger removed the find_module() export in 5.13.
+ */
+static int supdrvLinuxLdrModuleNotifyCallback(struct notifier_block *pBlock, unsigned long uModuleState, void *pvModule)
+{
+ struct module *pModule = (struct module *)pvModule;
+ switch (uModuleState)
+ {
+ case MODULE_STATE_UNFORMED: /* Setting up the module... */
+ break;
+
+ /*
+ * The module is about to have its ctors & init functions called.
+ *
+ * Add anything that looks like a wrapper module to our tracker list.
+ */
+ case MODULE_STATE_COMING:
+ if (supdrvLinuxLdrIsPotentialWrapperModule(pModule))
+ {
+ PSUPDRVLNXMODULE pTracker = (PSUPDRVLNXMODULE)RTMemAlloc(sizeof(*pTracker));
+ if (pTracker)
+ {
+ pTracker->pModule = pModule;
+ spin_lock(&g_supdrvLinuxWrapperModuleSpinlock);
+ RTListPrepend(&g_supdrvLinuxWrapperModuleList, &pTracker->ListEntry);
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+ }
+ }
+ break;
+
+ case MODULE_STATE_LIVE:
+ break;
+
+ /*
+ * The module has been uninited and is going away.
+ *
+ * Remove the tracker entry for the module, if we have one.
+ */
+ case MODULE_STATE_GOING:
+ {
+ PSUPDRVLNXMODULE pCur;
+ spin_lock(&g_supdrvLinuxWrapperModuleSpinlock);
+ RTListForEach(&g_supdrvLinuxWrapperModuleList, pCur, SUPDRVLNXMODULE, ListEntry)
+ {
+ if (pCur->pModule == pModule)
+ {
+ RTListNodeRemove(&pCur->ListEntry);
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+
+ pCur->pModule = NULL;
+ RTMemFree(pCur);
+
+ spin_lock(&g_supdrvLinuxWrapperModuleSpinlock); /* silly */
+ break;
+ }
+ }
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+ break;
+ }
+ }
+ RT_NOREF(pBlock);
+ return NOTIFY_OK;
+}
+
+/**
+ * Replacement for find_module() that's no longer exported with 5.13.
+ */
+static struct module *supdrvLinuxLdrFindModule(const char *pszLnxModName)
+{
+ PSUPDRVLNXMODULE pCur;
+
+ spin_lock(&g_supdrvLinuxWrapperModuleSpinlock);
+ RTListForEach(&g_supdrvLinuxWrapperModuleList, pCur, SUPDRVLNXMODULE, ListEntry)
+ {
+ struct module * const pModule = pCur->pModule;
+ if ( pModule
+ && strcmp(pszLnxModName, pModule->name) == 0)
+ {
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+ return pModule;
+ }
+ }
+ spin_unlock(&g_supdrvLinuxWrapperModuleSpinlock);
+ return NULL;
+}
+
+#endif /* >= 5.0.0 */
+
+
+/**
+ * Used by native wrapper modules, forwarding to supdrvLdrRegisterWrappedModule
+ * with device extension prepended to the argument list.
+ */
+SUPR0DECL(int) SUPDrvLinuxLdrRegisterWrappedModule(PCSUPLDRWRAPPEDMODULE pWrappedModInfo,
+ const char *pszLnxModName, void **phMod)
+{
+ AssertPtrReturn(pszLnxModName, VERR_INVALID_POINTER);
+ AssertReturn(*pszLnxModName, VERR_INVALID_NAME);
+
+ /* Locate the module structure for the caller so can later reference
+ and dereference it to prevent unloading while it is being used.
+
+ Before Linux v5.9 this could be done by address (__module_address()
+ or __module_text_address()), but someone (guess who) apparently on
+ a mission to make life miserable for out-of-tree modules or something,
+ decided it was only used by build-in code and unexported both of them.
+
+ I could find no init callouts getting a struct module pointer either,
+ nor any module name hint anywhere I could see. So, we're left with
+ hardcoding the module name via the compiler and pass it along to
+ SUPDrv so we can call find_module() here.
+
+ Sigh^2.
+
+ Update 5.13:
+ The find_module() and module_mutex symbols are no longer exported,
+ probably the doing of the same evil bugger mentioned above. So, we now
+ register a module notification callback and track the modules we're
+ interested in that way. */
+
+#if RTLNX_VER_MIN(5,0,0)
+ struct module *pLnxModule = supdrvLinuxLdrFindModule(pszLnxModName);
+ if (pLnxModule)
+ return supdrvLdrRegisterWrappedModule(&g_DevExt, pWrappedModInfo, pLnxModule, phMod);
+ printk("vboxdrv: supdrvLinuxLdrFindModule(%s) failed in SUPDrvLinuxLdrRegisterWrappedModule!\n", pszLnxModName);
+ return VERR_MODULE_NOT_FOUND;
+
+#elif RTLNX_VER_MIN(2,6,30)
+ if (mutex_lock_interruptible(&module_mutex) == 0)
+ {
+ struct module *pLnxModule = find_module(pszLnxModName);
+ mutex_unlock(&module_mutex);
+ if (pLnxModule)
+ return supdrvLdrRegisterWrappedModule(&g_DevExt, pWrappedModInfo, pLnxModule, phMod);
+ printk("vboxdrv: find_module(%s) failed in SUPDrvLinuxLdrRegisterWrappedModule!\n", pszLnxModName);
+ return VERR_MODULE_NOT_FOUND;
+ }
+ return VERR_INTERRUPTED;
+
+#else
+ printk("vboxdrv: wrapper modules are not supported on 2.6.29 and earlier. sorry.\n");
+ return VERR_NOT_SUPPORTED;
+#endif
+}
+EXPORT_SYMBOL(SUPDrvLinuxLdrRegisterWrappedModule);
+
+
+/**
+ * Used by native wrapper modules, forwarding to supdrvLdrDeregisterWrappedModule
+ * with device extension prepended to the argument list.
+ */
+SUPR0DECL(int) SUPDrvLinuxLdrDeregisterWrappedModule(PCSUPLDRWRAPPEDMODULE pWrappedModInfo, void **phMod)
+{
+ return supdrvLdrDeregisterWrappedModule(&g_DevExt, pWrappedModInfo, phMod);
+}
+EXPORT_SYMBOL(SUPDrvLinuxLdrDeregisterWrappedModule);
+
+
+RTCCUINTREG VBOXCALL supdrvOSChangeCR4(RTCCUINTREG fOrMask, RTCCUINTREG fAndMask)
+{
+#if RTLNX_VER_MIN(5,8,0)
+ unsigned long fSavedFlags;
+ local_irq_save(fSavedFlags);
+ RTCCUINTREG const uOld = cr4_read_shadow();
+ cr4_update_irqsoff(fOrMask, ~fAndMask); /* Same as this function, only it is not returning the old value. */
+ AssertMsg(cr4_read_shadow() == ((uOld & fAndMask) | fOrMask),
+ ("fOrMask=%#RTreg fAndMask=%#RTreg uOld=%#RTreg; new cr4=%#llx\n", fOrMask, fAndMask, uOld, cr4_read_shadow()));
+ local_irq_restore(fSavedFlags);
+#else
+# if RTLNX_VER_MIN(3,20,0)
+ RTCCUINTREG const uOld = this_cpu_read(cpu_tlbstate.cr4);
+# else
+ RTCCUINTREG const uOld = ASMGetCR4();
+# endif
+ RTCCUINTREG const uNew = (uOld & fAndMask) | fOrMask;
+ if (uNew != uOld)
+ {
+# if RTLNX_VER_MIN(3,20,0)
+ this_cpu_write(cpu_tlbstate.cr4, uNew);
+ __write_cr4(uNew);
+# else
+ ASMSetCR4(uNew);
+# endif
+ }
+#endif
+ return uOld;
+}
+
+
+void VBOXCALL supdrvOSCleanupSession(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession)
+{
+ NOREF(pDevExt);
+ NOREF(pSession);
+}
+
+
+void VBOXCALL supdrvOSSessionHashTabInserted(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
+{
+ NOREF(pDevExt); NOREF(pSession); NOREF(pvUser);
+}
+
+
+void VBOXCALL supdrvOSSessionHashTabRemoved(PSUPDRVDEVEXT pDevExt, PSUPDRVSESSION pSession, void *pvUser)
+{
+ NOREF(pDevExt); NOREF(pSession); NOREF(pvUser);
+}
+
+
+/**
+ * Initializes any OS specific object creator fields.
+ */
+void VBOXCALL supdrvOSObjInitCreator(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession)
+{
+ NOREF(pObj);
+ NOREF(pSession);
+}
+
+
+/**
+ * Checks if the session can access the object.
+ *
+ * @returns true if a decision has been made.
+ * @returns false if the default access policy should be applied.
+ *
+ * @param pObj The object in question.
+ * @param pSession The session wanting to access the object.
+ * @param pszObjName The object name, can be NULL.
+ * @param prc Where to store the result when returning true.
+ */
+bool VBOXCALL supdrvOSObjCanAccess(PSUPDRVOBJ pObj, PSUPDRVSESSION pSession, const char *pszObjName, int *prc)
+{
+ NOREF(pObj);
+ NOREF(pSession);
+ NOREF(pszObjName);
+ NOREF(prc);
+ return false;
+}
+
+
+bool VBOXCALL supdrvOSGetForcedAsyncTscMode(PSUPDRVDEVEXT pDevExt)
+{
+ return force_async_tsc != 0;
+}
+
+
+bool VBOXCALL supdrvOSAreCpusOfflinedOnSuspend(void)
+{
+ return true;
+}
+
+
+bool VBOXCALL supdrvOSAreTscDeltasInSync(void)
+{
+ return false;
+}
+
+
+int VBOXCALL supdrvOSLdrOpen(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, const char *pszFilename)
+{
+ NOREF(pDevExt); NOREF(pImage); NOREF(pszFilename);
+ return VERR_NOT_SUPPORTED;
+}
+
+
+int VBOXCALL supdrvOSLdrValidatePointer(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, void *pv,
+ const uint8_t *pbImageBits, const char *pszSymbol)
+{
+ NOREF(pDevExt); NOREF(pImage); NOREF(pv); NOREF(pbImageBits); NOREF(pszSymbol);
+ return VERR_NOT_SUPPORTED;
+}
+
+
+int VBOXCALL supdrvOSLdrLoad(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, const uint8_t *pbImageBits, PSUPLDRLOAD pReq)
+{
+ NOREF(pDevExt); NOREF(pImage); NOREF(pbImageBits); NOREF(pReq);
+ return VERR_NOT_SUPPORTED;
+}
+
+
+void VBOXCALL supdrvOSLdrUnload(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
+{
+ NOREF(pDevExt); NOREF(pImage);
+}
+
+
+/** @def VBOX_WITH_NON_PROD_HACK_FOR_PERF_STACKS
+ * A very crude hack for debugging using perf and dtrace.
+ *
+ * DO ABSOLUTELY NOT ENABLE IN PRODUCTION BUILDS! DEVELOPMENT ONLY!!
+ * DO ABSOLUTELY NOT ENABLE IN PRODUCTION BUILDS! DEVELOPMENT ONLY!!
+ * DO ABSOLUTELY NOT ENABLE IN PRODUCTION BUILDS! DEVELOPMENT ONLY!!
+ *
+ */
+#if 0 || defined(DOXYGEN_RUNNING)
+# define VBOX_WITH_NON_PROD_HACK_FOR_PERF_STACKS
+#endif
+
+#if defined(VBOX_WITH_NON_PROD_HACK_FOR_PERF_STACKS) && defined(CONFIG_MODULES_TREE_LOOKUP)
+/** Whether g_pfnModTreeInsert and g_pfnModTreeRemove have been initialized.
+ * @remarks can still be NULL after init. */
+static volatile bool g_fLookedForModTreeFunctions = false;
+static void (*g_pfnModTreeInsert)(struct mod_tree_node *) = NULL; /**< __mod_tree_insert */
+static void (*g_pfnModTreeRemove)(struct mod_tree_node *) = NULL; /**< __mod_tree_remove */
+#endif
+
+
+void VBOXCALL supdrvOSLdrNotifyOpened(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage, const char *pszFilename)
+{
+#ifdef VBOX_WITH_NON_PROD_HACK_FOR_PERF_STACKS /* Not for production use!! Debugging only! */
+ /*
+ * This trick stops working with 4.2 when CONFIG_MODULES_TREE_LOOKUP is
+ * defined. The module lookups are done via a tree structure and we
+ * cannot get at the root of it. :-(
+ */
+# ifdef CONFIG_KALLSYMS
+ size_t const cchName = strlen(pImage->szName);
+# endif
+ struct module *pMyMod, *pSelfMod, *pTestMod, *pTestModByName;
+ IPRT_LINUX_SAVE_EFL_AC();
+
+ pImage->pLnxModHack = NULL;
+
+# ifdef CONFIG_MODULES_TREE_LOOKUP
+ /*
+ * This is pretty naive, but works for 4.2 on arch linux. I don't think we
+ * can count on finding __mod_tree_remove in all kernel builds as it's not
+ * marked noinline like __mod_tree_insert.
+ */
+ if (!g_fLookedForModTreeFunctions)
+ {
+ unsigned long ulInsert = kallsyms_lookup_name("__mod_tree_insert");
+ unsigned long ulRemove = kallsyms_lookup_name("__mod_tree_remove");
+ if (!ulInsert || !ulRemove)
+ {
+ g_fLookedForModTreeFunctions = true;
+ printk(KERN_ERR "vboxdrv: failed to locate __mod_tree_insert and __mod_tree_remove.\n");
+ IPRT_LINUX_RESTORE_EFL_AC();
+ return;
+ }
+ *(unsigned long *)&g_pfnModTreeInsert = ulInsert;
+ *(unsigned long *)&g_pfnModTreeRemove = ulRemove;
+ ASMCompilerBarrier();
+ g_fLookedForModTreeFunctions = true;
+ }
+ else if (!g_pfnModTreeInsert || !g_pfnModTreeRemove)
+ return;
+#endif
+
+ /*
+ * Make sure we've found our own module, otherwise we cannot access the linked list.
+ */
+ mutex_lock(&module_mutex);
+ pSelfMod = find_module("vboxdrv");
+ mutex_unlock(&module_mutex);
+ if (!pSelfMod)
+ {
+ IPRT_LINUX_RESTORE_EFL_AC();
+ return;
+ }
+
+ /*
+ * Cook up a module structure for the image.
+ * We allocate symbol and string tables in the allocation and the module to keep things simple.
+ */
+# ifdef CONFIG_KALLSYMS
+ pMyMod = (struct module *)RTMemAllocZ(sizeof(*pMyMod)
+ + sizeof(Elf_Sym) * 3
+ + 1 + cchName * 2 + sizeof("_start") + sizeof("_end") + 4 );
+# else
+ pMyMod = (struct module *)RTMemAllocZ(sizeof(*pMyMod));
+# endif
+ if (pMyMod)
+ {
+ int rc = VINF_SUCCESS;
+# ifdef CONFIG_KALLSYMS
+ Elf_Sym *paSymbols = (Elf_Sym *)(pMyMod + 1);
+ char *pchStrTab = (char *)(paSymbols + 3);
+# endif
+
+ pMyMod->state = MODULE_STATE_LIVE;
+ INIT_LIST_HEAD(&pMyMod->list); /* just in case */
+
+ /* Perf only matches up files with a .ko extension (maybe .ko.gz),
+ so in order for this crap to work smoothly, we append .ko to the
+ module name and require the user to create symbolic links in
+ /lib/modules/`uname -r`:
+ for i in VMMR0.r0 VBoxDDR0.r0 VBoxDD2R0.r0; do
+ sudo ln -s /mnt/scratch/vbox/svn/trunk/out/linux.amd64/debug/bin/$i /lib/modules/`uname -r`/$i.ko;
+ done */
+ RTStrPrintf(pMyMod->name, sizeof(pMyMod->name), "%s", pImage->szName);
+
+ /* sysfs bits. */
+ INIT_LIST_HEAD(&pMyMod->mkobj.kobj.entry); /* rest of kobj is already zeroed, hopefully never accessed... */
+ pMyMod->mkobj.mod = pMyMod;
+ pMyMod->mkobj.drivers_dir = NULL;
+ pMyMod->mkobj.mp = NULL;
+ pMyMod->mkobj.kobj_completion = NULL;
+
+ pMyMod->modinfo_attrs = NULL; /* hopefully not accessed after setup. */
+ pMyMod->holders_dir = NULL; /* hopefully not accessed. */
+ pMyMod->version = "N/A";
+ pMyMod->srcversion = "N/A";
+
+ /* We export no symbols. */
+ pMyMod->num_syms = 0;
+ pMyMod->syms = NULL;
+ pMyMod->crcs = NULL;
+
+ pMyMod->num_gpl_syms = 0;
+ pMyMod->gpl_syms = NULL;
+ pMyMod->gpl_crcs = NULL;
+
+ pMyMod->num_gpl_future_syms = 0;
+ pMyMod->gpl_future_syms = NULL;
+ pMyMod->gpl_future_crcs = NULL;
+
+# if CONFIG_UNUSED_SYMBOLS
+ pMyMod->num_unused_syms = 0;
+ pMyMod->unused_syms = NULL;
+ pMyMod->unused_crcs = NULL;
+
+ pMyMod->num_unused_gpl_syms = 0;
+ pMyMod->unused_gpl_syms = NULL;
+ pMyMod->unused_gpl_crcs = NULL;
+# endif
+ /* No kernel parameters either. */
+ pMyMod->kp = NULL;
+ pMyMod->num_kp = 0;
+
+# ifdef CONFIG_MODULE_SIG
+ /* Pretend ok signature. */
+ pMyMod->sig_ok = true;
+# endif
+ /* No exception table. */
+ pMyMod->num_exentries = 0;
+ pMyMod->extable = NULL;
+
+ /* No init function */
+ pMyMod->init = NULL;
+ pMyMod->module_init = NULL;
+ pMyMod->init_size = 0;
+ pMyMod->init_ro_size = 0;
+ pMyMod->init_text_size = 0;
+
+ /* The module address and size. It's all text. */
+ pMyMod->module_core = pImage->pvImage;
+ pMyMod->core_size = pImage->cbImageBits;
+ pMyMod->core_text_size = pImage->cbImageBits;
+ pMyMod->core_ro_size = pImage->cbImageBits;
+
+#ifdef CONFIG_MODULES_TREE_LOOKUP
+ /* Fill in the self pointers for the tree nodes. */
+ pMyMod->mtn_core.mod = pMyMod;
+ pMyMod->mtn_init.mod = pMyMod;
+#endif
+ /* They invented the tained bit for us, didn't they? */
+ pMyMod->taints = 1;
+
+# ifdef CONFIG_GENERIC_BUGS
+ /* No BUGs in our modules. */
+ pMyMod->num_bugs = 0;
+ INIT_LIST_HEAD(&pMyMod->bug_list);
+ pMyMod->bug_table = NULL;
+# endif
+
+# ifdef CONFIG_KALLSYMS
+ /* The core stuff is documented as only used when loading. So just zero them. */
+ pMyMod->core_num_syms = 0;
+ pMyMod->core_symtab = NULL;
+ pMyMod->core_strtab = NULL;
+
+ /* Construct a symbol table with start and end symbols.
+ Note! We don't have our own symbol table at this point, image bit
+ are not uploaded yet! */
+ pMyMod->num_symtab = 3;
+ pMyMod->symtab = paSymbols;
+ pMyMod->strtab = pchStrTab;
+ RT_ZERO(paSymbols[0]);
+ pchStrTab[0] = '\0';
+ paSymbols[1].st_name = 1;
+ paSymbols[2].st_name = 2 + RTStrPrintf(&pchStrTab[paSymbols[1].st_name], cchName + sizeof("_start"),
+ "%s_start", pImage->szName);
+ RTStrPrintf(&pchStrTab[paSymbols[2].st_name], cchName + sizeof("_end"), "%s_end", pImage->szName);
+ paSymbols[1].st_info = 't';
+ paSymbols[2].st_info = 'b';
+ paSymbols[1].st_other = 0;
+ paSymbols[2].st_other = 0;
+ paSymbols[1].st_shndx = 0;
+ paSymbols[2].st_shndx = 0;
+ paSymbols[1].st_value = (uintptr_t)pImage->pvImage;
+ paSymbols[2].st_value = (uintptr_t)pImage->pvImage + pImage->cbImageBits - 1;
+ paSymbols[1].st_size = pImage->cbImageBits - 1;
+ paSymbols[2].st_size = 1;
+# endif
+ /* No arguments, but seems its always non-NULL so put empty string there. */
+ pMyMod->args = "";
+
+# ifdef CONFIG_SMP
+ /* No per CPU data. */
+ pMyMod->percpu = NULL;
+ pMyMod->percpu_size = 0;
+# endif
+# ifdef CONFIG_TRACEPOINTS
+ /* No tracepoints we like to share. */
+ pMyMod->num_tracepoints = 0;
+ pMyMod->tracepoints_ptrs = NULL;
+#endif
+# ifdef HAVE_JUMP_LABEL
+ /* No jump lable stuff either. */
+ pMyMod->jump_entries = NULL;
+ pMyMod->num_jump_entries = 0;
+# endif
+# ifdef CONFIG_TRACING
+ pMyMod->num_trace_bprintk_fmt = 0;
+ pMyMod->trace_bprintk_fmt_start = NULL;
+# endif
+# ifdef CONFIG_EVENT_TRACING
+ pMyMod->trace_events = NULL;
+ pMyMod->num_trace_events = 0;
+# endif
+# ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ pMyMod->num_ftrace_callsites = 0;
+ pMyMod->ftrace_callsites = NULL;
+# endif
+# ifdef CONFIG_MODULE_UNLOAD
+ /* Dependency lists, not worth sharing */
+ INIT_LIST_HEAD(&pMyMod->source_list);
+ INIT_LIST_HEAD(&pMyMod->target_list);
+
+ /* Nobody waiting and no exit function. */
+# if RTLNX_VER_MAX(3,13,0)
+ pMyMod->waiter = NULL;
+# endif
+ pMyMod->exit = NULL;
+
+ /* References, very important as we must not allow the module
+ to be unloaded using rmmod. */
+# if RTLNX_VER_MIN(3,19,0)
+ atomic_set(&pMyMod->refcnt, 42);
+# else
+ pMyMod->refptr = alloc_percpu(struct module_ref);
+ if (pMyMod->refptr)
+ {
+ int iCpu;
+ for_each_possible_cpu(iCpu)
+ {
+ per_cpu_ptr(pMyMod->refptr, iCpu)->decs = 0;
+ per_cpu_ptr(pMyMod->refptr, iCpu)->incs = 1;
+ }
+ }
+ else
+ rc = VERR_NO_MEMORY;
+# endif
+# endif
+# ifdef CONFIG_CONSTRUCTORS
+ /* No constructors. */
+ pMyMod->ctors = NULL;
+ pMyMod->num_ctors = 0;
+# endif
+ if (RT_SUCCESS(rc))
+ {
+ bool fIsModText;
+
+ /*
+ * Add the module to the list.
+ */
+ mutex_lock(&module_mutex);
+ list_add_rcu(&pMyMod->list, &pSelfMod->list);
+ pImage->pLnxModHack = pMyMod;
+# ifdef CONFIG_MODULES_TREE_LOOKUP
+ g_pfnModTreeInsert(&pMyMod->mtn_core); /* __mod_tree_insert */
+# endif
+ mutex_unlock(&module_mutex);
+
+ /*
+ * Test it.
+ */
+ mutex_lock(&module_mutex);
+ pTestModByName = find_module(pMyMod->name);
+ pTestMod = __module_address((uintptr_t)pImage->pvImage + pImage->cbImageBits / 4);
+ fIsModText = __module_text_address((uintptr_t)pImage->pvImage + pImage->cbImageBits / 2);
+ mutex_unlock(&module_mutex);
+ if ( pTestMod == pMyMod
+ && pTestModByName == pMyMod
+ && fIsModText)
+ printk(KERN_ERR "vboxdrv: fake module works for '%s' (%#lx to %#lx)\n",
+ pMyMod->name, (unsigned long)paSymbols[1].st_value, (unsigned long)paSymbols[2].st_value);
+ else
+ printk(KERN_ERR "vboxdrv: failed to find fake module (pTestMod=%p, pTestModByName=%p, pMyMod=%p, fIsModText=%d)\n",
+ pTestMod, pTestModByName, pMyMod, fIsModText);
+ }
+ else
+ RTMemFree(pMyMod);
+ }
+
+ IPRT_LINUX_RESTORE_EFL_AC();
+#else
+ pImage->pLnxModHack = NULL;
+#endif
+ NOREF(pDevExt); NOREF(pImage);
+}
+
+
+void VBOXCALL supdrvOSLdrNotifyUnloaded(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
+{
+#ifdef VBOX_WITH_NON_PROD_HACK_FOR_PERF_STACKS /* Not for production use!! Debugging only! */
+ struct module *pMyMod = pImage->pLnxModHack;
+ pImage->pLnxModHack = NULL;
+ if (pMyMod)
+ {
+ /*
+ * Remove the fake module list entry and free it.
+ */
+ IPRT_LINUX_SAVE_EFL_AC();
+ mutex_lock(&module_mutex);
+ list_del_rcu(&pMyMod->list);
+# ifdef CONFIG_MODULES_TREE_LOOKUP
+ g_pfnModTreeRemove(&pMyMod->mtn_core);
+# endif
+ synchronize_sched();
+ mutex_unlock(&module_mutex);
+
+# if RTLNX_VER_MAX(3,19,0)
+ free_percpu(pMyMod->refptr);
+# endif
+ RTMemFree(pMyMod);
+ IPRT_LINUX_RESTORE_EFL_AC();
+ }
+
+#else
+ Assert(pImage->pLnxModHack == NULL);
+#endif
+ NOREF(pDevExt); NOREF(pImage);
+}
+
+
+int VBOXCALL supdrvOSLdrQuerySymbol(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage,
+ const char *pszSymbol, size_t cchSymbol, void **ppvSymbol)
+{
+#ifdef VBOX_WITH_NON_PROD_HACK_FOR_PERF_STACKS
+# error "implement me!"
+#endif
+ RT_NOREF(pDevExt, pImage, pszSymbol, cchSymbol, ppvSymbol);
+ return VERR_WRONG_ORDER;
+}
+
+
+void VBOXCALL supdrvOSLdrRetainWrapperModule(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
+{
+ struct module *pLnxMod = (struct module *)pImage->pvWrappedNative;
+ Assert(!pImage->fLnxWrapperRef);
+ AssertReturnVoid(pLnxMod);
+ pImage->fLnxWrapperRef = try_module_get(pLnxMod);
+ RT_NOREF(pDevExt);
+}
+
+
+void VBOXCALL supdrvOSLdrReleaseWrapperModule(PSUPDRVDEVEXT pDevExt, PSUPDRVLDRIMAGE pImage)
+{
+ if (pImage->fLnxWrapperRef)
+ {
+ struct module *pLnxMod = (struct module *)pImage->pvWrappedNative;
+ pImage->fLnxWrapperRef = false;
+ module_put(pLnxMod);
+ }
+ RT_NOREF(pDevExt);
+}
+
+
+#ifdef SUPDRV_WITH_MSR_PROBER
+
+int VBOXCALL supdrvOSMsrProberRead(uint32_t uMsr, RTCPUID idCpu, uint64_t *puValue)
+{
+# ifdef SUPDRV_LINUX_HAS_SAFE_MSR_API
+ uint32_t u32Low, u32High;
+ int rc;
+
+ IPRT_LINUX_SAVE_EFL_AC();
+ if (idCpu == NIL_RTCPUID)
+ rc = rdmsr_safe(uMsr, &u32Low, &u32High);
+ else if (RTMpIsCpuOnline(idCpu))
+ rc = rdmsr_safe_on_cpu(idCpu, uMsr, &u32Low, &u32High);
+ else
+ return VERR_CPU_OFFLINE;
+ IPRT_LINUX_RESTORE_EFL_AC();
+ if (rc == 0)
+ {
+ *puValue = RT_MAKE_U64(u32Low, u32High);
+ return VINF_SUCCESS;
+ }
+ return VERR_ACCESS_DENIED;
+# else
+ return VERR_NOT_SUPPORTED;
+# endif
+}
+
+
+int VBOXCALL supdrvOSMsrProberWrite(uint32_t uMsr, RTCPUID idCpu, uint64_t uValue)
+{
+# ifdef SUPDRV_LINUX_HAS_SAFE_MSR_API
+ int rc;
+
+ IPRT_LINUX_SAVE_EFL_AC();
+ if (idCpu == NIL_RTCPUID)
+ rc = wrmsr_safe(uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue));
+ else if (RTMpIsCpuOnline(idCpu))
+ rc = wrmsr_safe_on_cpu(idCpu, uMsr, RT_LODWORD(uValue), RT_HIDWORD(uValue));
+ else
+ return VERR_CPU_OFFLINE;
+ IPRT_LINUX_RESTORE_EFL_AC();
+
+ if (rc == 0)
+ return VINF_SUCCESS;
+ return VERR_ACCESS_DENIED;
+# else
+ return VERR_NOT_SUPPORTED;
+# endif
+}
+
+# ifdef SUPDRV_LINUX_HAS_SAFE_MSR_API
+/**
+ * Worker for supdrvOSMsrProberModify.
+ */
+static DECLCALLBACK(void) supdrvLnxMsrProberModifyOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+ PSUPMSRPROBER pReq = (PSUPMSRPROBER)pvUser1;
+ register uint32_t uMsr = pReq->u.In.uMsr;
+ bool const fFaster = pReq->u.In.enmOp == SUPMSRPROBEROP_MODIFY_FASTER;
+ uint64_t uBefore;
+ uint64_t uWritten;
+ uint64_t uAfter;
+ int rcBefore, rcWrite, rcAfter, rcRestore;
+ RTCCUINTREG fOldFlags;
+
+ /* Initialize result variables. */
+ uBefore = uWritten = uAfter = 0;
+ rcWrite = rcAfter = rcRestore = -EIO;
+
+ /*
+ * Do the job.
+ */
+ fOldFlags = ASMIntDisableFlags();
+ ASMCompilerBarrier(); /* paranoia */
+ if (!fFaster)
+ ASMWriteBackAndInvalidateCaches();
+
+ rcBefore = rdmsrl_safe(uMsr, &uBefore);
+ if (rcBefore >= 0)
+ {
+ register uint64_t uRestore = uBefore;
+ uWritten = uRestore;
+ uWritten &= pReq->u.In.uArgs.Modify.fAndMask;
+ uWritten |= pReq->u.In.uArgs.Modify.fOrMask;
+
+ rcWrite = wrmsr_safe(uMsr, RT_LODWORD(uWritten), RT_HIDWORD(uWritten));
+ rcAfter = rdmsrl_safe(uMsr, &uAfter);
+ rcRestore = wrmsr_safe(uMsr, RT_LODWORD(uRestore), RT_HIDWORD(uRestore));
+
+ if (!fFaster)
+ {
+ ASMWriteBackAndInvalidateCaches();
+ ASMReloadCR3();
+ ASMNopPause();
+ }
+ }
+
+ ASMCompilerBarrier(); /* paranoia */
+ ASMSetFlags(fOldFlags);
+
+ /*
+ * Write out the results.
+ */
+ pReq->u.Out.uResults.Modify.uBefore = uBefore;
+ pReq->u.Out.uResults.Modify.uWritten = uWritten;
+ pReq->u.Out.uResults.Modify.uAfter = uAfter;
+ pReq->u.Out.uResults.Modify.fBeforeGp = rcBefore != 0;
+ pReq->u.Out.uResults.Modify.fModifyGp = rcWrite != 0;
+ pReq->u.Out.uResults.Modify.fAfterGp = rcAfter != 0;
+ pReq->u.Out.uResults.Modify.fRestoreGp = rcRestore != 0;
+ RT_ZERO(pReq->u.Out.uResults.Modify.afReserved);
+}
+# endif
+
+
+int VBOXCALL supdrvOSMsrProberModify(RTCPUID idCpu, PSUPMSRPROBER pReq)
+{
+# ifdef SUPDRV_LINUX_HAS_SAFE_MSR_API
+ if (idCpu == NIL_RTCPUID)
+ {
+ supdrvLnxMsrProberModifyOnCpu(idCpu, pReq, NULL);
+ return VINF_SUCCESS;
+ }
+ return RTMpOnSpecific(idCpu, supdrvLnxMsrProberModifyOnCpu, pReq, NULL);
+# else
+ return VERR_NOT_SUPPORTED;
+# endif
+}
+
+#endif /* SUPDRV_WITH_MSR_PROBER */
+
+
+/**
+ * Converts a supdrv error code to an linux error code.
+ *
+ * @returns corresponding linux error code.
+ * @param rc IPRT status code.
+ */
+static int VBoxDrvLinuxErr2LinuxErr(int rc)
+{
+ switch (rc)
+ {
+ case VINF_SUCCESS: return 0;
+ case VERR_GENERAL_FAILURE: return -EACCES;
+ case VERR_INVALID_PARAMETER: return -EINVAL;
+ case VERR_INVALID_MAGIC: return -EILSEQ;
+ case VERR_INVALID_HANDLE: return -ENXIO;
+ case VERR_INVALID_POINTER: return -EFAULT;
+ case VERR_LOCK_FAILED: return -ENOLCK;
+ case VERR_ALREADY_LOADED: return -EEXIST;
+ case VERR_PERMISSION_DENIED: return -EPERM;
+ case VERR_VERSION_MISMATCH: return -ENOSYS;
+ case VERR_IDT_FAILED: return -1000;
+ }
+
+ return -EPERM;
+}
+
+
+SUPR0DECL(int) SUPR0HCPhysToVirt(RTHCPHYS HCPhys, void **ppv)
+{
+ AssertReturn(!(HCPhys & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
+ AssertReturn(HCPhys != NIL_RTHCPHYS, VERR_INVALID_POINTER);
+ /* Would've like to use valid_phys_addr_range for this test, but it isn't exported. */
+ AssertReturn((HCPhys | PAGE_OFFSET_MASK) < __pa(high_memory), VERR_INVALID_POINTER);
+ *ppv = phys_to_virt(HCPhys);
+ return VINF_SUCCESS;
+}
+SUPR0_EXPORT_SYMBOL(SUPR0HCPhysToVirt);
+
+
+RTDECL(int) SUPR0PrintfV(const char *pszFormat, va_list va)
+{
+ char szMsg[512];
+ IPRT_LINUX_SAVE_EFL_AC();
+
+ RTStrPrintfV(szMsg, sizeof(szMsg) - 1, pszFormat, va);
+ szMsg[sizeof(szMsg) - 1] = '\0';
+
+ printk("%s", szMsg);
+
+ IPRT_LINUX_RESTORE_EFL_AC();
+ return 0;
+}
+SUPR0_EXPORT_SYMBOL(SUPR0PrintfV);
+
+
+SUPR0DECL(uint32_t) SUPR0GetKernelFeatures(void)
+{
+ uint32_t fFlags = 0;
+#ifdef CONFIG_PAX_KERNEXEC
+ fFlags |= SUPKERNELFEATURES_GDT_READ_ONLY;
+#endif
+#if RTLNX_VER_MIN(4,12,0)
+ fFlags |= SUPKERNELFEATURES_GDT_NEED_WRITABLE;
+#endif
+#if defined(VBOX_STRICT) || defined(VBOX_WITH_EFLAGS_AC_SET_IN_VBOXDRV)
+ fFlags |= SUPKERNELFEATURES_SMAP;
+#elif defined(CONFIG_X86_SMAP)
+ if (ASMGetCR4() & X86_CR4_SMAP)
+ fFlags |= SUPKERNELFEATURES_SMAP;
+#endif
+ return fFlags;
+}
+SUPR0_EXPORT_SYMBOL(SUPR0GetKernelFeatures);
+
+
+SUPR0DECL(bool) SUPR0FpuBegin(bool fCtxHook)
+{
+ RT_NOREF(fCtxHook);
+#if RTLNX_VER_MIN(4,19,0) /* Going back to 4.19.0 for better coverage, we
+ probably only need 5.17.7+ in the end. */
+ /*
+ * HACK ALERT!
+ *
+ * We'd like to use the old __kernel_fpu_begin() API which was removed in
+ * early 2019, because we typically run with preemption enabled and have an
+ * preemption hook installed which will call kernel_fpu_end() in case we're
+ * scheduled out after getting in here. The preemption hook is almost
+ * useless if we run with preemption disabled.
+ *
+ * For the case where the kernel does not have preemption hooks, we get here
+ * with preemption already disabled and one more count doesn't make any
+ * difference.
+ *
+ * So, after the kernel_fpu_begin() call we undo the implicit preempt_disable()
+ * call it does, so the preemption hook can do its work and the VBox user has
+ * a more responsive system.
+ *
+ * See @bugref{10209#c12} and onwards for more details.
+ */
+ Assert(fCtxHook || !RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+ kernel_fpu_begin();
+# if 0 /* Always do it for now for better test coverage. */
+ if (fCtxHook)
+# endif
+ preempt_enable();
+ return false; /** @todo Not sure if we have license to use any extended state, or
+ * if we're limited to the SSE & x87 FPU. If it's the former,
+ * we should return @a true and the caller can skip
+ * saving+restoring the host state and save some time. */
+#else
+ return false;
+#endif
+}
+SUPR0_EXPORT_SYMBOL(SUPR0FpuBegin);
+
+
+SUPR0DECL(void) SUPR0FpuEnd(bool fCtxHook)
+{
+ RT_NOREF(fCtxHook);
+#if RTLNX_VER_MIN(4,19,0)
+ /* HACK ALERT! See SUPR0FpuBegin for an explanation of this. */
+ Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+# if 0 /* Always do it for now for better test coverage. */
+ if (fCtxHook)
+# endif
+ preempt_disable();
+ kernel_fpu_end();
+#endif
+}
+SUPR0_EXPORT_SYMBOL(SUPR0FpuEnd);
+
+
+int VBOXCALL supdrvOSGetCurrentGdtRw(RTHCUINTPTR *pGdtRw)
+{
+#if RTLNX_VER_MIN(4,12,0)
+ *pGdtRw = (RTHCUINTPTR)get_current_gdt_rw();
+ return VINF_SUCCESS;
+#else
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
+
+module_init(VBoxDrvLinuxInit);
+module_exit(VBoxDrvLinuxUnload);
+
+MODULE_AUTHOR(VBOX_VENDOR);
+MODULE_DESCRIPTION(VBOX_PRODUCT " Support Driver");
+MODULE_LICENSE("GPL");
+#ifdef MODULE_VERSION
+MODULE_VERSION(VBOX_VERSION_STRING " r" RT_XSTR(VBOX_SVN_REV) " (" RT_XSTR(SUPDRV_IOC_VERSION) ")");
+#endif
+
+module_param(force_async_tsc, int, 0444);
+MODULE_PARM_DESC(force_async_tsc, "force the asynchronous TSC mode");
+