Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:17:27 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:17:27 +0000
commit: f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree: 6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Runtime/r0drv/linux
parent: Initial commit. (diff)
download: virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz
virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip
24 files changed, 8996 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r0drv/linux/Makefile.kup b/src/VBox/Runtime/r0drv/linux/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/Makefile.kup
diff --git a/src/VBox/Runtime/r0drv/linux/RTLogWriteDebugger-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/RTLogWriteDebugger-r0drv-linux.c
new file mode 100644
index 00000000..7de10556
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/RTLogWriteDebugger-r0drv-linux.c
@@ -0,0 +1,53 @@
+/* $Id: RTLogWriteDebugger-r0drv-linux.c $ */
+/** @file
+ * IPRT - Log To Debugger, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/log.h>
+
+
+RTDECL(void) RTLogWriteDebugger(const char *pch, size_t cb)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    printk("%.*s", (int)cb, pch);
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+RT_EXPORT_SYMBOL(RTLogWriteDebugger);
+
diff --git a/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c
new file mode 100644
index 00000000..47410cca
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/alloc-r0drv-linux.c
@@ -0,0 +1,257 @@
+/* $Id: alloc-r0drv-linux.c $ */
+/** @file
+ * IPRT - Memory Allocation, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/mem.h>
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include "r0drv/alloc-r0drv.h"
+
+#include "internal/initterm.h"
+
+
+
+/**
+ * OS specific allocation function.
+ */
+DECLHIDDEN(int) rtR0MemAllocEx(size_t cb, uint32_t fFlags, PRTMEMHDR *ppHdr)
+{
+    PRTMEMHDR pHdr;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Allocate.
+     */
+    if (
+#if 1 /* vmalloc has serious performance issues, avoid it. */
+           cb <= PAGE_SIZE*16 - sizeof(*pHdr)
+#else
+           cb <= PAGE_SIZE
+#endif
+        || (fFlags & RTMEMHDR_FLAG_ANY_CTX)
+       )
+    {
+        fFlags |= RTMEMHDR_FLAG_KMALLOC;
+        pHdr = kmalloc(cb + sizeof(*pHdr),
+                       fFlags & RTMEMHDR_FLAG_ANY_CTX_ALLOC ? GFP_ATOMIC | __GFP_NOWARN : GFP_KERNEL | __GFP_NOWARN);
+        if (RT_UNLIKELY(   !pHdr
+                        && cb > PAGE_SIZE
+                        && !(fFlags & RTMEMHDR_FLAG_ANY_CTX) ))
+        {
+            fFlags &= ~RTMEMHDR_FLAG_KMALLOC;
+            pHdr = vmalloc(cb + sizeof(*pHdr));
+        }
+    }
+    else
+        pHdr = vmalloc(cb + sizeof(*pHdr));
+    if (RT_LIKELY(pHdr))
+    {
+        /*
+         * Initialize.
+         */
+        pHdr->u32Magic  = RTMEMHDR_MAGIC;
+        pHdr->fFlags    = fFlags;
+        pHdr->cb        = cb;
+        pHdr->cbReq     = cb;
+
+        *ppHdr = pHdr;
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VERR_NO_MEMORY;
+}
+
+
+/**
+ * OS specific free function.
+ */
+DECLHIDDEN(void) rtR0MemFree(PRTMEMHDR pHdr)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    pHdr->u32Magic += 1;
+    if (pHdr->fFlags & RTMEMHDR_FLAG_KMALLOC)
+        kfree(pHdr);
+    else
+        vfree(pHdr);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+
+
+/**
+ * Compute order. Some functions allocate 2^order pages.
+ *
+ * @returns order.
+ * @param   cPages      Number of pages.
+ */
+static int CalcPowerOf2Order(unsigned long cPages)
+{
+    int             iOrder;
+    unsigned long   cTmp;
+
+    for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
+        ;
+    if (cPages & ~(1 << iOrder))
+        ++iOrder;
+
+    return iOrder;
+}
+
+
+/**
+ * Allocates physical contiguous memory (below 4GB).
+ * The allocation is page aligned and the content is undefined.
+ *
+ * @returns Pointer to the memory block. This is page aligned.
+ * @param   pPhys   Where to store the physical address.
+ * @param   cb      The allocation size in bytes. This is always
+ *                  rounded up to PAGE_SIZE.
+ */
+RTR0DECL(void *) RTMemContAlloc(PRTCCPHYS pPhys, size_t cb)
+{
+    int             cOrder;
+    unsigned        cPages;
+    struct page    *paPages;
+    void           *pvRet;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * validate input.
+     */
+    AssertPtr(pPhys);
+    Assert(cb > 0);
+
+    /*
+     * Allocate page pointer array.
+     */
+    cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+    cPages = cb >> PAGE_SHIFT;
+    cOrder = CalcPowerOf2Order(cPages);
+#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
+    /* ZONE_DMA32: 0-4GB */
+    paPages = alloc_pages(GFP_DMA32 | __GFP_NOWARN, cOrder);
+    if (!paPages)
+#endif
+#ifdef RT_ARCH_AMD64
+        /* ZONE_DMA; 0-16MB */
+        paPages = alloc_pages(GFP_DMA | __GFP_NOWARN, cOrder);
+#else
+        /* ZONE_NORMAL: 0-896MB */
+        paPages = alloc_pages(GFP_USER | __GFP_NOWARN, cOrder);
+#endif
+    if (paPages)
+    {
+        /*
+         * Reserve the pages and mark them executable.
+         */
+        unsigned iPage;
+        for (iPage = 0; iPage < cPages; iPage++)
+        {
+            Assert(!PageHighMem(&paPages[iPage]));
+            if (iPage + 1 < cPages)
+            {
+                AssertMsg(          (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage])) + PAGE_SIZE
+                                ==  (uintptr_t)phys_to_virt(page_to_phys(&paPages[iPage + 1]))
+                          &&        page_to_phys(&paPages[iPage]) + PAGE_SIZE
+                                ==  page_to_phys(&paPages[iPage + 1]),
+                          ("iPage=%i cPages=%u [0]=%#llx,%p [1]=%#llx,%p\n", iPage, cPages,
+                           (long long)page_to_phys(&paPages[iPage]),     phys_to_virt(page_to_phys(&paPages[iPage])),
+                           (long long)page_to_phys(&paPages[iPage + 1]), phys_to_virt(page_to_phys(&paPages[iPage + 1])) ));
+            }
+
+            SetPageReserved(&paPages[iPage]);
+        }
+        *pPhys = page_to_phys(paPages);
+        pvRet = phys_to_virt(page_to_phys(paPages));
+    }
+    else
+        pvRet = NULL;
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return pvRet;
+}
+RT_EXPORT_SYMBOL(RTMemContAlloc);
+
+
+/**
+ * Frees memory allocated using RTMemContAlloc().
+ *
+ * @param   pv      Pointer to return from RTMemContAlloc().
+ * @param   cb      The cb parameter passed to RTMemContAlloc().
+ */
+RTR0DECL(void) RTMemContFree(void *pv, size_t cb)
+{
+    if (pv)
+    {
+        int             cOrder;
+        unsigned        cPages;
+        unsigned        iPage;
+        struct page    *paPages;
+        IPRT_LINUX_SAVE_EFL_AC();
+
+        /* validate */
+        AssertMsg(!((uintptr_t)pv & PAGE_OFFSET_MASK), ("pv=%p\n", pv));
+        Assert(cb > 0);
+
+        /* calc order and get pages */
+        cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+        cPages = cb >> PAGE_SHIFT;
+        cOrder = CalcPowerOf2Order(cPages);
+        paPages = virt_to_page(pv);
+
+        /*
+         * Restore page attributes freeing the pages.
+         */
+        for (iPage = 0; iPage < cPages; iPage++)
+        {
+            ClearPageReserved(&paPages[iPage]);
+        }
+        __free_pages(paPages, cOrder);
+        IPRT_LINUX_RESTORE_EFL_AC();
+    }
+}
+RT_EXPORT_SYMBOL(RTMemContFree);
+
diff --git a/src/VBox/Runtime/r0drv/linux/assert-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/assert-r0drv-linux.c
new file mode 100644
index 00000000..a7968d06
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/assert-r0drv-linux.c
@@ -0,0 +1,84 @@
+/* $Id: assert-r0drv-linux.c $ */
+/** @file
+ * IPRT -  Assertion Workers, Ring-0 Drivers, Linux.
+ */
+
+/*
+ * Copyright (C) 2007-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/log.h>
+#include <iprt/string.h>
+#include <iprt/stdarg.h>
+#include <iprt/asm.h>
+
+#include "internal/assert.h"
+
+
+DECLHIDDEN(void) rtR0AssertNativeMsg1(const char *pszExpr, unsigned uLine, const char *pszFile, const char *pszFunction)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    printk(KERN_EMERG
+           "\r\n!!Assertion Failed!!\r\n"
+           "Expression: %s\r\n"
+           "Location  : %s(%d) %s\r\n",
+           pszExpr, pszFile, uLine, pszFunction);
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+
+DECLHIDDEN(void) rtR0AssertNativeMsg2V(bool fInitial, const char *pszFormat, va_list va)
+{
+    char szMsg[256];
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    RTStrPrintfV(szMsg, sizeof(szMsg) - 1, pszFormat, va);
+    szMsg[sizeof(szMsg) - 1] = '\0';
+    printk(KERN_EMERG "%s", szMsg);
+
+    NOREF(fInitial);
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+
+RTR0DECL(void) RTR0AssertPanicSystem(void)
+{
+    panic("%s%s", g_szRTAssertMsg1, g_szRTAssertMsg2);
+}
+RT_EXPORT_SYMBOL(RTR0AssertPanicSystem);
+
diff --git a/src/VBox/Runtime/r0drv/linux/initterm-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/initterm-r0drv-linux.c
new file mode 100644
index 00000000..cbdb324a
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/initterm-r0drv-linux.c
@@ -0,0 +1,138 @@
+/* $Id: initterm-r0drv-linux.c $ */
+/** @file
+ * IPRT - Initialization & Termination, R0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+#include "internal/initterm.h"
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** The IPRT work queue. */
+#if RTLNX_VER_MIN(2,5,41)
+static struct workqueue_struct *g_prtR0LnxWorkQueue;
+#else
+static DECLARE_TASK_QUEUE(g_rtR0LnxWorkQueue);
+#endif
+
+
+/**
+ * Pushes an item onto the IPRT work queue.
+ *
+ * @param   pWork               The work item.
+ * @param   pfnWorker           The callback function.  It will be called back
+ *                              with @a pWork as argument.
+ */
+DECLHIDDEN(void) rtR0LnxWorkqueuePush(RTR0LNXWORKQUEUEITEM *pWork, void (*pfnWorker)(RTR0LNXWORKQUEUEITEM *))
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+#if RTLNX_VER_MIN(2,5,41)
+# if RTLNX_VER_MIN(2,6,20)
+    INIT_WORK(pWork, pfnWorker);
+# else
+    INIT_WORK(pWork, (void (*)(void *))pfnWorker, pWork);
+# endif
+    queue_work(g_prtR0LnxWorkQueue, pWork);
+#else
+    INIT_TQUEUE(pWork, (void (*)(void *))pfnWorker, pWork);
+    queue_task(pWork, &g_rtR0LnxWorkQueue);
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+
+/**
+ * Flushes all items in the IPRT work queue.
+ *
+ * @remarks This is mostly for 2.4.x compatability.  Must not be called from
+ *          atomic contexts or with unncessary locks held.
+ */
+DECLHIDDEN(void) rtR0LnxWorkqueueFlush(void)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+#if RTLNX_VER_MIN(2,5,41)
+    flush_workqueue(g_prtR0LnxWorkQueue);
+#else
+    run_task_queue(&g_rtR0LnxWorkQueue);
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+
+DECLHIDDEN(int) rtR0InitNative(void)
+{
+    int rc = VINF_SUCCESS;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+#if RTLNX_VER_MIN(2,5,41)
+ #if RTLNX_VER_MIN(2,6,13)
+    g_prtR0LnxWorkQueue = create_workqueue("iprt-VBoxWQueue");
+ #else
+    g_prtR0LnxWorkQueue = create_workqueue("iprt-VBoxQ");
+ #endif
+    if (!g_prtR0LnxWorkQueue)
+        rc = VERR_NO_MEMORY;
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+DECLHIDDEN(void) rtR0TermNative(void)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    rtR0LnxWorkqueueFlush();
+#if RTLNX_VER_MIN(2,5,41)
+    destroy_workqueue(g_prtR0LnxWorkQueue);
+    g_prtR0LnxWorkQueue = NULL;
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
diff --git a/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c
new file mode 100644
index 00000000..8342fbf8
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/memobj-r0drv-linux.c
@@ -0,0 +1,2104 @@
+/* $Id: memobj-r0drv-linux.c $ */
+/** @file
+ * IPRT - Ring-0 Memory Objects, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+
+#include <iprt/memobj.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/mem.h>
+#include <iprt/process.h>
+#include <iprt/string.h>
+#include "internal/memobj.h"
+#include "internal/iprt.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/* early 2.6 kernels */
+#ifndef PAGE_SHARED_EXEC
+# define PAGE_SHARED_EXEC PAGE_SHARED
+#endif
+#ifndef PAGE_READONLY_EXEC
+# define PAGE_READONLY_EXEC PAGE_READONLY
+#endif
+
+/** @def IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+ * Whether we use alloc_vm_area (3.2+) for executable memory.
+ * This is a must for 5.8+, but we enable it all the way back to 3.2.x for
+ * better W^R compliance (fExecutable flag). */
+#if RTLNX_VER_RANGE(3,2,0, 5,10,0) || defined(DOXYGEN_RUNNING)
+# define IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+#endif
+/** @def IPRT_USE_APPLY_TO_PAGE_RANGE_FOR_EXEC
+ * alloc_vm_area was removed with 5.10 so we have to resort to a different way
+ * to allocate executable memory.
+ * It would be possible to remove IPRT_USE_ALLOC_VM_AREA_FOR_EXEC and use
+ * this path execlusively for 3.2+ but no time to test it really works on every
+ * supported kernel, so better play safe for now.
+ */
+#if RTLNX_VER_MIN(5,10,0) || defined(DOXYGEN_RUNNING)
+# define IPRT_USE_APPLY_TO_PAGE_RANGE_FOR_EXEC
+#endif
+
+/*
+ * 2.6.29+ kernels don't work with remap_pfn_range() anymore because
+ * track_pfn_vma_new() is apparently not defined for non-RAM pages.
+ * It should be safe to use vm_insert_page() older kernels as well.
+ */
+#if RTLNX_VER_MIN(2,6,23)
+# define VBOX_USE_INSERT_PAGE
+#endif
+#if    defined(CONFIG_X86_PAE) \
+    && (   defined(HAVE_26_STYLE_REMAP_PAGE_RANGE) \
+        || RTLNX_VER_RANGE(2,6,0,  2,6,11) )
+# define VBOX_USE_PAE_HACK
+#endif
+
+/* gfp_t was introduced in 2.6.14, define it for earlier. */
+#if RTLNX_VER_MAX(2,6,14)
+# define gfp_t  unsigned
+#endif
+
+/*
+ * Wrappers around mmap_lock/mmap_sem difference.
+ */
+#if RTLNX_VER_MIN(5,8,0)
+# define LNX_MM_DOWN_READ(a_pMm)    down_read(&(a_pMm)->mmap_lock)
+# define LNX_MM_UP_READ(a_pMm)        up_read(&(a_pMm)->mmap_lock)
+# define LNX_MM_DOWN_WRITE(a_pMm)   down_write(&(a_pMm)->mmap_lock)
+# define LNX_MM_UP_WRITE(a_pMm)       up_write(&(a_pMm)->mmap_lock)
+#else
+# define LNX_MM_DOWN_READ(a_pMm)    down_read(&(a_pMm)->mmap_sem)
+# define LNX_MM_UP_READ(a_pMm)        up_read(&(a_pMm)->mmap_sem)
+# define LNX_MM_DOWN_WRITE(a_pMm)   down_write(&(a_pMm)->mmap_sem)
+# define LNX_MM_UP_WRITE(a_pMm)       up_write(&(a_pMm)->mmap_sem)
+#endif
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * The Linux version of the memory object structure.
+ */
+typedef struct RTR0MEMOBJLNX
+{
+    /** The core structure. */
+    RTR0MEMOBJINTERNAL  Core;
+    /** Set if the allocation is contiguous.
+     * This means it has to be given back as one chunk. */
+    bool                fContiguous;
+    /** Set if executable allocation. */
+    bool                fExecutable;
+    /** Set if we've vmap'ed the memory into ring-0. */
+    bool                fMappedToRing0;
+    /** This is non-zero if large page allocation. */
+    uint8_t             cLargePageOrder;
+#ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+    /** Return from alloc_vm_area() that we now need to use for executable
+     *  memory. */
+    struct vm_struct   *pArea;
+    /** PTE array that goes along with pArea (must be freed). */
+    pte_t             **papPtesForArea;
+#endif
+    /** The pages in the apPages array. */
+    size_t              cPages;
+    /** Array of struct page pointers. (variable size) */
+    struct page        *apPages[1];
+} RTR0MEMOBJLNX;
+/** Pointer to the linux memory object. */
+typedef RTR0MEMOBJLNX *PRTR0MEMOBJLNX;
+
+
+static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx);
+
+
+/**
+ * Helper that converts from a RTR0PROCESS handle to a linux task.
+ *
+ * @returns The corresponding Linux task.
+ * @param   R0Process   IPRT ring-0 process handle.
+ */
+static struct task_struct *rtR0ProcessToLinuxTask(RTR0PROCESS R0Process)
+{
+    /** @todo fix rtR0ProcessToLinuxTask!! */
+    /** @todo many (all?) callers currently assume that we return 'current'! */
+    return R0Process == RTR0ProcHandleSelf() ? current : NULL;
+}
+
+
+/**
+ * Compute order. Some functions allocate 2^order pages.
+ *
+ * @returns order.
+ * @param   cPages      Number of pages.
+ */
+static int rtR0MemObjLinuxOrder(size_t cPages)
+{
+    int     iOrder;
+    size_t  cTmp;
+
+    for (iOrder = 0, cTmp = cPages; cTmp >>= 1; ++iOrder)
+        ;
+    if (cPages & ~((size_t)1 << iOrder))
+        ++iOrder;
+
+    return iOrder;
+}
+
+
+/**
+ * Converts from RTMEM_PROT_* to Linux PAGE_*.
+ *
+ * @returns Linux page protection constant.
+ * @param   fProt       The IPRT protection mask.
+ * @param   fKernel     Whether it applies to kernel or user space.
+ */
+static pgprot_t rtR0MemObjLinuxConvertProt(unsigned fProt, bool fKernel)
+{
+    switch (fProt)
+    {
+        default:
+            AssertMsgFailed(("%#x %d\n", fProt, fKernel)); RT_FALL_THRU();
+        case RTMEM_PROT_NONE:
+            return PAGE_NONE;
+
+        case RTMEM_PROT_READ:
+            return fKernel ? PAGE_KERNEL_RO         : PAGE_READONLY;
+
+        case RTMEM_PROT_WRITE:
+        case RTMEM_PROT_WRITE | RTMEM_PROT_READ:
+            return fKernel ? PAGE_KERNEL            : PAGE_SHARED;
+
+        case RTMEM_PROT_EXEC:
+        case RTMEM_PROT_EXEC | RTMEM_PROT_READ:
+#if defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
+            if (fKernel)
+            {
+                pgprot_t fPg = MY_PAGE_KERNEL_EXEC;
+                pgprot_val(fPg) &= ~_PAGE_RW;
+                return fPg;
+            }
+            return PAGE_READONLY_EXEC;
+#else
+            return fKernel ? MY_PAGE_KERNEL_EXEC    : PAGE_READONLY_EXEC;
+#endif
+
+        case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC:
+        case RTMEM_PROT_WRITE | RTMEM_PROT_EXEC | RTMEM_PROT_READ:
+            return fKernel ? MY_PAGE_KERNEL_EXEC    : PAGE_SHARED_EXEC;
+    }
+}
+
+
+/**
+ * Worker for rtR0MemObjNativeReserveUser and rtR0MemObjNativerMapUser that creates
+ * an empty user space mapping.
+ *
+ * We acquire the mmap_sem/mmap_lock of the task!
+ *
+ * @returns Pointer to the mapping.
+ *          (void *)-1 on failure.
+ * @param   R3PtrFixed  (RTR3PTR)-1 if anywhere, otherwise a specific location.
+ * @param   cb          The size of the mapping.
+ * @param   uAlignment  The alignment of the mapping.
+ * @param   pTask       The Linux task to create this mapping in.
+ * @param   fProt       The RTMEM_PROT_* mask.
+ */
+static void *rtR0MemObjLinuxDoMmap(RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment, struct task_struct *pTask, unsigned fProt)
+{
+    unsigned fLnxProt;
+    unsigned long ulAddr;
+
+    Assert(pTask == current); /* do_mmap */
+    RT_NOREF_PV(pTask);
+
+    /*
+     * Convert from IPRT protection to mman.h PROT_ and call do_mmap.
+     */
+    fProt &= (RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC);
+    if (fProt == RTMEM_PROT_NONE)
+        fLnxProt = PROT_NONE;
+    else
+    {
+        fLnxProt = 0;
+        if (fProt & RTMEM_PROT_READ)
+            fLnxProt |= PROT_READ;
+        if (fProt & RTMEM_PROT_WRITE)
+            fLnxProt |= PROT_WRITE;
+        if (fProt & RTMEM_PROT_EXEC)
+            fLnxProt |= PROT_EXEC;
+    }
+
+    if (R3PtrFixed != (RTR3PTR)-1)
+    {
+#if RTLNX_VER_MIN(3,5,0)
+        ulAddr = vm_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
+#else
+        LNX_MM_DOWN_WRITE(pTask->mm);
+        ulAddr = do_mmap(NULL, R3PtrFixed, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, 0);
+        LNX_MM_UP_WRITE(pTask->mm);
+#endif
+    }
+    else
+    {
+#if RTLNX_VER_MIN(3,5,0)
+        ulAddr = vm_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
+#else
+        LNX_MM_DOWN_WRITE(pTask->mm);
+        ulAddr = do_mmap(NULL, 0, cb, fLnxProt, MAP_SHARED | MAP_ANONYMOUS, 0);
+        LNX_MM_UP_WRITE(pTask->mm);
+#endif
+        if (    !(ulAddr & ~PAGE_MASK)
+            &&  (ulAddr & (uAlignment - 1)))
+        {
+            /** @todo implement uAlignment properly... We'll probably need to make some dummy mappings to fill
+             * up alignment gaps. This is of course complicated by fragmentation (which we might have cause
+             * ourselves) and further by there begin two mmap strategies (top / bottom). */
+            /* For now, just ignore uAlignment requirements... */
+        }
+    }
+
+
+    if (ulAddr & ~PAGE_MASK) /* ~PAGE_MASK == PAGE_OFFSET_MASK */
+        return (void *)-1;
+    return (void *)ulAddr;
+}
+
+
+/**
+ * Worker that destroys a user space mapping.
+ * Undoes what rtR0MemObjLinuxDoMmap did.
+ *
+ * We acquire the mmap_sem/mmap_lock of the task!
+ *
+ * @param   pv          The ring-3 mapping.
+ * @param   cb          The size of the mapping.
+ * @param   pTask       The Linux task to destroy this mapping in.
+ */
+static void rtR0MemObjLinuxDoMunmap(void *pv, size_t cb, struct task_struct *pTask)
+{
+#if RTLNX_VER_MIN(3,5,0)
+    Assert(pTask == current); RT_NOREF_PV(pTask);
+    vm_munmap((unsigned long)pv, cb);
+#elif defined(USE_RHEL4_MUNMAP)
+    LNX_MM_DOWN_WRITE(pTask->mm);
+    do_munmap(pTask->mm, (unsigned long)pv, cb, 0); /* should it be 1 or 0? */
+    LNX_MM_UP_WRITE(pTask->mm);
+#else
+    LNX_MM_DOWN_WRITE(pTask->mm);
+    do_munmap(pTask->mm, (unsigned long)pv, cb);
+    LNX_MM_UP_WRITE(pTask->mm);
+#endif
+}
+
+
+/**
+ * Internal worker that allocates physical pages and creates the memory object for them.
+ *
+ * @returns IPRT status code.
+ * @param   ppMemLnx    Where to store the memory object pointer.
+ * @param   enmType     The object type.
+ * @param   cb          The number of bytes to allocate.
+ * @param   uAlignment  The alignment of the physical memory.
+ *                      Only valid if fContiguous == true, ignored otherwise.
+ * @param   fFlagsLnx   The page allocation flags (GPFs).
+ * @param   fContiguous Whether the allocation must be contiguous.
+ * @param   fExecutable Whether the memory must be executable.
+ * @param   rcNoMem     What to return when we're out of pages.
+ * @param   pszTag      Allocation tag used for statistics and such.
+ */
+static int rtR0MemObjLinuxAllocPages(PRTR0MEMOBJLNX *ppMemLnx, RTR0MEMOBJTYPE enmType, size_t cb,
+                                     size_t uAlignment, gfp_t fFlagsLnx, bool fContiguous, bool fExecutable, int rcNoMem,
+                                     const char *pszTag)
+{
+    size_t          iPage;
+    size_t const    cPages = cb >> PAGE_SHIFT;
+    struct page    *paPages;
+
+    /*
+     * Allocate a memory object structure that's large enough to contain
+     * the page pointer array.
+     */
+    PRTR0MEMOBJLNX  pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_UOFFSETOF_DYN(RTR0MEMOBJLNX, apPages[cPages]), enmType,
+                                                            NULL, cb, pszTag);
+    if (!pMemLnx)
+        return VERR_NO_MEMORY;
+    pMemLnx->Core.fFlags |= RTR0MEMOBJ_FLAGS_UNINITIALIZED_AT_ALLOC;
+    pMemLnx->cPages = cPages;
+
+    if (cPages > 255)
+    {
+# ifdef __GFP_REPEAT
+        /* Try hard to allocate the memory, but the allocation attempt might fail. */
+        fFlagsLnx |= __GFP_REPEAT;
+# endif
+# ifdef __GFP_NOMEMALLOC
+        /* Introduced with Linux 2.6.12: Don't use emergency reserves */
+        fFlagsLnx |= __GFP_NOMEMALLOC;
+# endif
+    }
+
+    /*
+     * Allocate the pages.
+     * For small allocations we'll try contiguous first and then fall back on page by page.
+     */
+#if RTLNX_VER_MIN(2,4,22)
+    if (    fContiguous
+        ||  cb <= PAGE_SIZE * 2)
+    {
+# ifdef VBOX_USE_INSERT_PAGE
+        paPages = alloc_pages(fFlagsLnx | __GFP_COMP | __GFP_NOWARN, rtR0MemObjLinuxOrder(cPages));
+# else
+        paPages = alloc_pages(fFlagsLnx | __GFP_NOWARN, rtR0MemObjLinuxOrder(cPages));
+# endif
+        if (paPages)
+        {
+            fContiguous = true;
+            for (iPage = 0; iPage < cPages; iPage++)
+                pMemLnx->apPages[iPage] = &paPages[iPage];
+        }
+        else if (fContiguous)
+        {
+            rtR0MemObjDelete(&pMemLnx->Core);
+            return rcNoMem;
+        }
+    }
+
+    if (!fContiguous)
+    {
+        /** @todo Try use alloc_pages_bulk_array when available, it should be faster
+         *        than a alloc_page loop.  Put it in #ifdefs similar to
+         *        IPRT_USE_APPLY_TO_PAGE_RANGE_FOR_EXEC. */
+        for (iPage = 0; iPage < cPages; iPage++)
+        {
+            pMemLnx->apPages[iPage] = alloc_page(fFlagsLnx | __GFP_NOWARN);
+            if (RT_UNLIKELY(!pMemLnx->apPages[iPage]))
+            {
+                while (iPage-- > 0)
+                    __free_page(pMemLnx->apPages[iPage]);
+                rtR0MemObjDelete(&pMemLnx->Core);
+                return rcNoMem;
+            }
+        }
+    }
+
+#else /* < 2.4.22 */
+    /** @todo figure out why we didn't allocate page-by-page on 2.4.21 and older... */
+    paPages = alloc_pages(fFlagsLnx, rtR0MemObjLinuxOrder(cPages));
+    if (!paPages)
+    {
+        rtR0MemObjDelete(&pMemLnx->Core);
+        return rcNoMem;
+    }
+    for (iPage = 0; iPage < cPages; iPage++)
+    {
+        pMemLnx->apPages[iPage] = &paPages[iPage];
+        if (fExecutable)
+            MY_SET_PAGES_EXEC(pMemLnx->apPages[iPage], 1);
+        if (PageHighMem(pMemLnx->apPages[iPage]))
+            BUG();
+    }
+
+    fContiguous = true;
+#endif /* < 2.4.22 */
+    pMemLnx->fContiguous = fContiguous;
+    pMemLnx->fExecutable = fExecutable;
+
+#if RTLNX_VER_MAX(4,5,0)
+    /*
+     * Reserve the pages.
+     *
+     * Linux >= 4.5 with CONFIG_DEBUG_VM panics when setting PG_reserved on compound
+     * pages. According to Michal Hocko this shouldn't be necessary anyway because
+     * as pages which are not on the LRU list are never evictable.
+     */
+    for (iPage = 0; iPage < cPages; iPage++)
+        SetPageReserved(pMemLnx->apPages[iPage]);
+#endif
+
+    /*
+     * Note that the physical address of memory allocated with alloc_pages(flags, order)
+     * is always 2^(PAGE_SHIFT+order)-aligned.
+     */
+    if (   fContiguous
+        && uAlignment > PAGE_SIZE)
+    {
+        /*
+         * Check for alignment constraints. The physical address of memory allocated with
+         * alloc_pages(flags, order) is always 2^(PAGE_SHIFT+order)-aligned.
+         */
+        if (RT_UNLIKELY(page_to_phys(pMemLnx->apPages[0]) & (uAlignment - 1)))
+        {
+            /*
+             * This should never happen!
+             */
+            printk("rtR0MemObjLinuxAllocPages(cb=0x%lx, uAlignment=0x%lx): alloc_pages(..., %d) returned physical memory at 0x%lx!\n",
+                   (unsigned long)cb, (unsigned long)uAlignment, rtR0MemObjLinuxOrder(cPages), (unsigned long)page_to_phys(pMemLnx->apPages[0]));
+            rtR0MemObjLinuxFreePages(pMemLnx);
+            return rcNoMem;
+        }
+    }
+
+    *ppMemLnx = pMemLnx;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Frees the physical pages allocated by the rtR0MemObjLinuxAllocPages() call.
+ *
+ * This method does NOT free the object.
+ *
+ * @param   pMemLnx     The object which physical pages should be freed.
+ */
+static void rtR0MemObjLinuxFreePages(PRTR0MEMOBJLNX pMemLnx)
+{
+    size_t iPage = pMemLnx->cPages;
+    if (iPage > 0)
+    {
+        /*
+         * Restore the page flags.
+         */
+        while (iPage-- > 0)
+        {
+#if RTLNX_VER_MAX(4,5,0)
+            /* See SetPageReserved() in rtR0MemObjLinuxAllocPages() */
+            ClearPageReserved(pMemLnx->apPages[iPage]);
+#endif
+#if RTLNX_VER_MAX(2,4,22)
+            if (pMemLnx->fExecutable)
+                MY_SET_PAGES_NOEXEC(pMemLnx->apPages[iPage], 1);
+#endif
+        }
+
+        /*
+         * Free the pages.
+         */
+#if RTLNX_VER_MIN(2,4,22)
+        if (!pMemLnx->fContiguous)
+        {
+            iPage = pMemLnx->cPages;
+            while (iPage-- > 0)
+                __free_page(pMemLnx->apPages[iPage]);
+        }
+        else
+#endif
+            __free_pages(pMemLnx->apPages[0], rtR0MemObjLinuxOrder(pMemLnx->cPages));
+
+        pMemLnx->cPages = 0;
+    }
+}
+
+
+#ifdef IPRT_USE_APPLY_TO_PAGE_RANGE_FOR_EXEC
+/**
+ * User data passed to the apply_to_page_range() callback.
+ */
+typedef struct LNXAPPLYPGRANGE
+{
+    /** Pointer to the memory object. */
+    PRTR0MEMOBJLNX pMemLnx;
+    /** The page protection flags to apply. */
+    pgprot_t       fPg;
+} LNXAPPLYPGRANGE;
+/** Pointer to the user data. */
+typedef LNXAPPLYPGRANGE *PLNXAPPLYPGRANGE;
+/** Pointer to the const user data. */
+typedef const LNXAPPLYPGRANGE *PCLNXAPPLYPGRANGE;
+
+/**
+ * Callback called in apply_to_page_range().
+ *
+ * @returns Linux status code.
+ * @param   pPte                Pointer to the page table entry for the given address.
+ * @param   uAddr               The address to apply the new protection to.
+ * @param   pvUser              The opaque user data.
+ */
+static int rtR0MemObjLinuxApplyPageRange(pte_t *pPte, unsigned long uAddr, void *pvUser)
+{
+    PCLNXAPPLYPGRANGE pArgs = (PCLNXAPPLYPGRANGE)pvUser;
+    PRTR0MEMOBJLNX pMemLnx = pArgs->pMemLnx;
+    size_t idxPg = (uAddr - (unsigned long)pMemLnx->Core.pv) >> PAGE_SHIFT;
+
+    set_pte(pPte, mk_pte(pMemLnx->apPages[idxPg], pArgs->fPg));
+    return 0;
+}
+#endif
+
+
+/**
+ * Maps the allocation into ring-0.
+ *
+ * This will update the RTR0MEMOBJLNX::Core.pv and RTR0MEMOBJ::fMappedToRing0 members.
+ *
+ * Contiguous mappings that isn't in 'high' memory will already be mapped into kernel
+ * space, so we'll use that mapping if possible. If execute access is required, we'll
+ * play safe and do our own mapping.
+ *
+ * @returns IPRT status code.
+ * @param   pMemLnx     The linux memory object to map.
+ * @param   fExecutable Whether execute access is required.
+ */
+static int rtR0MemObjLinuxVMap(PRTR0MEMOBJLNX pMemLnx, bool fExecutable)
+{
+    int rc = VINF_SUCCESS;
+
+    /*
+     * Choose mapping strategy.
+     */
+    bool fMustMap = fExecutable
+                 || !pMemLnx->fContiguous;
+    if (!fMustMap)
+    {
+        size_t iPage = pMemLnx->cPages;
+        while (iPage-- > 0)
+            if (PageHighMem(pMemLnx->apPages[iPage]))
+            {
+                fMustMap = true;
+                break;
+            }
+    }
+
+    Assert(!pMemLnx->Core.pv);
+    Assert(!pMemLnx->fMappedToRing0);
+
+    if (fMustMap)
+    {
+        /*
+         * Use vmap - 2.4.22 and later.
+         */
+#if RTLNX_VER_MIN(2,4,22)
+        pgprot_t fPg;
+        pgprot_val(fPg) = _PAGE_PRESENT | _PAGE_RW;
+# ifdef _PAGE_NX
+        if (!fExecutable)
+            pgprot_val(fPg) |= _PAGE_NX;
+# endif
+
+# ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+        if (fExecutable)
+        {
+#  if RTLNX_VER_MIN(3,2,51)
+            pte_t **papPtes = (pte_t **)kmalloc_array(pMemLnx->cPages, sizeof(papPtes[0]), GFP_KERNEL);
+#  else
+            pte_t **papPtes = (pte_t **)kmalloc(pMemLnx->cPages * sizeof(papPtes[0]), GFP_KERNEL);
+#  endif
+            if (papPtes)
+            {
+                pMemLnx->pArea = alloc_vm_area(pMemLnx->Core.cb, papPtes); /* Note! pArea->nr_pages is not set. */
+                if (pMemLnx->pArea)
+                {
+                    size_t i;
+                    Assert(pMemLnx->pArea->size >= pMemLnx->Core.cb);   /* Note! includes guard page. */
+                    Assert(pMemLnx->pArea->addr);
+#  ifdef _PAGE_NX
+                    pgprot_val(fPg) |= _PAGE_NX; /* Uses RTR0MemObjProtect to clear NX when memory ready, W^X fashion. */
+#  endif
+                    pMemLnx->papPtesForArea = papPtes;
+                    for (i = 0; i < pMemLnx->cPages; i++)
+                        *papPtes[i] = mk_pte(pMemLnx->apPages[i], fPg);
+                    pMemLnx->Core.pv = pMemLnx->pArea->addr;
+                    pMemLnx->fMappedToRing0 = true;
+                }
+                else
+                {
+                    kfree(papPtes);
+                    rc = VERR_MAP_FAILED;
+                }
+            }
+            else
+                rc = VERR_MAP_FAILED;
+        }
+        else
+# endif
+        {
+#  if defined(IPRT_USE_APPLY_TO_PAGE_RANGE_FOR_EXEC)
+            if (fExecutable)
+                pgprot_val(fPg) |= _PAGE_NX; /* Uses RTR0MemObjProtect to clear NX when memory ready, W^X fashion. */
+#  endif
+
+# ifdef VM_MAP
+            pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_MAP, fPg);
+# else
+            pMemLnx->Core.pv = vmap(&pMemLnx->apPages[0], pMemLnx->cPages, VM_ALLOC, fPg);
+# endif
+            if (pMemLnx->Core.pv)
+                pMemLnx->fMappedToRing0 = true;
+            else
+                rc = VERR_MAP_FAILED;
+        }
+#else   /* < 2.4.22 */
+        rc = VERR_NOT_SUPPORTED;
+#endif
+    }
+    else
+    {
+        /*
+         * Use the kernel RAM mapping.
+         */
+        pMemLnx->Core.pv = phys_to_virt(page_to_phys(pMemLnx->apPages[0]));
+        Assert(pMemLnx->Core.pv);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Undoes what rtR0MemObjLinuxVMap() did.
+ *
+ * @param   pMemLnx     The linux memory object.
+ */
+static void rtR0MemObjLinuxVUnmap(PRTR0MEMOBJLNX pMemLnx)
+{
+#if RTLNX_VER_MIN(2,4,22)
+# ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+    if (pMemLnx->pArea)
+    {
+#  if 0
+        pte_t **papPtes = pMemLnx->papPtesForArea;
+        size_t  i;
+        for (i = 0; i < pMemLnx->cPages; i++)
+            *papPtes[i] = 0;
+#  endif
+        free_vm_area(pMemLnx->pArea);
+        kfree(pMemLnx->papPtesForArea);
+        pMemLnx->pArea = NULL;
+        pMemLnx->papPtesForArea = NULL;
+    }
+    else
+# endif
+    if (pMemLnx->fMappedToRing0)
+    {
+        Assert(pMemLnx->Core.pv);
+        vunmap(pMemLnx->Core.pv);
+        pMemLnx->fMappedToRing0 = false;
+    }
+#else /* < 2.4.22 */
+    Assert(!pMemLnx->fMappedToRing0);
+#endif
+    pMemLnx->Core.pv = NULL;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeFree(RTR0MEMOBJ pMem)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
+
+    /*
+     * Release any memory that we've allocated or locked.
+     */
+    switch (pMemLnx->Core.enmType)
+    {
+        case RTR0MEMOBJTYPE_PAGE:
+        case RTR0MEMOBJTYPE_LOW:
+        case RTR0MEMOBJTYPE_CONT:
+        case RTR0MEMOBJTYPE_PHYS:
+        case RTR0MEMOBJTYPE_PHYS_NC:
+            rtR0MemObjLinuxVUnmap(pMemLnx);
+            rtR0MemObjLinuxFreePages(pMemLnx);
+            break;
+
+        case RTR0MEMOBJTYPE_LARGE_PAGE:
+        {
+            uint32_t const cLargePages = pMemLnx->Core.cb >> (pMemLnx->cLargePageOrder + PAGE_SHIFT);
+            uint32_t       iLargePage;
+            for (iLargePage = 0; iLargePage < cLargePages; iLargePage++)
+                __free_pages(pMemLnx->apPages[iLargePage << pMemLnx->cLargePageOrder], pMemLnx->cLargePageOrder);
+            pMemLnx->cPages = 0;
+
+#ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+            Assert(!pMemLnx->pArea);
+            Assert(!pMemLnx->papPtesForArea);
+#endif
+            break;
+        }
+
+        case RTR0MEMOBJTYPE_LOCK:
+            if (pMemLnx->Core.u.Lock.R0Process != NIL_RTR0PROCESS)
+            {
+                struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
+                size_t              iPage;
+                Assert(pTask);
+                if (pTask && pTask->mm)
+                    LNX_MM_DOWN_READ(pTask->mm);
+
+                iPage = pMemLnx->cPages;
+                while (iPage-- > 0)
+                {
+                    if (!PageReserved(pMemLnx->apPages[iPage]))
+                        SetPageDirty(pMemLnx->apPages[iPage]);
+#if RTLNX_VER_MIN(4,6,0)
+                    put_page(pMemLnx->apPages[iPage]);
+#else
+                    page_cache_release(pMemLnx->apPages[iPage]);
+#endif
+                }
+
+                if (pTask && pTask->mm)
+                    LNX_MM_UP_READ(pTask->mm);
+            }
+            /* else: kernel memory - nothing to do here. */
+            break;
+
+        case RTR0MEMOBJTYPE_RES_VIRT:
+            Assert(pMemLnx->Core.pv);
+            if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
+            {
+                struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
+                Assert(pTask);
+                if (pTask && pTask->mm)
+                    rtR0MemObjLinuxDoMunmap(pMemLnx->Core.pv, pMemLnx->Core.cb, pTask);
+            }
+            else
+            {
+                vunmap(pMemLnx->Core.pv);
+
+                Assert(pMemLnx->cPages == 1 && pMemLnx->apPages[0] != NULL);
+                __free_page(pMemLnx->apPages[0]);
+                pMemLnx->apPages[0] = NULL;
+                pMemLnx->cPages = 0;
+            }
+            pMemLnx->Core.pv = NULL;
+            break;
+
+        case RTR0MEMOBJTYPE_MAPPING:
+            Assert(pMemLnx->cPages == 0); Assert(pMemLnx->Core.pv);
+            if (pMemLnx->Core.u.ResVirt.R0Process != NIL_RTR0PROCESS)
+            {
+                struct task_struct *pTask = rtR0ProcessToLinuxTask(pMemLnx->Core.u.Lock.R0Process);
+                Assert(pTask);
+                if (pTask && pTask->mm)
+                    rtR0MemObjLinuxDoMunmap(pMemLnx->Core.pv, pMemLnx->Core.cb, pTask);
+            }
+            else
+                vunmap(pMemLnx->Core.pv);
+            pMemLnx->Core.pv = NULL;
+            break;
+
+        default:
+            AssertMsgFailed(("enmType=%d\n", pMemLnx->Core.enmType));
+            return VERR_INTERNAL_ERROR;
+    }
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeAllocPage(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable, const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    PRTR0MEMOBJLNX pMemLnx;
+    int rc;
+
+#if RTLNX_VER_MIN(2,4,22)
+    rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_HIGHUSER,
+                                   false /* non-contiguous */, fExecutable, VERR_NO_MEMORY, pszTag);
+#else
+    rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_PAGE, cb, PAGE_SIZE, GFP_USER,
+                                   false /* non-contiguous */, fExecutable, VERR_NO_MEMORY, pszTag);
+#endif
+    if (RT_SUCCESS(rc))
+    {
+        rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
+        if (RT_SUCCESS(rc))
+        {
+            *ppMem = &pMemLnx->Core;
+            IPRT_LINUX_RESTORE_EFL_AC();
+            return rc;
+        }
+
+        rtR0MemObjLinuxFreePages(pMemLnx);
+        rtR0MemObjDelete(&pMemLnx->Core);
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeAllocLarge(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, size_t cbLargePage, uint32_t fFlags,
+                                           const char *pszTag)
+{
+#ifdef GFP_TRANSHUGE
+    /*
+     * Allocate a memory object structure that's large enough to contain
+     * the page pointer array.
+     */
+# ifdef __GFP_MOVABLE
+    unsigned const  fGfp            = (GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE;
+# else
+    unsigned const  fGfp            = (GFP_TRANSHUGE | __GFP_ZERO);
+# endif
+    size_t const    cPagesPerLarge  = cbLargePage >> PAGE_SHIFT;
+    unsigned const  cLargePageOrder = rtR0MemObjLinuxOrder(cPagesPerLarge);
+    size_t const    cLargePages     = cb >> (cLargePageOrder + PAGE_SHIFT);
+    size_t const    cPages          = cb >> PAGE_SHIFT;
+    PRTR0MEMOBJLNX  pMemLnx;
+
+    Assert(RT_BIT_64(cLargePageOrder + PAGE_SHIFT) == cbLargePage);
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_UOFFSETOF_DYN(RTR0MEMOBJLNX, apPages[cPages]),
+                                            RTR0MEMOBJTYPE_LARGE_PAGE, NULL, cb, pszTag);
+    if (pMemLnx)
+    {
+        size_t iLargePage;
+
+        pMemLnx->Core.fFlags    |= RTR0MEMOBJ_FLAGS_ZERO_AT_ALLOC;
+        pMemLnx->cLargePageOrder = cLargePageOrder;
+        pMemLnx->cPages          = cPages;
+
+        /*
+         * Allocate the requested number of large pages.
+         */
+        for (iLargePage = 0; iLargePage < cLargePages; iLargePage++)
+        {
+            struct page *paPages = alloc_pages(fGfp, cLargePageOrder);
+            if (paPages)
+            {
+                size_t const iPageBase = iLargePage << cLargePageOrder;
+                size_t       iPage     = cPagesPerLarge;
+                while (iPage-- > 0)
+                    pMemLnx->apPages[iPageBase + iPage] = &paPages[iPage];
+            }
+            else
+            {
+                /*Log(("rtR0MemObjNativeAllocLarge: cb=%#zx cPages=%#zx cLargePages=%#zx cLargePageOrder=%u cPagesPerLarge=%#zx iLargePage=%#zx -> failed!\n",
+                     cb, cPages, cLargePages, cLargePageOrder, cPagesPerLarge, iLargePage, paPages));*/
+                while (iLargePage-- > 0)
+                    __free_pages(pMemLnx->apPages[iLargePage << (cLargePageOrder - PAGE_SHIFT)], cLargePageOrder);
+                rtR0MemObjDelete(&pMemLnx->Core);
+                return VERR_NO_MEMORY;
+            }
+        }
+        *ppMem = &pMemLnx->Core;
+        return VINF_SUCCESS;
+    }
+    return VERR_NO_MEMORY;
+
+#else
+    /*
+     * We don't call rtR0MemObjFallbackAllocLarge here as it can be a really
+     * bad idea to trigger the swap daemon and whatnot.  So, just fail.
+     */
+    RT_NOREF(ppMem, cb, cbLargePage, fFlags, pszTag);
+    return VERR_NOT_SUPPORTED;
+#endif
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeAllocLow(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable, const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    PRTR0MEMOBJLNX pMemLnx;
+    int rc;
+
+    /* Try to avoid GFP_DMA. GFM_DMA32 was introduced with Linux 2.6.15. */
+#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
+    /* ZONE_DMA32: 0-4GB */
+    rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA32,
+                                   false /* non-contiguous */, fExecutable, VERR_NO_LOW_MEMORY, pszTag);
+    if (RT_FAILURE(rc))
+#endif
+#ifdef RT_ARCH_AMD64
+        /* ZONE_DMA: 0-16MB */
+        rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_DMA,
+                                       false /* non-contiguous */, fExecutable, VERR_NO_LOW_MEMORY, pszTag);
+#else
+# ifdef CONFIG_X86_PAE
+# endif
+        /* ZONE_NORMAL: 0-896MB */
+        rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_LOW, cb, PAGE_SIZE, GFP_USER,
+                                       false /* non-contiguous */, fExecutable, VERR_NO_LOW_MEMORY, pszTag);
+#endif
+    if (RT_SUCCESS(rc))
+    {
+        rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
+        if (RT_SUCCESS(rc))
+        {
+            *ppMem = &pMemLnx->Core;
+            IPRT_LINUX_RESTORE_EFL_AC();
+            return rc;
+        }
+
+        rtR0MemObjLinuxFreePages(pMemLnx);
+        rtR0MemObjDelete(&pMemLnx->Core);
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeAllocCont(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, bool fExecutable, const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    PRTR0MEMOBJLNX pMemLnx;
+    int rc;
+
+#if (defined(RT_ARCH_AMD64) || defined(CONFIG_X86_PAE)) && defined(GFP_DMA32)
+    /* ZONE_DMA32: 0-4GB */
+    rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA32,
+                                   true /* contiguous */, fExecutable, VERR_NO_CONT_MEMORY, pszTag);
+    if (RT_FAILURE(rc))
+#endif
+#ifdef RT_ARCH_AMD64
+        /* ZONE_DMA: 0-16MB */
+        rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_DMA,
+                                       true /* contiguous */, fExecutable, VERR_NO_CONT_MEMORY, pszTag);
+#else
+        /* ZONE_NORMAL (32-bit hosts): 0-896MB */
+        rc = rtR0MemObjLinuxAllocPages(&pMemLnx, RTR0MEMOBJTYPE_CONT, cb, PAGE_SIZE, GFP_USER,
+                                       true /* contiguous */, fExecutable, VERR_NO_CONT_MEMORY, pszTag);
+#endif
+    if (RT_SUCCESS(rc))
+    {
+        rc = rtR0MemObjLinuxVMap(pMemLnx, fExecutable);
+        if (RT_SUCCESS(rc))
+        {
+#if defined(RT_STRICT) && (defined(RT_ARCH_AMD64) || defined(CONFIG_HIGHMEM64G))
+            size_t iPage = pMemLnx->cPages;
+            while (iPage-- > 0)
+                Assert(page_to_phys(pMemLnx->apPages[iPage]) < _4G);
+#endif
+            pMemLnx->Core.u.Cont.Phys = page_to_phys(pMemLnx->apPages[0]);
+            *ppMem = &pMemLnx->Core;
+            IPRT_LINUX_RESTORE_EFL_AC();
+            return rc;
+        }
+
+        rtR0MemObjLinuxFreePages(pMemLnx);
+        rtR0MemObjDelete(&pMemLnx->Core);
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+/**
+ * Worker for rtR0MemObjLinuxAllocPhysSub that tries one allocation strategy.
+ *
+ * @returns IPRT status code.
+ * @param   ppMemLnx    Where to
+ * @param   enmType     The object type.
+ * @param   cb          The size of the allocation.
+ * @param   uAlignment  The alignment of the physical memory.
+ *                      Only valid for fContiguous == true, ignored otherwise.
+ * @param   PhysHighest See rtR0MemObjNativeAllocPhys.
+ * @param   pszTag      Allocation tag used for statistics and such.
+ * @param   fGfp        The Linux GFP flags to use for the allocation.
+ */
+static int rtR0MemObjLinuxAllocPhysSub2(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
+                                        size_t cb, size_t uAlignment, RTHCPHYS PhysHighest, const char *pszTag, gfp_t fGfp)
+{
+    PRTR0MEMOBJLNX pMemLnx;
+    int rc = rtR0MemObjLinuxAllocPages(&pMemLnx, enmType, cb, uAlignment, fGfp,
+                                       enmType == RTR0MEMOBJTYPE_PHYS /* contiguous / non-contiguous */,
+                                       false /*fExecutable*/, VERR_NO_PHYS_MEMORY, pszTag);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /*
+     * Check the addresses if necessary. (Can be optimized a bit for PHYS.)
+     */
+    if (PhysHighest != NIL_RTHCPHYS)
+    {
+        size_t iPage = pMemLnx->cPages;
+        while (iPage-- > 0)
+            if (page_to_phys(pMemLnx->apPages[iPage]) > PhysHighest)
+            {
+                rtR0MemObjLinuxFreePages(pMemLnx);
+                rtR0MemObjDelete(&pMemLnx->Core);
+                return VERR_NO_MEMORY;
+            }
+    }
+
+    /*
+     * Complete the object.
+     */
+    if (enmType == RTR0MEMOBJTYPE_PHYS)
+    {
+        pMemLnx->Core.u.Phys.PhysBase = page_to_phys(pMemLnx->apPages[0]);
+        pMemLnx->Core.u.Phys.fAllocated = true;
+    }
+    *ppMem = &pMemLnx->Core;
+    return rc;
+}
+
+
+/**
+ * Worker for rtR0MemObjNativeAllocPhys and rtR0MemObjNativeAllocPhysNC.
+ *
+ * @returns IPRT status code.
+ * @param   ppMem       Where to store the memory object pointer on success.
+ * @param   enmType     The object type.
+ * @param   cb          The size of the allocation.
+ * @param   uAlignment  The alignment of the physical memory.
+ *                      Only valid for enmType == RTR0MEMOBJTYPE_PHYS, ignored otherwise.
+ * @param   PhysHighest See rtR0MemObjNativeAllocPhys.
+ * @param   pszTag      Allocation tag used for statistics and such.
+ */
+static int rtR0MemObjLinuxAllocPhysSub(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJTYPE enmType,
+                                       size_t cb, size_t uAlignment, RTHCPHYS PhysHighest, const char *pszTag)
+{
+    int rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * There are two clear cases and that's the <=16MB and anything-goes ones.
+     * When the physical address limit is somewhere in-between those two we'll
+     * just have to try, starting with HIGHUSER and working our way thru the
+     * different types, hoping we'll get lucky.
+     *
+     * We should probably move this physical address restriction logic up to
+     * the page alloc function as it would be more efficient there. But since
+     * we don't expect this to be a performance issue just yet it can wait.
+     */
+    if (PhysHighest == NIL_RTHCPHYS)
+        /* ZONE_HIGHMEM: the whole physical memory */
+        rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, pszTag, GFP_HIGHUSER);
+    else if (PhysHighest <= _1M * 16)
+        /* ZONE_DMA: 0-16MB */
+        rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, pszTag, GFP_DMA);
+    else
+    {
+        rc = VERR_NO_MEMORY;
+        if (RT_FAILURE(rc))
+            /* ZONE_HIGHMEM: the whole physical memory */
+            rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, pszTag, GFP_HIGHUSER);
+        if (RT_FAILURE(rc))
+            /* ZONE_NORMAL: 0-896MB */
+            rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, pszTag, GFP_USER);
+#ifdef GFP_DMA32
+        if (RT_FAILURE(rc))
+            /* ZONE_DMA32: 0-4GB */
+            rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, pszTag, GFP_DMA32);
+#endif
+        if (RT_FAILURE(rc))
+            /* ZONE_DMA: 0-16MB */
+            rc = rtR0MemObjLinuxAllocPhysSub2(ppMem, enmType, cb, uAlignment, PhysHighest, pszTag, GFP_DMA);
+    }
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+/**
+ * Translates a kernel virtual address to a linux page structure by walking the
+ * page tables.
+ *
+ * @note    We do assume that the page tables will not change as we are walking
+ *          them.  This assumption is rather forced by the fact that I could not
+ *          immediately see any way of preventing this from happening.  So, we
+ *          take some extra care when accessing them.
+ *
+ *          Because of this, we don't want to use this function on memory where
+ *          attribute changes to nearby pages is likely to cause large pages to
+ *          be used or split up. So, don't use this for the linear mapping of
+ *          physical memory.
+ *
+ * @returns Pointer to the page structur or NULL if it could not be found.
+ * @param   pv      The kernel virtual address.
+ */
+RTDECL(struct page *) rtR0MemObjLinuxVirtToPage(void *pv)
+{
+    unsigned long   ulAddr = (unsigned long)pv;
+    unsigned long   pfn;
+    struct page    *pPage;
+    pte_t          *pEntry;
+    union
+    {
+        pgd_t       Global;
+#if RTLNX_VER_MIN(4,12,0)
+        p4d_t       Four;
+#endif
+#if RTLNX_VER_MIN(2,6,11)
+        pud_t       Upper;
+#endif
+        pmd_t       Middle;
+        pte_t       Entry;
+    } u;
+
+    /* Should this happen in a situation this code will be called in?  And if
+     * so, can it change under our feet?  See also
+     * "Documentation/vm/active_mm.txt" in the kernel sources. */
+    if (RT_UNLIKELY(!current->active_mm))
+        return NULL;
+    u.Global = *pgd_offset(current->active_mm, ulAddr);
+    if (RT_UNLIKELY(pgd_none(u.Global)))
+        return NULL;
+#if RTLNX_VER_MIN(2,6,11)
+# if RTLNX_VER_MIN(4,12,0)
+    u.Four  = *p4d_offset(&u.Global, ulAddr);
+    if (RT_UNLIKELY(p4d_none(u.Four)))
+        return NULL;
+    if (p4d_large(u.Four))
+    {
+        pPage = p4d_page(u.Four);
+        AssertReturn(pPage, NULL);
+        pfn   = page_to_pfn(pPage);      /* doing the safe way... */
+        AssertCompile(P4D_SHIFT - PAGE_SHIFT < 31);
+        pfn  += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (P4D_SHIFT - PAGE_SHIFT)) - 1);
+        return pfn_to_page(pfn);
+    }
+    u.Upper = *pud_offset(&u.Four, ulAddr);
+# else /* < 4.12 */
+    u.Upper = *pud_offset(&u.Global, ulAddr);
+# endif /* < 4.12 */
+    if (RT_UNLIKELY(pud_none(u.Upper)))
+        return NULL;
+# if RTLNX_VER_MIN(2,6,25)
+    if (pud_large(u.Upper))
+    {
+        pPage = pud_page(u.Upper);
+        AssertReturn(pPage, NULL);
+        pfn  = page_to_pfn(pPage);      /* doing the safe way... */
+        pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PUD_SHIFT - PAGE_SHIFT)) - 1);
+        return pfn_to_page(pfn);
+    }
+# endif
+    u.Middle = *pmd_offset(&u.Upper, ulAddr);
+#else  /* < 2.6.11 */
+    u.Middle = *pmd_offset(&u.Global, ulAddr);
+#endif /* < 2.6.11 */
+    if (RT_UNLIKELY(pmd_none(u.Middle)))
+        return NULL;
+#if RTLNX_VER_MIN(2,6,0)
+    if (pmd_large(u.Middle))
+    {
+        pPage = pmd_page(u.Middle);
+        AssertReturn(pPage, NULL);
+        pfn  = page_to_pfn(pPage);      /* doing the safe way... */
+        pfn += (ulAddr >> PAGE_SHIFT) & ((UINT32_C(1) << (PMD_SHIFT - PAGE_SHIFT)) - 1);
+        return pfn_to_page(pfn);
+    }
+#endif
+
+#if RTLNX_VER_MIN(6,5,0) || RTLNX_RHEL_RANGE(9,4, 9,99)
+    pEntry = __pte_map(&u.Middle, ulAddr);
+#elif RTLNX_VER_MIN(2,5,5) || defined(pte_offset_map) /* As usual, RHEL 3 had pte_offset_map earlier. */
+    pEntry = pte_offset_map(&u.Middle, ulAddr);
+#else
+    pEntry = pte_offset(&u.Middle, ulAddr);
+#endif
+    if (RT_UNLIKELY(!pEntry))
+        return NULL;
+    u.Entry = *pEntry;
+#if RTLNX_VER_MIN(2,5,5) || defined(pte_offset_map)
+    pte_unmap(pEntry);
+#endif
+
+    if (RT_UNLIKELY(!pte_present(u.Entry)))
+        return NULL;
+    return pte_page(u.Entry);
+}
+RT_EXPORT_SYMBOL(rtR0MemObjLinuxVirtToPage);
+
+
+DECLHIDDEN(int) rtR0MemObjNativeAllocPhys(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, size_t uAlignment,
+                                          const char *pszTag)
+{
+    return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS, cb, uAlignment, PhysHighest, pszTag);
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeAllocPhysNC(PPRTR0MEMOBJINTERNAL ppMem, size_t cb, RTHCPHYS PhysHighest, const char *pszTag)
+{
+    return rtR0MemObjLinuxAllocPhysSub(ppMem, RTR0MEMOBJTYPE_PHYS_NC, cb, PAGE_SIZE, PhysHighest, pszTag);
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeEnterPhys(PPRTR0MEMOBJINTERNAL ppMem, RTHCPHYS Phys, size_t cb, uint32_t uCachePolicy,
+                                          const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * All we need to do here is to validate that we can use
+     * ioremap on the specified address (32/64-bit dma_addr_t).
+     */
+    PRTR0MEMOBJLNX  pMemLnx;
+    dma_addr_t      PhysAddr = Phys;
+    AssertMsgReturn(PhysAddr == Phys, ("%#llx\n", (unsigned long long)Phys), VERR_ADDRESS_TOO_BIG);
+
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_PHYS, NULL, cb, pszTag);
+    if (!pMemLnx)
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+
+    pMemLnx->Core.u.Phys.PhysBase = PhysAddr;
+    pMemLnx->Core.u.Phys.fAllocated = false;
+    pMemLnx->Core.u.Phys.uCachePolicy = uCachePolicy;
+    Assert(!pMemLnx->cPages);
+    *ppMem = &pMemLnx->Core;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+
+/* openSUSE Leap 42.3 detection :-/ */
+#if RTLNX_VER_RANGE(4,4,0,  4,6,0) && defined(FAULT_FLAG_REMOTE)
+# define GET_USER_PAGES_API     KERNEL_VERSION(4, 10, 0) /* no typo! */
+#else
+# define GET_USER_PAGES_API     LINUX_VERSION_CODE
+#endif
+
+DECLHIDDEN(int) rtR0MemObjNativeLockUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3Ptr, size_t cb, uint32_t fAccess,
+                                         RTR0PROCESS R0Process, const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    const int cPages = cb >> PAGE_SHIFT;
+    struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+    struct vm_area_struct **papVMAs;
+# endif
+    PRTR0MEMOBJLNX  pMemLnx;
+    int             rc      = VERR_NO_MEMORY;
+    int  const      fWrite  = fAccess & RTMEM_PROT_WRITE ? 1 : 0;
+
+    /*
+     * Check for valid task and size overflows.
+     */
+    if (!pTask)
+        return VERR_NOT_SUPPORTED;
+    if (((size_t)cPages << PAGE_SHIFT) != cb)
+        return VERR_OUT_OF_RANGE;
+
+    /*
+     * Allocate the memory object and a temporary buffer for the VMAs.
+     */
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_UOFFSETOF_DYN(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK,
+                                            (void *)R3Ptr, cb, pszTag);
+    if (!pMemLnx)
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+    papVMAs = (struct vm_area_struct **)RTMemAlloc(sizeof(*papVMAs) * cPages);
+    if (papVMAs)
+    {
+# endif
+        LNX_MM_DOWN_READ(pTask->mm);
+
+        /*
+         * Get user pages.
+         */
+/** @todo r=bird: Should we not force read access too? */
+#if GET_USER_PAGES_API >= KERNEL_VERSION(4, 6, 0)
+        if (R0Process == RTR0ProcHandleSelf())
+            rc = get_user_pages(R3Ptr,                  /* Where from. */
+                                cPages,                 /* How many pages. */
+# if GET_USER_PAGES_API >= KERNEL_VERSION(4, 9, 0)
+                                fWrite ? FOLL_WRITE |   /* Write to memory. */
+                                         FOLL_FORCE     /* force write access. */
+                                       : 0,             /* Write to memory. */
+# else
+                                fWrite,                 /* Write to memory. */
+                                fWrite,                 /* force write access. */
+# endif
+                                &pMemLnx->apPages[0]    /* Page array. */
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+                                , papVMAs               /* vmas */
+# endif
+                                );
+        /*
+         * Actually this should not happen at the moment as call this function
+         * only for our own process.
+         */
+        else
+            rc = get_user_pages_remote(
+# if GET_USER_PAGES_API < KERNEL_VERSION(5, 9, 0)
+                                pTask,                  /* Task for fault accounting. */
+# endif
+                                pTask->mm,              /* Whose pages. */
+                                R3Ptr,                  /* Where from. */
+                                cPages,                 /* How many pages. */
+# if GET_USER_PAGES_API >= KERNEL_VERSION(4, 9, 0)
+                                fWrite ? FOLL_WRITE |   /* Write to memory. */
+                                         FOLL_FORCE     /* force write access. */
+                                       : 0,             /* Write to memory. */
+# else
+                                fWrite,                 /* Write to memory. */
+                                fWrite,                 /* force write access. */
+# endif
+                                &pMemLnx->apPages[0]    /* Page array. */
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+                                , papVMAs               /* vmas */
+# endif
+# if GET_USER_PAGES_API >= KERNEL_VERSION(4, 10, 0)
+                                , NULL                  /* locked */
+# endif
+                                );
+#else /* GET_USER_PAGES_API < KERNEL_VERSION(4, 6, 0) */
+            rc = get_user_pages(pTask,                  /* Task for fault accounting. */
+                                pTask->mm,              /* Whose pages. */
+                                R3Ptr,                  /* Where from. */
+                                cPages,                 /* How many pages. */
+/* The get_user_pages API change was back-ported to 4.4.168. */
+# if RTLNX_VER_RANGE(4,4,168,  4,5,0)
+                                fWrite ? FOLL_WRITE |   /* Write to memory. */
+                                         FOLL_FORCE     /* force write access. */
+                                       : 0,             /* Write to memory. */
+# else
+                                fWrite,                 /* Write to memory. */
+                                fWrite,                 /* force write access. */
+# endif
+                                &pMemLnx->apPages[0]    /* Page array. */
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+                                , papVMAs               /* vmas */
+# endif
+                                );
+#endif /* GET_USER_PAGES_API < KERNEL_VERSION(4, 6, 0) */
+        if (rc == cPages)
+        {
+            /*
+             * Flush dcache (required?), protect against fork and _really_ pin the page
+             * table entries. get_user_pages() will protect against swapping out the
+             * pages but it will NOT protect against removing page table entries. This
+             * can be achieved with
+             *   - using mlock / mmap(..., MAP_LOCKED, ...) from userland. This requires
+             *     an appropriate limit set up with setrlimit(..., RLIMIT_MEMLOCK, ...).
+             *     Usual Linux distributions support only a limited size of locked pages
+             *     (e.g. 32KB).
+             *   - setting the PageReserved bit (as we do in rtR0MemObjLinuxAllocPages()
+             *     or by
+             *   - setting the VM_LOCKED flag. This is the same as doing mlock() without
+             *     a range check.
+             */
+            /** @todo The Linux fork() protection will require more work if this API
+             * is to be used for anything but locking VM pages. */
+            while (rc-- > 0)
+            {
+                flush_dcache_page(pMemLnx->apPages[rc]);
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+#  if RTLNX_VER_MIN(6,3,0)
+                vm_flags_set(papVMAs[rc], VM_DONTCOPY | VM_LOCKED);
+#  else
+                papVMAs[rc]->vm_flags |= VM_DONTCOPY | VM_LOCKED;
+#  endif
+# endif
+            }
+
+            LNX_MM_UP_READ(pTask->mm);
+
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+            RTMemFree(papVMAs);
+# endif
+
+            pMemLnx->Core.u.Lock.R0Process = R0Process;
+            pMemLnx->cPages = cPages;
+            Assert(!pMemLnx->fMappedToRing0);
+            *ppMem = &pMemLnx->Core;
+
+            IPRT_LINUX_RESTORE_EFL_AC();
+            return VINF_SUCCESS;
+        }
+
+        /*
+         * Failed - we need to unlock any pages that we succeeded to lock.
+         */
+        while (rc-- > 0)
+        {
+            if (!PageReserved(pMemLnx->apPages[rc]))
+                SetPageDirty(pMemLnx->apPages[rc]);
+#if RTLNX_VER_MIN(4,6,0)
+            put_page(pMemLnx->apPages[rc]);
+#else
+            page_cache_release(pMemLnx->apPages[rc]);
+#endif
+        }
+
+        LNX_MM_UP_READ(pTask->mm);
+
+        rc = VERR_LOCK_FAILED;
+
+# if GET_USER_PAGES_API < KERNEL_VERSION(6, 5, 0)
+        RTMemFree(papVMAs);
+    }
+# endif
+
+    rtR0MemObjDelete(&pMemLnx->Core);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeLockKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pv, size_t cb, uint32_t fAccess, const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    void           *pvLast = (uint8_t *)pv + cb - 1;
+    size_t const    cPages = cb >> PAGE_SHIFT;
+    PRTR0MEMOBJLNX  pMemLnx;
+    bool            fLinearMapping;
+    int             rc;
+    uint8_t        *pbPage;
+    size_t          iPage;
+    NOREF(fAccess);
+
+    if (   !RTR0MemKernelIsValidAddr(pv)
+        || !RTR0MemKernelIsValidAddr(pv + cb))
+        return VERR_INVALID_PARAMETER;
+
+    /*
+     * The lower part of the kernel memory has a linear mapping between
+     * physical and virtual addresses. So we take a short cut here.  This is
+     * assumed to be the cleanest way to handle those addresses (and the code
+     * is well tested, though the test for determining it is not very nice).
+     * If we ever decide it isn't we can still remove it.
+     */
+#if 0
+    fLinearMapping = (unsigned long)pvLast < VMALLOC_START;
+#else
+    fLinearMapping = (unsigned long)pv     >= (unsigned long)__va(0)
+                  && (unsigned long)pvLast <  (unsigned long)high_memory;
+#endif
+
+    /*
+     * Allocate the memory object.
+     */
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(RT_UOFFSETOF_DYN(RTR0MEMOBJLNX, apPages[cPages]), RTR0MEMOBJTYPE_LOCK,
+                                            pv, cb, pszTag);
+    if (!pMemLnx)
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+
+    /*
+     * Gather the pages.
+     * We ASSUME all kernel pages are non-swappable and non-movable.
+     */
+    rc     = VINF_SUCCESS;
+    pbPage = (uint8_t *)pvLast;
+    iPage  = cPages;
+    if (!fLinearMapping)
+    {
+        while (iPage-- > 0)
+        {
+            struct page *pPage = rtR0MemObjLinuxVirtToPage(pbPage);
+            if (RT_UNLIKELY(!pPage))
+            {
+                rc = VERR_LOCK_FAILED;
+                break;
+            }
+            pMemLnx->apPages[iPage] = pPage;
+            pbPage -= PAGE_SIZE;
+        }
+    }
+    else
+    {
+        while (iPage-- > 0)
+        {
+            pMemLnx->apPages[iPage] = virt_to_page(pbPage);
+            pbPage -= PAGE_SIZE;
+        }
+    }
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Complete the memory object and return.
+         */
+        pMemLnx->Core.u.Lock.R0Process = NIL_RTR0PROCESS;
+        pMemLnx->cPages = cPages;
+        Assert(!pMemLnx->fMappedToRing0);
+        *ppMem = &pMemLnx->Core;
+
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+
+    rtR0MemObjDelete(&pMemLnx->Core);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeReserveKernel(PPRTR0MEMOBJINTERNAL ppMem, void *pvFixed, size_t cb, size_t uAlignment,
+                                              const char *pszTag)
+{
+#if RTLNX_VER_MIN(2,4,22)
+    IPRT_LINUX_SAVE_EFL_AC();
+    const size_t cPages = cb >> PAGE_SHIFT;
+    struct page *pDummyPage;
+    struct page **papPages;
+
+    /* check for unsupported stuff. */
+    AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
+    if (uAlignment > PAGE_SIZE)
+        return VERR_NOT_SUPPORTED;
+
+    /*
+     * Allocate a dummy page and create a page pointer array for vmap such that
+     * the dummy page is mapped all over the reserved area.
+     */
+    pDummyPage = alloc_page(GFP_HIGHUSER | __GFP_NOWARN);
+    if (pDummyPage)
+    {
+        papPages = RTMemAlloc(sizeof(*papPages) * cPages);
+        if (papPages)
+        {
+            void *pv;
+            size_t iPage = cPages;
+            while (iPage-- > 0)
+                papPages[iPage] = pDummyPage;
+# ifdef VM_MAP
+            pv = vmap(papPages, cPages, VM_MAP, PAGE_KERNEL_RO);
+# else
+            pv = vmap(papPages, cPages, VM_ALLOC, PAGE_KERNEL_RO);
+# endif
+            RTMemFree(papPages);
+            if (pv)
+            {
+                PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb, pszTag);
+                if (pMemLnx)
+                {
+                    pMemLnx->Core.u.ResVirt.R0Process = NIL_RTR0PROCESS;
+                    pMemLnx->cPages = 1;
+                    pMemLnx->apPages[0] = pDummyPage;
+                    *ppMem = &pMemLnx->Core;
+                    IPRT_LINUX_RESTORE_EFL_AC();
+                    return VINF_SUCCESS;
+                }
+                vunmap(pv);
+            }
+        }
+        __free_page(pDummyPage);
+    }
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VERR_NO_MEMORY;
+
+#else   /* < 2.4.22 */
+    /*
+     * Could probably use ioremap here, but the caller is in a better position than us
+     * to select some safe physical memory.
+     */
+    return VERR_NOT_SUPPORTED;
+#endif
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeReserveUser(PPRTR0MEMOBJINTERNAL ppMem, RTR3PTR R3PtrFixed, size_t cb, size_t uAlignment,
+                                            RTR0PROCESS R0Process, const char *pszTag)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    PRTR0MEMOBJLNX      pMemLnx;
+    void               *pv;
+    struct task_struct *pTask = rtR0ProcessToLinuxTask(R0Process);
+    if (!pTask)
+        return VERR_NOT_SUPPORTED;
+
+    /*
+     * Check that the specified alignment is supported.
+     */
+    if (uAlignment > PAGE_SIZE)
+        return VERR_NOT_SUPPORTED;
+
+    /*
+     * Let rtR0MemObjLinuxDoMmap do the difficult bits.
+     */
+    pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cb, uAlignment, pTask, RTMEM_PROT_NONE);
+    if (pv == (void *)-1)
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_RES_VIRT, pv, cb, pszTag);
+    if (!pMemLnx)
+    {
+        rtR0MemObjLinuxDoMunmap(pv, cb, pTask);
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+
+    pMemLnx->Core.u.ResVirt.R0Process = R0Process;
+    *ppMem = &pMemLnx->Core;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeMapKernel(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, void *pvFixed, size_t uAlignment,
+                                          unsigned fProt, size_t offSub, size_t cbSub, const char *pszTag)
+{
+    int rc = VERR_NO_MEMORY;
+    PRTR0MEMOBJLNX pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
+    PRTR0MEMOBJLNX pMemLnx;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /* Fail if requested to do something we can't. */
+    AssertMsgReturn(pvFixed == (void *)-1, ("%p\n", pvFixed), VERR_NOT_SUPPORTED);
+    if (uAlignment > PAGE_SIZE)
+        return VERR_NOT_SUPPORTED;
+
+    /*
+     * Create the IPRT memory object.
+     */
+    if (!cbSub)
+        cbSub = pMemLnxToMap->Core.cb - offSub;
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, cbSub, pszTag);
+    if (pMemLnx)
+    {
+        if (pMemLnxToMap->cPages)
+        {
+#if RTLNX_VER_MIN(2,4,22)
+            /*
+             * Use vmap - 2.4.22 and later.
+             */
+            pgprot_t fPg = rtR0MemObjLinuxConvertProt(fProt, true /* kernel */);
+            /** @todo We don't really care too much for EXEC here... 5.8 always adds NX. */
+            Assert(((offSub + cbSub) >> PAGE_SHIFT) <= pMemLnxToMap->cPages);
+# ifdef VM_MAP
+            pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[offSub >> PAGE_SHIFT], cbSub >> PAGE_SHIFT, VM_MAP, fPg);
+# else
+            pMemLnx->Core.pv = vmap(&pMemLnxToMap->apPages[offSub >> PAGE_SHIFT], cbSub >> PAGE_SHIFT, VM_ALLOC, fPg);
+# endif
+            if (pMemLnx->Core.pv)
+            {
+                pMemLnx->fMappedToRing0 = true;
+                rc = VINF_SUCCESS;
+            }
+            else
+                rc = VERR_MAP_FAILED;
+
+#else   /* < 2.4.22 */
+            /*
+             * Only option here is to share mappings if possible and forget about fProt.
+             */
+            if (rtR0MemObjIsRing3(pMemToMap))
+                rc = VERR_NOT_SUPPORTED;
+            else
+            {
+                rc = VINF_SUCCESS;
+                if (!pMemLnxToMap->Core.pv)
+                    rc = rtR0MemObjLinuxVMap(pMemLnxToMap, !!(fProt & RTMEM_PROT_EXEC));
+                if (RT_SUCCESS(rc))
+                {
+                    Assert(pMemLnxToMap->Core.pv);
+                    pMemLnx->Core.pv = (uint8_t *)pMemLnxToMap->Core.pv + offSub;
+                }
+            }
+#endif
+        }
+        else
+        {
+            /*
+             * MMIO / physical memory.
+             */
+            Assert(pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS && !pMemLnxToMap->Core.u.Phys.fAllocated);
+#if RTLNX_VER_MIN(2,6,25)
+            /*
+             * ioremap() defaults to no caching since the 2.6 kernels.
+             * ioremap_nocache() has been removed finally in 5.6-rc1.
+             */
+            pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO
+                             ? ioremap(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub)
+                             : ioremap_cache(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub);
+#else /* KERNEL_VERSION < 2.6.25 */
+            pMemLnx->Core.pv = pMemLnxToMap->Core.u.Phys.uCachePolicy == RTMEM_CACHE_POLICY_MMIO
+                             ? ioremap_nocache(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub)
+                             : ioremap(pMemLnxToMap->Core.u.Phys.PhysBase + offSub, cbSub);
+#endif /* KERNEL_VERSION < 2.6.25 */
+            if (pMemLnx->Core.pv)
+            {
+                /** @todo fix protection. */
+                rc = VINF_SUCCESS;
+            }
+        }
+        if (RT_SUCCESS(rc))
+        {
+            pMemLnx->Core.u.Mapping.R0Process = NIL_RTR0PROCESS;
+            *ppMem = &pMemLnx->Core;
+            IPRT_LINUX_RESTORE_EFL_AC();
+            return VINF_SUCCESS;
+        }
+        rtR0MemObjDelete(&pMemLnx->Core);
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+#ifdef VBOX_USE_PAE_HACK
+/**
+ * Replace the PFN of a PTE with the address of the actual page.
+ *
+ * The caller maps a reserved dummy page at the address with the desired access
+ * and flags.
+ *
+ * This hack is required for older Linux kernels which don't provide
+ * remap_pfn_range().
+ *
+ * @returns 0 on success, -ENOMEM on failure.
+ * @param   mm          The memory context.
+ * @param   ulAddr      The mapping address.
+ * @param   Phys        The physical address of the page to map.
+ */
+static int rtR0MemObjLinuxFixPte(struct mm_struct *mm, unsigned long ulAddr, RTHCPHYS Phys)
+{
+    int rc = -ENOMEM;
+    pgd_t *pgd;
+
+    spin_lock(&mm->page_table_lock);
+
+    pgd = pgd_offset(mm, ulAddr);
+    if (!pgd_none(*pgd) && !pgd_bad(*pgd))
+    {
+        pmd_t *pmd = pmd_offset(pgd, ulAddr);
+        if (!pmd_none(*pmd))
+        {
+            pte_t *ptep = pte_offset_map(pmd, ulAddr);
+            if (ptep)
+            {
+                pte_t pte = *ptep;
+                pte.pte_high &= 0xfff00000;
+                pte.pte_high |= ((Phys >> 32) & 0x000fffff);
+                pte.pte_low  &= 0x00000fff;
+                pte.pte_low  |= (Phys & 0xfffff000);
+                set_pte(ptep, pte);
+                pte_unmap(ptep);
+                rc = 0;
+            }
+        }
+    }
+
+    spin_unlock(&mm->page_table_lock);
+    return rc;
+}
+#endif /* VBOX_USE_PAE_HACK */
+
+
+DECLHIDDEN(int) rtR0MemObjNativeMapUser(PPRTR0MEMOBJINTERNAL ppMem, RTR0MEMOBJ pMemToMap, RTR3PTR R3PtrFixed, size_t uAlignment,
+                                        unsigned fProt, RTR0PROCESS R0Process, size_t offSub, size_t cbSub, const char *pszTag)
+{
+    struct task_struct *pTask        = rtR0ProcessToLinuxTask(R0Process);
+    PRTR0MEMOBJLNX      pMemLnxToMap = (PRTR0MEMOBJLNX)pMemToMap;
+    int                 rc           = VERR_NO_MEMORY;
+    PRTR0MEMOBJLNX      pMemLnx;
+#ifdef VBOX_USE_PAE_HACK
+    struct page        *pDummyPage;
+    RTHCPHYS            DummyPhys;
+#endif
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Check for restrictions.
+     */
+    if (!pTask)
+        return VERR_NOT_SUPPORTED;
+    if (uAlignment > PAGE_SIZE)
+        return VERR_NOT_SUPPORTED;
+
+#ifdef VBOX_USE_PAE_HACK
+    /*
+     * Allocate a dummy page for use when mapping the memory.
+     */
+    pDummyPage = alloc_page(GFP_USER | __GFP_NOWARN);
+    if (!pDummyPage)
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+    SetPageReserved(pDummyPage);
+    DummyPhys = page_to_phys(pDummyPage);
+#endif
+
+    /*
+     * Create the IPRT memory object.
+     */
+    Assert(!offSub || cbSub);
+    if (cbSub == 0)
+        cbSub = pMemLnxToMap->Core.cb;
+    pMemLnx = (PRTR0MEMOBJLNX)rtR0MemObjNew(sizeof(*pMemLnx), RTR0MEMOBJTYPE_MAPPING, NULL, cbSub, pszTag);
+    if (pMemLnx)
+    {
+        /*
+         * Allocate user space mapping.
+         */
+        void *pv;
+        pv = rtR0MemObjLinuxDoMmap(R3PtrFixed, cbSub, uAlignment, pTask, fProt);
+        if (pv != (void *)-1)
+        {
+            /*
+             * Map page by page into the mmap area.
+             * This is generic, paranoid and not very efficient.
+             */
+            pgprot_t        fPg       = rtR0MemObjLinuxConvertProt(fProt, false /* user */);
+            unsigned long   ulAddrCur = (unsigned long)pv;
+            const size_t    cPages    = (offSub + cbSub) >> PAGE_SHIFT;
+            size_t          iPage;
+
+            LNX_MM_DOWN_WRITE(pTask->mm);
+
+            rc = VINF_SUCCESS;
+            if (pMemLnxToMap->cPages)
+            {
+                for (iPage = offSub >> PAGE_SHIFT; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE)
+                {
+#if RTLNX_VER_MAX(2,6,11)
+                    RTHCPHYS Phys = page_to_phys(pMemLnxToMap->apPages[iPage]);
+#endif
+#if RTLNX_VER_MIN(2,6,0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
+                    struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
+                    AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
+#endif
+#if RTLNX_VER_MAX(2,6,0) && defined(RT_ARCH_X86)
+                    /* remap_page_range() limitation on x86 */
+                    AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
+#endif
+
+#if   defined(VBOX_USE_INSERT_PAGE) && RTLNX_VER_MIN(2,6,22)
+                    rc = vm_insert_page(vma, ulAddrCur, pMemLnxToMap->apPages[iPage]);
+                    /* Thes flags help making 100% sure some bad stuff wont happen (swap, core, ++).
+                     * See remap_pfn_range() in mm/memory.c */
+
+#if    RTLNX_VER_MIN(6,3,0)
+                    vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
+#elif  RTLNX_VER_MIN(3,7,0)
+                    vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+#else
+                    vma->vm_flags |= VM_RESERVED;
+#endif
+#elif RTLNX_VER_MIN(2,6,11)
+                    rc = remap_pfn_range(vma, ulAddrCur, page_to_pfn(pMemLnxToMap->apPages[iPage]), PAGE_SIZE, fPg);
+#elif defined(VBOX_USE_PAE_HACK)
+                    rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
+                    if (!rc)
+                        rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
+#elif RTLNX_VER_MIN(2,6,0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
+                    rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
+#else /* 2.4 */
+                    rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
+#endif
+                    if (rc)
+                    {
+                        rc = VERR_NO_MEMORY;
+                        break;
+                    }
+                }
+            }
+            else
+            {
+                RTHCPHYS Phys;
+                if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_PHYS)
+                    Phys = pMemLnxToMap->Core.u.Phys.PhysBase;
+                else if (pMemLnxToMap->Core.enmType == RTR0MEMOBJTYPE_CONT)
+                    Phys = pMemLnxToMap->Core.u.Cont.Phys;
+                else
+                {
+                    AssertMsgFailed(("%d\n", pMemLnxToMap->Core.enmType));
+                    Phys = NIL_RTHCPHYS;
+                }
+                if (Phys != NIL_RTHCPHYS)
+                {
+                    for (iPage = offSub >> PAGE_SHIFT; iPage < cPages; iPage++, ulAddrCur += PAGE_SIZE, Phys += PAGE_SIZE)
+                    {
+#if RTLNX_VER_MIN(2,6,0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
+                        struct vm_area_struct *vma = find_vma(pTask->mm, ulAddrCur); /* this is probably the same for all the pages... */
+                        AssertBreakStmt(vma, rc = VERR_INTERNAL_ERROR);
+#endif
+#if RTLNX_VER_MAX(2,6,0) && defined(RT_ARCH_X86)
+                        /* remap_page_range() limitation on x86 */
+                        AssertBreakStmt(Phys < _4G, rc = VERR_NO_MEMORY);
+#endif
+
+#if   RTLNX_VER_MIN(2,6,11)
+                        rc = remap_pfn_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
+#elif defined(VBOX_USE_PAE_HACK)
+                        rc = remap_page_range(vma, ulAddrCur, DummyPhys, PAGE_SIZE, fPg);
+                        if (!rc)
+                            rc = rtR0MemObjLinuxFixPte(pTask->mm, ulAddrCur, Phys);
+#elif RTLNX_VER_MIN(2,6,0) || defined(HAVE_26_STYLE_REMAP_PAGE_RANGE)
+                        rc = remap_page_range(vma, ulAddrCur, Phys, PAGE_SIZE, fPg);
+#else /* 2.4 */
+                        rc = remap_page_range(ulAddrCur, Phys, PAGE_SIZE, fPg);
+#endif
+                        if (rc)
+                        {
+                            rc = VERR_NO_MEMORY;
+                            break;
+                        }
+                    }
+                }
+            }
+
+#ifdef CONFIG_NUMA_BALANCING
+# if RTLNX_VER_MAX(3,13,0) && RTLNX_RHEL_MAX(7,0)
+#  define VBOX_NUMA_HACK_OLD
+# endif
+            if (RT_SUCCESS(rc))
+            {
+                /** @todo Ugly hack! But right now we have no other means to
+                 *        disable automatic NUMA page balancing. */
+# ifdef RT_OS_X86
+#  ifdef VBOX_NUMA_HACK_OLD
+                pTask->mm->numa_next_reset = jiffies + 0x7fffffffUL;
+#  endif
+                pTask->mm->numa_next_scan  = jiffies + 0x7fffffffUL;
+# else
+#  ifdef VBOX_NUMA_HACK_OLD
+                pTask->mm->numa_next_reset = jiffies + 0x7fffffffffffffffUL;
+#  endif
+                pTask->mm->numa_next_scan  = jiffies + 0x7fffffffffffffffUL;
+# endif
+            }
+#endif /* CONFIG_NUMA_BALANCING */
+
+            LNX_MM_UP_WRITE(pTask->mm);
+
+            if (RT_SUCCESS(rc))
+            {
+#ifdef VBOX_USE_PAE_HACK
+                __free_page(pDummyPage);
+#endif
+                pMemLnx->Core.pv = pv;
+                pMemLnx->Core.u.Mapping.R0Process = R0Process;
+                *ppMem = &pMemLnx->Core;
+                IPRT_LINUX_RESTORE_EFL_AC();
+                return VINF_SUCCESS;
+            }
+
+            /*
+             * Bail out.
+             */
+            rtR0MemObjLinuxDoMunmap(pv, cbSub, pTask);
+        }
+        rtR0MemObjDelete(&pMemLnx->Core);
+    }
+#ifdef VBOX_USE_PAE_HACK
+    __free_page(pDummyPage);
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+
+
+DECLHIDDEN(int) rtR0MemObjNativeProtect(PRTR0MEMOBJINTERNAL pMem, size_t offSub, size_t cbSub, uint32_t fProt)
+{
+# ifdef IPRT_USE_ALLOC_VM_AREA_FOR_EXEC
+    /*
+     * Currently only supported when we've got addresses PTEs from the kernel.
+     */
+    PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
+    if (pMemLnx->pArea && pMemLnx->papPtesForArea)
+    {
+        pgprot_t const  fPg     = rtR0MemObjLinuxConvertProt(fProt, true /*fKernel*/);
+        size_t const    cPages  = (offSub + cbSub) >> PAGE_SHIFT;
+        pte_t         **papPtes = pMemLnx->papPtesForArea;
+        size_t          i;
+
+        for (i = offSub >> PAGE_SHIFT; i < cPages; i++)
+        {
+            set_pte(papPtes[i], mk_pte(pMemLnx->apPages[i], fPg));
+        }
+        preempt_disable();
+        __flush_tlb_all();
+        preempt_enable();
+        return VINF_SUCCESS;
+    }
+# elif defined(IPRT_USE_APPLY_TO_PAGE_RANGE_FOR_EXEC)
+    PRTR0MEMOBJLNX pMemLnx = (PRTR0MEMOBJLNX)pMem;
+    if (   pMemLnx->fExecutable
+        && pMemLnx->fMappedToRing0)
+    {
+        LNXAPPLYPGRANGE Args;
+        Args.pMemLnx = pMemLnx;
+        Args.fPg = rtR0MemObjLinuxConvertProt(fProt, true /*fKernel*/);
+        int rcLnx = apply_to_page_range(current->active_mm, (unsigned long)pMemLnx->Core.pv + offSub, cbSub,
+                                        rtR0MemObjLinuxApplyPageRange, (void *)&Args);
+        if (rcLnx)
+            return VERR_NOT_SUPPORTED;
+
+        return VINF_SUCCESS;
+    }
+# endif
+
+    NOREF(pMem);
+    NOREF(offSub);
+    NOREF(cbSub);
+    NOREF(fProt);
+    return VERR_NOT_SUPPORTED;
+}
+
+
+DECLHIDDEN(RTHCPHYS) rtR0MemObjNativeGetPagePhysAddr(PRTR0MEMOBJINTERNAL pMem, size_t iPage)
+{
+    PRTR0MEMOBJLNX  pMemLnx = (PRTR0MEMOBJLNX)pMem;
+
+    if (pMemLnx->cPages)
+        return page_to_phys(pMemLnx->apPages[iPage]);
+
+    switch (pMemLnx->Core.enmType)
+    {
+        case RTR0MEMOBJTYPE_CONT:
+            return pMemLnx->Core.u.Cont.Phys     + (iPage << PAGE_SHIFT);
+
+        case RTR0MEMOBJTYPE_PHYS:
+            return pMemLnx->Core.u.Phys.PhysBase + (iPage << PAGE_SHIFT);
+
+            /* the parent knows */
+        case RTR0MEMOBJTYPE_MAPPING:
+            return rtR0MemObjNativeGetPagePhysAddr(pMemLnx->Core.uRel.Child.pParent, iPage);
+
+            /* cPages > 0 */
+        case RTR0MEMOBJTYPE_LOW:
+        case RTR0MEMOBJTYPE_LOCK:
+        case RTR0MEMOBJTYPE_PHYS_NC:
+        case RTR0MEMOBJTYPE_PAGE:
+        case RTR0MEMOBJTYPE_LARGE_PAGE:
+        default:
+            AssertMsgFailed(("%d\n", pMemLnx->Core.enmType));
+            RT_FALL_THROUGH();
+
+        case RTR0MEMOBJTYPE_RES_VIRT:
+            return NIL_RTHCPHYS;
+    }
+}
+
diff --git a/src/VBox/Runtime/r0drv/linux/memuserkernel-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/memuserkernel-r0drv-linux.c
new file mode 100644
index 00000000..e526132d
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/memuserkernel-r0drv-linux.c
@@ -0,0 +1,191 @@
+/* $Id: memuserkernel-r0drv-linux.c $ */
+/** @file
+ * IPRT - User & Kernel Memory, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2009-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/mem.h>
+#include <iprt/errcore.h>
+
+
+RTR0DECL(int) RTR0MemUserCopyFrom(void *pvDst, RTR3PTR R3PtrSrc, size_t cb)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    if (RT_LIKELY(copy_from_user(pvDst, (void *)R3PtrSrc, cb) == 0))
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VERR_ACCESS_DENIED;
+}
+RT_EXPORT_SYMBOL(RTR0MemUserCopyFrom);
+
+
+RTR0DECL(int) RTR0MemUserCopyTo(RTR3PTR R3PtrDst, void const *pvSrc, size_t cb)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    if (RT_LIKELY(copy_to_user((void *)R3PtrDst, pvSrc, cb) == 0))
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VERR_ACCESS_DENIED;
+}
+RT_EXPORT_SYMBOL(RTR0MemUserCopyTo);
+
+
+RTR0DECL(bool) RTR0MemUserIsValidAddr(RTR3PTR R3Ptr)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+#if RTLNX_VER_MIN(5,0,0) || RTLNX_RHEL_MIN(8,1)
+    bool fRc = access_ok((void *)R3Ptr, 1);
+#else
+    bool fRc = access_ok(VERIFY_READ, (void *)R3Ptr, 1);
+#endif
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return fRc;
+}
+RT_EXPORT_SYMBOL(RTR0MemUserIsValidAddr);
+
+
+RTR0DECL(bool) RTR0MemKernelIsValidAddr(void *pv)
+{
+    /* Couldn't find a straight forward way of doing this... */
+#if defined(RT_ARCH_X86) && defined(CONFIG_X86_HIGH_ENTRY)
+    return true; /* ?? */
+#elif defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)
+    return (uintptr_t)pv >= PAGE_OFFSET;
+#else
+# error "PORT ME"
+#if RTLNX_VER_MIN(5,0,0) || RTLNX_RHEL_MIN(8,1)
+    return !access_ok(pv, 1);
+#else
+    return !access_ok(VERIFY_READ, pv, 1);
+#endif
+#endif
+}
+RT_EXPORT_SYMBOL(RTR0MemKernelIsValidAddr);
+
+
+RTR0DECL(bool) RTR0MemAreKrnlAndUsrDifferent(void)
+{
+#if defined(RT_ARCH_X86) && defined(CONFIG_X86_HIGH_ENTRY) /* ?? */
+    return false;
+#else
+    return true;
+#endif
+}
+RT_EXPORT_SYMBOL(RTR0MemAreKrnlAndUsrDifferent);
+
+
+/**
+ * Treats both source and destination as unsafe buffers.
+ */
+static int rtR0MemKernelCopyLnxWorker(void *pvDst, void const *pvSrc, size_t cb)
+{
+#if RTLNX_VER_MIN(2,5,55)
+/* _ASM_EXTABLE was introduced in 2.6.25 from what I can tell. Using #ifndef
+   here since it has to be a macro and you never know what someone might have
+   backported to an earlier kernel release. */
+# ifndef _ASM_EXTABLE
+#  if ARCH_BITS == 32
+#   define _ASM_EXTABLE(a_Instr, a_Resume) \
+    ".section __ex_table,\"a\"\n" \
+    ".balign 4\n" \
+    ".long   " #a_Instr "\n" \
+    ".long   " #a_Resume "\n" \
+    ".previous\n"
+#  else
+#   define _ASM_EXTABLE(a_Instr, a_Resume) \
+    ".section __ex_table,\"a\"\n" \
+    ".balign 8\n" \
+    ".quad   " #a_Instr "\n" \
+    ".quad   " #a_Resume "\n" \
+    ".previous\n"
+#  endif
+# endif /* !_ASM_EXTABLE */
+    int rc;
+    IPRT_LINUX_SAVE_EFL_AC(); /* paranoia */
+    if (!cb)
+        return VINF_SUCCESS;
+
+    __asm__ __volatile__ ("cld\n"
+                          "1:\n\t"
+                          "rep; movsb\n"
+                          "2:\n\t"
+                          ".section .fixup,\"ax\"\n"
+                          "3:\n\t"
+                          "movl %4, %0\n\t"
+                          "jmp 2b\n\t"
+                          ".previous\n"
+                          _ASM_EXTABLE(1b, 3b)
+                          : "=r" (rc),
+                            "=D" (pvDst),
+                            "=S" (pvSrc),
+                            "=c" (cb)
+                          : "i" (VERR_ACCESS_DENIED),
+                            "0" (VINF_SUCCESS),
+                            "1" (pvDst),
+                            "2" (pvSrc),
+                            "3" (cb)
+                          : "memory");
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+#else
+    return VERR_NOT_SUPPORTED;
+#endif
+}
+
+
+RTR0DECL(int) RTR0MemKernelCopyFrom(void *pvDst, void const *pvSrc, size_t cb)
+{
+    return rtR0MemKernelCopyLnxWorker(pvDst, pvSrc, cb);
+}
+RT_EXPORT_SYMBOL(RTR0MemKernelCopyFrom);
+
+
+RTR0DECL(int) RTR0MemKernelCopyTo(void *pvDst, void const *pvSrc, size_t cb)
+{
+    return rtR0MemKernelCopyLnxWorker(pvDst, pvSrc, cb);
+}
+RT_EXPORT_SYMBOL(RTR0MemKernelCopyTo);
+
diff --git a/src/VBox/Runtime/r0drv/linux/mp-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/mp-r0drv-linux.c
new file mode 100644
index 00000000..5ac05db5
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/mp-r0drv-linux.c
@@ -0,0 +1,640 @@
+/* $Id: mp-r0drv-linux.c $ */
+/** @file
+ * IPRT - Multiprocessor, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2008-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/mp.h>
+#include <iprt/cpuset.h>
+#include <iprt/err.h>
+#include <iprt/asm.h>
+#include <iprt/thread.h>
+#include "r0drv/mp-r0drv.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#if defined(nr_cpumask_bits) || RTLNX_VER_MIN(2,6,28)
+# define VBOX_NR_CPUMASK_BITS   (nr_cpumask_bits)   /* same as nr_cpu_ids */
+#else
+# define VBOX_NR_CPUMASK_BITS   (NR_CPUS)
+#endif
+
+
+RTDECL(RTCPUID) RTMpCpuId(void)
+{
+    return smp_processor_id();
+}
+RT_EXPORT_SYMBOL(RTMpCpuId);
+
+
+RTDECL(int) RTMpCurSetIndex(void)
+{
+    return smp_processor_id();
+}
+RT_EXPORT_SYMBOL(RTMpCurSetIndex);
+
+
+RTDECL(int) RTMpCurSetIndexAndId(PRTCPUID pidCpu)
+{
+    return *pidCpu = smp_processor_id();
+}
+RT_EXPORT_SYMBOL(RTMpCurSetIndexAndId);
+
+
+RTDECL(int) RTMpCpuIdToSetIndex(RTCPUID idCpu)
+{
+    return idCpu < RTCPUSET_MAX_CPUS && idCpu < VBOX_NR_CPUMASK_BITS ? (int)idCpu : -1;
+}
+RT_EXPORT_SYMBOL(RTMpCpuIdToSetIndex);
+
+
+RTDECL(RTCPUID) RTMpCpuIdFromSetIndex(int iCpu)
+{
+    return (unsigned)iCpu < VBOX_NR_CPUMASK_BITS ? (RTCPUID)iCpu : NIL_RTCPUID;
+}
+RT_EXPORT_SYMBOL(RTMpCpuIdFromSetIndex);
+
+
+RTDECL(RTCPUID) RTMpGetMaxCpuId(void)
+{
+    return VBOX_NR_CPUMASK_BITS - 1;
+}
+RT_EXPORT_SYMBOL(RTMpGetMaxCpuId);
+
+
+RTDECL(bool) RTMpIsCpuPossible(RTCPUID idCpu)
+{
+#if defined(CONFIG_SMP)
+# if RTLNX_VER_MIN(2,6,2) || defined(cpu_possible)
+    return idCpu < VBOX_NR_CPUMASK_BITS && cpu_possible(idCpu);
+# else /* < 2.5.29 */
+    return idCpu < (RTCPUID)(smp_num_cpus);
+# endif
+#else
+    return idCpu == RTMpCpuId();
+#endif
+}
+RT_EXPORT_SYMBOL(RTMpIsCpuPossible);
+
+
+RTDECL(PRTCPUSET) RTMpGetSet(PRTCPUSET pSet)
+{
+    RTCPUID idCpu;
+
+    RTCpuSetEmpty(pSet);
+    idCpu = RTMpGetMaxCpuId();
+    do
+    {
+        if (RTMpIsCpuPossible(idCpu))
+            RTCpuSetAdd(pSet, idCpu);
+    } while (idCpu-- > 0);
+    return pSet;
+}
+RT_EXPORT_SYMBOL(RTMpGetSet);
+
+
+RTDECL(RTCPUID) RTMpGetCount(void)
+{
+#ifdef CONFIG_SMP
+# if RTLNX_VER_MIN(2,6,4) || defined(num_possible_cpus)
+    return num_possible_cpus();
+# elif RTLNX_VER_MAX(2,5,0)
+    return smp_num_cpus;
+# else
+    RTCPUSET Set;
+    RTMpGetSet(&Set);
+    return RTCpuSetCount(&Set);
+# endif
+#else
+    return 1;
+#endif
+}
+RT_EXPORT_SYMBOL(RTMpGetCount);
+
+
+RTDECL(bool) RTMpIsCpuOnline(RTCPUID idCpu)
+{
+#ifdef CONFIG_SMP
+# if RTLNX_VER_MIN(2,6,0) || defined(cpu_online)
+    return idCpu < VBOX_NR_CPUMASK_BITS && cpu_online(idCpu);
+# else /* 2.4: */
+    return idCpu < VBOX_NR_CPUMASK_BITS && cpu_online_map & RT_BIT_64(idCpu);
+# endif
+#else
+    return idCpu == RTMpCpuId();
+#endif
+}
+RT_EXPORT_SYMBOL(RTMpIsCpuOnline);
+
+
+RTDECL(PRTCPUSET) RTMpGetOnlineSet(PRTCPUSET pSet)
+{
+#ifdef CONFIG_SMP
+    RTCPUID idCpu;
+
+    RTCpuSetEmpty(pSet);
+    idCpu = RTMpGetMaxCpuId();
+    do
+    {
+        if (RTMpIsCpuOnline(idCpu))
+            RTCpuSetAdd(pSet, idCpu);
+    } while (idCpu-- > 0);
+#else
+    RTCpuSetEmpty(pSet);
+    RTCpuSetAdd(pSet, RTMpCpuId());
+#endif
+    return pSet;
+}
+RT_EXPORT_SYMBOL(RTMpGetOnlineSet);
+
+
+RTDECL(RTCPUID) RTMpGetOnlineCount(void)
+{
+#ifdef CONFIG_SMP
+# if RTLNX_VER_MIN(2,6,0) || defined(num_online_cpus)
+    return num_online_cpus();
+# else
+    RTCPUSET Set;
+    RTMpGetOnlineSet(&Set);
+    return RTCpuSetCount(&Set);
+# endif
+#else
+    return 1;
+#endif
+}
+RT_EXPORT_SYMBOL(RTMpGetOnlineCount);
+
+
+RTDECL(bool) RTMpIsCpuWorkPending(void)
+{
+    /** @todo (not used on non-Windows platforms yet). */
+    return false;
+}
+RT_EXPORT_SYMBOL(RTMpIsCpuWorkPending);
+
+
+/**
+ * Wrapper between the native linux per-cpu callbacks and PFNRTWORKER.
+ *
+ * @param   pvInfo      Pointer to the RTMPARGS package.
+ */
+static void rtmpLinuxWrapper(void *pvInfo)
+{
+    PRTMPARGS pArgs = (PRTMPARGS)pvInfo;
+    ASMAtomicIncU32(&pArgs->cHits);
+    pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2);
+}
+
+#ifdef CONFIG_SMP
+
+# if RTLNX_VER_MIN(2,6,27)
+/**
+ * Wrapper between the native linux per-cpu callbacks and PFNRTWORKER, does hit
+ * increment after calling the worker.
+ *
+ * @param   pvInfo      Pointer to the RTMPARGS package.
+ */
+static void rtmpLinuxWrapperPostInc(void *pvInfo)
+{
+    PRTMPARGS pArgs = (PRTMPARGS)pvInfo;
+    pArgs->pfnWorker(RTMpCpuId(), pArgs->pvUser1, pArgs->pvUser2);
+    ASMAtomicIncU32(&pArgs->cHits);
+}
+# endif
+
+
+/**
+ * Wrapper between the native linux all-cpu callbacks and PFNRTWORKER.
+ *
+ * @param   pvInfo      Pointer to the RTMPARGS package.
+ */
+static void rtmpLinuxAllWrapper(void *pvInfo)
+{
+    PRTMPARGS  pArgs      = (PRTMPARGS)pvInfo;
+    PRTCPUSET  pWorkerSet = pArgs->pWorkerSet;
+    RTCPUID    idCpu      = RTMpCpuId();
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    if (RTCpuSetIsMember(pWorkerSet, idCpu))
+    {
+        pArgs->pfnWorker(idCpu, pArgs->pvUser1, pArgs->pvUser2);
+        RTCpuSetDel(pWorkerSet, idCpu);
+    }
+}
+
+#endif /* CONFIG_SMP */
+
+RTDECL(int) RTMpOnAll(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    RTMPARGS Args;
+    RTCPUSET OnlineSet;
+    RTCPUID  idCpu;
+#ifdef CONFIG_SMP
+    uint32_t cLoops;
+#endif
+
+    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+
+    Args.pfnWorker  = pfnWorker;
+    Args.pvUser1    = pvUser1;
+    Args.pvUser2    = pvUser2;
+    Args.idCpu      = NIL_RTCPUID;
+    Args.cHits      = 0;
+
+    RTThreadPreemptDisable(&PreemptState);
+    RTMpGetOnlineSet(&OnlineSet);
+    Args.pWorkerSet = &OnlineSet;
+    idCpu = RTMpCpuId();
+
+#ifdef CONFIG_SMP
+    if (RTCpuSetCount(&OnlineSet) > 1)
+    {
+        /* Fire the function on all other CPUs without waiting for completion. */
+# if RTLNX_VER_MIN(5,3,0)
+        smp_call_function(rtmpLinuxAllWrapper, &Args, 0 /* wait */);
+# elif RTLNX_VER_MIN(2,6,27)
+        int rc = smp_call_function(rtmpLinuxAllWrapper, &Args, 0 /* wait */);
+        Assert(!rc); NOREF(rc);
+# else
+        int rc = smp_call_function(rtmpLinuxAllWrapper, &Args, 0 /* retry */, 0 /* wait */);
+        Assert(!rc); NOREF(rc);
+# endif
+    }
+#endif
+
+    /* Fire the function on this CPU. */
+    Args.pfnWorker(idCpu, Args.pvUser1, Args.pvUser2);
+    RTCpuSetDel(Args.pWorkerSet, idCpu);
+
+#ifdef CONFIG_SMP
+    /* Wait for all of them finish. */
+    cLoops = 64000;
+    while (!RTCpuSetIsEmpty(Args.pWorkerSet))
+    {
+        /* Periodically check if any CPU in the wait set has gone offline, if so update the wait set. */
+        if (!cLoops--)
+        {
+            RTCPUSET OnlineSetNow;
+            RTMpGetOnlineSet(&OnlineSetNow);
+            RTCpuSetAnd(Args.pWorkerSet, &OnlineSetNow);
+
+            cLoops = 64000;
+        }
+
+        ASMNopPause();
+    }
+#endif
+
+    RTThreadPreemptRestore(&PreemptState);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTMpOnAll);
+
+
+RTDECL(int) RTMpOnOthers(PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
+{
+#ifdef CONFIG_SMP
+    IPRT_LINUX_SAVE_EFL_AC();
+    RTMPARGS Args;
+
+    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+    Args.pfnWorker = pfnWorker;
+    Args.pvUser1 = pvUser1;
+    Args.pvUser2 = pvUser2;
+    Args.idCpu = NIL_RTCPUID;
+    Args.cHits = 0;
+
+    RTThreadPreemptDisable(&PreemptState);
+# if RTLNX_VER_MIN(5,3,0)
+    smp_call_function(rtmpLinuxWrapper, &Args, 1 /* wait */);
+# elif RTLNX_VER_MIN(2,6,27)
+    int rc = smp_call_function(rtmpLinuxWrapper, &Args, 1 /* wait */);
+    Assert(rc == 0); NOREF(rc);
+# else /* older kernels */
+    int rc = smp_call_function(rtmpLinuxWrapper, &Args, 0 /* retry */, 1 /* wait */);
+    Assert(rc == 0); NOREF(rc);
+# endif /* older kernels */
+    RTThreadPreemptRestore(&PreemptState);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+#else
+    RT_NOREF(pfnWorker, pvUser1, pvUser2);
+#endif
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTMpOnOthers);
+
+
+#if RTLNX_VER_MAX(2,6,27) && defined(CONFIG_SMP)
+/**
+ * Wrapper between the native linux per-cpu callbacks and PFNRTWORKER
+ * employed by RTMpOnPair on older kernels that lacks smp_call_function_many.
+ *
+ * @param   pvInfo      Pointer to the RTMPARGS package.
+ */
+static void rtMpLinuxOnPairWrapper(void *pvInfo)
+{
+    PRTMPARGS pArgs = (PRTMPARGS)pvInfo;
+    RTCPUID   idCpu = RTMpCpuId();
+
+    if (   idCpu == pArgs->idCpu
+        || idCpu == pArgs->idCpu2)
+    {
+        pArgs->pfnWorker(idCpu, pArgs->pvUser1, pArgs->pvUser2);
+        ASMAtomicIncU32(&pArgs->cHits);
+    }
+}
+#endif
+
+
+RTDECL(int) RTMpOnPair(RTCPUID idCpu1, RTCPUID idCpu2, uint32_t fFlags, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
+{
+#ifdef CONFIG_SMP
+    IPRT_LINUX_SAVE_EFL_AC();
+    int                     rc;
+    RTTHREADPREEMPTSTATE    PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+# if RTLNX_VER_MIN(2,6,28) /* 2.6.28 introduces CONFIG_CPUMASK_OFFSTACK */
+    cpumask_var_t           DstCpuMask;
+# elif RTLNX_VER_MIN(2,6,27)
+    cpumask_t               DstCpuMask;
+# endif
+
+    AssertReturn(idCpu1 != idCpu2, VERR_INVALID_PARAMETER);
+    AssertReturn(!(fFlags & RTMPON_F_VALID_MASK), VERR_INVALID_FLAGS);
+
+    /*
+     * Prepare the CPU mask before we disable preemption.
+     */
+# if RTLNX_VER_MIN(2,6,30)
+    if (!zalloc_cpumask_var(&DstCpuMask, GFP_KERNEL))
+        return VERR_NO_MEMORY;
+    cpumask_set_cpu(idCpu1, DstCpuMask);
+    cpumask_set_cpu(idCpu2, DstCpuMask);
+# elif RTLNX_VER_MIN(2,6,28)
+    if (!alloc_cpumask_var(&DstCpuMask, GFP_KERNEL))
+        return VERR_NO_MEMORY;
+    cpumask_clear(DstCpuMask);
+    cpumask_set_cpu(idCpu1, DstCpuMask);
+    cpumask_set_cpu(idCpu2, DstCpuMask);
+# elif RTLNX_VER_MIN(2,6,27)
+    cpus_clear(DstCpuMask);
+    cpu_set(idCpu1, DstCpuMask);
+    cpu_set(idCpu2, DstCpuMask);
+# endif
+
+    /*
+     * Check that both CPUs are online before doing the broadcast call.
+     */
+    RTThreadPreemptDisable(&PreemptState);
+    if (   RTMpIsCpuOnline(idCpu1)
+        && RTMpIsCpuOnline(idCpu2))
+    {
+        /*
+         * Use the smp_call_function variant taking a cpu mask where available,
+         * falling back on broadcast with filter.  Slight snag if one of the
+         * CPUs is the one we're running on, we must do the call and the post
+         * call wait ourselves.
+         */
+        RTCPUID     idCpuSelf = RTMpCpuId();
+        bool const  fCallSelf = idCpuSelf == idCpu1 || idCpuSelf == idCpu2;
+        RTMPARGS    Args;
+        Args.pfnWorker = pfnWorker;
+        Args.pvUser1 = pvUser1;
+        Args.pvUser2 = pvUser2;
+        Args.idCpu   = idCpu1;
+        Args.idCpu2  = idCpu2;
+        Args.cHits   = 0;
+
+# if RTLNX_VER_MIN(2,6,28)
+        smp_call_function_many(DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
+        rc = 0;
+# elif RTLNX_VER_MIN(2,6,27)
+        rc = smp_call_function_mask(DstCpuMask, rtmpLinuxWrapperPostInc, &Args, !fCallSelf /* wait */);
+# else /* older kernels */
+        rc = smp_call_function(rtMpLinuxOnPairWrapper, &Args, 0 /* retry */, !fCallSelf /* wait */);
+# endif /* older kernels */
+        Assert(rc == 0);
+
+        /* Call ourselves if necessary and wait for the other party to be done. */
+        if (fCallSelf)
+        {
+            uint32_t cLoops = 0;
+            rtmpLinuxWrapper(&Args);
+            while (ASMAtomicReadU32(&Args.cHits) < 2)
+            {
+                if ((cLoops & 0x1ff) == 0 && !RTMpIsCpuOnline(idCpuSelf == idCpu1 ? idCpu2 : idCpu1))
+                    break;
+                cLoops++;
+                ASMNopPause();
+            }
+        }
+
+        Assert(Args.cHits <= 2);
+        if (Args.cHits == 2)
+            rc = VINF_SUCCESS;
+        else if (Args.cHits == 1)
+            rc = VERR_NOT_ALL_CPUS_SHOWED;
+        else if (Args.cHits == 0)
+            rc = VERR_CPU_OFFLINE;
+        else
+            rc = VERR_CPU_IPE_1;
+    }
+    /*
+     * A CPU must be present to be considered just offline.
+     */
+    else if (   RTMpIsCpuPresent(idCpu1)
+             && RTMpIsCpuPresent(idCpu2))
+        rc = VERR_CPU_OFFLINE;
+    else
+        rc = VERR_CPU_NOT_FOUND;
+
+    RTThreadPreemptRestore(&PreemptState);;
+# if RTLNX_VER_MIN(2,6,28)
+    free_cpumask_var(DstCpuMask);
+# endif
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+
+#else /* !CONFIG_SMP */
+    RT_NOREF(idCpu1, idCpu2, fFlags, pfnWorker, pvUser1, pvUser2);
+    return VERR_CPU_NOT_FOUND;
+#endif /* !CONFIG_SMP */
+}
+RT_EXPORT_SYMBOL(RTMpOnPair);
+
+
+RTDECL(bool) RTMpOnPairIsConcurrentExecSupported(void)
+{
+    return true;
+}
+RT_EXPORT_SYMBOL(RTMpOnPairIsConcurrentExecSupported);
+
+
+#if RTLNX_VER_MAX(2,6,19) && defined(CONFIG_SMP)
+/**
+ * Wrapper between the native linux per-cpu callbacks and PFNRTWORKER
+ * employed by RTMpOnSpecific on older kernels that lacks smp_call_function_single.
+ *
+ * @param   pvInfo      Pointer to the RTMPARGS package.
+ */
+static void rtmpOnSpecificLinuxWrapper(void *pvInfo)
+{
+    PRTMPARGS pArgs = (PRTMPARGS)pvInfo;
+    RTCPUID idCpu = RTMpCpuId();
+
+    if (idCpu == pArgs->idCpu)
+    {
+        pArgs->pfnWorker(idCpu, pArgs->pvUser1, pArgs->pvUser2);
+        ASMAtomicIncU32(&pArgs->cHits);
+    }
+}
+#endif
+
+
+RTDECL(int) RTMpOnSpecific(RTCPUID idCpu, PFNRTMPWORKER pfnWorker, void *pvUser1, void *pvUser2)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    int rc;
+    RTMPARGS Args;
+
+    RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
+    Args.pfnWorker = pfnWorker;
+    Args.pvUser1 = pvUser1;
+    Args.pvUser2 = pvUser2;
+    Args.idCpu = idCpu;
+    Args.cHits = 0;
+
+    if (!RTMpIsCpuPossible(idCpu))
+        return VERR_CPU_NOT_FOUND;
+
+    RTThreadPreemptDisable(&PreemptState);
+    if (idCpu != RTMpCpuId())
+    {
+#ifdef CONFIG_SMP
+        if (RTMpIsCpuOnline(idCpu))
+        {
+# if RTLNX_VER_MIN(2,6,27)
+            rc = smp_call_function_single(idCpu, rtmpLinuxWrapper, &Args, 1 /* wait */);
+# elif RTLNX_VER_MIN(2,6,19)
+            rc = smp_call_function_single(idCpu, rtmpLinuxWrapper, &Args, 0 /* retry */, 1 /* wait */);
+# else /* older kernels */
+            rc = smp_call_function(rtmpOnSpecificLinuxWrapper, &Args, 0 /* retry */, 1 /* wait */);
+# endif /* older kernels */
+            Assert(rc == 0);
+            rc = Args.cHits ? VINF_SUCCESS : VERR_CPU_OFFLINE;
+        }
+        else
+#endif /* CONFIG_SMP */
+            rc = VERR_CPU_OFFLINE;
+    }
+    else
+    {
+        rtmpLinuxWrapper(&Args);
+        rc = VINF_SUCCESS;
+    }
+    RTThreadPreemptRestore(&PreemptState);;
+
+    NOREF(rc);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTMpOnSpecific);
+
+
+#if RTLNX_VER_MIN(2,6,19) && defined(CONFIG_SMP)
+/**
+ * Dummy callback used by RTMpPokeCpu.
+ *
+ * @param   pvInfo      Ignored.
+ */
+static void rtmpLinuxPokeCpuCallback(void *pvInfo)
+{
+    NOREF(pvInfo);
+}
+#endif
+
+
+RTDECL(int) RTMpPokeCpu(RTCPUID idCpu)
+{
+#if RTLNX_VER_MIN(2,6,19)
+    IPRT_LINUX_SAVE_EFL_AC();
+    int rc;
+    if (RTMpIsCpuPossible(idCpu))
+    {
+        if (RTMpIsCpuOnline(idCpu))
+        {
+# ifdef CONFIG_SMP
+#  if RTLNX_VER_MIN(2,6,27)
+            rc = smp_call_function_single(idCpu, rtmpLinuxPokeCpuCallback, NULL, 0 /* wait */);
+#  elif RTLNX_VER_MIN(2,6,19)
+            rc = smp_call_function_single(idCpu, rtmpLinuxPokeCpuCallback, NULL, 0 /* retry */, 0 /* wait */);
+#  else  /* older kernels */
+#   error oops
+#  endif /* older kernels */
+            Assert(rc == 0);
+# endif /* CONFIG_SMP */
+            rc = VINF_SUCCESS;
+        }
+        else
+            rc = VERR_CPU_OFFLINE;
+    }
+    else
+        rc = VERR_CPU_NOT_FOUND;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+
+#else  /* older kernels */
+    /* no unicast here? */
+    return VERR_NOT_SUPPORTED;
+#endif /* older kernels */
+}
+RT_EXPORT_SYMBOL(RTMpPokeCpu);
+
+
+RTDECL(bool) RTMpOnAllIsConcurrentSafe(void)
+{
+    return true;
+}
+RT_EXPORT_SYMBOL(RTMpOnAllIsConcurrentSafe);
+
diff --git a/src/VBox/Runtime/r0drv/linux/mpnotification-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/mpnotification-r0drv-linux.c
new file mode 100644
index 00000000..99b8748e
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/mpnotification-r0drv-linux.c
@@ -0,0 +1,258 @@
+/* $Id: mpnotification-r0drv-linux.c $ */
+/** @file
+ * IPRT - Multiprocessor Event Notifications, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2008-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/asm-amd64-x86.h>
+#include <iprt/errcore.h>
+#include <iprt/cpuset.h>
+#include <iprt/thread.h>
+#include "r0drv/mp-r0drv.h"
+
+#if RTLNX_VER_MIN(4,10,0)
+
+static enum cpuhp_state g_rtR0MpOnline;
+
+/*
+ * Linux 4.10 completely removed CPU notifiers. So let's switch to CPU hotplug
+ * notification.
+ */
+
+static int rtR0MpNotificationLinuxOnline(unsigned int cpu)
+{
+    RTCPUID idCpu = RTMpCpuIdFromSetIndex(cpu);
+    rtMpNotificationDoCallbacks(RTMPEVENT_ONLINE, idCpu);
+    return 0;
+}
+
+static int rtR0MpNotificationLinuxOffline(unsigned int cpu)
+{
+    RTCPUID idCpu = RTMpCpuIdFromSetIndex(cpu);
+    rtMpNotificationDoCallbacks(RTMPEVENT_OFFLINE, idCpu);
+    return 0;
+}
+
+DECLHIDDEN(int) rtR0MpNotificationNativeInit(void)
+{
+    int rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+    rc = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "vboxdrv:online",
+                                   rtR0MpNotificationLinuxOnline, rtR0MpNotificationLinuxOffline);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    /*
+     * cpuhp_setup_state_nocalls() returns a positive state number for
+     * CPUHP_AP_ONLINE_DYN or -ENOSPC if there is no free slot available
+     * (see cpuhp_reserve_state / definition of CPUHP_AP_ONLINE_DYN).
+     */
+    AssertMsgReturn(rc > 0, ("%d\n", rc), RTErrConvertFromErrno(rc));
+    g_rtR0MpOnline = rc;
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(void) rtR0MpNotificationNativeTerm(void)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    cpuhp_remove_state_nocalls(g_rtR0MpOnline);
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+#elif RTLNX_VER_MIN(2,5,71) && defined(CONFIG_SMP)
+
+static int rtMpNotificationLinuxCallback(struct notifier_block *pNotifierBlock, unsigned long ulNativeEvent, void *pvCpu);
+
+/**
+ * The notifier block we use for registering the callback.
+ */
+static struct notifier_block g_NotifierBlock =
+{
+    .notifier_call = rtMpNotificationLinuxCallback,
+    .next = NULL,
+    .priority = 0
+};
+
+# ifdef CPU_DOWN_FAILED
+/**
+ * The set of CPUs we've seen going offline recently.
+ */
+static RTCPUSET g_MpPendingOfflineSet;
+# endif
+
+
+/**
+ * The native callback.
+ *
+ * @returns NOTIFY_DONE.
+ * @param   pNotifierBlock  Pointer to g_NotifierBlock.
+ * @param   ulNativeEvent   The native event.
+ * @param   pvCpu           The cpu id cast into a pointer value.
+ *
+ * @remarks This can fire with preemption enabled and on any CPU.
+ */
+static int rtMpNotificationLinuxCallback(struct notifier_block *pNotifierBlock, unsigned long ulNativeEvent, void *pvCpu)
+{
+    bool fProcessEvent = false;
+    RTCPUID idCpu      = (uintptr_t)pvCpu;
+    NOREF(pNotifierBlock);
+
+    /*
+     * Note that redhat/CentOS ported _some_ of the FROZEN macros
+     * back to their 2.6.18-92.1.10.el5 kernel but actually don't
+     * use them. Thus we have to test for both CPU_TASKS_FROZEN and
+     * the individual event variants.
+     */
+    switch (ulNativeEvent)
+    {
+        /*
+         * Pick up online events or failures to go offline.
+         * Ignore failure events for CPUs we didn't see go offline.
+         */
+# ifdef CPU_DOWN_FAILED
+        case CPU_DOWN_FAILED:
+#  if defined(CPU_TASKS_FROZEN) && defined(CPU_DOWN_FAILED_FROZEN)
+        case CPU_DOWN_FAILED_FROZEN:
+#  endif
+            if (!RTCpuSetIsMember(&g_MpPendingOfflineSet, idCpu))
+                break;      /* fProcessEvents = false */
+        /* fall thru */
+# endif
+        case CPU_ONLINE:
+# if defined(CPU_TASKS_FROZEN) && defined(CPU_ONLINE_FROZEN)
+        case CPU_ONLINE_FROZEN:
+# endif
+# ifdef CPU_DOWN_FAILED
+            RTCpuSetDel(&g_MpPendingOfflineSet, idCpu);
+# endif
+            fProcessEvent = true;
+            break;
+
+        /*
+         * Pick the earliest possible offline event.
+         * The only important thing here is that we get the event and that
+         * it's exactly one.
+         */
+# ifdef CPU_DOWN_PREPARE
+        case CPU_DOWN_PREPARE:
+#  if defined(CPU_TASKS_FROZEN) && defined(CPU_DOWN_PREPARE_FROZEN)
+        case CPU_DOWN_PREPARE_FROZEN:
+#  endif
+            fProcessEvent = true;
+# else
+        case CPU_DEAD:
+#  if defined(CPU_TASKS_FROZEN) && defined(CPU_DEAD_FROZEN)
+        case CPU_DEAD_FROZEN:
+#  endif
+            /* Don't process CPU_DEAD notifications. */
+# endif
+# ifdef CPU_DOWN_FAILED
+            RTCpuSetAdd(&g_MpPendingOfflineSet, idCpu);
+# endif
+            break;
+    }
+
+    if (!fProcessEvent)
+        return NOTIFY_DONE;
+
+    switch (ulNativeEvent)
+    {
+# ifdef CPU_DOWN_FAILED
+        case CPU_DOWN_FAILED:
+#  if defined(CPU_TASKS_FROZEN) && defined(CPU_DOWN_FAILED_FROZEN)
+        case CPU_DOWN_FAILED_FROZEN:
+#  endif
+# endif
+        case CPU_ONLINE:
+# if defined(CPU_TASKS_FROZEN) && defined(CPU_ONLINE_FROZEN)
+        case CPU_ONLINE_FROZEN:
+# endif
+            rtMpNotificationDoCallbacks(RTMPEVENT_ONLINE, idCpu);
+            break;
+
+# ifdef CPU_DOWN_PREPARE
+        case CPU_DOWN_PREPARE:
+#  if defined(CPU_TASKS_FROZEN) && defined(CPU_DOWN_PREPARE_FROZEN)
+        case CPU_DOWN_PREPARE_FROZEN:
+#  endif
+            rtMpNotificationDoCallbacks(RTMPEVENT_OFFLINE, idCpu);
+            break;
+# endif
+    }
+
+    return NOTIFY_DONE;
+}
+
+
+DECLHIDDEN(int) rtR0MpNotificationNativeInit(void)
+{
+    int rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+# ifdef CPU_DOWN_FAILED
+    RTCpuSetEmpty(&g_MpPendingOfflineSet);
+# endif
+
+    rc = register_cpu_notifier(&g_NotifierBlock);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    AssertMsgReturn(!rc, ("%d\n", rc), RTErrConvertFromErrno(rc));
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(void) rtR0MpNotificationNativeTerm(void)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    unregister_cpu_notifier(&g_NotifierBlock);
+    IPRT_LINUX_RESTORE_EFL_AC();
+}
+
+#else   /* Not supported / Not needed */
+
+DECLHIDDEN(int) rtR0MpNotificationNativeInit(void)
+{
+    return VINF_SUCCESS;
+}
+
+DECLHIDDEN(void) rtR0MpNotificationNativeTerm(void)
+{
+}
+
+#endif  /* Not supported / Not needed */
+
diff --git a/src/VBox/Runtime/r0drv/linux/process-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/process-r0drv-linux.c
new file mode 100644
index 00000000..3179a95e
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/process-r0drv-linux.c
@@ -0,0 +1,59 @@
+/* $Id: process-r0drv-linux.c $ */
+/** @file
+ * IPRT - Process, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/process.h>
+
+
+RTDECL(RTPROCESS) RTProcSelf(void)
+{
+    return (RTPROCESS)current->tgid;
+}
+RT_EXPORT_SYMBOL(RTProcSelf);
+
+
+RTR0DECL(RTR0PROCESS) RTR0ProcHandleSelf(void)
+{
+    return (RTR0PROCESS)current->tgid;
+}
+RT_EXPORT_SYMBOL(RTR0ProcHandleSelf);
+
diff --git a/src/VBox/Runtime/r0drv/linux/rtStrFormatKernelAddress-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/rtStrFormatKernelAddress-r0drv-linux.c
new file mode 100644
index 00000000..14cfc4f7
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/rtStrFormatKernelAddress-r0drv-linux.c
@@ -0,0 +1,66 @@
+/* $Id: rtStrFormatKernelAddress-r0drv-linux.c $ */
+/** @file
+ * IPRT - IPRT String Formatter, ring-0 addresses.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_STRING
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/string.h>
+
+#include "internal/string.h"
+
+
+DECLHIDDEN(size_t) rtStrFormatKernelAddress(char *pszBuf, size_t cbBuf, RTR0INTPTR uPtr, signed int cchWidth,
+                                            signed int cchPrecision, unsigned int fFlags)
+{
+#if !defined(DEBUG) && RTLNX_VER_MIN(2,6,38)
+    RT_NOREF(cchWidth, cchPrecision);
+    /* use the Linux kernel function which is able to handle "%pK" */
+    static const char s_szFmt[] = "0x%pK";
+    const char *pszFmt = s_szFmt;
+    if (!(fFlags & RTSTR_F_SPECIAL))
+        pszFmt += 2;
+    return scnprintf(pszBuf, cbBuf, pszFmt, uPtr);
+#else
+    Assert(cbBuf >= 64);
+    return RTStrFormatNumber(pszBuf, uPtr, 16, cchWidth, cchPrecision, fFlags);
+#endif
+}
diff --git a/src/VBox/Runtime/r0drv/linux/semevent-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/semevent-r0drv-linux.c
new file mode 100644
index 00000000..0b1ca618
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/semevent-r0drv-linux.c
@@ -0,0 +1,296 @@
+/* $Id: semevent-r0drv-linux.c $ */
+/** @file
+ * IPRT - Single Release Event Semaphores, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define RTSEMEVENT_WITHOUT_REMAPPING
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/semaphore.h>
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+
+#include "waitqueue-r0drv-linux.h"
+#include "internal/magics.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Linux event semaphore.
+ */
+typedef struct RTSEMEVENTINTERNAL
+{
+    /** Magic value (RTSEMEVENT_MAGIC). */
+    uint32_t volatile   u32Magic;
+    /** The object status - !0 when signaled and 0 when reset. */
+    uint32_t volatile   fState;
+    /** Reference counter. */
+    uint32_t volatile   cRefs;
+    /** The wait queue. */
+    wait_queue_head_t   Head;
+} RTSEMEVENTINTERNAL, *PRTSEMEVENTINTERNAL;
+
+
+
+RTDECL(int)  RTSemEventCreate(PRTSEMEVENT phEventSem)
+{
+    return RTSemEventCreateEx(phEventSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int)  RTSemEventCreateEx(PRTSEMEVENT phEventSem, uint32_t fFlags, RTLOCKVALCLASS hClass, const char *pszNameFmt, ...)
+{
+    PRTSEMEVENTINTERNAL pThis;
+    IPRT_LINUX_SAVE_EFL_AC();
+    RT_NOREF_PV(hClass); RT_NOREF_PV(pszNameFmt);
+
+    AssertReturn(!(fFlags & ~(RTSEMEVENT_FLAGS_NO_LOCK_VAL | RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)), VERR_INVALID_PARAMETER);
+    Assert(!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK) || (fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL));
+
+    pThis = (PRTSEMEVENTINTERNAL)RTMemAlloc(sizeof(*pThis));
+    if (!pThis)
+        return VERR_NO_MEMORY;
+
+    pThis->u32Magic = RTSEMEVENT_MAGIC;
+    pThis->fState   = 0;
+    pThis->cRefs    = 1;
+    init_waitqueue_head(&pThis->Head);
+
+    *phEventSem = pThis;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemEventCreate);
+
+
+/**
+ * Retains a reference to the event semaphore.
+ *
+ * @param   pThis       The event semaphore.
+ */
+DECLINLINE(void) rtR0SemEventLnxRetain(PRTSEMEVENTINTERNAL pThis)
+{
+    uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+    Assert(cRefs < 100000); NOREF(cRefs);
+}
+
+
+/**
+ * Releases a reference to the event semaphore.
+ *
+ * @param   pThis       The event semaphore.
+ */
+DECLINLINE(void) rtR0SemEventLnxRelease(PRTSEMEVENTINTERNAL pThis)
+{
+    if (RT_UNLIKELY(ASMAtomicDecU32(&pThis->cRefs) == 0))
+        RTMemFree(pThis);
+}
+
+
+RTDECL(int)  RTSemEventDestroy(RTSEMEVENT hEventSem)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate input.
+     */
+    PRTSEMEVENTINTERNAL pThis = hEventSem;
+    if (pThis == NIL_RTSEMEVENT)
+        return VINF_SUCCESS;
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENT_MAGIC, ("pThis->u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+    Assert(pThis->cRefs > 0);
+
+    /*
+     * Invalidate it and signal the object just in case.
+     */
+    ASMAtomicWriteU32(&pThis->u32Magic, ~RTSEMEVENT_MAGIC);
+    ASMAtomicWriteU32(&pThis->fState, 0);
+    Assert(!waitqueue_active(&pThis->Head));
+    wake_up_all(&pThis->Head);
+    rtR0SemEventLnxRelease(pThis);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemEventDestroy);
+
+
+RTDECL(int)  RTSemEventSignal(RTSEMEVENT hEventSem)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate input.
+     */
+    PRTSEMEVENTINTERNAL pThis = (PRTSEMEVENTINTERNAL)hEventSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENT_MAGIC, ("pThis->u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+    rtR0SemEventLnxRetain(pThis);
+
+    /*
+     * Signal the event object.
+     */
+    ASMAtomicWriteU32(&pThis->fState, 1);
+    wake_up(&pThis->Head);
+
+    rtR0SemEventLnxRelease(pThis);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemEventSignal);
+
+
+/**
+ * Worker for RTSemEventWaitEx and RTSemEventWaitExDebug.
+ *
+ * @returns VBox status code.
+ * @param   pThis           The event semaphore.
+ * @param   fFlags          See RTSemEventWaitEx.
+ * @param   uTimeout        See RTSemEventWaitEx.
+ * @param   pSrcPos         The source code position of the wait.
+ */
+static int rtR0SemEventLnxWait(PRTSEMEVENTINTERNAL pThis, uint32_t fFlags, uint64_t uTimeout,
+                               PCRTLOCKVALSRCPOS pSrcPos)
+{
+    int rc;
+    RT_NOREF_PV(pSrcPos);
+
+    /*
+     * Validate the input.
+     */
+    AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENT_MAGIC, ("%p u32Magic=%RX32\n", pThis, pThis->u32Magic), VERR_INVALID_PARAMETER);
+    AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+    rtR0SemEventLnxRetain(pThis);
+
+    /*
+     * Try grab the event without setting up the wait.
+     */
+    if (   1 /** @todo check if there are someone waiting already - waitqueue_active, but then what do we do below? */
+        && ASMAtomicCmpXchgU32(&pThis->fState, 0, 1))
+        rc = VINF_SUCCESS;
+    else
+    {
+        /*
+         * We have to wait.
+         */
+        IPRT_LINUX_SAVE_EFL_AC();
+        RTR0SEMLNXWAIT Wait;
+        rc = rtR0SemLnxWaitInit(&Wait, fFlags, uTimeout, &pThis->Head);
+        if (RT_SUCCESS(rc))
+        {
+            IPRT_DEBUG_SEMS_STATE(pThis, 'E');
+            for (;;)
+            {
+                /* The destruction test. */
+                if (RT_UNLIKELY(pThis->u32Magic != RTSEMEVENT_MAGIC))
+                    rc = VERR_SEM_DESTROYED;
+                else
+                {
+                    rtR0SemLnxWaitPrepare(&Wait);
+
+                    /* Check the exit conditions. */
+                    if (RT_UNLIKELY(pThis->u32Magic != RTSEMEVENT_MAGIC))
+                        rc = VERR_SEM_DESTROYED;
+                    else if (ASMAtomicCmpXchgU32(&pThis->fState, 0, 1))
+                        rc = VINF_SUCCESS;
+                    else if (rtR0SemLnxWaitHasTimedOut(&Wait))
+                        rc = VERR_TIMEOUT;
+                    else if (rtR0SemLnxWaitWasInterrupted(&Wait))
+                        rc = VERR_INTERRUPTED;
+                    else
+                    {
+                        /* Do the wait and then recheck the conditions. */
+                        rtR0SemLnxWaitDoIt(&Wait);
+                        continue;
+                    }
+                }
+                break;
+            }
+
+            rtR0SemLnxWaitDelete(&Wait);
+            IPRT_DEBUG_SEMS_STATE_RC(pThis, 'E', rc);
+        }
+        IPRT_LINUX_RESTORE_EFL_AC();
+    }
+
+    rtR0SemEventLnxRelease(pThis);
+    return rc;
+}
+
+
+RTDECL(int)  RTSemEventWaitEx(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+    return rtR0SemEventLnxWait(hEventSem, fFlags, uTimeout, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    return rtR0SemEventLnxWait(hEventSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+RT_EXPORT_SYMBOL(RTSemEventWaitEx);
+
+
+RTDECL(int)  RTSemEventWaitExDebug(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout,
+                                   RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+    return rtR0SemEventLnxWait(hEventSem, fFlags, uTimeout, &SrcPos);
+}
+RT_EXPORT_SYMBOL(RTSemEventWaitExDebug);
+
+
+RTDECL(uint32_t) RTSemEventGetResolution(void)
+{
+    return rtR0SemLnxWaitGetResolution();
+}
+RT_EXPORT_SYMBOL(RTSemEventGetResolution);
+
+
+RTR0DECL(bool) RTSemEventIsSignalSafe(void)
+{
+    return true;
+}
+RT_EXPORT_SYMBOL(RTSemEventIsSignalSafe);
+
diff --git a/src/VBox/Runtime/r0drv/linux/semeventmulti-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/semeventmulti-r0drv-linux.c
new file mode 100644
index 00000000..fbc028a1
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/semeventmulti-r0drv-linux.c
@@ -0,0 +1,361 @@
+/* $Id: semeventmulti-r0drv-linux.c $ */
+/** @file
+ * IPRT - Multiple Release Event Semaphores, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define RTSEMEVENTMULTI_WITHOUT_REMAPPING
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/semaphore.h>
+
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/lockvalidator.h>
+
+#include "waitqueue-r0drv-linux.h"
+#include "internal/magics.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** @name fStateAndGen values
+ * @{ */
+/** The state bit number. */
+#define RTSEMEVENTMULTILNX_STATE_BIT        0
+/** The state mask. */
+#define RTSEMEVENTMULTILNX_STATE_MASK       RT_BIT_32(RTSEMEVENTMULTILNX_STATE_BIT)
+/** The generation mask. */
+#define RTSEMEVENTMULTILNX_GEN_MASK         ~RTSEMEVENTMULTILNX_STATE_MASK
+/** The generation shift. */
+#define RTSEMEVENTMULTILNX_GEN_SHIFT        1
+/** The initial variable value. */
+#define RTSEMEVENTMULTILNX_STATE_GEN_INIT   UINT32_C(0xfffffffc)
+/** @}  */
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Linux event semaphore.
+ */
+typedef struct RTSEMEVENTMULTIINTERNAL
+{
+    /** Magic value (RTSEMEVENTMULTI_MAGIC). */
+    uint32_t volatile   u32Magic;
+    /** The object state bit and generation counter.
+     * The generation counter is incremented every time the object is
+     * signalled. */
+    uint32_t volatile   fStateAndGen;
+    /** Reference counter. */
+    uint32_t volatile   cRefs;
+    /** The wait queue. */
+    wait_queue_head_t   Head;
+} RTSEMEVENTMULTIINTERNAL, *PRTSEMEVENTMULTIINTERNAL;
+
+
+
+
+
+RTDECL(int)  RTSemEventMultiCreate(PRTSEMEVENTMULTI phEventMultiSem)
+{
+    return RTSemEventMultiCreateEx(phEventMultiSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int)  RTSemEventMultiCreateEx(PRTSEMEVENTMULTI phEventMultiSem, uint32_t fFlags, RTLOCKVALCLASS hClass,
+                                     const char *pszNameFmt, ...)
+{
+    PRTSEMEVENTMULTIINTERNAL pThis;
+    IPRT_LINUX_SAVE_EFL_AC();
+    RT_NOREF_PV(hClass); RT_NOREF_PV(pszNameFmt);
+
+    AssertReturn(!(fFlags & ~RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+    pThis = (PRTSEMEVENTMULTIINTERNAL)RTMemAlloc(sizeof(*pThis));
+    if (pThis)
+    {
+        pThis->u32Magic     = RTSEMEVENTMULTI_MAGIC;
+        pThis->fStateAndGen = RTSEMEVENTMULTILNX_STATE_GEN_INIT;
+        pThis->cRefs        = 1;
+        init_waitqueue_head(&pThis->Head);
+
+        *phEventMultiSem = pThis;
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VERR_NO_MEMORY;
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiCreate);
+
+
+/**
+ * Retain a reference to the semaphore.
+ *
+ * @param   pThis       The semaphore.
+ */
+DECLINLINE(void) rtR0SemEventMultiLnxRetain(PRTSEMEVENTMULTIINTERNAL pThis)
+{
+    uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+    NOREF(cRefs);
+    Assert(cRefs && cRefs < 100000);
+}
+
+
+/**
+ * Release a reference, destroy the thing if necessary.
+ *
+ * @param   pThis       The semaphore.
+ */
+DECLINLINE(void) rtR0SemEventMultiLnxRelease(PRTSEMEVENTMULTIINTERNAL pThis)
+{
+    if (RT_UNLIKELY(ASMAtomicDecU32(&pThis->cRefs) == 0))
+    {
+        Assert(pThis->u32Magic != RTSEMEVENTMULTI_MAGIC);
+        RTMemFree(pThis);
+    }
+}
+
+
+RTDECL(int) RTSemEventMultiDestroy(RTSEMEVENTMULTI hEventMultiSem)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate input.
+     */
+    PRTSEMEVENTMULTIINTERNAL pThis = (PRTSEMEVENTMULTIINTERNAL)hEventMultiSem;
+    if (pThis == NIL_RTSEMEVENTMULTI)
+        return VINF_SUCCESS;
+    AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, ("%p u32Magic=%RX32\n", pThis, pThis->u32Magic), VERR_INVALID_PARAMETER);
+    Assert(pThis->cRefs > 0);
+
+    /*
+     * Invalidate it and signal the object just in case.
+     */
+    ASMAtomicWriteU32(&pThis->u32Magic, ~RTSEMEVENTMULTI_MAGIC);
+    ASMAtomicAndU32(&pThis->fStateAndGen, RTSEMEVENTMULTILNX_GEN_MASK);
+    Assert(!waitqueue_active(&pThis->Head));
+    wake_up_all(&pThis->Head);
+    rtR0SemEventMultiLnxRelease(pThis);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiDestroy);
+
+
+RTDECL(int) RTSemEventMultiSignal(RTSEMEVENTMULTI hEventMultiSem)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    uint32_t fNew;
+    uint32_t fOld;
+
+    /*
+     * Validate input.
+     */
+    PRTSEMEVENTMULTIINTERNAL pThis = (PRTSEMEVENTMULTIINTERNAL)hEventMultiSem;
+    if (!pThis)
+        return VERR_INVALID_PARAMETER;
+    AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, ("%p u32Magic=%RX32\n", pThis, pThis->u32Magic), VERR_INVALID_PARAMETER);
+    rtR0SemEventMultiLnxRetain(pThis);
+
+    /*
+     * Signal the event object.  The cause of the paranoia here is racing to try
+     * deal with racing RTSemEventMultiSignal calls (should probably be
+     * forbidden, but it's relatively easy to handle).
+     */
+    do
+    {
+        fNew = fOld = ASMAtomicUoReadU32(&pThis->fStateAndGen);
+        fNew += 1 << RTSEMEVENTMULTILNX_GEN_SHIFT;
+        fNew |= RTSEMEVENTMULTILNX_STATE_MASK;
+    }
+    while (!ASMAtomicCmpXchgU32(&pThis->fStateAndGen, fNew, fOld));
+
+    wake_up_all(&pThis->Head);
+
+    rtR0SemEventMultiLnxRelease(pThis);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiSignal);
+
+
+RTDECL(int) RTSemEventMultiReset(RTSEMEVENTMULTI hEventMultiSem)
+{
+    /*
+     * Validate input.
+     */
+    PRTSEMEVENTMULTIINTERNAL pThis = (PRTSEMEVENTMULTIINTERNAL)hEventMultiSem;
+    if (!pThis)
+        return VERR_INVALID_PARAMETER;
+    AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, ("%p u32Magic=%RX32\n", pThis, pThis->u32Magic), VERR_INVALID_PARAMETER);
+    rtR0SemEventMultiLnxRetain(pThis);
+
+    /*
+     * Reset it.
+     */
+    ASMAtomicAndU32(&pThis->fStateAndGen, ~RTSEMEVENTMULTILNX_STATE_MASK);
+
+    rtR0SemEventMultiLnxRelease(pThis);
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiReset);
+
+
+/**
+ * Worker for RTSemEventMultiWaitEx and RTSemEventMultiWaitExDebug.
+ *
+ * @returns VBox status code.
+ * @param   pThis           The event semaphore.
+ * @param   fFlags          See RTSemEventMultiWaitEx.
+ * @param   uTimeout        See RTSemEventMultiWaitEx.
+ * @param   pSrcPos         The source code position of the wait.
+ */
+static int rtR0SemEventMultiLnxWait(PRTSEMEVENTMULTIINTERNAL pThis, uint32_t fFlags, uint64_t uTimeout,
+                                    PCRTLOCKVALSRCPOS pSrcPos)
+{
+    uint32_t    fOrgStateAndGen;
+    int         rc;
+    RT_NOREF_PV(pSrcPos);
+
+    /*
+     * Validate the input.
+     */
+    AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+    AssertMsgReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, ("%p u32Magic=%RX32\n", pThis, pThis->u32Magic), VERR_INVALID_PARAMETER);
+    AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+    rtR0SemEventMultiLnxRetain(pThis);
+
+    /*
+     * Is the event already signalled or do we have to wait?
+     */
+    fOrgStateAndGen = ASMAtomicUoReadU32(&pThis->fStateAndGen);
+    if (fOrgStateAndGen & RTSEMEVENTMULTILNX_STATE_MASK)
+        rc = VINF_SUCCESS;
+    else
+    {
+        /*
+         * We have to wait.
+         */
+        RTR0SEMLNXWAIT Wait;
+        IPRT_LINUX_SAVE_EFL_AC();
+        rc = rtR0SemLnxWaitInit(&Wait, fFlags, uTimeout, &pThis->Head);
+        if (RT_SUCCESS(rc))
+        {
+            IPRT_DEBUG_SEMS_STATE(pThis, 'E');
+            for (;;)
+            {
+                /* The destruction test. */
+                if (RT_UNLIKELY(pThis->u32Magic != RTSEMEVENTMULTI_MAGIC))
+                    rc = VERR_SEM_DESTROYED;
+                else
+                {
+                    rtR0SemLnxWaitPrepare(&Wait);
+
+                    /* Check the exit conditions. */
+                    if (RT_UNLIKELY(pThis->u32Magic != RTSEMEVENTMULTI_MAGIC))
+                        rc = VERR_SEM_DESTROYED;
+                    else if (ASMAtomicUoReadU32(&pThis->fStateAndGen) != fOrgStateAndGen)
+                        rc = VINF_SUCCESS;
+                    else if (rtR0SemLnxWaitHasTimedOut(&Wait))
+                        rc = VERR_TIMEOUT;
+                    else if (rtR0SemLnxWaitWasInterrupted(&Wait))
+                        rc = VERR_INTERRUPTED;
+                    else
+                    {
+                        /* Do the wait and then recheck the conditions. */
+                        rtR0SemLnxWaitDoIt(&Wait);
+                        continue;
+                    }
+                }
+                break;
+            }
+
+            rtR0SemLnxWaitDelete(&Wait);
+            IPRT_DEBUG_SEMS_STATE_RC(pThis, 'E', rc);
+        }
+        IPRT_LINUX_RESTORE_EFL_AC();
+    }
+
+    rtR0SemEventMultiLnxRelease(pThis);
+    return rc;
+}
+
+
+RTDECL(int)  RTSemEventMultiWaitEx(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+    return rtR0SemEventMultiLnxWait(hEventMultiSem, fFlags, uTimeout, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    return rtR0SemEventMultiLnxWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiWaitEx);
+
+
+RTDECL(int)  RTSemEventMultiWaitExDebug(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout,
+                                        RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+    return rtR0SemEventMultiLnxWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiWaitExDebug);
+
+
+RTDECL(uint32_t) RTSemEventMultiGetResolution(void)
+{
+    return rtR0SemLnxWaitGetResolution();
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiGetResolution);
+
+
+RTR0DECL(bool) RTSemEventMultiIsSignalSafe(void)
+{
+    return true;
+}
+RT_EXPORT_SYMBOL(RTSemEventMultiIsSignalSafe);
+
diff --git a/src/VBox/Runtime/r0drv/linux/semfastmutex-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/semfastmutex-r0drv-linux.c
new file mode 100644
index 00000000..0e2dff35
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/semfastmutex-r0drv-linux.c
@@ -0,0 +1,167 @@
+/* $Id: semfastmutex-r0drv-linux.c $ */
+/** @file
+ * IPRT - Fast Mutex Semaphores, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/semaphore.h>
+#include <iprt/alloc.h>
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/errcore.h>
+#if defined(RT_STRICT) || defined(IPRT_DEBUG_SEMS)
+# include <iprt/thread.h>
+#endif
+
+#include "internal/magics.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Wrapper for the linux semaphore structure.
+ */
+typedef struct RTSEMFASTMUTEXINTERNAL
+{
+    /** Magic value (RTSEMFASTMUTEX_MAGIC). */
+    uint32_t            u32Magic;
+    /** the linux semaphore. */
+    struct semaphore    Semaphore;
+#if defined(RT_STRICT) || defined(IPRT_DEBUG_SEMS)
+    /** For check. */
+    RTNATIVETHREAD volatile Owner;
+#endif
+} RTSEMFASTMUTEXINTERNAL, *PRTSEMFASTMUTEXINTERNAL;
+
+
+RTDECL(int)  RTSemFastMutexCreate(PRTSEMFASTMUTEX phFastMtx)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Allocate.
+     */
+    PRTSEMFASTMUTEXINTERNAL pThis;
+    pThis = (PRTSEMFASTMUTEXINTERNAL)RTMemAlloc(sizeof(*pThis));
+    if (!pThis)
+        return VERR_NO_MEMORY;
+
+    /*
+     * Initialize.
+     */
+    pThis->u32Magic = RTSEMFASTMUTEX_MAGIC;
+    sema_init(&pThis->Semaphore, 1);
+#if defined(RT_STRICT) || defined(IPRT_DEBUG_SEMS)
+    pThis->Owner = NIL_RTNATIVETHREAD;
+#endif
+
+    *phFastMtx = pThis;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemFastMutexCreate);
+
+
+RTDECL(int)  RTSemFastMutexDestroy(RTSEMFASTMUTEX hFastMtx)
+{
+    /*
+     * Validate.
+     */
+    PRTSEMFASTMUTEXINTERNAL pThis = hFastMtx;
+    if (pThis == NIL_RTSEMFASTMUTEX)
+        return VINF_SUCCESS;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMFASTMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+
+    ASMAtomicWriteU32(&pThis->u32Magic, RTSEMFASTMUTEX_MAGIC_DEAD);
+    RTMemFree(pThis);
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemFastMutexDestroy);
+
+
+RTDECL(int)  RTSemFastMutexRequest(RTSEMFASTMUTEX hFastMtx)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    PRTSEMFASTMUTEXINTERNAL pThis = hFastMtx;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMFASTMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+
+    IPRT_DEBUG_SEMS_STATE(pThis, 'd');
+    down(&pThis->Semaphore);
+#if defined(RT_STRICT) || defined(IPRT_DEBUG_SEMS)
+    IPRT_DEBUG_SEMS_STATE(pThis, 'o');
+    AssertRelease(pThis->Owner == NIL_RTNATIVETHREAD);
+    ASMAtomicUoWriteSize(&pThis->Owner, RTThreadNativeSelf());
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemFastMutexRequest);
+
+
+RTDECL(int)  RTSemFastMutexRelease(RTSEMFASTMUTEX hFastMtx)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    PRTSEMFASTMUTEXINTERNAL pThis = hFastMtx;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMFASTMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+
+#if defined(RT_STRICT) || defined(IPRT_DEBUG_SEMS)
+    AssertRelease(pThis->Owner == RTThreadNativeSelf());
+    ASMAtomicUoWriteSize(&pThis->Owner, NIL_RTNATIVETHREAD);
+#endif
+    up(&pThis->Semaphore);
+    IPRT_DEBUG_SEMS_STATE(pThis, 'u');
+
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemFastMutexRelease);
+
diff --git a/src/VBox/Runtime/r0drv/linux/semmutex-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/semmutex-r0drv-linux.c
new file mode 100644
index 00000000..16fcacfe
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/semmutex-r0drv-linux.c
@@ -0,0 +1,431 @@
+/* $Id: semmutex-r0drv-linux.c $ */
+/** @file
+ * IPRT - Mutex Semaphores, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define RTSEMMUTEX_WITHOUT_REMAPPING
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/semaphore.h>
+
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/mem.h>
+#include <iprt/err.h>
+#include <iprt/list.h>
+
+#include "internal/magics.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+typedef struct RTSEMMUTEXLNXWAITER
+{
+    /** The list entry. */
+    RTLISTNODE                  ListEntry;
+    /** The waiting task. */
+    struct task_struct         *pTask;
+    /** Why did we wake up? */
+    enum
+    {
+        /** Wakeup to take the semaphore. */
+        RTSEMMUTEXLNXWAITER_WAKEUP,
+        /** Mutex is being destroyed. */
+        RTSEMMUTEXLNXWAITER_DESTROYED,
+        /** Some other reason. */
+        RTSEMMUTEXLNXWAITER_OTHER
+    } volatile                  enmReason;
+} RTSEMMUTEXLNXWAITER, *PRTSEMMUTEXLNXWAITER;
+
+/**
+ * Wrapper for the linux semaphore structure.
+ */
+typedef struct RTSEMMUTEXINTERNAL
+{
+    /** Magic value (RTSEMMUTEX_MAGIC). */
+    uint32_t                    u32Magic;
+    /** The number of recursions. */
+    uint32_t                    cRecursions;
+    /** The list of waiting threads. */
+    RTLISTANCHOR                WaiterList;
+    /** The current owner, NULL if none. */
+    struct task_struct         *pOwnerTask;
+    /** The number of references to this piece of memory.  This is used to
+     *  prevent it from being kicked from underneath us while waiting. */
+    uint32_t volatile           cRefs;
+    /** The spinlock protecting the members and falling asleep. */
+    spinlock_t                  Spinlock;
+} RTSEMMUTEXINTERNAL, *PRTSEMMUTEXINTERNAL;
+
+
+RTDECL(int) RTSemMutexCreate(PRTSEMMUTEX phMtx)
+{
+    int rc = VINF_SUCCESS;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Allocate.
+     */
+    PRTSEMMUTEXINTERNAL pThis;
+    pThis = (PRTSEMMUTEXINTERNAL)RTMemAlloc(sizeof(*pThis));
+    if (pThis)
+    {
+        /*
+         * Initialize.
+         */
+        pThis->u32Magic     = RTSEMMUTEX_MAGIC;
+        pThis->cRecursions  = 0;
+        pThis->pOwnerTask   = NULL;
+        pThis->cRefs        = 1;
+        RTListInit(&pThis->WaiterList);
+        spin_lock_init(&pThis->Spinlock);
+
+        *phMtx = pThis;
+    }
+    else
+        rc = VERR_NO_MEMORY;
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTSemMutexCreate);
+
+
+RTDECL(int) RTSemMutexDestroy(RTSEMMUTEX hMtx)
+{
+    PRTSEMMUTEXINTERNAL     pThis = hMtx;
+    PRTSEMMUTEXLNXWAITER    pCur;
+    unsigned long           fSavedIrq;
+
+    /*
+     * Validate.
+     */
+    if (pThis == NIL_RTSEMMUTEX)
+        return VINF_SUCCESS;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+
+    /*
+     * Kill it, kick waiters and release it.
+     */
+    AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, RTSEMMUTEX_MAGIC_DEAD, RTSEMMUTEX_MAGIC), VERR_INVALID_HANDLE);
+
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    spin_lock_irqsave(&pThis->Spinlock, fSavedIrq);
+    RTListForEach(&pThis->WaiterList, pCur, RTSEMMUTEXLNXWAITER, ListEntry)
+    {
+        pCur->enmReason = RTSEMMUTEXLNXWAITER_DESTROYED;
+        wake_up_process(pCur->pTask);
+    }
+
+    if (ASMAtomicDecU32(&pThis->cRefs) != 0)
+        spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+    else
+    {
+        spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+        RTMemFree(pThis);
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSemMutexDestroy);
+
+
+/**
+ * Worker for rtSemMutexLinuxRequest that handles the case where we go to sleep.
+ *
+ * @returns VINF_SUCCESS, VERR_INTERRUPTED, VERR_TIMEOUT or VERR_SEM_DESTROYED.
+ *          Returns without owning the spinlock.
+ * @param   pThis           The mutex instance.
+ * @param   cMillies        The timeout.
+ * @param   fInterruptible  The wait type.
+ * @param   fSavedIrq       The saved IRQ flags.
+ */
+static int rtSemMutexLinuxRequestSleep(PRTSEMMUTEXINTERNAL pThis, RTMSINTERVAL cMillies,
+                                       bool fInterruptible, unsigned long fSavedIrq)
+{
+    struct task_struct *pSelf    = current;
+    int                 rc       = VERR_TIMEOUT;
+    long                lTimeout = cMillies == RT_INDEFINITE_WAIT ? MAX_SCHEDULE_TIMEOUT : msecs_to_jiffies(cMillies);
+    RTSEMMUTEXLNXWAITER Waiter;
+
+    IPRT_DEBUG_SEMS_STATE(pThis, 'm');
+
+    /*
+     * Grab a reference to the mutex and add ourselves to the waiter list.
+     */
+    ASMAtomicIncU32(&pThis->cRefs);
+
+    Waiter.pTask     = pSelf;
+    Waiter.enmReason = RTSEMMUTEXLNXWAITER_OTHER;
+    RTListAppend(&pThis->WaiterList, &Waiter.ListEntry);
+
+    /*
+     * Do the waiting.
+     */
+    for (;;)
+    {
+        /* Check signal and timeout conditions. */
+        if (    fInterruptible
+            &&  signal_pending(pSelf))
+        {
+            rc = VERR_INTERRUPTED;
+            break;
+        }
+
+        if (!lTimeout)
+            break;
+
+        /* Go to sleep. */
+        set_current_state(fInterruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
+        spin_unlock_irq(&pThis->Spinlock);
+
+        lTimeout = schedule_timeout(lTimeout);
+
+        spin_lock_irq(&pThis->Spinlock);
+        set_current_state(TASK_RUNNING);
+
+        /* Did someone wake us up? */
+        if (Waiter.enmReason == RTSEMMUTEXLNXWAITER_WAKEUP)
+        {
+            Assert(pThis->cRecursions == 0);
+            pThis->cRecursions = 1;
+            pThis->pOwnerTask  = pSelf;
+            rc = VINF_SUCCESS;
+            break;
+        }
+
+        /* Is the mutex being destroyed? */
+        if (RT_UNLIKELY(   Waiter.enmReason == RTSEMMUTEXLNXWAITER_DESTROYED
+                        || pThis->u32Magic != RTSEMMUTEX_MAGIC))
+        {
+            rc = VERR_SEM_DESTROYED;
+            break;
+        }
+    }
+
+    /*
+     * Unlink ourself from the waiter list, dereference the mutex and exit the
+     * lock.  We might have to free the mutex if it was the destroyed.
+     */
+    RTListNodeRemove(&Waiter.ListEntry);
+    IPRT_DEBUG_SEMS_STATE_RC(pThis, 'M', rc);
+
+    if (RT_LIKELY(ASMAtomicDecU32(&pThis->cRefs) != 0))
+        spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+    else
+    {
+        Assert(RT_FAILURE_NP(rc));
+        spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+        RTMemFree(pThis);
+    }
+    return rc;
+}
+
+
+/**
+ * Internal worker.
+ */
+DECLINLINE(int) rtSemMutexLinuxRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, bool fInterruptible)
+{
+    PRTSEMMUTEXINTERNAL pThis = hMutexSem;
+    struct task_struct *pSelf = current;
+    unsigned long       fSavedIrq;
+    int                 rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+    Assert(pThis->cRefs >= 1);
+
+    /*
+     * Lock it and check if it's a recursion.
+     */
+    spin_lock_irqsave(&pThis->Spinlock, fSavedIrq);
+    if (pThis->pOwnerTask == pSelf)
+    {
+        pThis->cRecursions++;
+        Assert(pThis->cRecursions > 1);
+        Assert(pThis->cRecursions < 256);
+        rc = VINF_SUCCESS;
+    }
+    /*
+     * Not a recursion, maybe it's not owned by anyone then?
+     */
+    else if (   pThis->pOwnerTask == NULL
+             && RTListIsEmpty(&pThis->WaiterList))
+    {
+        Assert(pThis->cRecursions == 0);
+        pThis->cRecursions = 1;
+        pThis->pOwnerTask  = pSelf;
+        rc = VINF_SUCCESS;
+    }
+    /*
+     * Was it a polling call?
+     */
+    else if (cMillies == 0)
+        rc = VERR_TIMEOUT;
+    /*
+     * No, so go to sleep.
+     */
+    else
+    {
+        rc = rtSemMutexLinuxRequestSleep(pThis, cMillies, fInterruptible, fSavedIrq);
+        IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+        return rc;
+    }
+
+    IPRT_DEBUG_SEMS_STATE_RC(pThis, 'M', rc);
+    spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+    return rc;
+}
+
+
+RTDECL(int) RTSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+    return rtSemMutexLinuxRequest(hMutexSem, cMillies, false /*fInterruptible*/);
+}
+RT_EXPORT_SYMBOL(RTSemMutexRequest);
+
+
+RTDECL(int) RTSemMutexRequestDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RT_NOREF_PV(uId); RT_SRC_POS_NOREF();
+    return RTSemMutexRequest(hMutexSem, cMillies);
+}
+RT_EXPORT_SYMBOL(RTSemMutexRequestDebug);
+
+
+RTDECL(int) RTSemMutexRequestNoResume(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+    return rtSemMutexLinuxRequest(hMutexSem, cMillies, true /*fInterruptible*/);
+}
+RT_EXPORT_SYMBOL(RTSemMutexRequestNoResume);
+
+
+RTDECL(int) RTSemMutexRequestNoResumeDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RT_NOREF_PV(uId); RT_SRC_POS_NOREF();
+    return RTSemMutexRequestNoResume(hMutexSem, cMillies);
+}
+RT_EXPORT_SYMBOL(RTSemMutexRequestNoResumeDebug);
+
+
+RTDECL(int) RTSemMutexRelease(RTSEMMUTEX hMtx)
+{
+    PRTSEMMUTEXINTERNAL pThis = hMtx;
+    struct task_struct *pSelf = current;
+    unsigned long       fSavedIrq;
+    int                 rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), VERR_INVALID_HANDLE);
+    Assert(pThis->cRefs >= 1);
+
+    /*
+     * Take the lock and release one recursion.
+     */
+    spin_lock_irqsave(&pThis->Spinlock, fSavedIrq);
+    if (pThis->pOwnerTask == pSelf)
+    {
+        Assert(pThis->cRecursions > 0);
+        if (--pThis->cRecursions == 0)
+        {
+            pThis->pOwnerTask = NULL;
+
+            /* anyone to wake up? */
+            if (!RTListIsEmpty(&pThis->WaiterList))
+            {
+                PRTSEMMUTEXLNXWAITER pWaiter = RTListGetFirst(&pThis->WaiterList, RTSEMMUTEXLNXWAITER, ListEntry);
+                pWaiter->enmReason = RTSEMMUTEXLNXWAITER_WAKEUP;
+                wake_up_process(pWaiter->pTask);
+            }
+            IPRT_DEBUG_SEMS_STATE(pThis, 'u');
+        }
+        rc = VINF_SUCCESS;
+    }
+    else
+        rc = VERR_NOT_OWNER;
+    spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+
+    AssertRC(rc);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTSemMutexRelease);
+
+
+RTDECL(bool) RTSemMutexIsOwned(RTSEMMUTEX hMutexSem)
+{
+    PRTSEMMUTEXINTERNAL pThis = hMutexSem;
+    unsigned long       fSavedIrq;
+    bool                fOwned;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    AssertPtrReturn(pThis, false);
+    AssertMsgReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, ("u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis), false);
+    Assert(pThis->cRefs >= 1);
+
+    /*
+     * Take the lock and release one recursion.
+     */
+    spin_lock_irqsave(&pThis->Spinlock, fSavedIrq);
+    fOwned = pThis->pOwnerTask != NULL;
+    spin_unlock_irqrestore(&pThis->Spinlock, fSavedIrq);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return fOwned;
+
+}
+RT_EXPORT_SYMBOL(RTSemMutexIsOwned);
+
diff --git a/src/VBox/Runtime/r0drv/linux/spinlock-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/spinlock-r0drv-linux.c
new file mode 100644
index 00000000..c5bdded2
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/spinlock-r0drv-linux.c
@@ -0,0 +1,196 @@
+/* $Id: spinlock-r0drv-linux.c $ */
+/** @file
+ * IPRT - Spinlocks, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/spinlock.h>
+
+#include <iprt/asm.h>
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+# include <iprt/asm-amd64-x86.h>
+#endif
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/mem.h>
+#include <iprt/mp.h>
+#include <iprt/thread.h>
+#include "internal/magics.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Wrapper for the spinlock_t structure.
+ */
+typedef struct RTSPINLOCKINTERNAL
+{
+    /** Spinlock magic value (RTSPINLOCK_MAGIC). */
+    uint32_t volatile       u32Magic;
+    /** The spinlock creation flags.  */
+    uint32_t                fFlags;
+    /** The saved interrupt flag. */
+    unsigned long volatile  fIntSaved;
+    /** The linux spinlock structure. */
+    spinlock_t              Spinlock;
+#ifdef RT_MORE_STRICT
+    /** The idAssertCpu variable before acquring the lock for asserting after
+     *  releasing the spinlock. */
+    RTCPUID volatile        idAssertCpu;
+    /** The CPU that owns the lock. */
+    RTCPUID volatile        idCpuOwner;
+#endif
+} RTSPINLOCKINTERNAL, *PRTSPINLOCKINTERNAL;
+
+
+
+RTDECL(int)  RTSpinlockCreate(PRTSPINLOCK pSpinlock, uint32_t fFlags, const char *pszName)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    PRTSPINLOCKINTERNAL pThis;
+    AssertReturn(fFlags == RTSPINLOCK_FLAGS_INTERRUPT_SAFE || fFlags == RTSPINLOCK_FLAGS_INTERRUPT_UNSAFE, VERR_INVALID_PARAMETER);
+    RT_NOREF_PV(pszName);
+
+    /*
+     * Allocate.
+     */
+    Assert(sizeof(RTSPINLOCKINTERNAL) > sizeof(void *));
+    pThis = (PRTSPINLOCKINTERNAL)RTMemAlloc(sizeof(*pThis));
+    if (!pThis)
+        return VERR_NO_MEMORY;
+    /*
+     * Initialize and return.
+     */
+    pThis->u32Magic     = RTSPINLOCK_MAGIC;
+    pThis->fFlags       = fFlags;
+    pThis->fIntSaved    = 0;
+#ifdef RT_MORE_STRICT
+    pThis->idCpuOwner   = NIL_RTCPUID;
+    pThis->idAssertCpu  = NIL_RTCPUID;
+#endif
+
+    spin_lock_init(&pThis->Spinlock);
+
+    *pSpinlock = pThis;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSpinlockCreate);
+
+
+RTDECL(int)  RTSpinlockDestroy(RTSPINLOCK Spinlock)
+{
+    /*
+     * Validate input.
+     */
+    PRTSPINLOCKINTERNAL pThis = (PRTSPINLOCKINTERNAL)Spinlock;
+    if (!pThis)
+        return VERR_INVALID_PARAMETER;
+    if (pThis->u32Magic != RTSPINLOCK_MAGIC)
+    {
+        AssertMsgFailed(("Invalid spinlock %p magic=%#x\n", pThis, pThis->u32Magic));
+        return VERR_INVALID_PARAMETER;
+    }
+
+    ASMAtomicIncU32(&pThis->u32Magic);
+    RTMemFree(pThis);
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSpinlockDestroy);
+
+
+RTDECL(void) RTSpinlockAcquire(RTSPINLOCK Spinlock)
+{
+    PRTSPINLOCKINTERNAL pThis = (PRTSPINLOCKINTERNAL)Spinlock;
+    IPRT_LINUX_SAVE_EFL_AC();
+    RT_ASSERT_PREEMPT_CPUID_VAR();
+    AssertMsg(pThis && pThis->u32Magic == RTSPINLOCK_MAGIC,
+              ("pThis=%p u32Magic=%08x\n", pThis, pThis ? (int)pThis->u32Magic : 0));
+
+#ifdef CONFIG_PROVE_LOCKING
+    lockdep_off();
+#endif
+    if (pThis->fFlags & RTSPINLOCK_FLAGS_INTERRUPT_SAFE)
+    {
+        unsigned long fIntSaved;
+        spin_lock_irqsave(&pThis->Spinlock, fIntSaved);
+        pThis->fIntSaved = fIntSaved;
+    }
+    else
+        spin_lock(&pThis->Spinlock);
+#ifdef CONFIG_PROVE_LOCKING
+    lockdep_on();
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+    RT_ASSERT_PREEMPT_CPUID_SPIN_ACQUIRED(pThis);
+}
+RT_EXPORT_SYMBOL(RTSpinlockAcquire);
+
+
+RTDECL(void) RTSpinlockRelease(RTSPINLOCK Spinlock)
+{
+    PRTSPINLOCKINTERNAL pThis = (PRTSPINLOCKINTERNAL)Spinlock;
+    IPRT_LINUX_SAVE_EFL_AC();           /* spin_unlock* may preempt and trash eflags.ac. */
+    RT_ASSERT_PREEMPT_CPUID_SPIN_RELEASE_VARS();
+    AssertMsg(pThis && pThis->u32Magic == RTSPINLOCK_MAGIC,
+              ("pThis=%p u32Magic=%08x\n", pThis, pThis ? (int)pThis->u32Magic : 0));
+    RT_ASSERT_PREEMPT_CPUID_SPIN_RELEASE(pThis);
+
+#ifdef CONFIG_PROVE_LOCKING
+    lockdep_off();
+#endif
+    if (pThis->fFlags & RTSPINLOCK_FLAGS_INTERRUPT_SAFE)
+    {
+        unsigned long fIntSaved = pThis->fIntSaved;
+        pThis->fIntSaved = 0;
+        spin_unlock_irqrestore(&pThis->Spinlock, fIntSaved);
+    }
+    else
+        spin_unlock(&pThis->Spinlock);
+#ifdef CONFIG_PROVE_LOCKING
+    lockdep_on();
+#endif
+
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();
+    RT_ASSERT_PREEMPT_CPUID();
+}
+RT_EXPORT_SYMBOL(RTSpinlockRelease);
+
diff --git a/src/VBox/Runtime/r0drv/linux/string.h b/src/VBox/Runtime/r0drv/linux/string.h
new file mode 100644
index 00000000..9e5a6941
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/string.h
@@ -0,0 +1,70 @@
+/* $Id: string.h $ */
+/** @file
+ * IPRT - wrapper for the linux kernel asm/string.h.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+#ifndef IPRT_INCLUDED_SRC_r0drv_linux_string_h
+#define IPRT_INCLUDED_SRC_r0drv_linux_string_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include <iprt/cdefs.h>
+
+RT_C_DECLS_BEGIN
+#ifndef bool /* Linux 2.6.19 C++ nightmare */
+#define bool bool_type
+#define true true_type
+#define false false_type
+#define _Bool int
+#define bool_type_r0drv_string_h__
+#endif
+#include <linux/types.h>
+#include <linux/string.h>
+#ifdef bool_type_r0drv_string_h__
+#undef bool
+#undef true
+#undef false
+#undef bool_type_r0drv_string_h__
+#endif
+char *strpbrk(const char *pszStr, const char *pszChars)
+#if defined(__THROW)
+    __THROW
+#endif
+    ;
+
+RT_C_DECLS_END
+
+#endif /* !IPRT_INCLUDED_SRC_r0drv_linux_string_h */
+
diff --git a/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h b/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h
new file mode 100644
index 00000000..8e70f992
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/the-linux-kernel.h
@@ -0,0 +1,494 @@
+/* $Id: the-linux-kernel.h $ */
+/** @file
+ * IPRT - Include all necessary headers for the Linux kernel.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+#ifndef IPRT_INCLUDED_SRC_r0drv_linux_the_linux_kernel_h
+#define IPRT_INCLUDED_SRC_r0drv_linux_the_linux_kernel_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+/*
+ * Include iprt/types.h to install the bool wrappers.
+ * Then use the linux bool type for all the stuff include here.
+ */
+#include <iprt/types.h>
+#define bool linux_bool
+
+#if RT_GNUC_PREREQ(4, 6)
+# pragma GCC diagnostic push
+#endif
+#if RT_GNUC_PREREQ(4, 2)
+# pragma GCC diagnostic ignored "-Wunused-parameter"
+# if !defined(__cplusplus) && RT_GNUC_PREREQ(4, 3)
+#  pragma GCC diagnostic ignored "-Wold-style-declaration" /* 2.6.18-411.0.0.0.1.el5/build/include/asm/apic.h:110: warning: 'inline' is not at beginning of declaration [-Wold-style-declaration] */
+# endif
+#endif
+
+
+#include <iprt/linux/version.h>
+#if RTLNX_VER_MIN(2,6,33)
+# include <generated/autoconf.h>
+#else
+# ifndef AUTOCONF_INCLUDED
+#  include <linux/autoconf.h>
+# endif
+#endif
+
+/* We only support 2.4 and 2.6 series kernels */
+#if RTLNX_VER_MAX(2,4,0)
+# error Sorry, we do not support 2.3 and earlier kernels.
+#endif
+#if RTLNX_VER_MIN(2,5,0) && RTLNX_VER_MAX(2,6,0)
+# error Sorry, we do not support 2.5 series kernels (might work though).
+#endif
+
+#if defined(CONFIG_MODVERSIONS) && !defined(MODVERSIONS)
+# define MODVERSIONS
+# if RTLNX_VER_MAX(2,5,71)
+#  include <linux/modversions.h>
+# endif
+#endif
+#ifndef KBUILD_STR
+# if RTLNX_VER_MAX(2,6,16)
+#  define KBUILD_STR(s) s
+# else
+#  define KBUILD_STR(s) #s
+# endif
+#endif
+# if RTLNX_VER_MIN(3,3,0)
+#  include <linux/kconfig.h> /* for macro IS_ENABLED */
+# endif
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#if RTLNX_VER_MIN(2,6,27)
+# include <linux/semaphore.h>
+#else /* older kernels */
+# include <asm/semaphore.h>
+#endif /* older kernels */
+#include <linux/module.h>
+#if RTLNX_VER_MIN(2,6,0)
+# include <linux/moduleparam.h>
+#endif
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#if RTLNX_VER_MIN(2,6,0)
+# include <linux/namei.h>
+#endif
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+
+#if RTLNX_VER_RANGE(3,9,23,  3,9,31)
+# include  <linux/splice.h>
+#endif
+
+#if RTLNX_VER_MIN(3,9,0)
+# include <linux/sched/rt.h>
+#endif
+#if RTLNX_VER_MIN(4,11,0)
+# include <linux/sched/signal.h>
+# include <linux/sched/types.h>
+#endif
+#if RTLNX_VER_MIN(2,6,7)
+# include <linux/jiffies.h>
+#endif
+#if RTLNX_VER_MIN(2,6,16)
+# include <linux/ktime.h>
+# include <linux/hrtimer.h>
+#endif
+#include <linux/wait.h>
+#if RTLNX_VER_MIN(2,5,71)
+# include <linux/cpu.h>
+# include <linux/notifier.h>
+#endif
+#if RTLNX_VER_MIN(5,1,0)
+# include <uapi/linux/mman.h>
+#endif
+/* For the basic additions module */
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/completion.h>
+#include <linux/compiler.h>
+#if RTLNX_VER_MIN(5,9,0) || RTLNX_SUSE_MAJ_PREREQ(15,3) /* linux/fs.h defined HAVE_UNLOCKED_IOCTL from 2.6.11 up to 5.9 (also 5.3.18-56 in SLES15-SP3), when it became an implicit assumption. */
+# define HAVE_UNLOCKED_IOCTL 1 /* We use this in a couple of places, so for now just define it for 5.9+ too. */
+#endif
+#if !defined(HAVE_UNLOCKED_IOCTL) && RTLNX_VER_MAX(2,6,38)
+# include <linux/smp_lock.h>
+#endif
+/* For the shared folders module */
+#include <linux/vmalloc.h>
+#define wchar_t linux_wchar_t
+#include <linux/nls.h>
+#undef wchar_t
+#include <asm/mman.h>
+#include <asm/io.h>
+#include <asm/uaccess.h>
+#include <asm/div64.h>
+
+/* For thread-context hooks. */
+#if RTLNX_VER_MIN(2,6,18) && defined(CONFIG_PREEMPT_NOTIFIERS)
+# include <linux/preempt.h>
+#endif
+
+/* for workqueue / task queues. */
+#if RTLNX_VER_MIN(2,5,41)
+# include <linux/workqueue.h>
+#else
+# include <linux/tqueue.h>
+#endif
+
+#if RTLNX_VER_MIN(2,6,4)
+# include <linux/kthread.h>
+#endif
+
+/* for cr4_init_shadow() / cpu_tlbstate. */
+#if RTLNX_VER_MIN(3,20,0)
+# include <asm/tlbflush.h>
+#endif
+
+/* for set_pages_x() */
+#if RTLNX_VER_MIN(4,12,0)
+# include <asm/set_memory.h>
+#endif
+
+/* for __flush_tlb_all() */
+#if RTLNX_VER_MIN(2,6,28) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86))
+# include <asm/tlbflush.h>
+#endif
+
+/* for kernel_fpu_begin / kernel_fpu_end() */
+#if RTLNX_VER_MIN(4,2,0)
+# include <asm/fpu/api.h>
+#endif
+
+#if RTLNX_VER_MIN(3,7,0)
+# include <asm/smap.h>
+#else
+static inline void clac(void) { }
+static inline void stac(void) { }
+#endif
+
+#if RTLNX_VER_MAX(2,6,0)
+# ifndef page_to_pfn
+#  define page_to_pfn(page) ((page) - mem_map)
+# endif
+#endif
+
+#ifndef DEFINE_WAIT
+# define DEFINE_WAIT(name) DECLARE_WAITQUEUE(name, current)
+#endif
+
+#ifndef __GFP_NOWARN
+# define __GFP_NOWARN 0
+#endif
+
+/*
+ * 2.4 / early 2.6 compatibility wrappers
+ */
+#if RTLNX_VER_MAX(2,6,7)
+
+# ifndef MAX_JIFFY_OFFSET
+#  define MAX_JIFFY_OFFSET ((~0UL >> 1)-1)
+# endif
+
+# if RTLNX_VER_MAX(2,4,29) || RTLNX_VER_MIN(2,6,0)
+
+DECLINLINE(unsigned int) jiffies_to_msecs(unsigned long cJiffies)
+{
+#  if HZ <= 1000 && !(1000 % HZ)
+    return (1000 / HZ) * cJiffies;
+#  elif HZ > 1000 && !(HZ % 1000)
+    return (cJiffies + (HZ / 1000) - 1) / (HZ / 1000);
+#  else
+    return (cJiffies * 1000) / HZ;
+#  endif
+}
+
+DECLINLINE(unsigned long) msecs_to_jiffies(unsigned int cMillies)
+{
+#  if HZ > 1000
+    if (cMillies > jiffies_to_msecs(MAX_JIFFY_OFFSET))
+        return MAX_JIFFY_OFFSET;
+#  endif
+#  if HZ <= 1000 && !(1000 % HZ)
+    return (cMillies + (1000 / HZ) - 1) / (1000 / HZ);
+#  elif HZ > 1000 && !(HZ % 1000)
+    return cMillies * (HZ / 1000);
+#  else
+    return (cMillies * HZ + 999) / 1000;
+#  endif
+}
+
+# endif  /* < 2.4.29 || >= 2.6.0 */
+
+#endif /* < 2.6.7 */
+
+/*
+ * 2.4 compatibility wrappers
+ */
+#if RTLNX_VER_MAX(2,6,0)
+
+# define prepare_to_wait(q, wait, state) \
+    do { \
+        add_wait_queue(q, wait); \
+        set_current_state(state); \
+    } while (0)
+
+# define after_wait(wait) \
+    do { \
+        list_del_init(&(wait)->task_list); \
+    } while (0)
+
+# define finish_wait(q, wait) \
+    do { \
+        set_current_state(TASK_RUNNING); \
+        remove_wait_queue(q, wait); \
+    } while (0)
+
+#else /* >= 2.6.0 */
+
+# define after_wait(wait)       do {} while (0)
+
+#endif /* >= 2.6.0 */
+
+/** @def TICK_NSEC
+ * The time between ticks in nsec */
+#ifndef TICK_NSEC
+# define TICK_NSEC (1000000000UL / HZ)
+#endif
+
+/*
+ * This sucks soooo badly on x86! Why don't they export __PAGE_KERNEL_EXEC so PAGE_KERNEL_EXEC would be usable?
+ */
+#if   RTLNX_VER_MIN(2,6,8) && defined(RT_ARCH_AMD64)
+# define MY_PAGE_KERNEL_EXEC    PAGE_KERNEL_EXEC
+#elif RTLNX_VER_MIN(2,6,8) && defined(PAGE_KERNEL_EXEC) && defined(CONFIG_X86_PAE)
+# ifdef __PAGE_KERNEL_EXEC
+   /* >= 2.6.27 */
+#  define MY_PAGE_KERNEL_EXEC   __pgprot(boot_cpu_has(X86_FEATURE_PGE) ? __PAGE_KERNEL_EXEC | _PAGE_GLOBAL : __PAGE_KERNEL_EXEC)
+# else
+#  define MY_PAGE_KERNEL_EXEC   __pgprot(boot_cpu_has(X86_FEATURE_PGE) ? _PAGE_KERNEL_EXEC | _PAGE_GLOBAL : _PAGE_KERNEL_EXEC)
+# endif
+#else
+# define MY_PAGE_KERNEL_EXEC    PAGE_KERNEL
+#endif
+
+
+/*
+ * The redhat hack section.
+ *  - The current hacks are for 2.4.21-15.EL only.
+ */
+#ifndef NO_REDHAT_HACKS
+/* accounting. */
+# if RTLNX_VER_MAX(2,6,0)
+#  ifdef VM_ACCOUNT
+#   define USE_RHEL4_MUNMAP
+#  endif
+# endif
+
+/* backported remap_page_range. */
+# if RTLNX_VER_MAX(2,6,0)
+#  include <asm/tlb.h>
+#  ifdef tlb_vma /* probably not good enough... */
+#   define HAVE_26_STYLE_REMAP_PAGE_RANGE 1
+#  endif
+# endif
+
+# ifndef RT_ARCH_AMD64
+/* In 2.6.9-22.ELsmp we have to call change_page_attr() twice when changing
+ * the page attributes from PAGE_KERNEL to something else, because there appears
+ * to be a bug in one of the many patches that redhat applied.
+ * It should be safe to do this on less buggy linux kernels too. ;-)
+ */
+#  define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
+    do { \
+        if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) \
+            change_page_attr(pPages, cPages, prot); \
+        change_page_attr(pPages, cPages, prot); \
+    } while (0)
+# endif  /* !RT_ARCH_AMD64 */
+#endif /* !NO_REDHAT_HACKS */
+
+#ifndef MY_CHANGE_PAGE_ATTR
+# ifdef RT_ARCH_AMD64 /** @todo This is a cheap hack, but it'll get around that 'else BUG();' in __change_page_attr().  */
+#  define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) \
+    do { \
+        change_page_attr(pPages, cPages, PAGE_KERNEL_NOCACHE); \
+        change_page_attr(pPages, cPages, prot); \
+    } while (0)
+# else
+#  define MY_CHANGE_PAGE_ATTR(pPages, cPages, prot) change_page_attr(pPages, cPages, prot)
+# endif
+#endif
+
+#if RTLNX_VER_MIN(2,6,25)
+# if RTLNX_VER_MAX(5,4,0) /* The interface was removed, but we only need it for < 2.4.22, so who cares. */
+#  define MY_SET_PAGES_EXEC(pPages, cPages)     set_pages_x(pPages, cPages)
+#  define MY_SET_PAGES_NOEXEC(pPages, cPages)   set_pages_nx(pPages, cPages)
+# endif
+#else
+# define MY_SET_PAGES_EXEC(pPages, cPages) \
+    do { \
+        if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL)) \
+            MY_CHANGE_PAGE_ATTR(pPages, cPages, MY_PAGE_KERNEL_EXEC); \
+    } while (0)
+# define MY_SET_PAGES_NOEXEC(pPages, cPages) \
+    do { \
+        if (pgprot_val(MY_PAGE_KERNEL_EXEC) != pgprot_val(PAGE_KERNEL)) \
+            MY_CHANGE_PAGE_ATTR(pPages, cPages, PAGE_KERNEL); \
+    } while (0)
+#endif
+
+/** @def ONE_MSEC_IN_JIFFIES
+ * The number of jiffies that make up 1 millisecond. Must be at least 1! */
+#if HZ <= 1000
+# define ONE_MSEC_IN_JIFFIES       1
+#elif !(HZ % 1000)
+# define ONE_MSEC_IN_JIFFIES       (HZ / 1000)
+#else
+# define ONE_MSEC_IN_JIFFIES       ((HZ + 999) / 1000)
+# error "HZ is not a multiple of 1000, the GIP stuff won't work right!"
+#endif
+
+/*
+ * Stop using the linux bool type.
+ */
+#undef bool
+
+#if RT_GNUC_PREREQ(4, 6)
+# pragma GCC diagnostic pop
+#endif
+
+/*
+ * There are post-2.6.24 kernels (confusingly with unchanged version number)
+ * which eliminate macros which were marked as deprecated.
+ */
+#ifndef __attribute_used__
+#define __attribute_used__ __used
+#endif
+
+/**
+ * Hack for shortening pointers on linux so we can stuff more stuff into the
+ * task_struct::comm field. This is used by the semaphore code but put here
+ * because we don't have any better place atm. Don't use outside IPRT, please.
+ */
+#ifdef RT_ARCH_AMD64
+# define IPRT_DEBUG_SEMS_ADDRESS(addr)  ( ((long)(addr) & (long)~UINT64_C(0xfffffff000000000)) )
+#else
+# define IPRT_DEBUG_SEMS_ADDRESS(addr)  ( (long)(addr) )
+#endif
+
+/**
+ * Puts semaphore info into the task_struct::comm field if IPRT_DEBUG_SEMS is
+ * defined.
+ */
+#ifdef IPRT_DEBUG_SEMS
+# define IPRT_DEBUG_SEMS_STATE(pThis, chState) \
+    snprintf(current->comm, sizeof(current->comm), "%c%lx", (chState), IPRT_DEBUG_SEMS_ADDRESS(pThis));
+#else
+# define IPRT_DEBUG_SEMS_STATE(pThis, chState)  do {  } while (0)
+#endif
+
+/**
+ * Puts semaphore info into the task_struct::comm field if IPRT_DEBUG_SEMS is
+ * defined.
+ */
+#ifdef IPRT_DEBUG_SEMS
+# define IPRT_DEBUG_SEMS_STATE_RC(pThis, chState, rc) \
+    snprintf(current->comm, sizeof(current->comm), "%c%lx:%d", (chState), IPRT_DEBUG_SEMS_ADDRESS(pThis), rc);
+#else
+# define IPRT_DEBUG_SEMS_STATE_RC(pThis, chState, rc)  do {  } while (0)
+#endif
+
+/** @name Macros for preserving EFLAGS.AC on 3.19+/amd64  paranoid.
+ * The AMD 64 switch_to in macro in arch/x86/include/asm/switch_to.h stopped
+ * restoring flags.
+ * @{ */
+#if (defined(CONFIG_X86_SMAP) || defined(RT_STRICT) || defined(IPRT_WITH_EFLAGS_AC_PRESERVING)) \
+  && !defined(IPRT_WITHOUT_EFLAGS_AC_PRESERVING)
+# include <iprt/asm-amd64-x86.h>
+# define IPRT_X86_EFL_AC                    RT_BIT(18)
+# define IPRT_LINUX_SAVE_EFL_AC()           RTCCUINTREG fSavedEfl = ASMGetFlags()
+# define IPRT_LINUX_RESTORE_EFL_AC()        ASMSetFlags(fSavedEfl)
+# define IPRT_LINUX_RESTORE_EFL_ONLY_AC()   ASMChangeFlags(~IPRT_X86_EFL_AC, fSavedEfl & IPRT_X86_EFL_AC)
+#else
+# define IPRT_LINUX_SAVE_EFL_AC()           do { } while (0)
+# define IPRT_LINUX_RESTORE_EFL_AC()        do { } while (0)
+# define IPRT_LINUX_RESTORE_EFL_ONLY_AC()   do { } while (0)
+#endif
+/** @} */
+
+/*
+ * There are some conflicting defines in iprt/param.h, sort them out here.
+ */
+#ifndef IPRT_INCLUDED_param_h
+# undef PAGE_SIZE
+# undef PAGE_OFFSET_MASK
+# include <iprt/param.h>
+#endif
+
+/*
+ * Some global indicator macros.
+ */
+/** @def IPRT_LINUX_HAS_HRTIMER
+ * Whether the kernel support high resolution timers (Linux kernel versions
+ * 2.6.28 and later (hrtimer_add_expires_ns() & schedule_hrtimeout). */
+#if RTLNX_VER_MIN(2,6,28) || defined(DOXYGEN_RUNNING)
+# define IPRT_LINUX_HAS_HRTIMER
+#endif
+
+/*
+ * Workqueue stuff, see initterm-r0drv-linux.c.
+ */
+#if RTLNX_VER_MIN(2,5,41)
+typedef struct work_struct  RTR0LNXWORKQUEUEITEM;
+#else
+typedef struct tq_struct    RTR0LNXWORKQUEUEITEM;
+#endif
+DECLHIDDEN(void) rtR0LnxWorkqueuePush(RTR0LNXWORKQUEUEITEM *pWork, void (*pfnWorker)(RTR0LNXWORKQUEUEITEM *));
+DECLHIDDEN(void) rtR0LnxWorkqueueFlush(void);
+
+/*
+ * Memory hacks from memobj-r0drv-linux.c that shared folders need.
+ */
+RTDECL(struct page *) rtR0MemObjLinuxVirtToPage(void *pv);
+
+#endif /* !IPRT_INCLUDED_SRC_r0drv_linux_the_linux_kernel_h */
diff --git a/src/VBox/Runtime/r0drv/linux/thread-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/thread-r0drv-linux.c
new file mode 100644
index 00000000..2724d091
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/thread-r0drv-linux.c
@@ -0,0 +1,285 @@
+/* $Id: thread-r0drv-linux.c $ */
+/** @file
+ * IPRT - Threads, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+#include <iprt/thread.h>
+
+#include <iprt/asm.h>
+#if RTLNX_VER_MAX(2,5,28) || defined(CONFIG_X86_SMAP)
+# include <iprt/asm-amd64-x86.h>
+#endif
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/mp.h>
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+#if  defined(CONFIG_PREEMPT_COUNT) /* since 3.1 */ \
+  || defined(CONFIG_PREEMPTION)    /* since 5.3 */ \
+  || defined(CONFIG_PREEMPT)       /* since 2.6.13 - preemption model choice; before that arch specific choice back to 2.5.45. */
+# define IPRT_LNX_HAVE_PREEMPTION
+#endif
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+#ifndef IPRT_LNX_HAVE_PREEMPTION
+/** Per-cpu preemption counters. */
+static int32_t volatile g_acPreemptDisabled[NR_CPUS];
+#endif
+
+
+RTDECL(RTNATIVETHREAD) RTThreadNativeSelf(void)
+{
+    return (RTNATIVETHREAD)current;
+}
+RT_EXPORT_SYMBOL(RTThreadNativeSelf);
+
+
+static int rtR0ThreadLnxSleepCommon(RTMSINTERVAL cMillies)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+    long cJiffies = msecs_to_jiffies(cMillies);
+    set_current_state(TASK_INTERRUPTIBLE);
+    cJiffies = schedule_timeout(cJiffies);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    if (!cJiffies)
+        return VINF_SUCCESS;
+    return VERR_INTERRUPTED;
+}
+
+
+RTDECL(int) RTThreadSleep(RTMSINTERVAL cMillies)
+{
+    return rtR0ThreadLnxSleepCommon(cMillies);
+}
+RT_EXPORT_SYMBOL(RTThreadSleep);
+
+
+RTDECL(int) RTThreadSleepNoLog(RTMSINTERVAL cMillies)
+{
+    return rtR0ThreadLnxSleepCommon(cMillies);
+}
+RT_EXPORT_SYMBOL(RTThreadSleepNoLog);
+
+
+RTDECL(bool) RTThreadYield(void)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+#if RTLNX_VER_MIN(2,4,20)
+    yield();
+#else
+    /** @todo r=ramshankar: Can we use cond_resched() instead?  */
+    set_current_state(TASK_RUNNING);
+    sys_sched_yield();
+    schedule();
+#endif
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return true;
+}
+RT_EXPORT_SYMBOL(RTThreadYield);
+
+
+RTDECL(bool) RTThreadPreemptIsEnabled(RTTHREAD hThread)
+{
+#ifdef IPRT_LNX_HAVE_PREEMPTION
+    Assert(hThread == NIL_RTTHREAD); RT_NOREF_PV(hThread);
+# ifdef preemptible
+    return preemptible();
+# else
+    return preempt_count() == 0 && !in_atomic() && !irqs_disabled();
+# endif
+#else
+    int32_t c;
+
+    Assert(hThread == NIL_RTTHREAD);
+    c = g_acPreemptDisabled[smp_processor_id()];
+    AssertMsg(c >= 0 && c < 32, ("%d\n", c));
+    if (c != 0)
+        return false;
+# if RTLNX_VER_MIN(2,5,32)
+    if (in_atomic())
+        return false;
+# endif
+# if RTLNX_VER_MIN(2,5,28)
+    if (irqs_disabled())
+        return false;
+# else
+    if (!ASMIntAreEnabled())
+        return false;
+# endif
+    return true;
+#endif
+}
+RT_EXPORT_SYMBOL(RTThreadPreemptIsEnabled);
+
+
+RTDECL(bool) RTThreadPreemptIsPending(RTTHREAD hThread)
+{
+    Assert(hThread == NIL_RTTHREAD); RT_NOREF_PV(hThread);
+#if RTLNX_VER_MIN(2,5,4)
+    return !!test_tsk_thread_flag(current, TIF_NEED_RESCHED);
+
+#elif RTLNX_VER_MIN(2,4,20)
+    return !!need_resched();
+
+#elif RTLNX_VER_MIN(2,1,110)
+    return current->need_resched != 0;
+
+#else
+    return need_resched != 0;
+#endif
+}
+RT_EXPORT_SYMBOL(RTThreadPreemptIsPending);
+
+
+RTDECL(bool) RTThreadPreemptIsPendingTrusty(void)
+{
+    /* yes, RTThreadPreemptIsPending is reliable. */
+    return true;
+}
+RT_EXPORT_SYMBOL(RTThreadPreemptIsPendingTrusty);
+
+
+RTDECL(bool) RTThreadPreemptIsPossible(void)
+{
+#ifdef IPRT_LNX_HAVE_PREEMPTION
+    return true;    /* Yes, kernel preemption is possible. */
+#else
+    return false;   /* No kernel preemption (or just CONFIG_PREEMPT_VOLUNTARY). */
+#endif
+}
+RT_EXPORT_SYMBOL(RTThreadPreemptIsPossible);
+
+
+RTDECL(void) RTThreadPreemptDisable(PRTTHREADPREEMPTSTATE pState)
+{
+#ifdef IPRT_LNX_HAVE_PREEMPTION
+    AssertPtr(pState);
+    Assert(pState->u32Reserved == 0);
+    pState->u32Reserved = 42;
+    preempt_disable();
+    RT_ASSERT_PREEMPT_CPUID_DISABLE(pState);
+
+#else /* !IPRT_LNX_HAVE_PREEMPTION */
+    int32_t c;
+    AssertPtr(pState);
+    Assert(pState->u32Reserved == 0);
+
+    /* Do our own accounting. */
+    c = ASMAtomicIncS32(&g_acPreemptDisabled[smp_processor_id()]);
+    AssertMsg(c > 0 && c < 32, ("%d\n", c));
+    pState->u32Reserved = c;
+    RT_ASSERT_PREEMPT_CPUID_DISABLE(pState);
+#endif
+}
+RT_EXPORT_SYMBOL(RTThreadPreemptDisable);
+
+
+RTDECL(void) RTThreadPreemptRestore(PRTTHREADPREEMPTSTATE pState)
+{
+#ifdef IPRT_LNX_HAVE_PREEMPTION
+    IPRT_LINUX_SAVE_EFL_AC(); /* paranoia */
+    AssertPtr(pState);
+    Assert(pState->u32Reserved == 42);
+    RT_ASSERT_PREEMPT_CPUID_RESTORE(pState);
+    preempt_enable();
+    IPRT_LINUX_RESTORE_EFL_ONLY_AC();  /* paranoia */
+
+#else
+    int32_t volatile *pc;
+    AssertPtr(pState);
+    AssertMsg(pState->u32Reserved > 0 && pState->u32Reserved < 32, ("%d\n", pState->u32Reserved));
+    RT_ASSERT_PREEMPT_CPUID_RESTORE(pState);
+
+    /* Do our own accounting. */
+    pc = &g_acPreemptDisabled[smp_processor_id()];
+    AssertMsg(pState->u32Reserved == (uint32_t)*pc, ("u32Reserved=%d *pc=%d \n", pState->u32Reserved, *pc));
+    ASMAtomicUoWriteS32(pc, pState->u32Reserved - 1);
+#endif
+    pState->u32Reserved = 0;
+}
+RT_EXPORT_SYMBOL(RTThreadPreemptRestore);
+
+
+RTDECL(bool) RTThreadIsInInterrupt(RTTHREAD hThread)
+{
+    Assert(hThread == NIL_RTTHREAD); NOREF(hThread);
+
+    return in_interrupt() != 0;
+}
+RT_EXPORT_SYMBOL(RTThreadIsInInterrupt);
+
+
+RTDECL(int) RTThreadQueryTerminationStatus(RTTHREAD hThread)
+{
+    struct task_struct *pTask = current;
+    AssertReturn(hThread == NIL_RTTHREAD, VERR_NOT_SUPPORTED);
+
+    /* Check out pending signals.  ASSUMES we can get away w/o locking
+       anything because we're only reading the data.  */
+    if (sigismember(&pTask->pending.signal, SIGKILL))
+        return VINF_THREAD_IS_TERMINATING;
+
+#if RTLNX_VER_MIN(2,5,34)
+    /* Check the pending signals shared with other threads in
+       the same process/group.  ASSUME since we're alive that
+       the signal_struct won't be freed while we're looking
+       at it here... */
+    {
+# if RTLNX_VER_MIN(2,5,60)
+        struct signal_struct *pSignal = current->signal;
+# else
+        struct signal_struct *pSignal = current->sig;
+# endif
+        if (   pSignal
+            && sigismember(&pSignal->shared_pending.signal, SIGKILL))
+            return VINF_THREAD_IS_TERMINATING;
+    }
+#endif
+
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTThreadQueryTerminationStatus);
+
diff --git a/src/VBox/Runtime/r0drv/linux/thread2-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/thread2-r0drv-linux.c
new file mode 100644
index 00000000..16cebd7f
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/thread2-r0drv-linux.c
@@ -0,0 +1,243 @@
+/* $Id: thread2-r0drv-linux.c $ */
+/** @file
+ * IPRT - Threads (Part 2), Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+#include <iprt/errcore.h>
+#include "internal/thread.h"
+
+#if RTLNX_VER_MIN(4,11,0)
+    #include <uapi/linux/sched/types.h>
+#endif /* >= KERNEL_VERSION(4, 11, 0) */
+
+RTDECL(RTTHREAD) RTThreadSelf(void)
+{
+    return rtThreadGetByNative((RTNATIVETHREAD)current);
+}
+
+
+DECLHIDDEN(int) rtThreadNativeInit(void)
+{
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(int) rtThreadNativeSetPriority(PRTTHREADINT pThread, RTTHREADTYPE enmType)
+{
+#if RTLNX_VER_MIN(2,5,2)
+    /*
+     * Assignments are partially based on g_aTypesLinuxFree but
+     * scaled up in the high priority end.
+     *
+     * 5.9.0 - :
+     *      The sched_set_normal interfaces does not really check the input,
+     *      whereas sched_set_fifo & sched_set_fifo_low have fixed assignments.
+     * 2.6.11 - 5.9.0:
+     *      Use sched_setscheduler to try effect FIFO scheduling
+     *      for IO and TIMER threads, otherwise use set_user_nice.
+     * 2.5.2 - 5.9.0:
+     *      Use set_user_nice to renice the thread.
+     */
+    int                 iNice       = 0;
+# if RTLNX_VER_MAX(5,9,0)
+    int                 rc;
+#  if RTLNX_VER_MIN(2,6,11)
+    int                 iSchedClass = SCHED_NORMAL;
+    struct sched_param  Param       = { .sched_priority = 0 };
+#  endif
+# endif
+    switch (enmType)
+    {
+        case RTTHREADTYPE_INFREQUENT_POLLER:
+            iNice = +3;
+            break;
+
+        case RTTHREADTYPE_MAIN_HEAVY_WORKER:
+            iNice = +2;
+            break;
+
+        case RTTHREADTYPE_EMULATION:
+            iNice = +1;
+            break;
+
+        case RTTHREADTYPE_DEFAULT:
+        case RTTHREADTYPE_GUI:
+        case RTTHREADTYPE_MAIN_WORKER:
+            iNice =  0;
+            break;
+
+        case RTTHREADTYPE_VRDP_IO:
+        case RTTHREADTYPE_DEBUGGER:
+            iNice = -1;
+            break;
+
+        case RTTHREADTYPE_MSG_PUMP:
+            iNice = -2;
+            break;
+
+        case RTTHREADTYPE_IO:
+# if RTLNX_VER_MIN(5,9,0)
+            sched_set_fifo_low(current);
+            return VINF_SUCCESS;
+# else
+            iNice = -12;
+#  if RTLNX_VER_MIN(2,6,11)
+            iSchedClass = SCHED_FIFO;
+            Param.sched_priority = 1; /* => prio=98; */
+#  endif
+            break;
+# endif
+
+        case RTTHREADTYPE_TIMER:
+# if RTLNX_VER_MIN(5,9,0)
+            sched_set_fifo(current);
+            return VINF_SUCCESS;
+# else
+            iNice = -19;
+#  if RTLNX_VER_MIN(2,6,11)
+            iSchedClass = SCHED_FIFO;
+            Param.sched_priority = MAX_RT_PRIO / 2; /* => prio=49 */
+#  endif
+            break;
+# endif
+        default:
+            AssertMsgFailedReturn(("enmType=%d\n", enmType), VERR_INVALID_PARAMETER);
+    }
+
+# if RTLNX_VER_MIN(5,9,0)
+    /*
+     * We only get here for renice work.
+     */
+    sched_set_normal(current, iNice);
+
+# else  /* < 5.9.0 */
+#  if RTLNX_VER_MIN(2,6,11)
+    /*
+     * Try set scheduler parameters.
+     * Fall back on normal + nice if this fails for FIFO policy.*
+     */
+    rc = sched_setscheduler(current, iSchedClass, &Param);
+    if (rc)
+    {
+        Param.sched_priority = 0;
+        iSchedClass = SCHED_NORMAL;
+        rc = sched_setscheduler(current, iSchedClass, &Param);
+    }
+
+    /*
+     * Renice if using normal scheduling class.
+     */
+    if (iSchedClass == SCHED_NORMAL)
+#  endif /* >= 2.6.11 */
+        set_user_nice(current, iNice);
+
+# endif /* < 5.9.0 */
+#else  /* < 2.5.2 */
+    RT_NOREF_PV(enmType);
+#endif /* < 2.5.2 */
+    RT_NOREF_PV(pThread);
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(int) rtThreadNativeAdopt(PRTTHREADINT pThread)
+{
+    RT_NOREF_PV(pThread);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+DECLHIDDEN(void) rtThreadNativeWaitKludge(PRTTHREADINT pThread)
+{
+    /** @todo fix RTThreadWait/RTR0Term race on linux. */
+    RTThreadSleep(1); NOREF(pThread);
+}
+
+
+DECLHIDDEN(void) rtThreadNativeDestroy(PRTTHREADINT pThread)
+{
+    NOREF(pThread);
+}
+
+
+#if RTLNX_VER_MIN(2,6,4)
+/**
+ * Native kernel thread wrapper function.
+ *
+ * This will forward to rtThreadMain and do termination upon return.
+ *
+ * @param pvArg         Pointer to the argument package.
+ */
+static int rtThreadNativeMain(void *pvArg)
+{
+    PRTTHREADINT pThread = (PRTTHREADINT)pvArg;
+
+    rtThreadMain(pThread, (RTNATIVETHREAD)current, &pThread->szName[0]);
+    return 0;
+}
+#endif
+
+
+DECLHIDDEN(int) rtThreadNativeCreate(PRTTHREADINT pThreadInt, PRTNATIVETHREAD pNativeThread)
+{
+#if RTLNX_VER_MIN(2,6,4)
+    struct task_struct *NativeThread;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    RT_ASSERT_PREEMPTIBLE();
+
+    NativeThread = kthread_run(rtThreadNativeMain, pThreadInt, "iprt-%s", pThreadInt->szName);
+
+    if (!IS_ERR(NativeThread))
+    {
+        *pNativeThread = (RTNATIVETHREAD)NativeThread;
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VERR_GENERAL_FAILURE;
+#else
+    return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
diff --git a/src/VBox/Runtime/r0drv/linux/threadctxhooks-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/threadctxhooks-r0drv-linux.c
new file mode 100644
index 00000000..a7b7d905
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/threadctxhooks-r0drv-linux.c
@@ -0,0 +1,341 @@
+/* $Id: threadctxhooks-r0drv-linux.c $ */
+/** @file
+ * IPRT - Thread Context Switching Hook, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2013-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/mem.h>
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+#include <iprt/errcore.h>
+#include <iprt/asm.h>
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+# include <iprt/asm-amd64-x86.h>
+#endif
+#include "internal/thread.h"
+
+
+/*
+ * Linux kernel 2.6.23 introduced preemption notifiers but RedHat 2.6.18 kernels
+ * got it backported.
+ */
+#if RTLNX_VER_MIN(2,6,18) && defined(CONFIG_PREEMPT_NOTIFIERS)
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * The internal hook object for linux.
+ */
+typedef struct RTTHREADCTXHOOKINT
+{
+    /** Magic value (RTTHREADCTXHOOKINT_MAGIC). */
+    uint32_t volatile           u32Magic;
+    /** The thread handle (owner) for which the hook is registered. */
+    RTNATIVETHREAD              hOwner;
+    /** The preemption notifier object. */
+    struct preempt_notifier     LnxPreemptNotifier;
+    /** Whether the hook is enabled or not.  If enabled, the LnxPreemptNotifier
+     * is linked into the owning thread's list of preemption callouts. */
+    bool                        fEnabled;
+    /** Pointer to the user callback. */
+    PFNRTTHREADCTXHOOK          pfnCallback;
+    /** User argument passed to the callback. */
+    void                       *pvUser;
+    /** The linux callbacks. */
+    struct preempt_ops          PreemptOps;
+#if RTLNX_VER_MIN(3,1,19) && defined(RT_ARCH_AMD64)
+    /** Starting with 3.1.19, the linux kernel doesn't restore kernel RFLAGS during
+     * task switch, so we have to do that ourselves. (x86 code is not affected.) */
+    RTCCUINTREG                 fSavedRFlags;
+#endif
+} RTTHREADCTXHOOKINT;
+typedef RTTHREADCTXHOOKINT *PRTTHREADCTXHOOKINT;
+
+
+/**
+ * Hook function for the thread schedule out event.
+ *
+ * @param   pPreemptNotifier    Pointer to the preempt_notifier struct.
+ * @param   pNext               Pointer to the task that is being scheduled
+ *                              instead of the current thread.
+ *
+ * @remarks Called with the rq (runqueue) lock held and with preemption and
+ *          interrupts disabled!
+ */
+static void rtThreadCtxHooksLnxSchedOut(struct preempt_notifier *pPreemptNotifier, struct task_struct *pNext)
+{
+    PRTTHREADCTXHOOKINT pThis = RT_FROM_MEMBER(pPreemptNotifier, RTTHREADCTXHOOKINT, LnxPreemptNotifier);
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+    RTCCUINTREG fSavedEFlags = ASMGetFlags();
+    stac();
+#endif
+    RT_NOREF_PV(pNext);
+
+    AssertPtr(pThis);
+    AssertPtr(pThis->pfnCallback);
+    Assert(pThis->fEnabled);
+    Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+
+    pThis->pfnCallback(RTTHREADCTXEVENT_OUT, pThis->pvUser);
+
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+    ASMSetFlags(fSavedEFlags);
+# if RTLNX_VER_MIN(3,1,19) && defined(RT_ARCH_AMD64)
+    pThis->fSavedRFlags = fSavedEFlags;
+# endif
+#endif
+}
+
+
+/**
+ * Hook function for the thread schedule in event.
+ *
+ * @param   pPreemptNotifier    Pointer to the preempt_notifier struct.
+ * @param   iCpu                The CPU this thread is being scheduled on.
+ *
+ * @remarks Called without holding the rq (runqueue) lock and with preemption
+ *          enabled!
+ * @todo    r=bird: Preemption is of course disabled when it is called.
+ */
+static void rtThreadCtxHooksLnxSchedIn(struct preempt_notifier *pPreemptNotifier, int iCpu)
+{
+    PRTTHREADCTXHOOKINT pThis = RT_FROM_MEMBER(pPreemptNotifier, RTTHREADCTXHOOKINT, LnxPreemptNotifier);
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+    RTCCUINTREG fSavedEFlags = ASMGetFlags();
+    stac();
+#endif
+    RT_NOREF_PV(iCpu);
+
+    AssertPtr(pThis);
+    AssertPtr(pThis->pfnCallback);
+    Assert(pThis->fEnabled);
+
+    pThis->pfnCallback(RTTHREADCTXEVENT_IN, pThis->pvUser);
+
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+# if RTLNX_VER_MIN(3,1,19) && defined(RT_ARCH_AMD64)
+    fSavedEFlags &= ~RT_BIT_64(18) /*X86_EFL_AC*/;
+    fSavedEFlags |= pThis->fSavedRFlags & RT_BIT_64(18) /*X86_EFL_AC*/;
+# endif
+    ASMSetFlags(fSavedEFlags);
+#endif
+}
+
+
+/**
+ * Worker function for RTThreadCtxHooks(Deregister|Release)().
+ *
+ * @param   pThis   Pointer to the internal thread-context object.
+ */
+DECLINLINE(void) rtThreadCtxHookDisable(PRTTHREADCTXHOOKINT pThis)
+{
+    Assert(pThis->PreemptOps.sched_out == rtThreadCtxHooksLnxSchedOut);
+    Assert(pThis->PreemptOps.sched_in  == rtThreadCtxHooksLnxSchedIn);
+    preempt_disable();
+    preempt_notifier_unregister(&pThis->LnxPreemptNotifier);
+    pThis->fEnabled = false;
+    preempt_enable();
+}
+
+
+RTDECL(int) RTThreadCtxHookCreate(PRTTHREADCTXHOOK phCtxHook, uint32_t fFlags, PFNRTTHREADCTXHOOK pfnCallback, void *pvUser)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate input.
+     */
+    PRTTHREADCTXHOOKINT pThis;
+    Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    AssertPtrReturn(pfnCallback, VERR_INVALID_POINTER);
+    AssertReturn(fFlags == 0, VERR_INVALID_FLAGS);
+
+    /*
+     * Allocate and initialize a new hook.  We don't register it yet, just
+     * create it.
+     */
+    pThis = (PRTTHREADCTXHOOKINT)RTMemAllocZ(sizeof(*pThis));
+    if (RT_UNLIKELY(!pThis))
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_NO_MEMORY;
+    }
+    pThis->u32Magic     = RTTHREADCTXHOOKINT_MAGIC;
+    pThis->hOwner       = RTThreadNativeSelf();
+    pThis->fEnabled     = false;
+    pThis->pfnCallback  = pfnCallback;
+    pThis->pvUser       = pvUser;
+    preempt_notifier_init(&pThis->LnxPreemptNotifier, &pThis->PreemptOps);
+    pThis->PreemptOps.sched_out = rtThreadCtxHooksLnxSchedOut;
+    pThis->PreemptOps.sched_in  = rtThreadCtxHooksLnxSchedIn;
+
+#if RTLNX_VER_MIN(4,2,0)
+    preempt_notifier_inc();
+#endif
+
+    *phCtxHook = pThis;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTThreadCtxHookCreate);
+
+
+RTDECL(int ) RTThreadCtxHookDestroy(RTTHREADCTXHOOK hCtxHook)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate input.
+     */
+    PRTTHREADCTXHOOKINT pThis = hCtxHook;
+    if (pThis == NIL_RTTHREADCTXHOOK)
+        return VINF_SUCCESS;
+    AssertPtr(pThis);
+    AssertMsgReturn(pThis->u32Magic == RTTHREADCTXHOOKINT_MAGIC, ("pThis->u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis),
+                    VERR_INVALID_HANDLE);
+    Assert(RTThreadPreemptIsEnabled(NIL_RTTHREAD));
+    Assert(!pThis->fEnabled || pThis->hOwner == RTThreadNativeSelf());
+
+    /*
+     * If there's still a registered thread-context hook, deregister it now before destroying the object.
+     */
+    if (pThis->fEnabled)
+    {
+        Assert(pThis->hOwner == RTThreadNativeSelf());
+        rtThreadCtxHookDisable(pThis);
+        Assert(!pThis->fEnabled); /* paranoia */
+    }
+
+#if RTLNX_VER_MIN(4,2,0)
+    preempt_notifier_dec();
+#endif
+
+    ASMAtomicWriteU32(&pThis->u32Magic, ~RTTHREADCTXHOOKINT_MAGIC);
+    RTMemFree(pThis);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTThreadCtxHookDestroy);
+
+
+RTDECL(int) RTThreadCtxHookEnable(RTTHREADCTXHOOK hCtxHook)
+{
+    /*
+     * Validate input.
+     */
+    PRTTHREADCTXHOOKINT pThis = hCtxHook;
+    AssertPtr(pThis);
+    AssertMsgReturn(pThis->u32Magic == RTTHREADCTXHOOKINT_MAGIC, ("pThis->u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis),
+                    VERR_INVALID_HANDLE);
+    Assert(pThis->hOwner == RTThreadNativeSelf());
+    Assert(!pThis->fEnabled);
+    if (!pThis->fEnabled)
+    {
+        IPRT_LINUX_SAVE_EFL_AC();
+        Assert(pThis->PreemptOps.sched_out == rtThreadCtxHooksLnxSchedOut);
+        Assert(pThis->PreemptOps.sched_in == rtThreadCtxHooksLnxSchedIn);
+
+        /*
+         * Register the callback.
+         */
+        preempt_disable();
+        pThis->fEnabled = true;
+        preempt_notifier_register(&pThis->LnxPreemptNotifier);
+        preempt_enable();
+
+        IPRT_LINUX_RESTORE_EFL_AC();
+    }
+
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTThreadCtxHookEnable);
+
+
+RTDECL(int) RTThreadCtxHookDisable(RTTHREADCTXHOOK hCtxHook)
+{
+    /*
+     * Validate input.
+     */
+    PRTTHREADCTXHOOKINT pThis = hCtxHook;
+    if (pThis != NIL_RTTHREADCTXHOOK)
+    {
+        AssertPtr(pThis);
+        AssertMsgReturn(pThis->u32Magic == RTTHREADCTXHOOKINT_MAGIC, ("pThis->u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis),
+                        VERR_INVALID_HANDLE);
+        Assert(pThis->hOwner == RTThreadNativeSelf());
+
+        /*
+         * Deregister the callback.
+         */
+        if (pThis->fEnabled)
+        {
+            IPRT_LINUX_SAVE_EFL_AC();
+            rtThreadCtxHookDisable(pThis);
+            IPRT_LINUX_RESTORE_EFL_AC();
+        }
+    }
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTThreadCtxHookDisable);
+
+
+RTDECL(bool) RTThreadCtxHookIsEnabled(RTTHREADCTXHOOK hCtxHook)
+{
+    /*
+     * Validate input.
+     */
+    PRTTHREADCTXHOOKINT pThis = hCtxHook;
+    if (pThis == NIL_RTTHREADCTXHOOK)
+        return false;
+    AssertPtr(pThis);
+    AssertMsgReturn(pThis->u32Magic == RTTHREADCTXHOOKINT_MAGIC, ("pThis->u32Magic=%RX32 pThis=%p\n", pThis->u32Magic, pThis),
+                    false);
+
+    return pThis->fEnabled;
+}
+RT_EXPORT_SYMBOL(RTThreadCtxHookIsEnabled);
+
+#else    /* Not supported / Not needed */
+# include "../generic/threadctxhooks-r0drv-generic.cpp"
+#endif   /* Not supported / Not needed */
+
diff --git a/src/VBox/Runtime/r0drv/linux/time-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/time-r0drv-linux.c
new file mode 100644
index 00000000..370afbf1
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/time-r0drv-linux.c
@@ -0,0 +1,221 @@
+/* $Id: time-r0drv-linux.c $ */
+/** @file
+ * IPRT - Time, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+/* Make sure we have the setting functions we need for RTTimeNow: */
+#if RTLNX_VER_MAX(2,6,16)
+# define RTTIME_INCL_TIMEVAL
+#elif RTLNX_VER_MAX(3,17,0)
+# define RTTIME_INCL_TIMESPEC
+#endif
+#include <iprt/time.h>
+#include <iprt/asm.h>
+
+
+
+DECLINLINE(uint64_t) rtTimeGetSystemNanoTS(void)
+{
+#if RTLNX_VER_MIN(5,6,0)
+    /*
+     * Starting with kernel version 5.6-rc3 only 64-bit time interfaces
+     * are allowed in the kernel.
+     */
+    uint64_t u64;
+    struct timespec64 Ts = { 0, 0 };
+
+    ktime_get_ts64(&Ts);
+    u64 = Ts.tv_sec * RT_NS_1SEC_64 + Ts.tv_nsec;
+    return u64;
+
+#elif RTLNX_VER_MIN(2,6,16) /* This must match timer-r0drv-linux.c! */
+    /*
+     * Use ktime_get_ts, this is also what clock_gettime(CLOCK_MONOTONIC,) is using.
+     */
+    uint64_t u64;
+    struct timespec Ts = { 0, 0 };
+    ktime_get_ts(&Ts);
+    u64 = Ts.tv_sec * RT_NS_1SEC_64 + Ts.tv_nsec;
+    return u64;
+
+#elif RTLNX_VER_MIN(2,5,60)
+    /*
+     * Seems there is no way of getting to the exact source of
+     * sys_clock_gettime(CLOCK_MONOTONIC, &ts) here, I think. But
+     * 64-bit jiffies adjusted for the initial value should be pretty
+     * much the same I hope.
+     */
+    uint64_t u64 = get_jiffies_64();
+# ifdef INITIAL_JIFFIES
+    u64 += INITIAL_JIFFIES;
+# endif
+    u64 *= TICK_NSEC;
+    return u64;
+
+#else   /* < 2.5.60 */
+# if BITS_PER_LONG >= 64
+    /*
+     * This is the same as above, except that there is no get_jiffies_64()
+     * here and we rely on long, and therefor jiffies, being 64-bit instead.
+     */
+    uint64_t u64 = jiffies;
+# ifdef INITIAL_JIFFIES
+    u64 += INITIAL_JIFFIES;
+# endif
+    u64 *= TICK_NSEC;
+    return u64;
+
+# else /* 32 bit jiffies */
+    /*
+     * We'll have to try track jiffy rollovers here or we'll be
+     * in trouble every time it flips.
+     *
+     * The high dword of the s_u64Last is the rollover count, the
+     * low dword is the previous jiffies.  Updating is done by
+     * atomic compare & exchange of course.
+     */
+    static uint64_t volatile s_u64Last = 0;
+    uint64_t u64;
+
+    for (;;)
+    {
+        uint64_t u64NewLast;
+        int32_t iDelta;
+        uint32_t cRollovers;
+        uint32_t u32LastJiffies;
+
+        /* sample the values */
+        unsigned long ulNow = jiffies;
+        uint64_t u64Last = s_u64Last;
+        if (ulNow != jiffies)
+            continue; /* try again */
+#  ifdef INITIAL_JIFFIES
+        ulNow += INITIAL_JIFFIES;
+#  endif
+
+        u32LastJiffies = (uint32_t)u64Last;
+        cRollovers = u64Last >> 32;
+
+        /*
+         * Check for rollover and update the static last value.
+         *
+         * We have to make sure we update it successfully to rule out
+         * an underrun because of racing someone.
+         */
+        iDelta = ulNow - u32LastJiffies;
+        if (iDelta < 0)
+        {
+            cRollovers++;
+            u64NewLast = RT_MAKE_U64(ulNow, cRollovers);
+            if (!ASMAtomicCmpXchgU64(&s_u64Last, u64NewLast, u64Last))
+                continue; /* race, try again */
+        }
+        else
+        {
+            u64NewLast = RT_MAKE_U64(ulNow, cRollovers);
+            ASMAtomicCmpXchgU64(&s_u64Last, u64NewLast, u64Last);
+        }
+
+        /* calculate the return value */
+        u64 = ulNow;
+        u64 *= TICK_NSEC;
+        u64 += cRollovers * (_4G * TICK_NSEC);
+        break;
+    }
+
+    return u64;
+# endif /* 32 bit jiffies */
+#endif  /* < 2.5.60 */
+}
+
+
+RTDECL(uint64_t) RTTimeNanoTS(void)
+{
+    return rtTimeGetSystemNanoTS();
+}
+RT_EXPORT_SYMBOL(RTTimeNanoTS);
+
+
+RTDECL(uint64_t) RTTimeMilliTS(void)
+{
+    return rtTimeGetSystemNanoTS() / RT_NS_1MS;
+}
+RT_EXPORT_SYMBOL(RTTimeMilliTS);
+
+
+RTDECL(uint64_t) RTTimeSystemNanoTS(void)
+{
+    return rtTimeGetSystemNanoTS();
+}
+RT_EXPORT_SYMBOL(RTTimeSystemNanoTS);
+
+
+RTDECL(uint64_t) RTTimeSystemMilliTS(void)
+{
+    return rtTimeGetSystemNanoTS() / RT_NS_1MS;
+}
+RT_EXPORT_SYMBOL(RTTimeSystemMilliTS);
+
+
+RTDECL(PRTTIMESPEC) RTTimeNow(PRTTIMESPEC pTime)
+{
+    IPRT_LINUX_SAVE_EFL_AC();
+#if RTLNX_VER_MIN(3,17,0)
+    struct timespec64 Ts;
+    ktime_get_real_ts64(&Ts);   /* ktime_get_real_ts64 was added as a macro in 3.17, function since 4.18. */
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return RTTimeSpecSetTimespec64(pTime, &Ts);
+
+#elif RTLNX_VER_MIN(2,6,16)
+    struct timespec Ts;
+    ktime_get_real_ts(&Ts);     /* ktime_get_real_ts was removed in Linux 4.20. */
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return RTTimeSpecSetTimespec(pTime, &Ts);
+
+#else /* < 2.6.16 */
+    struct timeval Tv;
+    do_gettimeofday(&Tv);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return RTTimeSpecSetTimeval(pTime, &Tv);
+#endif
+}
+RT_EXPORT_SYMBOL(RTTimeNow);
+
diff --git a/src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c b/src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c
new file mode 100644
index 00000000..68886a8c
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/timer-r0drv-linux.c
@@ -0,0 +1,1739 @@
+/* $Id: timer-r0drv-linux.c $ */
+/** @file
+ * IPRT - Timers, Ring-0 Driver, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "the-linux-kernel.h"
+#include "internal/iprt.h"
+
+#include <iprt/timer.h>
+#include <iprt/time.h>
+#include <iprt/mp.h>
+#include <iprt/cpuset.h>
+#include <iprt/spinlock.h>
+#include <iprt/err.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/alloc.h>
+
+#include "internal/magics.h"
+
+/** @def RTTIMER_LINUX_WITH_HRTIMER
+ * Whether to use high resolution timers.  */
+#if !defined(RTTIMER_LINUX_WITH_HRTIMER) \
+    && defined(IPRT_LINUX_HAS_HRTIMER)
+# define RTTIMER_LINUX_WITH_HRTIMER
+#endif
+
+#if RTLNX_VER_MAX(2,6,31)
+# define mod_timer_pinned               mod_timer
+# define HRTIMER_MODE_ABS_PINNED        HRTIMER_MODE_ABS
+#endif
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Timer state machine.
+ *
+ * This is used to try handle the issues with MP events and
+ * timers that runs on all CPUs. It's relatively nasty :-/
+ */
+typedef enum RTTIMERLNXSTATE
+{
+    /** Stopped. */
+    RTTIMERLNXSTATE_STOPPED = 0,
+    /** Transient state; next ACTIVE. */
+    RTTIMERLNXSTATE_STARTING,
+    /** Transient state; next ACTIVE. (not really necessary) */
+    RTTIMERLNXSTATE_MP_STARTING,
+    /** Active. */
+    RTTIMERLNXSTATE_ACTIVE,
+    /** Active and in callback; next ACTIVE, STOPPED or CALLBACK_DESTROYING. */
+    RTTIMERLNXSTATE_CALLBACK,
+    /** Stopped while in the callback; next STOPPED. */
+    RTTIMERLNXSTATE_CB_STOPPING,
+    /** Restarted while in the callback; next ACTIVE, STOPPED, DESTROYING. */
+    RTTIMERLNXSTATE_CB_RESTARTING,
+    /** The callback shall destroy the timer; next STOPPED. */
+    RTTIMERLNXSTATE_CB_DESTROYING,
+    /** Transient state; next STOPPED. */
+    RTTIMERLNXSTATE_STOPPING,
+    /** Transient state; next STOPPED. */
+    RTTIMERLNXSTATE_MP_STOPPING,
+    /** The usual 32-bit hack. */
+    RTTIMERLNXSTATE_32BIT_HACK = 0x7fffffff
+} RTTIMERLNXSTATE;
+
+
+/**
+ * A Linux sub-timer.
+ */
+typedef struct RTTIMERLNXSUBTIMER
+{
+    /** Timer specific data.  */
+    union
+    {
+#if defined(RTTIMER_LINUX_WITH_HRTIMER)
+        /** High resolution timer. */
+        struct
+        {
+            /** The linux timer structure. */
+            struct hrtimer          LnxTimer;
+        } Hr;
+#endif
+        /** Standard timer. */
+        struct
+        {
+            /** The linux timer structure. */
+            struct timer_list       LnxTimer;
+            /** The start of the current run (ns).
+             * This is used to calculate when the timer ought to fire the next time. */
+            uint64_t                u64NextTS;
+            /** When the timer was started. */
+            uint64_t                nsStartTS;
+            /** The u64NextTS in jiffies. */
+            unsigned long           ulNextJiffies;
+            /** Set when starting or changing the timer so that u64StartTs
+             *  and u64NextTS gets reinitialized (eliminating some jitter). */
+            bool volatile           fFirstAfterChg;
+        } Std;
+    } u;
+    /** The current tick number. */
+    uint64_t                iTick;
+    /** Restart the single shot timer at this specific time.
+     * Used when a single shot timer is restarted from the callback. */
+    uint64_t volatile       uNsRestartAt;
+    /** Pointer to the parent timer. */
+    PRTTIMER                pParent;
+    /** The current sub-timer state. */
+    RTTIMERLNXSTATE volatile enmState;
+} RTTIMERLNXSUBTIMER;
+/** Pointer to a linux sub-timer. */
+typedef RTTIMERLNXSUBTIMER *PRTTIMERLNXSUBTIMER;
+
+
+/**
+ * The internal representation of an Linux timer handle.
+ */
+typedef struct RTTIMER
+{
+    /** Magic.
+     * This is RTTIMER_MAGIC, but changes to something else before the timer
+     * is destroyed to indicate clearly that thread should exit. */
+    uint32_t volatile       u32Magic;
+    /** Spinlock synchronizing the fSuspended and MP event handling.
+     * This is NIL_RTSPINLOCK if cCpus == 1. */
+    RTSPINLOCK              hSpinlock;
+    /** Flag indicating that the timer is suspended. */
+    bool volatile           fSuspended;
+    /** Whether the timer must run on one specific CPU or not. */
+    bool                    fSpecificCpu;
+#ifdef CONFIG_SMP
+    /** Whether the timer must run on all CPUs or not. */
+    bool                    fAllCpus;
+#endif /* else: All -> specific on non-SMP kernels */
+    /** Whether it is a high resolution timer or a standard one. */
+    bool                    fHighRes;
+    /** The id of the CPU it must run on if fSpecificCpu is set. */
+    RTCPUID                 idCpu;
+    /** The number of CPUs this timer should run on. */
+    RTCPUID                 cCpus;
+    /** Callback. */
+    PFNRTTIMER              pfnTimer;
+    /** User argument. */
+    void                   *pvUser;
+    /** The timer interval. 0 if one-shot. */
+    uint64_t volatile       u64NanoInterval;
+    /** This is set to the number of jiffies between ticks if the interval is
+     * an exact number of jiffies. (Standard timers only.) */
+    unsigned long volatile  cJiffies;
+    /** The change interval spinlock for standard timers only. */
+    spinlock_t              ChgIntLock;
+    /** Workqueue item for delayed destruction. */
+    RTR0LNXWORKQUEUEITEM    DtorWorkqueueItem;
+    /** Sub-timers.
+     * Normally there is just one, but for RTTIMER_FLAGS_CPU_ALL this will contain
+     * an entry for all possible cpus. In that case the index will be the same as
+     * for the RTCpuSet. */
+    RTTIMERLNXSUBTIMER      aSubTimers[1];
+} RTTIMER;
+
+
+/**
+ * A rtTimerLinuxStartOnCpu and rtTimerLinuxStartOnCpu argument package.
+ */
+typedef struct RTTIMERLINUXSTARTONCPUARGS
+{
+    /** The current time (RTTimeSystemNanoTS). */
+    uint64_t                u64Now;
+    /** When to start firing (delta). */
+    uint64_t                u64First;
+} RTTIMERLINUXSTARTONCPUARGS;
+/** Pointer to a rtTimerLinuxStartOnCpu argument package. */
+typedef RTTIMERLINUXSTARTONCPUARGS *PRTTIMERLINUXSTARTONCPUARGS;
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+#ifdef CONFIG_SMP
+static DECLCALLBACK(void) rtTimerLinuxMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser);
+#endif
+
+#if 0
+#define DEBUG_HACKING
+#include <iprt/string.h>
+#include <iprt/asm-amd64-x86.h>
+static void myLogBackdoorPrintf(const char *pszFormat, ...)
+{
+    char        szTmp[256];
+    va_list     args;
+    size_t      cb;
+
+    cb = RTStrPrintf(szTmp, sizeof(szTmp) - 10, "%d: ", RTMpCpuId());
+    va_start(args, pszFormat);
+    cb += RTStrPrintfV(&szTmp[cb], sizeof(szTmp) - cb, pszFormat, args);
+    va_end(args);
+
+    ASMOutStrU8(0x504, (uint8_t *)&szTmp[0], cb);
+}
+# define RTAssertMsg1Weak(pszExpr, uLine, pszFile, pszFunction) \
+    myLogBackdoorPrintf("\n!!Guest Assertion failed!!\n%s(%d) %s\n%s\n", uLine, pszFile, pszFunction, (pszExpr))
+# define RTAssertMsg2Weak myLogBackdoorPrintf
+# define RTTIMERLNX_LOG(a)          myLogBackdoorPrintf a
+#else
+# define RTTIMERLNX_LOG(a)          do { } while (0)
+#endif
+
+/**
+ * Sets the state.
+ */
+DECLINLINE(void) rtTimerLnxSetState(RTTIMERLNXSTATE volatile *penmState, RTTIMERLNXSTATE enmNewState)
+{
+#ifdef DEBUG_HACKING
+    RTTIMERLNX_LOG(("set %d -> %d\n", *penmState, enmNewState));
+#endif
+    ASMAtomicWriteU32((uint32_t volatile *)penmState, enmNewState);
+}
+
+
+/**
+ * Sets the state if it has a certain value.
+ *
+ * @return true if xchg was done.
+ * @return false if xchg wasn't done.
+ */
+#ifdef DEBUG_HACKING
+#define rtTimerLnxCmpXchgState(penmState, enmNewState, enmCurState) rtTimerLnxCmpXchgStateDebug(penmState, enmNewState, enmCurState, __LINE__)
+static bool rtTimerLnxCmpXchgStateDebug(RTTIMERLNXSTATE volatile *penmState, RTTIMERLNXSTATE enmNewState,
+                                        RTTIMERLNXSTATE enmCurState, uint32_t uLine)
+{
+    RTTIMERLNXSTATE enmOldState = enmCurState;
+    bool fRc = ASMAtomicCmpXchgExU32((uint32_t volatile *)penmState, enmNewState, enmCurState, (uint32_t *)&enmOldState);
+    RTTIMERLNX_LOG(("cxg %d -> %d - %d at %u\n", enmOldState, enmNewState, fRc, uLine));
+    return fRc;
+}
+#else
+DECLINLINE(bool) rtTimerLnxCmpXchgState(RTTIMERLNXSTATE volatile *penmState, RTTIMERLNXSTATE enmNewState,
+                                        RTTIMERLNXSTATE enmCurState)
+{
+    return ASMAtomicCmpXchgU32((uint32_t volatile *)penmState, enmNewState, enmCurState);
+}
+#endif
+
+
+/**
+ * Gets the state.
+ */
+DECLINLINE(RTTIMERLNXSTATE) rtTimerLnxGetState(RTTIMERLNXSTATE volatile *penmState)
+{
+    return (RTTIMERLNXSTATE)ASMAtomicUoReadU32((uint32_t volatile *)penmState);
+}
+
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+
+/**
+ * Converts a nano second time stamp to ktime_t.
+ *
+ * ASSUMES RTTimeSystemNanoTS() is implemented using ktime_get_ts().
+ *
+ * @returns ktime_t.
+ * @param   cNanoSecs   Nanoseconds.
+ */
+DECLINLINE(ktime_t) rtTimerLnxNanoToKt(uint64_t cNanoSecs)
+{
+    /* With some luck the compiler optimizes the division out of this... (Bet it doesn't.) */
+    return ktime_set(cNanoSecs / 1000000000, cNanoSecs % 1000000000);
+}
+
+/**
+ * Converts ktime_t to a nano second time stamp.
+ *
+ * ASSUMES RTTimeSystemNanoTS() is implemented using ktime_get_ts().
+ *
+ * @returns nano second time stamp.
+ * @param   Kt          ktime_t.
+ */
+DECLINLINE(uint64_t) rtTimerLnxKtToNano(ktime_t Kt)
+{
+    return ktime_to_ns(Kt);
+}
+
+#endif /* RTTIMER_LINUX_WITH_HRTIMER */
+
+/**
+ * Converts a nano second interval to jiffies.
+ *
+ * @returns Jiffies.
+ * @param   cNanoSecs   Nanoseconds.
+ */
+DECLINLINE(unsigned long) rtTimerLnxNanoToJiffies(uint64_t cNanoSecs)
+{
+    /* this can be made even better... */
+    if (cNanoSecs > (uint64_t)TICK_NSEC * MAX_JIFFY_OFFSET)
+        return MAX_JIFFY_OFFSET;
+# if ARCH_BITS == 32
+    if (RT_LIKELY(cNanoSecs <= UINT32_MAX))
+        return ((uint32_t)cNanoSecs + (TICK_NSEC-1)) / TICK_NSEC;
+# endif
+    return (cNanoSecs + (TICK_NSEC-1)) / TICK_NSEC;
+}
+
+
+/**
+ * Starts a sub-timer (RTTimerStart).
+ *
+ * @param   pSubTimer   The sub-timer to start.
+ * @param   u64Now      The current timestamp (RTTimeSystemNanoTS()).
+ * @param   u64First    The interval from u64Now to the first time the timer should fire.
+ * @param   fPinned     true = timer pinned to a specific CPU,
+ *                      false = timer can migrate between CPUs
+ * @param   fHighRes    Whether the user requested a high resolution timer or not.
+ * @param   enmOldState The old timer state.
+ */
+static void rtTimerLnxStartSubTimer(PRTTIMERLNXSUBTIMER pSubTimer, uint64_t u64Now, uint64_t u64First,
+                                    bool fPinned, bool fHighRes)
+{
+    /*
+     * Calc when it should start firing.
+     */
+    uint64_t u64NextTS = u64Now + u64First;
+    if (!fHighRes)
+    {
+        pSubTimer->u.Std.u64NextTS = u64NextTS;
+        pSubTimer->u.Std.nsStartTS = u64NextTS;
+    }
+    RTTIMERLNX_LOG(("startsubtimer %p\n", pSubTimer->pParent));
+
+    pSubTimer->iTick = 0;
+
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+    if (fHighRes)
+        hrtimer_start(&pSubTimer->u.Hr.LnxTimer, rtTimerLnxNanoToKt(u64NextTS),
+                      fPinned ? HRTIMER_MODE_ABS_PINNED : HRTIMER_MODE_ABS);
+    else
+#endif
+    {
+        unsigned long cJiffies = !u64First ? 0 : rtTimerLnxNanoToJiffies(u64First);
+        pSubTimer->u.Std.ulNextJiffies  = jiffies + cJiffies;
+        pSubTimer->u.Std.fFirstAfterChg = true;
+#ifdef CONFIG_SMP
+        if (fPinned)
+        {
+# if RTLNX_VER_MIN(4,8,0)
+            mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies);
+# else
+            mod_timer_pinned(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies);
+# endif
+        }
+        else
+#endif
+            mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies);
+    }
+
+    /* Be a bit careful here since we could be racing the callback. */
+    if (!rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_STARTING))
+        rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_MP_STARTING);
+}
+
+
+/**
+ * Stops a sub-timer (RTTimerStart and rtTimerLinuxMpEvent()).
+ *
+ * The caller has already changed the state, so we will not be in a callback
+ * situation wrt to the calling thread.
+ *
+ * @param   pSubTimer   The sub-timer.
+ * @param   fHighRes    Whether the user requested a high resolution timer or not.
+ */
+static void rtTimerLnxStopSubTimer(PRTTIMERLNXSUBTIMER pSubTimer, bool fHighRes)
+{
+    RTTIMERLNX_LOG(("stopsubtimer %p %d\n", pSubTimer->pParent, fHighRes));
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+    if (fHighRes)
+    {
+        /* There is no equivalent to del_timer in the hrtimer API,
+           hrtimer_cancel() == del_timer_sync().  Just like the WARN_ON in
+           del_timer_sync() asserts, waiting for a timer callback to complete
+           is deadlock prone, so don't do it.  */
+        int rc = hrtimer_try_to_cancel(&pSubTimer->u.Hr.LnxTimer);
+        if (rc < 0)
+        {
+            hrtimer_start(&pSubTimer->u.Hr.LnxTimer, ktime_set(KTIME_SEC_MAX, 0), HRTIMER_MODE_ABS);
+            hrtimer_try_to_cancel(&pSubTimer->u.Hr.LnxTimer);
+        }
+    }
+    else
+#endif
+        del_timer(&pSubTimer->u.Std.LnxTimer);
+
+    rtTimerLnxSetState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED);
+}
+
+
+/**
+ * Used by RTTimerDestroy and rtTimerLnxCallbackDestroy to do the actual work.
+ *
+ * @param   pTimer  The timer in question.
+ */
+static void rtTimerLnxDestroyIt(PRTTIMER pTimer)
+{
+    RTSPINLOCK  hSpinlock = pTimer->hSpinlock;
+    RTCPUID     iCpu;
+    Assert(pTimer->fSuspended);
+    RTTIMERLNX_LOG(("destroyit %p\n", pTimer));
+
+    /*
+     * Remove the MP notifications first because it'll reduce the risk of
+     * us overtaking any MP event that might theoretically be racing us here.
+     */
+#ifdef CONFIG_SMP
+    if (    pTimer->cCpus > 1
+        &&  hSpinlock != NIL_RTSPINLOCK)
+    {
+        int rc = RTMpNotificationDeregister(rtTimerLinuxMpEvent, pTimer);
+        AssertRC(rc);
+    }
+#endif /* CONFIG_SMP */
+
+    /*
+     * Invalidate the handle.
+     */
+    ASMAtomicWriteU32(&pTimer->u32Magic, ~RTTIMER_MAGIC);
+
+    /*
+     * Make sure all timers have stopped executing since we're stopping them in
+     * an asynchronous manner up in rtTimerLnxStopSubTimer.
+     */
+    iCpu = pTimer->cCpus;
+    while (iCpu-- > 0)
+    {
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+        if (pTimer->fHighRes)
+            hrtimer_cancel(&pTimer->aSubTimers[iCpu].u.Hr.LnxTimer);
+        else
+#endif
+            del_timer_sync(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer);
+    }
+
+    /*
+     * Finally, free the resources.
+     */
+    RTMemFreeEx(pTimer, RT_UOFFSETOF_DYN(RTTIMER, aSubTimers[pTimer->cCpus]));
+    if (hSpinlock != NIL_RTSPINLOCK)
+        RTSpinlockDestroy(hSpinlock);
+}
+
+
+/**
+ * Workqueue callback (no DECLCALLBACK!) for deferred destruction.
+ *
+ * @param   pWork        Pointer to the DtorWorkqueueItem member of our timer
+ *                       structure.
+ */
+static void rtTimerLnxDestroyDeferred(RTR0LNXWORKQUEUEITEM *pWork)
+{
+    PRTTIMER pTimer = RT_FROM_MEMBER(pWork, RTTIMER, DtorWorkqueueItem);
+    rtTimerLnxDestroyIt(pTimer);
+}
+
+
+/**
+ * Called when the timer was destroyed by the callback function.
+ *
+ * @param   pTimer      The timer.
+ * @param   pSubTimer   The sub-timer which we're handling, the state of this
+ *                      will be RTTIMERLNXSTATE_CALLBACK_DESTROYING.
+ */
+static void rtTimerLnxCallbackDestroy(PRTTIMER pTimer, PRTTIMERLNXSUBTIMER pSubTimer)
+{
+    /*
+     * If it's an omni timer, the last dude does the destroying.
+     */
+    if (pTimer->cCpus > 1)
+    {
+        uint32_t        iCpu        = pTimer->cCpus;
+        RTSpinlockAcquire(pTimer->hSpinlock);
+
+        Assert(pSubTimer->enmState == RTTIMERLNXSTATE_CB_DESTROYING);
+        rtTimerLnxSetState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED);
+
+        while (iCpu-- > 0)
+            if (rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState) != RTTIMERLNXSTATE_STOPPED)
+            {
+                RTSpinlockRelease(pTimer->hSpinlock);
+                return;
+            }
+
+        RTSpinlockRelease(pTimer->hSpinlock);
+    }
+
+    /*
+     * Destroying a timer from the callback is unsafe since the callout code
+     * might be touching the timer structure upon return (hrtimer does!).  So,
+     * we have to defer the actual destruction to the IRPT workqueue.
+     */
+    rtR0LnxWorkqueuePush(&pTimer->DtorWorkqueueItem, rtTimerLnxDestroyDeferred);
+}
+
+
+#ifdef CONFIG_SMP
+/**
+ * Deal with a sub-timer that has migrated.
+ *
+ * @param   pTimer          The timer.
+ * @param   pSubTimer       The sub-timer.
+ */
+static void rtTimerLnxCallbackHandleMigration(PRTTIMER pTimer, PRTTIMERLNXSUBTIMER pSubTimer)
+{
+    RTTIMERLNXSTATE enmState;
+    if (pTimer->cCpus > 1)
+        RTSpinlockAcquire(pTimer->hSpinlock);
+
+    do
+    {
+        enmState = rtTimerLnxGetState(&pSubTimer->enmState);
+        switch (enmState)
+        {
+            case RTTIMERLNXSTATE_STOPPING:
+            case RTTIMERLNXSTATE_MP_STOPPING:
+                enmState = RTTIMERLNXSTATE_STOPPED;
+                RT_FALL_THRU();
+            case RTTIMERLNXSTATE_STOPPED:
+                break;
+
+            default:
+                AssertMsgFailed(("%d\n", enmState));
+                RT_FALL_THRU();
+            case RTTIMERLNXSTATE_STARTING:
+            case RTTIMERLNXSTATE_MP_STARTING:
+            case RTTIMERLNXSTATE_ACTIVE:
+            case RTTIMERLNXSTATE_CALLBACK:
+            case RTTIMERLNXSTATE_CB_STOPPING:
+            case RTTIMERLNXSTATE_CB_RESTARTING:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, enmState))
+                    enmState = RTTIMERLNXSTATE_STOPPED;
+                break;
+
+            case RTTIMERLNXSTATE_CB_DESTROYING:
+            {
+                if (pTimer->cCpus > 1)
+                    RTSpinlockRelease(pTimer->hSpinlock);
+
+                rtTimerLnxCallbackDestroy(pTimer, pSubTimer);
+                return;
+            }
+        }
+    } while (enmState != RTTIMERLNXSTATE_STOPPED);
+
+    if (pTimer->cCpus > 1)
+        RTSpinlockRelease(pTimer->hSpinlock);
+}
+#endif /* CONFIG_SMP */
+
+
+/**
+ * The slow path of rtTimerLnxChangeToCallbackState.
+ *
+ * @returns true if changed successfully, false if not.
+ * @param   pSubTimer       The sub-timer.
+ */
+static bool rtTimerLnxChangeToCallbackStateSlow(PRTTIMERLNXSUBTIMER pSubTimer)
+{
+    for (;;)
+    {
+        RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pSubTimer->enmState);
+        switch (enmState)
+        {
+            case RTTIMERLNXSTATE_ACTIVE:
+            case RTTIMERLNXSTATE_STARTING:
+            case RTTIMERLNXSTATE_MP_STARTING:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_CALLBACK, enmState))
+                    return true;
+                break;
+
+            case RTTIMERLNXSTATE_CALLBACK:
+            case RTTIMERLNXSTATE_CB_STOPPING:
+            case RTTIMERLNXSTATE_CB_RESTARTING:
+            case RTTIMERLNXSTATE_CB_DESTROYING:
+                AssertMsgFailed(("%d\n", enmState)); RT_FALL_THRU();
+            default:
+                return false;
+        }
+        ASMNopPause();
+    }
+}
+
+
+/**
+ * Tries to change the sub-timer state to 'callback'.
+ *
+ * @returns true if changed successfully, false if not.
+ * @param   pSubTimer       The sub-timer.
+ */
+DECLINLINE(bool) rtTimerLnxChangeToCallbackState(PRTTIMERLNXSUBTIMER pSubTimer)
+{
+    if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_CALLBACK, RTTIMERLNXSTATE_ACTIVE)))
+        return true;
+    return rtTimerLnxChangeToCallbackStateSlow(pSubTimer);
+}
+
+
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+/**
+ * Timer callback function for high resolution timers.
+ *
+ * @returns HRTIMER_NORESTART or HRTIMER_RESTART depending on whether it's a
+ *          one-shot or interval timer.
+ * @param   pHrTimer    Pointer to the sub-timer structure.
+ */
+static enum hrtimer_restart rtTimerLinuxHrCallback(struct hrtimer *pHrTimer)
+{
+    PRTTIMERLNXSUBTIMER     pSubTimer = RT_FROM_MEMBER(pHrTimer, RTTIMERLNXSUBTIMER, u.Hr.LnxTimer);
+    PRTTIMER                pTimer    = pSubTimer->pParent;
+
+
+    RTTIMERLNX_LOG(("hrcallback %p\n", pTimer));
+    if (RT_UNLIKELY(!rtTimerLnxChangeToCallbackState(pSubTimer)))
+        return HRTIMER_NORESTART;
+
+#ifdef CONFIG_SMP
+    /*
+     * Check for unwanted migration.
+     */
+    if (pTimer->fAllCpus || pTimer->fSpecificCpu)
+    {
+        RTCPUID idCpu = RTMpCpuId();
+        if (RT_UNLIKELY(  pTimer->fAllCpus
+                        ? (RTCPUID)(pSubTimer - &pTimer->aSubTimers[0]) != idCpu
+                        : pTimer->idCpu != idCpu))
+        {
+            rtTimerLnxCallbackHandleMigration(pTimer, pSubTimer);
+            return HRTIMER_NORESTART;
+        }
+    }
+#endif
+
+    if (pTimer->u64NanoInterval)
+    {
+        /*
+         * Periodic timer, run it and update the native timer afterwards so
+         * we can handle RTTimerStop and RTTimerChangeInterval from the
+         * callback as well as a racing control thread.
+         */
+        pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick);
+        hrtimer_add_expires_ns(&pSubTimer->u.Hr.LnxTimer, ASMAtomicReadU64(&pTimer->u64NanoInterval));
+        if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CALLBACK)))
+            return HRTIMER_RESTART;
+    }
+    else
+    {
+        /*
+         * One shot timer (no omni), stop it before dispatching it.
+         * Allow RTTimerStart as well as RTTimerDestroy to be called from
+         * the callback.
+         */
+        ASMAtomicWriteBool(&pTimer->fSuspended, true);
+        pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick);
+        if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CALLBACK)))
+            return HRTIMER_NORESTART;
+    }
+
+    /*
+     * Some state change occurred while we were in the callback routine.
+     */
+    for (;;)
+    {
+        RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pSubTimer->enmState);
+        switch (enmState)
+        {
+            case RTTIMERLNXSTATE_CB_DESTROYING:
+                rtTimerLnxCallbackDestroy(pTimer, pSubTimer);
+                return HRTIMER_NORESTART;
+
+            case RTTIMERLNXSTATE_CB_STOPPING:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CB_STOPPING))
+                    return HRTIMER_NORESTART;
+                break;
+
+            case RTTIMERLNXSTATE_CB_RESTARTING:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CB_RESTARTING))
+                {
+                    pSubTimer->iTick = 0;
+                    hrtimer_set_expires(&pSubTimer->u.Hr.LnxTimer, rtTimerLnxNanoToKt(pSubTimer->uNsRestartAt));
+                    return HRTIMER_RESTART;
+                }
+                break;
+
+            default:
+                AssertMsgFailed(("%d\n", enmState));
+                return HRTIMER_NORESTART;
+        }
+        ASMNopPause();
+    }
+}
+#endif /* RTTIMER_LINUX_WITH_HRTIMER */
+
+
+#if RTLNX_VER_MIN(4,15,0)
+/**
+ * Timer callback function for standard timers.
+ *
+ * @param   pLnxTimer   Pointer to the Linux timer structure.
+ */
+static void rtTimerLinuxStdCallback(struct timer_list *pLnxTimer)
+{
+    PRTTIMERLNXSUBTIMER pSubTimer = from_timer(pSubTimer, pLnxTimer, u.Std.LnxTimer);
+#else
+/**
+ * Timer callback function for standard timers.
+ *
+ * @param   ulUser      Address of the sub-timer structure.
+ */
+static void rtTimerLinuxStdCallback(unsigned long ulUser)
+{
+    PRTTIMERLNXSUBTIMER pSubTimer = (PRTTIMERLNXSUBTIMER)ulUser;
+#endif
+    PRTTIMER            pTimer    = pSubTimer->pParent;
+
+    RTTIMERLNX_LOG(("stdcallback %p\n", pTimer));
+    if (RT_UNLIKELY(!rtTimerLnxChangeToCallbackState(pSubTimer)))
+        return;
+
+#ifdef CONFIG_SMP
+    /*
+     * Check for unwanted migration.
+     */
+    if (pTimer->fAllCpus || pTimer->fSpecificCpu)
+    {
+        RTCPUID idCpu = RTMpCpuId();
+        if (RT_UNLIKELY(  pTimer->fAllCpus
+                        ? (RTCPUID)(pSubTimer - &pTimer->aSubTimers[0]) != idCpu
+                        : pTimer->idCpu != idCpu))
+        {
+            rtTimerLnxCallbackHandleMigration(pTimer, pSubTimer);
+            return;
+        }
+    }
+#endif
+
+    if (pTimer->u64NanoInterval)
+    {
+        /*
+         * Interval timer, calculate the next timeout.
+         *
+         * The first time around, we'll re-adjust the u.Std.u64NextTS to
+         * try prevent some jittering if we were started at a bad time.
+         */
+        const uint64_t  iTick       = ++pSubTimer->iTick;
+        unsigned long   uCurJiffies = jiffies;
+        unsigned long   ulNextJiffies;
+        uint64_t        u64NanoInterval;
+        unsigned long   cJiffies;
+        unsigned long   flFlags;
+
+        spin_lock_irqsave(&pTimer->ChgIntLock, flFlags);
+        u64NanoInterval = pTimer->u64NanoInterval;
+        cJiffies        = pTimer->cJiffies;
+        if (RT_UNLIKELY(pSubTimer->u.Std.fFirstAfterChg))
+        {
+            pSubTimer->u.Std.fFirstAfterChg = false;
+            pSubTimer->u.Std.u64NextTS      = RTTimeSystemNanoTS();
+            pSubTimer->u.Std.nsStartTS      = pSubTimer->u.Std.u64NextTS - u64NanoInterval * (iTick - 1);
+            pSubTimer->u.Std.ulNextJiffies  = uCurJiffies = jiffies;
+        }
+        spin_unlock_irqrestore(&pTimer->ChgIntLock, flFlags);
+
+        pSubTimer->u.Std.u64NextTS += u64NanoInterval;
+        if (cJiffies)
+        {
+            ulNextJiffies = pSubTimer->u.Std.ulNextJiffies + cJiffies;
+            pSubTimer->u.Std.ulNextJiffies = ulNextJiffies;
+            if (time_after_eq(ulNextJiffies, uCurJiffies))
+            { /* likely */ }
+            else
+            {
+                unsigned long cJiffiesBehind = uCurJiffies - ulNextJiffies;
+                ulNextJiffies = uCurJiffies + cJiffies / 2;
+                if (cJiffiesBehind >= HZ / 4) /* Conside if we're lagging too far behind.  Screw the u64NextTS member. */
+                    pSubTimer->u.Std.ulNextJiffies = ulNextJiffies;
+                /*else: Don't update u.Std.ulNextJiffies so we can continue catching up in the next tick. */
+            }
+        }
+        else
+        {
+            const uint64_t u64NanoTS = RTTimeSystemNanoTS();
+            const int64_t  cNsBehind = u64NanoTS - pSubTimer->u.Std.u64NextTS;
+            if (cNsBehind <= 0)
+                ulNextJiffies = uCurJiffies + rtTimerLnxNanoToJiffies(pSubTimer->u.Std.u64NextTS - u64NanoTS);
+            else if (u64NanoInterval >= RT_NS_1SEC_64 * 2 / HZ)
+            {
+                ulNextJiffies = uCurJiffies + rtTimerLnxNanoToJiffies(u64NanoInterval / 2);
+                if (cNsBehind >= RT_NS_1SEC_64 / HZ / 4) /* Conside if we're lagging too far behind. */
+                    pSubTimer->u.Std.u64NextTS = u64NanoTS + u64NanoInterval / 2;
+            }
+            else
+            {
+                ulNextJiffies = uCurJiffies + 1;
+                if (cNsBehind >= RT_NS_1SEC_64 / HZ / 4) /* Conside if we're lagging too far behind. */
+                    pSubTimer->u.Std.u64NextTS = u64NanoTS + RT_NS_1SEC_64 / HZ;
+            }
+            pSubTimer->u.Std.ulNextJiffies = ulNextJiffies;
+        }
+
+        /*
+         * Run the timer and re-arm it unless the state changed                                                                                                        .
+         *                                                                                                                                                             .
+         * We must re-arm it afterwards as we're not in a position to undo this                                                                                        .
+         * operation if for instance someone stopped or destroyed us while we                                                                                          .
+         * were in the callback.  (Linux takes care of any races here.)
+         */
+        pTimer->pfnTimer(pTimer, pTimer->pvUser, iTick);
+        if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CALLBACK)))
+        {
+#ifdef CONFIG_SMP
+            if (pTimer->fSpecificCpu || pTimer->fAllCpus)
+            {
+# if RTLNX_VER_MIN(4,8,0)
+                mod_timer(&pSubTimer->u.Std.LnxTimer, ulNextJiffies);
+# else
+                mod_timer_pinned(&pSubTimer->u.Std.LnxTimer, ulNextJiffies);
+# endif
+            }
+            else
+#endif
+                mod_timer(&pSubTimer->u.Std.LnxTimer, ulNextJiffies);
+            return;
+        }
+    }
+    else
+    {
+        /*
+         * One shot timer, stop it before dispatching it.
+         * Allow RTTimerStart as well as RTTimerDestroy to be called from
+         * the callback.
+         */
+        ASMAtomicWriteBool(&pTimer->fSuspended, true);
+        pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pSubTimer->iTick);
+        if (RT_LIKELY(rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CALLBACK)))
+            return;
+    }
+
+    /*
+     * Some state change occurred while we were in the callback routine.
+     */
+    for (;;)
+    {
+        RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pSubTimer->enmState);
+        switch (enmState)
+        {
+            case RTTIMERLNXSTATE_CB_DESTROYING:
+                rtTimerLnxCallbackDestroy(pTimer, pSubTimer);
+                return;
+
+            case RTTIMERLNXSTATE_CB_STOPPING:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_CB_STOPPING))
+                    return;
+                break;
+
+            case RTTIMERLNXSTATE_CB_RESTARTING:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_ACTIVE, RTTIMERLNXSTATE_CB_RESTARTING))
+                {
+                    uint64_t        u64NanoTS;
+                    uint64_t        u64NextTS;
+                    unsigned long   flFlags;
+
+                    spin_lock_irqsave(&pTimer->ChgIntLock, flFlags);
+                    u64NextTS = pSubTimer->uNsRestartAt;
+                    u64NanoTS = RTTimeSystemNanoTS();
+                    pSubTimer->iTick                = 0;
+                    pSubTimer->u.Std.u64NextTS      = u64NextTS;
+                    pSubTimer->u.Std.fFirstAfterChg = true;
+                    pSubTimer->u.Std.ulNextJiffies  = u64NextTS > u64NanoTS
+                                                    ? jiffies + rtTimerLnxNanoToJiffies(u64NextTS - u64NanoTS)
+                                                    : jiffies;
+                    spin_unlock_irqrestore(&pTimer->ChgIntLock, flFlags);
+
+#ifdef CONFIG_SMP
+                    if (pTimer->fSpecificCpu || pTimer->fAllCpus)
+                    {
+# if RTLNX_VER_MIN(4,8,0)
+                        mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies);
+# else
+                        mod_timer_pinned(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies);
+# endif
+                    }
+                    else
+#endif
+                        mod_timer(&pSubTimer->u.Std.LnxTimer, pSubTimer->u.Std.ulNextJiffies);
+                    return;
+                }
+                break;
+
+            default:
+                AssertMsgFailed(("%d\n", enmState));
+                return;
+        }
+        ASMNopPause();
+    }
+}
+
+
+#ifdef CONFIG_SMP
+
+/**
+ * Per-cpu callback function (RTMpOnAll/RTMpOnSpecific).
+ *
+ * @param   idCpu       The current CPU.
+ * @param   pvUser1     Pointer to the timer.
+ * @param   pvUser2     Pointer to the argument structure.
+ */
+static DECLCALLBACK(void) rtTimerLnxStartAllOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PRTTIMERLINUXSTARTONCPUARGS pArgs = (PRTTIMERLINUXSTARTONCPUARGS)pvUser2;
+    PRTTIMER pTimer = (PRTTIMER)pvUser1;
+    Assert(idCpu < pTimer->cCpus);
+    rtTimerLnxStartSubTimer(&pTimer->aSubTimers[idCpu], pArgs->u64Now, pArgs->u64First, true /*fPinned*/, pTimer->fHighRes);
+}
+
+
+/**
+ * Worker for RTTimerStart() that takes care of the ugly bits.
+ *
+ * @returns RTTimerStart() return value.
+ * @param   pTimer      The timer.
+ * @param   pArgs       The argument structure.
+ */
+static int rtTimerLnxOmniStart(PRTTIMER pTimer, PRTTIMERLINUXSTARTONCPUARGS pArgs)
+{
+    RTCPUID         iCpu;
+    RTCPUSET        OnlineSet;
+    RTCPUSET        OnlineSet2;
+    int             rc2;
+
+    /*
+     * Prepare all the sub-timers for the startup and then flag the timer
+     * as a whole as non-suspended, make sure we get them all before
+     * clearing fSuspended as the MP handler will be waiting on this
+     * should something happen while we're looping.
+     */
+    RTSpinlockAcquire(pTimer->hSpinlock);
+
+    /* Just make it a omni timer restriction that no stop/start races are allowed. */
+    for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++)
+        if (rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState) != RTTIMERLNXSTATE_STOPPED)
+        {
+            RTSpinlockRelease(pTimer->hSpinlock);
+            return VERR_TIMER_BUSY;
+        }
+
+    do
+    {
+        RTMpGetOnlineSet(&OnlineSet);
+        for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++)
+        {
+            Assert(pTimer->aSubTimers[iCpu].enmState != RTTIMERLNXSTATE_MP_STOPPING);
+            rtTimerLnxSetState(&pTimer->aSubTimers[iCpu].enmState,
+                               RTCpuSetIsMember(&OnlineSet, iCpu)
+                               ? RTTIMERLNXSTATE_STARTING
+                               : RTTIMERLNXSTATE_STOPPED);
+        }
+    } while (!RTCpuSetIsEqual(&OnlineSet, RTMpGetOnlineSet(&OnlineSet2)));
+
+    ASMAtomicWriteBool(&pTimer->fSuspended, false);
+
+    RTSpinlockRelease(pTimer->hSpinlock);
+
+    /*
+     * Start them (can't find any exported function that allows me to
+     * do this without the cross calls).
+     */
+    pArgs->u64Now = RTTimeSystemNanoTS();
+    rc2 = RTMpOnAll(rtTimerLnxStartAllOnCpu, pTimer, pArgs);
+    AssertRC(rc2); /* screw this if it fails. */
+
+    /*
+     * Reset the sub-timers who didn't start up (ALL CPUs case).
+     */
+    RTSpinlockAcquire(pTimer->hSpinlock);
+
+    for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++)
+        if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, RTTIMERLNXSTATE_STOPPED, RTTIMERLNXSTATE_STARTING))
+        {
+            /** @todo very odd case for a rainy day. Cpus that temporarily went offline while
+             * we were between calls needs to nudged as the MP handler will ignore events for
+             * them because of the STARTING state. This is an extremely unlikely case - not that
+             * that means anything in my experience... ;-) */
+            RTTIMERLNX_LOG(("what!? iCpu=%u -> didn't start\n", iCpu));
+        }
+
+    RTSpinlockRelease(pTimer->hSpinlock);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Worker for RTTimerStop() that takes care of the ugly SMP bits.
+ *
+ * @returns true if there was any active callbacks, false if not.
+ * @param   pTimer      The timer (valid).
+ * @param   fForDestroy Whether this is for RTTimerDestroy or not.
+ */
+static bool rtTimerLnxOmniStop(PRTTIMER pTimer, bool fForDestroy)
+{
+    bool            fActiveCallbacks = false;
+    RTCPUID         iCpu;
+    RTTIMERLNXSTATE enmState;
+
+
+    /*
+     * Mark the timer as suspended and flag all timers as stopping, except
+     * for those being stopped by an MP event.
+     */
+    RTSpinlockAcquire(pTimer->hSpinlock);
+
+    ASMAtomicWriteBool(&pTimer->fSuspended, true);
+    for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++)
+    {
+        for (;;)
+        {
+            enmState = rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState);
+            if (    enmState == RTTIMERLNXSTATE_STOPPED
+                ||  enmState == RTTIMERLNXSTATE_MP_STOPPING)
+                break;
+            if (   enmState == RTTIMERLNXSTATE_CALLBACK
+                || enmState == RTTIMERLNXSTATE_CB_STOPPING
+                || enmState == RTTIMERLNXSTATE_CB_RESTARTING)
+            {
+                Assert(enmState != RTTIMERLNXSTATE_CB_STOPPING || fForDestroy);
+                if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState,
+                                           !fForDestroy ? RTTIMERLNXSTATE_CB_STOPPING : RTTIMERLNXSTATE_CB_DESTROYING,
+                                           enmState))
+                {
+                    fActiveCallbacks = true;
+                    break;
+                }
+            }
+            else
+            {
+                Assert(enmState == RTTIMERLNXSTATE_ACTIVE);
+                if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, RTTIMERLNXSTATE_STOPPING, enmState))
+                    break;
+            }
+            ASMNopPause();
+        }
+    }
+
+    RTSpinlockRelease(pTimer->hSpinlock);
+
+    /*
+     * Do the actual stopping. Fortunately, this doesn't require any IPIs.
+     * Unfortunately it cannot be done synchronously.
+     */
+    for (iCpu = 0; iCpu < pTimer->cCpus; iCpu++)
+        if (rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState) == RTTIMERLNXSTATE_STOPPING)
+            rtTimerLnxStopSubTimer(&pTimer->aSubTimers[iCpu], pTimer->fHighRes);
+
+    return fActiveCallbacks;
+}
+
+
+/**
+ * Per-cpu callback function (RTMpOnSpecific) used by rtTimerLinuxMpEvent()
+ * to start a sub-timer on a cpu that just have come online.
+ *
+ * @param   idCpu       The current CPU.
+ * @param   pvUser1     Pointer to the timer.
+ * @param   pvUser2     Pointer to the argument structure.
+ */
+static DECLCALLBACK(void) rtTimerLinuxMpStartOnCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PRTTIMERLINUXSTARTONCPUARGS pArgs = (PRTTIMERLINUXSTARTONCPUARGS)pvUser2;
+    PRTTIMER pTimer = (PRTTIMER)pvUser1;
+    RTSPINLOCK hSpinlock;
+    Assert(idCpu < pTimer->cCpus);
+
+    /*
+     * We have to be kind of careful here as we might be racing RTTimerStop
+     * (and/or RTTimerDestroy, thus the paranoia.
+     */
+    hSpinlock = pTimer->hSpinlock;
+    if (    hSpinlock != NIL_RTSPINLOCK
+        &&  pTimer->u32Magic == RTTIMER_MAGIC)
+    {
+        RTSpinlockAcquire(hSpinlock);
+
+        if (    !ASMAtomicUoReadBool(&pTimer->fSuspended)
+            &&  pTimer->u32Magic == RTTIMER_MAGIC)
+        {
+            /* We're sane and the timer is not suspended yet. */
+            PRTTIMERLNXSUBTIMER pSubTimer = &pTimer->aSubTimers[idCpu];
+            if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_MP_STARTING, RTTIMERLNXSTATE_STOPPED))
+                rtTimerLnxStartSubTimer(pSubTimer, pArgs->u64Now, pArgs->u64First, true /*fPinned*/, pTimer->fHighRes);
+        }
+
+        RTSpinlockRelease(hSpinlock);
+    }
+}
+
+
+/**
+ * MP event notification callback.
+ *
+ * @param   enmEvent    The event.
+ * @param   idCpu       The cpu it applies to.
+ * @param   pvUser      The timer.
+ */
+static DECLCALLBACK(void) rtTimerLinuxMpEvent(RTMPEVENT enmEvent, RTCPUID idCpu, void *pvUser)
+{
+    PRTTIMER            pTimer    = (PRTTIMER)pvUser;
+    PRTTIMERLNXSUBTIMER pSubTimer = &pTimer->aSubTimers[idCpu];
+    RTSPINLOCK          hSpinlock;
+
+    Assert(idCpu < pTimer->cCpus);
+
+    /*
+     * Some initial paranoia.
+     */
+    if (pTimer->u32Magic != RTTIMER_MAGIC)
+        return;
+    hSpinlock = pTimer->hSpinlock;
+    if (hSpinlock == NIL_RTSPINLOCK)
+        return;
+
+    RTSpinlockAcquire(hSpinlock);
+
+    /* Is it active? */
+    if (    !ASMAtomicUoReadBool(&pTimer->fSuspended)
+        &&  pTimer->u32Magic == RTTIMER_MAGIC)
+    {
+        switch (enmEvent)
+        {
+            /*
+             * Try do it without leaving the spin lock, but if we have to, retake it
+             * when we're on the right cpu.
+             */
+            case RTMPEVENT_ONLINE:
+                if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_MP_STARTING, RTTIMERLNXSTATE_STOPPED))
+                {
+                    RTTIMERLINUXSTARTONCPUARGS Args;
+                    Args.u64Now = RTTimeSystemNanoTS();
+                    Args.u64First = 0;
+
+                    if (RTMpCpuId() == idCpu)
+                        rtTimerLnxStartSubTimer(pSubTimer, Args.u64Now, Args.u64First, true /*fPinned*/, pTimer->fHighRes);
+                    else
+                    {
+                        rtTimerLnxSetState(&pSubTimer->enmState, RTTIMERLNXSTATE_STOPPED); /* we'll recheck it. */
+                        RTSpinlockRelease(hSpinlock);
+
+                        RTMpOnSpecific(idCpu, rtTimerLinuxMpStartOnCpu, pTimer, &Args);
+                        return; /* we've left the spinlock */
+                    }
+                }
+                break;
+
+            /*
+             * The CPU is (going) offline, make sure the sub-timer is stopped.
+             *
+             * Linux will migrate it to a different CPU, but we don't want this. The
+             * timer function is checking for this.
+             */
+            case RTMPEVENT_OFFLINE:
+            {
+                RTTIMERLNXSTATE enmState;
+                while (   (enmState = rtTimerLnxGetState(&pSubTimer->enmState)) == RTTIMERLNXSTATE_ACTIVE
+                       || enmState == RTTIMERLNXSTATE_CALLBACK
+                       || enmState == RTTIMERLNXSTATE_CB_RESTARTING)
+                {
+                    if (enmState == RTTIMERLNXSTATE_ACTIVE)
+                    {
+                        if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_MP_STOPPING, RTTIMERLNXSTATE_ACTIVE))
+                        {
+                            RTSpinlockRelease(hSpinlock);
+
+                            rtTimerLnxStopSubTimer(pSubTimer, pTimer->fHighRes);
+                            return; /* we've left the spinlock */
+                        }
+                    }
+                    else if (rtTimerLnxCmpXchgState(&pSubTimer->enmState, RTTIMERLNXSTATE_CB_STOPPING, enmState))
+                        break;
+
+                    /* State not stable, try again. */
+                    ASMNopPause();
+                }
+                break;
+            }
+        }
+    }
+
+    RTSpinlockRelease(hSpinlock);
+}
+
+#endif /* CONFIG_SMP */
+
+
+/**
+ * Callback function use by RTTimerStart via RTMpOnSpecific to start a timer
+ * running on a specific CPU.
+ *
+ * @param   idCpu       The current CPU.
+ * @param   pvUser1     Pointer to the timer.
+ * @param   pvUser2     Pointer to the argument structure.
+ */
+static DECLCALLBACK(void) rtTimerLnxStartOnSpecificCpu(RTCPUID idCpu, void *pvUser1, void *pvUser2)
+{
+    PRTTIMERLINUXSTARTONCPUARGS pArgs = (PRTTIMERLINUXSTARTONCPUARGS)pvUser2;
+    PRTTIMER pTimer = (PRTTIMER)pvUser1;
+    RT_NOREF_PV(idCpu);
+    rtTimerLnxStartSubTimer(&pTimer->aSubTimers[0], pArgs->u64Now, pArgs->u64First, true /*fPinned*/, pTimer->fHighRes);
+}
+
+
+RTDECL(int) RTTimerStart(PRTTIMER pTimer, uint64_t u64First)
+{
+    RTTIMERLINUXSTARTONCPUARGS Args;
+    int rc2;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    AssertPtrReturn(pTimer, VERR_INVALID_HANDLE);
+    AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE);
+
+    if (!ASMAtomicUoReadBool(&pTimer->fSuspended))
+        return VERR_TIMER_ACTIVE;
+    RTTIMERLNX_LOG(("start %p cCpus=%d\n", pTimer, pTimer->cCpus));
+
+    Args.u64First = u64First;
+#ifdef CONFIG_SMP
+    /*
+     * Omni timer?
+     */
+    if (pTimer->fAllCpus)
+    {
+        rc2 = rtTimerLnxOmniStart(pTimer, &Args);
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return rc2;
+    }
+#endif
+
+    /*
+     * Simple timer - Pretty straight forward if it wasn't for restarting.
+     */
+    Args.u64Now = RTTimeSystemNanoTS();
+    ASMAtomicWriteU64(&pTimer->aSubTimers[0].uNsRestartAt, Args.u64Now + u64First);
+    for (;;)
+    {
+        RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pTimer->aSubTimers[0].enmState);
+        switch (enmState)
+        {
+            case RTTIMERLNXSTATE_STOPPED:
+                if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_STARTING, RTTIMERLNXSTATE_STOPPED))
+                {
+                    ASMAtomicWriteBool(&pTimer->fSuspended, false);
+                    if (!pTimer->fSpecificCpu)
+                        rtTimerLnxStartSubTimer(&pTimer->aSubTimers[0], Args.u64Now, Args.u64First,
+                                                false /*fPinned*/, pTimer->fHighRes);
+                    else
+                    {
+                        rc2 = RTMpOnSpecific(pTimer->idCpu, rtTimerLnxStartOnSpecificCpu, pTimer, &Args);
+                        if (RT_FAILURE(rc2))
+                        {
+                            /* Suspend it, the cpu id is probably invalid or offline. */
+                            ASMAtomicWriteBool(&pTimer->fSuspended, true);
+                            rtTimerLnxSetState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_STOPPED);
+                            return rc2;
+                        }
+                    }
+                    IPRT_LINUX_RESTORE_EFL_AC();
+                    return VINF_SUCCESS;
+                }
+                break;
+
+            case RTTIMERLNXSTATE_CALLBACK:
+            case RTTIMERLNXSTATE_CB_STOPPING:
+                if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_CB_RESTARTING, enmState))
+                {
+                    ASMAtomicWriteBool(&pTimer->fSuspended, false);
+                    IPRT_LINUX_RESTORE_EFL_AC();
+                    return VINF_SUCCESS;
+                }
+                break;
+
+            default:
+                AssertMsgFailed(("%d\n", enmState));
+                IPRT_LINUX_RESTORE_EFL_AC();
+                return VERR_INTERNAL_ERROR_4;
+        }
+        ASMNopPause();
+    }
+}
+RT_EXPORT_SYMBOL(RTTimerStart);
+
+
+/**
+ * Common worker for RTTimerStop and RTTimerDestroy.
+ *
+ * @returns true if there was any active callbacks, false if not.
+ * @param   pTimer              The timer to stop.
+ * @param   fForDestroy         Whether it's RTTimerDestroy calling or not.
+ */
+static bool rtTimerLnxStop(PRTTIMER pTimer, bool fForDestroy)
+{
+    RTTIMERLNX_LOG(("lnxstop %p %d\n", pTimer, fForDestroy));
+#ifdef CONFIG_SMP
+    /*
+     * Omni timer?
+     */
+    if (pTimer->fAllCpus)
+        return rtTimerLnxOmniStop(pTimer, fForDestroy);
+#endif
+
+    /*
+     * Simple timer.
+     */
+    ASMAtomicWriteBool(&pTimer->fSuspended, true);
+    for (;;)
+    {
+        RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pTimer->aSubTimers[0].enmState);
+        switch (enmState)
+        {
+            case RTTIMERLNXSTATE_ACTIVE:
+                if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState, RTTIMERLNXSTATE_STOPPING, RTTIMERLNXSTATE_ACTIVE))
+                {
+                    rtTimerLnxStopSubTimer(&pTimer->aSubTimers[0], pTimer->fHighRes);
+                    return false;
+                }
+                break;
+
+            case RTTIMERLNXSTATE_CALLBACK:
+            case RTTIMERLNXSTATE_CB_RESTARTING:
+            case RTTIMERLNXSTATE_CB_STOPPING:
+                Assert(enmState != RTTIMERLNXSTATE_CB_STOPPING || fForDestroy);
+                if (rtTimerLnxCmpXchgState(&pTimer->aSubTimers[0].enmState,
+                                           !fForDestroy ? RTTIMERLNXSTATE_CB_STOPPING : RTTIMERLNXSTATE_CB_DESTROYING,
+                                           enmState))
+                    return true;
+                break;
+
+            case RTTIMERLNXSTATE_STOPPED:
+                return VINF_SUCCESS;
+
+            case RTTIMERLNXSTATE_CB_DESTROYING:
+                AssertMsgFailed(("enmState=%d pTimer=%p\n", enmState, pTimer));
+                return true;
+
+            default:
+            case RTTIMERLNXSTATE_STARTING:
+            case RTTIMERLNXSTATE_MP_STARTING:
+            case RTTIMERLNXSTATE_STOPPING:
+            case RTTIMERLNXSTATE_MP_STOPPING:
+                AssertMsgFailed(("enmState=%d pTimer=%p\n", enmState, pTimer));
+                return false;
+        }
+
+        /* State not stable, try again. */
+        ASMNopPause();
+    }
+}
+
+
+RTDECL(int) RTTimerStop(PRTTIMER pTimer)
+{
+    /*
+     * Validate.
+     */
+    IPRT_LINUX_SAVE_EFL_AC();
+    AssertPtrReturn(pTimer, VERR_INVALID_HANDLE);
+    AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE);
+    RTTIMERLNX_LOG(("stop %p\n", pTimer));
+
+    if (ASMAtomicUoReadBool(&pTimer->fSuspended))
+        return VERR_TIMER_SUSPENDED;
+
+    rtTimerLnxStop(pTimer, false /*fForDestroy*/);
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTTimerStop);
+
+
+RTDECL(int) RTTimerChangeInterval(PRTTIMER pTimer, uint64_t u64NanoInterval)
+{
+    unsigned long cJiffies;
+    unsigned long flFlags;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate.
+     */
+    AssertPtrReturn(pTimer, VERR_INVALID_HANDLE);
+    AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE);
+    AssertReturn(u64NanoInterval, VERR_INVALID_PARAMETER);
+    AssertReturn(u64NanoInterval < UINT64_MAX / 8, VERR_INVALID_PARAMETER);
+    AssertReturn(pTimer->u64NanoInterval, VERR_INVALID_STATE);
+    RTTIMERLNX_LOG(("change %p %llu\n", pTimer, u64NanoInterval));
+
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+    /*
+     * For the high resolution timers it is easy since we don't care so much
+     * about when it is applied to the sub-timers.
+     */
+    if (pTimer->fHighRes)
+    {
+        ASMAtomicWriteU64(&pTimer->u64NanoInterval, u64NanoInterval);
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VINF_SUCCESS;
+    }
+#endif
+
+    /*
+     * Standard timers have a bit more complicated way of calculating
+     * their interval and such. So, forget omni timers for now.
+     */
+    if (pTimer->cCpus > 1)
+        return VERR_NOT_SUPPORTED;
+
+    cJiffies = u64NanoInterval / (RT_NS_1SEC / HZ);
+    if (cJiffies * (RT_NS_1SEC / HZ) != u64NanoInterval)
+        cJiffies = 0;
+
+    spin_lock_irqsave(&pTimer->ChgIntLock, flFlags);
+    pTimer->aSubTimers[0].u.Std.fFirstAfterChg = true;
+    pTimer->cJiffies = cJiffies;
+    ASMAtomicWriteU64(&pTimer->u64NanoInterval, u64NanoInterval);
+    spin_unlock_irqrestore(&pTimer->ChgIntLock, flFlags);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTTimerChangeInterval);
+
+
+RTDECL(int) RTTimerDestroy(PRTTIMER pTimer)
+{
+    bool fCanDestroy;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    /*
+     * Validate. It's ok to pass NULL pointer.
+     */
+    if (pTimer == /*NIL_RTTIMER*/ NULL)
+        return VINF_SUCCESS;
+    AssertPtrReturn(pTimer, VERR_INVALID_HANDLE);
+    AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_HANDLE);
+    RTTIMERLNX_LOG(("destroy %p\n", pTimer));
+/** @todo We should invalidate the magic here! */
+
+    /*
+     * Stop the timer if it's still active, then destroy it if we can.
+     */
+    if (!ASMAtomicUoReadBool(&pTimer->fSuspended))
+        fCanDestroy = rtTimerLnxStop(pTimer, true /*fForDestroy*/);
+    else
+    {
+        uint32_t        iCpu = pTimer->cCpus;
+        if (pTimer->cCpus > 1)
+            RTSpinlockAcquire(pTimer->hSpinlock);
+
+        fCanDestroy = true;
+        while (iCpu-- > 0)
+        {
+            for (;;)
+            {
+                RTTIMERLNXSTATE enmState = rtTimerLnxGetState(&pTimer->aSubTimers[iCpu].enmState);
+                switch (enmState)
+                {
+                    case RTTIMERLNXSTATE_CALLBACK:
+                    case RTTIMERLNXSTATE_CB_RESTARTING:
+                    case RTTIMERLNXSTATE_CB_STOPPING:
+                        if (!rtTimerLnxCmpXchgState(&pTimer->aSubTimers[iCpu].enmState, RTTIMERLNXSTATE_CB_DESTROYING, enmState))
+                            continue;
+                        fCanDestroy = false;
+                        break;
+
+                    case RTTIMERLNXSTATE_CB_DESTROYING:
+                        AssertMsgFailed(("%d\n", enmState));
+                        fCanDestroy = false;
+                        break;
+                    default:
+                        break;
+                }
+                break;
+            }
+        }
+
+        if (pTimer->cCpus > 1)
+            RTSpinlockRelease(pTimer->hSpinlock);
+    }
+
+    if (fCanDestroy)
+    {
+        /* For paranoid reasons, defer actually destroying the semaphore when
+           in atomic or interrupt context. */
+#if RTLNX_VER_MIN(2,5,32)
+        if (in_atomic() || in_interrupt())
+#else
+        if (in_interrupt())
+#endif
+            rtR0LnxWorkqueuePush(&pTimer->DtorWorkqueueItem, rtTimerLnxDestroyDeferred);
+        else
+            rtTimerLnxDestroyIt(pTimer);
+    }
+
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTTimerDestroy);
+
+
+RTDECL(int) RTTimerCreateEx(PRTTIMER *ppTimer, uint64_t u64NanoInterval, uint32_t fFlags, PFNRTTIMER pfnTimer, void *pvUser)
+{
+    PRTTIMER    pTimer;
+    RTCPUID     iCpu;
+    unsigned    cCpus;
+    int         rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+
+    rtR0LnxWorkqueueFlush();                /* for 2.4 */
+    *ppTimer = NULL;
+
+    /*
+     * Validate flags.
+     */
+    if (!RTTIMER_FLAGS_ARE_VALID(fFlags))
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_INVALID_PARAMETER;
+    }
+    if (    (fFlags & RTTIMER_FLAGS_CPU_SPECIFIC)
+        &&  (fFlags & RTTIMER_FLAGS_CPU_ALL) != RTTIMER_FLAGS_CPU_ALL
+        &&  !RTMpIsCpuPossible(RTMpCpuIdFromSetIndex(fFlags & RTTIMER_FLAGS_CPU_MASK)))
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return VERR_CPU_NOT_FOUND;
+    }
+
+    /*
+     * Allocate the timer handler.
+     */
+    cCpus = 1;
+#ifdef CONFIG_SMP
+    if ((fFlags & RTTIMER_FLAGS_CPU_ALL) == RTTIMER_FLAGS_CPU_ALL)
+    {
+        cCpus = RTMpGetMaxCpuId() + 1;
+        Assert(cCpus <= RTCPUSET_MAX_CPUS); /* On linux we have a 1:1 relationship between cpuid and set index. */
+        AssertReturnStmt(u64NanoInterval, IPRT_LINUX_RESTORE_EFL_AC(), VERR_NOT_IMPLEMENTED); /* We don't implement single shot on all cpus, sorry. */
+    }
+#endif
+
+    rc = RTMemAllocEx(RT_UOFFSETOF_DYN(RTTIMER, aSubTimers[cCpus]), 0,
+                      RTMEMALLOCEX_FLAGS_ZEROED | RTMEMALLOCEX_FLAGS_ANY_CTX_FREE, (void **)&pTimer);
+    if (RT_FAILURE(rc))
+    {
+        IPRT_LINUX_RESTORE_EFL_AC();
+        return rc;
+    }
+
+    /*
+     * Initialize it.
+     */
+    pTimer->u32Magic        = RTTIMER_MAGIC;
+    pTimer->hSpinlock       = NIL_RTSPINLOCK;
+    pTimer->fSuspended      = true;
+    pTimer->fHighRes        = !!(fFlags & RTTIMER_FLAGS_HIGH_RES);
+#ifdef CONFIG_SMP
+    pTimer->fSpecificCpu    = (fFlags & RTTIMER_FLAGS_CPU_SPECIFIC) && (fFlags & RTTIMER_FLAGS_CPU_ALL) != RTTIMER_FLAGS_CPU_ALL;
+    pTimer->fAllCpus        = (fFlags & RTTIMER_FLAGS_CPU_ALL) == RTTIMER_FLAGS_CPU_ALL;
+    pTimer->idCpu           = pTimer->fSpecificCpu
+                            ? RTMpCpuIdFromSetIndex(fFlags & RTTIMER_FLAGS_CPU_MASK)
+                            : NIL_RTCPUID;
+#else
+    pTimer->fSpecificCpu    = !!(fFlags & RTTIMER_FLAGS_CPU_SPECIFIC);
+    pTimer->idCpu           = RTMpCpuId();
+#endif
+    pTimer->cCpus           = cCpus;
+    pTimer->pfnTimer        = pfnTimer;
+    pTimer->pvUser          = pvUser;
+    pTimer->u64NanoInterval = u64NanoInterval;
+    pTimer->cJiffies        = u64NanoInterval / (RT_NS_1SEC / HZ);
+    if (pTimer->cJiffies * (RT_NS_1SEC / HZ) != u64NanoInterval)
+        pTimer->cJiffies    = 0;
+    spin_lock_init(&pTimer->ChgIntLock);
+
+    for (iCpu = 0; iCpu < cCpus; iCpu++)
+    {
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+        if (pTimer->fHighRes)
+        {
+            hrtimer_init(&pTimer->aSubTimers[iCpu].u.Hr.LnxTimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+            pTimer->aSubTimers[iCpu].u.Hr.LnxTimer.function     = rtTimerLinuxHrCallback;
+        }
+        else
+#endif
+        {
+#if RTLNX_VER_MIN(4,15,0)
+            timer_setup(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer, rtTimerLinuxStdCallback, TIMER_PINNED);
+#elif RTLNX_VER_MIN(4,8,0)
+            init_timer_pinned(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer);
+#else
+            init_timer(&pTimer->aSubTimers[iCpu].u.Std.LnxTimer);
+#endif
+#if RTLNX_VER_MAX(4,15,0)
+            pTimer->aSubTimers[iCpu].u.Std.LnxTimer.data        = (unsigned long)&pTimer->aSubTimers[iCpu];
+            pTimer->aSubTimers[iCpu].u.Std.LnxTimer.function    = rtTimerLinuxStdCallback;
+#endif
+            pTimer->aSubTimers[iCpu].u.Std.LnxTimer.expires     = jiffies;
+            pTimer->aSubTimers[iCpu].u.Std.u64NextTS            = 0;
+        }
+        pTimer->aSubTimers[iCpu].iTick      = 0;
+        pTimer->aSubTimers[iCpu].pParent    = pTimer;
+        pTimer->aSubTimers[iCpu].enmState   = RTTIMERLNXSTATE_STOPPED;
+    }
+
+#ifdef CONFIG_SMP
+    /*
+     * If this is running on ALL cpus, we'll have to register a callback
+     * for MP events (so timers can be started/stopped on cpus going
+     * online/offline). We also create the spinlock for synchronizing
+     * stop/start/mp-event.
+     */
+    if (cCpus > 1)
+    {
+        int rc = RTSpinlockCreate(&pTimer->hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "RTTimerLnx");
+        if (RT_SUCCESS(rc))
+            rc = RTMpNotificationRegister(rtTimerLinuxMpEvent, pTimer);
+        else
+            pTimer->hSpinlock = NIL_RTSPINLOCK;
+        if (RT_FAILURE(rc))
+        {
+            RTTimerDestroy(pTimer);
+            IPRT_LINUX_RESTORE_EFL_AC();
+            return rc;
+        }
+    }
+#endif /* CONFIG_SMP */
+
+    RTTIMERLNX_LOG(("create %p hires=%d fFlags=%#x cCpus=%u\n", pTimer, pTimer->fHighRes, fFlags, cCpus));
+    *ppTimer = pTimer;
+    IPRT_LINUX_RESTORE_EFL_AC();
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTTimerCreateEx);
+
+
+RTDECL(uint32_t) RTTimerGetSystemGranularity(void)
+{
+#if 0 /** @todo Not sure if this is what we want or not... Add new API for
+       *        querying the resolution of the high res timers? */
+    struct timespec Ts;
+    int rc;
+    IPRT_LINUX_SAVE_EFL_AC();
+    rc = hrtimer_get_res(CLOCK_MONOTONIC, &Ts);
+    IPRT_LINUX_RESTORE_EFL_AC();
+    if (!rc)
+    {
+        Assert(!Ts.tv_sec);
+        return Ts.tv_nsec;
+    }
+#endif
+    /* */
+#if RTLNX_VER_MAX(4,9,0) || RTLNX_VER_MIN(4,13,0)
+    /* On 4.9, 4.10 and 4.12 we've observed tstRTR0Timer failures of the omni timer tests
+       where we get about half of the ticks we want.  The failing test is using this value
+       as interval.  So, this is a very very crude hack to try make omni timers work
+       correctly without actually knowing what's going wrong... */
+    return RT_NS_1SEC * 2 / HZ; /* ns */
+#else
+    return RT_NS_1SEC / HZ; /* ns */
+#endif
+}
+RT_EXPORT_SYMBOL(RTTimerGetSystemGranularity);
+
+
+RTDECL(int) RTTimerRequestSystemGranularity(uint32_t u32Request, uint32_t *pu32Granted)
+{
+    RT_NOREF_PV(u32Request); RT_NOREF_PV(*pu32Granted);
+    return VERR_NOT_SUPPORTED;
+}
+RT_EXPORT_SYMBOL(RTTimerRequestSystemGranularity);
+
+
+RTDECL(int) RTTimerReleaseSystemGranularity(uint32_t u32Granted)
+{
+    RT_NOREF_PV(u32Granted);
+    return VERR_NOT_SUPPORTED;
+}
+RT_EXPORT_SYMBOL(RTTimerReleaseSystemGranularity);
+
+
+RTDECL(bool) RTTimerCanDoHighResolution(void)
+{
+#ifdef RTTIMER_LINUX_WITH_HRTIMER
+    return true;
+#else
+    return false;
+#endif
+}
+RT_EXPORT_SYMBOL(RTTimerCanDoHighResolution);
+
diff --git a/src/VBox/Runtime/r0drv/linux/waitqueue-r0drv-linux.h b/src/VBox/Runtime/r0drv/linux/waitqueue-r0drv-linux.h
new file mode 100644
index 00000000..0ef103fe
--- /dev/null
+++ b/src/VBox/Runtime/r0drv/linux/waitqueue-r0drv-linux.h
@@ -0,0 +1,302 @@
+/* $Id: waitqueue-r0drv-linux.h $ */
+/** @file
+ * IPRT - Linux Ring-0 Driver Helpers for Abstracting Wait Queues,
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+#ifndef IPRT_INCLUDED_SRC_r0drv_linux_waitqueue_r0drv_linux_h
+#define IPRT_INCLUDED_SRC_r0drv_linux_waitqueue_r0drv_linux_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+#include "the-linux-kernel.h"
+
+#include <iprt/asm-math.h>
+#include <iprt/err.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+
+/** The resolution (nanoseconds) specified when using
+ *  schedule_hrtimeout_range. */
+#define RTR0SEMLNXWAIT_RESOLUTION   50000
+
+
+/**
+ * Kernel mode Linux wait state structure.
+ */
+typedef struct RTR0SEMLNXWAIT
+{
+    /** The wait queue entry. */
+#if RTLNX_VER_MIN(4,13,0) || RTLNX_SUSE_MAJ_PREREQ(12, 4) || RTLNX_SUSE_MAJ_PREREQ(15, 0)
+    wait_queue_entry_t WaitQE;
+#else
+    wait_queue_t    WaitQE;
+#endif
+    /** The absolute timeout given as nano seconds since the start of the
+     *  monotonic clock. */
+    uint64_t        uNsAbsTimeout;
+    /** The timeout in nano seconds relative to the start of the wait. */
+    uint64_t        cNsRelTimeout;
+    /** The native timeout value. */
+    union
+    {
+#ifdef IPRT_LINUX_HAS_HRTIMER
+        /** The timeout when fHighRes is true. Absolute, so no updating. */
+        ktime_t     KtTimeout;
+#endif
+        /** The timeout when fHighRes is false.  Updated after waiting. */
+        long        lTimeout;
+    } u;
+    /** Set if we use high resolution timeouts. */
+    bool            fHighRes;
+    /** Set if it's an indefinite wait. */
+    bool            fIndefinite;
+    /** Set if we've already timed out.
+     * Set by rtR0SemLnxWaitDoIt and read by rtR0SemLnxWaitHasTimedOut. */
+    bool            fTimedOut;
+    /** TASK_INTERRUPTIBLE or TASK_UNINTERRUPTIBLE. */
+    int             iWaitState;
+    /** The wait queue. */
+    wait_queue_head_t *pWaitQueue;
+} RTR0SEMLNXWAIT;
+/** Pointer to a linux wait state. */
+typedef RTR0SEMLNXWAIT *PRTR0SEMLNXWAIT;
+
+
+/**
+ * Initializes a wait.
+ *
+ * The caller MUST check the wait condition BEFORE calling this function or the
+ * timeout logic will be flawed.
+ *
+ * @returns VINF_SUCCESS or VERR_TIMEOUT.
+ * @param   pWait               The wait structure.
+ * @param   fFlags              The wait flags.
+ * @param   uTimeout            The timeout.
+ * @param   pWaitQueue          The wait queue head.
+ */
+DECLINLINE(int) rtR0SemLnxWaitInit(PRTR0SEMLNXWAIT pWait, uint32_t fFlags, uint64_t uTimeout,
+                                   wait_queue_head_t *pWaitQueue)
+{
+    /*
+     * Process the flags and timeout.
+     */
+    if (!(fFlags & RTSEMWAIT_FLAGS_INDEFINITE))
+    {
+/** @todo optimize: millisecs -> nanosecs -> millisec -> jiffies */
+        if (fFlags & RTSEMWAIT_FLAGS_MILLISECS)
+            uTimeout = uTimeout < UINT64_MAX / RT_US_1SEC * RT_US_1SEC
+                     ? uTimeout * RT_US_1SEC
+                     : UINT64_MAX;
+        if (uTimeout == UINT64_MAX)
+            fFlags |= RTSEMWAIT_FLAGS_INDEFINITE;
+        else
+        {
+            uint64_t u64Now;
+            if (fFlags & RTSEMWAIT_FLAGS_RELATIVE)
+            {
+                if (uTimeout == 0)
+                    return VERR_TIMEOUT;
+
+                u64Now = RTTimeSystemNanoTS();
+                pWait->cNsRelTimeout = uTimeout;
+                pWait->uNsAbsTimeout = u64Now + uTimeout;
+                if (pWait->uNsAbsTimeout < u64Now) /* overflow */
+                    fFlags |= RTSEMWAIT_FLAGS_INDEFINITE;
+            }
+            else
+            {
+                u64Now = RTTimeSystemNanoTS();
+                if (u64Now >= uTimeout)
+                    return VERR_TIMEOUT;
+
+                pWait->cNsRelTimeout = uTimeout - u64Now;
+                pWait->uNsAbsTimeout = uTimeout;
+            }
+        }
+    }
+
+    if (!(fFlags & RTSEMWAIT_FLAGS_INDEFINITE))
+    {
+        pWait->fIndefinite      = false;
+#ifdef IPRT_LINUX_HAS_HRTIMER
+        if (   (fFlags & (RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_ABSOLUTE))
+            || pWait->cNsRelTimeout < RT_NS_1SEC / HZ * 4)
+        {
+            pWait->fHighRes     = true;
+# if BITS_PER_LONG < 64
+            if (   KTIME_SEC_MAX <= LONG_MAX
+                && pWait->uNsAbsTimeout >= KTIME_SEC_MAX * RT_NS_1SEC_64 + (RT_NS_1SEC - 1))
+                fFlags |= RTSEMWAIT_FLAGS_INDEFINITE;
+            else
+# endif
+                pWait->u.KtTimeout  = ns_to_ktime(pWait->uNsAbsTimeout);
+        }
+        else
+#endif
+        {
+            uint64_t cJiffies = ASMMultU64ByU32DivByU32(pWait->cNsRelTimeout, HZ, RT_NS_1SEC);
+            if (cJiffies >= MAX_JIFFY_OFFSET)
+                fFlags |= RTSEMWAIT_FLAGS_INDEFINITE;
+            else
+            {
+                pWait->u.lTimeout   = (long)cJiffies;
+                pWait->fHighRes     = false;
+            }
+        }
+    }
+
+    if (fFlags & RTSEMWAIT_FLAGS_INDEFINITE)
+    {
+        pWait->fIndefinite      = true;
+        pWait->fHighRes         = false;
+        pWait->uNsAbsTimeout    = UINT64_MAX;
+        pWait->cNsRelTimeout    = UINT64_MAX;
+        pWait->u.lTimeout       = LONG_MAX;
+    }
+
+    pWait->fTimedOut   = false;
+
+    /*
+     * Initialize the wait queue related bits.
+     */
+#if RTLNX_VER_MIN(2,5,39)
+    init_wait((&pWait->WaitQE));
+#else
+    RT_ZERO(pWait->WaitQE);
+    init_waitqueue_entry((&pWait->WaitQE), current);
+#endif
+    pWait->pWaitQueue = pWaitQueue;
+    pWait->iWaitState = fFlags & RTSEMWAIT_FLAGS_INTERRUPTIBLE
+                      ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Prepares the next wait.
+ *
+ * This must be called before rtR0SemLnxWaitDoIt, and the caller should check
+ * the exit conditions in-between the two calls.
+ *
+ * @param   pWait               The wait structure.
+ */
+DECLINLINE(void) rtR0SemLnxWaitPrepare(PRTR0SEMLNXWAIT pWait)
+{
+    /* Make everything thru schedule*() atomic scheduling wise. (Is this correct?) */
+    prepare_to_wait(pWait->pWaitQueue, &pWait->WaitQE, pWait->iWaitState);
+}
+
+
+/**
+ * Do the actual wait.
+ *
+ * @param   pWait               The wait structure.
+ */
+DECLINLINE(void) rtR0SemLnxWaitDoIt(PRTR0SEMLNXWAIT pWait)
+{
+    if (pWait->fIndefinite)
+        schedule();
+#ifdef IPRT_LINUX_HAS_HRTIMER
+    else if (pWait->fHighRes)
+    {
+        int rc = schedule_hrtimeout_range(&pWait->u.KtTimeout, HRTIMER_MODE_ABS, RTR0SEMLNXWAIT_RESOLUTION);
+        if (!rc)
+            pWait->fTimedOut = true;
+    }
+#endif
+    else
+    {
+        pWait->u.lTimeout = schedule_timeout(pWait->u.lTimeout);
+        if (pWait->u.lTimeout <= 0)
+            pWait->fTimedOut = true;
+    }
+    after_wait((&pWait->WaitQE));
+}
+
+
+/**
+ * Checks if a linux wait was interrupted.
+ *
+ * @returns true / false
+ * @param   pWait               The wait structure.
+ * @remarks This shall be called before the first rtR0SemLnxWaitDoIt().
+ */
+DECLINLINE(bool) rtR0SemLnxWaitWasInterrupted(PRTR0SEMLNXWAIT pWait)
+{
+    return pWait->iWaitState == TASK_INTERRUPTIBLE
+        && signal_pending(current);
+}
+
+
+/**
+ * Checks if a linux wait has timed out.
+ *
+ * @returns true / false
+ * @param   pWait               The wait structure.
+ */
+DECLINLINE(bool) rtR0SemLnxWaitHasTimedOut(PRTR0SEMLNXWAIT pWait)
+{
+    return pWait->fTimedOut;
+}
+
+
+/**
+ * Deletes a linux wait.
+ *
+ * @param   pWait               The wait structure.
+ */
+DECLINLINE(void) rtR0SemLnxWaitDelete(PRTR0SEMLNXWAIT pWait)
+{
+    finish_wait(pWait->pWaitQueue, &pWait->WaitQE);
+}
+
+
+/**
+ * Gets the max resolution of the timeout machinery.
+ *
+ * @returns Resolution specified in nanoseconds.
+ */
+DECLINLINE(uint32_t) rtR0SemLnxWaitGetResolution(void)
+{
+#ifdef IPRT_LINUX_HAS_HRTIMER
+    return RTR0SEMLNXWAIT_RESOLUTION;
+#else
+    return RT_NS_1SEC / HZ; /* ns */
+#endif
+}
+
+#endif /* !IPRT_INCLUDED_SRC_r0drv_linux_waitqueue_r0drv_linux_h */
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:17:27 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:17:27 +0000
commit	f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree	6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Runtime/r0drv/linux
parent	Initial commit. (diff)
download	virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip