Adding upstream version 7.0.14-dfsg.upstream/7.0.14-dfsg

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:17:27 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:17:27 +0000
commit: f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree: 6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Runtime/r3/linux
parent: Initial commit. (diff)
download: virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz
virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip
24 files changed, 7473 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r3/linux/Makefile.kup b/src/VBox/Runtime/r3/linux/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/Makefile.kup
diff --git a/src/VBox/Runtime/r3/linux/RTFileCopyPartEx-linux.cpp b/src/VBox/Runtime/r3/linux/RTFileCopyPartEx-linux.cpp
new file mode 100644
index 00000000..0b1d93d7
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTFileCopyPartEx-linux.cpp
@@ -0,0 +1,196 @@
+/* $Id: RTFileCopyPartEx-linux.cpp $ */
+/** @file
+ * IPRT - RTFileCopyPartEx, linux specific implementation.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/file.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#ifndef __NR_copy_file_range
+# if defined(RT_ARCH_X86)
+#  define __NR_copy_file_range      377
+# elif defined(RT_ARCH_AMD64)
+#  define __NR_copy_file_range      326
+# endif
+#endif
+
+
+#ifndef __NR_copy_file_range
+# include "../../generic/RTFileCopyPartEx-generic.cpp"
+#else  /* __NR_copy_file_range - whole file */
+/* Include the generic code as a fallback since copy_file_range is rather new . */
+# define IPRT_FALLBACK_VERSION
+# include "../../generic/RTFileCopyPartEx-generic.cpp"
+# undef  IPRT_FALLBACK_VERSION
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+static int32_t volatile g_fCopyFileRangeSupported = -1;
+
+
+DECLINLINE(loff_t)
+MyCopyFileRangeSysCall(int fdIn, loff_t *poffIn, int fdOut, loff_t *poffOut, size_t cbChunk, unsigned int fFlags)
+{
+    return syscall(__NR_copy_file_range, fdIn, poffIn, fdOut, poffOut, cbChunk, fFlags);
+}
+
+
+DECL_NO_INLINE(static, bool) HasCopyFileRangeSyscallSlow(void)
+{
+    errno = 0;
+    MyCopyFileRangeSysCall(-1, NULL, -1, NULL, 4096, 0);
+    if (errno != ENOSYS)
+    {
+        ASMAtomicWriteS32(&g_fCopyFileRangeSupported, 1);
+        return true;
+    }
+    ASMAtomicWriteS32(&g_fCopyFileRangeSupported, 0);
+    return false;
+}
+
+DECLINLINE(bool) HasCopyFileRangeSyscall(void)
+{
+    int32_t i = ASMAtomicUoReadS32(&g_fCopyFileRangeSupported);
+    if (i != -1)
+        return i == 1;
+    return HasCopyFileRangeSyscallSlow();
+}
+
+
+
+RTDECL(int) RTFileCopyPartPrep(PRTFILECOPYPARTBUFSTATE pBufState, uint64_t cbToCopy)
+{
+    if (HasCopyFileRangeSyscall())
+    {
+        pBufState->iAllocType = -42;
+        pBufState->pbBuf      = NULL;
+        pBufState->cbBuf      = 0;
+        pBufState->uMagic     = RTFILECOPYPARTBUFSTATE_MAGIC;
+        return VINF_SUCCESS;
+    }
+    return rtFileCopyPartPrepFallback(pBufState, cbToCopy);
+}
+
+
+RTDECL(void) RTFileCopyPartCleanup(PRTFILECOPYPARTBUFSTATE pBufState)
+{
+    return rtFileCopyPartCleanupFallback(pBufState);
+}
+
+
+RTDECL(int) RTFileCopyPartEx(RTFILE hFileSrc, RTFOFF offSrc, RTFILE hFileDst, RTFOFF offDst, uint64_t cbToCopy,
+                             uint32_t fFlags, PRTFILECOPYPARTBUFSTATE pBufState, uint64_t *pcbCopied)
+{
+    /*
+     * Validate input.
+     */
+    if (pcbCopied)
+        *pcbCopied = 0;
+    AssertReturn(pBufState->uMagic == RTFILECOPYPARTBUFSTATE_MAGIC, VERR_INVALID_FLAGS);
+    if (pBufState->iAllocType == -42)
+    { /* more and more likely as time goes */ }
+    else
+        return rtFileCopyPartExFallback(hFileSrc, offSrc, hFileDst, offDst, cbToCopy, fFlags, pBufState, pcbCopied);
+    AssertReturn(offSrc >= 0, VERR_NEGATIVE_SEEK);
+    AssertReturn(offDst >= 0, VERR_NEGATIVE_SEEK);
+    AssertReturn(!fFlags, VERR_INVALID_FLAGS);
+
+    /*
+     * If nothing to copy, return right away.
+     */
+    if (!cbToCopy)
+        return VINF_SUCCESS;
+
+    /*
+     * Do the copying.
+     */
+    uint64_t cbCopied = 0;
+    int      rc       = VINF_SUCCESS;
+    do
+    {
+        size_t  cbThisCopy = (size_t)RT_MIN(cbToCopy - cbCopied, _1G);
+        loff_t  offThisDst = offSrc + cbCopied;
+        loff_t  offThisSrc = offDst + cbCopied;
+        ssize_t cbActual   = MyCopyFileRangeSysCall((int)RTFileToNative(hFileSrc), &offThisSrc,
+                                                    (int)RTFileToNative(hFileDst), &offThisDst,
+                                                    cbThisCopy, 0);
+        if (cbActual < 0)
+        {
+            rc = errno;
+            Assert(rc != 0);
+            rc = rc != 0 ? RTErrConvertFromErrno(rc) : VERR_READ_ERROR;
+            if (rc != VERR_NOT_SAME_DEVICE || cbCopied != 0)
+                break;
+
+            /* Fall back to generic implementation if the syscall refuses to handle the case. */
+            rc = rtFileCopyPartPrepFallback(pBufState, cbToCopy);
+            if (RT_SUCCESS(rc))
+                return rtFileCopyPartExFallback(hFileSrc, offSrc, hFileDst, offDst, cbToCopy, fFlags, pBufState, pcbCopied);
+            return rc;
+        }
+        Assert(offThisSrc == offSrc + (int64_t)cbCopied + cbActual);
+        Assert(offThisDst == offDst + (int64_t)cbCopied + cbActual);
+
+        if (cbActual == 0)
+        {
+            if (!pcbCopied)
+                rc = VERR_EOF;
+            break;
+        }
+
+        cbCopied += cbActual;
+    } while (cbCopied < cbToCopy);
+
+    if (pcbCopied)
+        *pcbCopied = cbCopied;
+
+    return rc;
+}
+
+#endif /* __NR_copy_file_range */
+
diff --git a/src/VBox/Runtime/r3/linux/RTFileQuerySectorSize-linux.cpp b/src/VBox/Runtime/r3/linux/RTFileQuerySectorSize-linux.cpp
new file mode 100644
index 00000000..ffd615fb
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTFileQuerySectorSize-linux.cpp
@@ -0,0 +1,88 @@
+/* $Id: RTFileQuerySectorSize-linux.cpp $ */
+/** @file
+ * IPRT - RTFileQuerySectorSize, Linux implementation.
+ */
+
+/*
+ * Copyright (C) 2017-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "internal/iprt.h"
+#include <iprt/file.h>
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+
+#include <errno.h>
+#include <linux/fs.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+
+
+RTDECL(int) RTFileQuerySectorSize(RTFILE hFile, uint32_t *pcbSector)
+{
+    AssertPtrReturn(pcbSector, VERR_INVALID_PARAMETER);
+
+    int rc;
+    int const fd = (int)RTFileToNative(hFile);
+    struct stat DevStat = { 0 };
+    if (!fstat(fd, &DevStat))
+    {
+        if (S_ISBLK(DevStat.st_mode))
+        {
+            int cbLogicalBlock = 0;
+            if (!ioctl(fd, BLKSSZGET, &cbLogicalBlock))
+            {
+                AssertReturn(cbLogicalBlock > 0, VERR_INVALID_FUNCTION);
+                *pcbSector = cbLogicalBlock;
+                return VINF_SUCCESS;
+            }
+
+            rc = RTErrConvertFromErrno(errno);
+            AssertMsgFailed(("ioctl failed: errno=%d / %Rrc\n", errno, rc));
+        }
+        else
+        {
+            AssertMsgFailed(("not a block device.\n"));
+            rc = VERR_INVALID_FUNCTION;
+        }
+    }
+    else
+    {
+        rc = RTErrConvertFromErrno(errno);
+        AssertMsgFailed(("fstat failed: errno=%d / %Rrc\n", errno, rc));
+    }
+    return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp b/src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp
new file mode 100644
index 00000000..f3acd7fe
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp
@@ -0,0 +1,86 @@
+/* $Id: RTFileSetAllocationSize-linux.cpp $ */
+/** @file
+ * IPRT - RTFileSetAllocationSize, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2016-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+#include <iprt/file.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+
+/**
+ * The Linux specific fallocate() method.
+ */
+typedef int (*PFNLNXFALLOCATE) (int iFd, int fMode, off_t offStart, off_t cb);
+/** Flag to specify that the file size should not be extended. */
+#define LNX_FALLOC_FL_KEEP_SIZE 1
+
+RTDECL(int) RTFileSetAllocationSize(RTFILE hFile, uint64_t cbSize, uint32_t fFlags)
+{
+    AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_PARAMETER);
+    AssertReturn(!(fFlags & ~RTFILE_ALLOC_SIZE_F_VALID), VERR_INVALID_PARAMETER);
+    AssertMsgReturn(sizeof(off_t) >= sizeof(cbSize) ||  RT_HIDWORD(cbSize) == 0,
+                    ("64-bit filesize not supported! cbSize=%lld\n", cbSize),
+                    VERR_NOT_SUPPORTED);
+
+    int rc;
+    PFNLNXFALLOCATE pfnLnxFAllocate = (PFNLNXFALLOCATE)(uintptr_t)dlsym(RTLD_DEFAULT, "fallocate64");
+    if (RT_VALID_PTR(pfnLnxFAllocate))
+    {
+        int fLnxFlags = (fFlags & RTFILE_ALLOC_SIZE_F_KEEP_SIZE) ? LNX_FALLOC_FL_KEEP_SIZE : 0;
+        int rcLnx = pfnLnxFAllocate(RTFileToNative(hFile), fLnxFlags, 0, cbSize);
+        if (rcLnx == 0)
+            rc = VINF_SUCCESS;
+        else if (errno == EOPNOTSUPP)
+            rc = VERR_NOT_SUPPORTED;
+        else
+            rc = RTErrConvertFromErrno(errno);
+    }
+    else
+        rc = VERR_NOT_SUPPORTED;
+
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTFileSetAllocationSize);
diff --git a/src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp b/src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp
new file mode 100644
index 00000000..6049fa1a
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp
@@ -0,0 +1,128 @@
+/* $Id: RTProcIsRunningByName-linux.cpp $ */
+/** @file
+ * IPRT - RTProcIsRunningByName, Linux implementation.
+ */
+
+/*
+ * Copyright (C) 2009-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <iprt/process.h>
+#include <iprt/string.h>
+#include <iprt/dir.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+#include <iprt/param.h>
+#include <iprt/assert.h>
+
+#include <unistd.h>
+
+
+RTR3DECL(bool) RTProcIsRunningByName(const char *pszName)
+{
+    /*
+     * Quick validation.
+     */
+    if (!pszName)
+        return false;
+
+    bool const fWithPath = RTPathHavePath(pszName);
+
+    /*
+     * Enumerate /proc.
+     */
+    RTDIR hDir;
+    int rc = RTDirOpen(&hDir, "/proc");
+    AssertMsgRCReturn(rc, ("RTDirOpen on /proc failed: rc=%Rrc\n", rc), false);
+    if (RT_SUCCESS(rc))
+    {
+        RTDIRENTRY DirEntry;
+        while (RT_SUCCESS(RTDirRead(hDir, &DirEntry, NULL)))
+        {
+            /*
+             * Filter numeric directory entries only.
+             */
+            if (   (   DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
+                    || DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
+                && RTStrToUInt32(DirEntry.szName) > 0)
+            {
+                /*
+                 * Try readlink on exe first since it's more faster and reliable.
+                 * Fall back on reading the first line in cmdline if that fails
+                 * (access errors typically). cmdline is unreliable as it might
+                 * contain whatever the execv caller passes as argv[0].
+                 */
+                char szName[RTPATH_MAX];
+                RTStrPrintf(szName, sizeof(szName), "/proc/%s/exe", &DirEntry.szName[0]);
+                char szExe[RTPATH_MAX];
+                int cchLink = readlink(szName, szExe, sizeof(szExe) - 1);
+                if (    cchLink > 0
+                    &&  (size_t)cchLink < sizeof(szExe))
+                {
+                    szExe[cchLink] = '\0';
+                    rc = VINF_SUCCESS;
+                }
+                else
+                {
+                    RTStrPrintf(szName, sizeof(szName), "/proc/%s/cmdline", &DirEntry.szName[0]);
+                    PRTSTREAM pStream;
+                    rc = RTStrmOpen(szName, "r", &pStream);
+                    if (RT_SUCCESS(rc))
+                    {
+                        rc = RTStrmGetLine(pStream, szExe, sizeof(szExe));
+                        RTStrmClose(pStream);
+                    }
+                }
+                if (RT_SUCCESS(rc))
+                {
+                    /*
+                     * We are interested on the file name part only.
+                     */
+                    char const *pszProcName = fWithPath ? szExe : RTPathFilename(szExe);
+                    if (RTStrCmp(pszProcName, pszName) == 0)
+                    {
+                        /* Found it! */
+                        RTDirClose(hDir);
+                        return true;
+                    }
+                }
+            }
+        }
+        RTDirClose(hDir);
+    }
+
+    return false;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/RTSystemFirmware-linux.cpp b/src/VBox/Runtime/r3/linux/RTSystemFirmware-linux.cpp
new file mode 100644
index 00000000..2d7b8986
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTSystemFirmware-linux.cpp
@@ -0,0 +1,115 @@
+/* $Id: RTSystemFirmware-linux.cpp $ */
+/** @file
+ * IPRT - System firmware information, linux.
+ */
+
+/*
+ * Copyright (C) 2019-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include "internal/iprt.h"
+#include <iprt/system.h>
+
+#include <iprt/err.h>
+#include <iprt/file.h>
+#include <iprt/string.h>
+#include <iprt/linux/sysfs.h>
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** Defines the UEFI Globals UUID that is used here as filename suffix (case sensitive). */
+#define VBOX_UEFI_UUID_GLOBALS "8be4df61-93ca-11d2-aa0d-00e098032b8c"
+
+
+RTDECL(int) RTSystemQueryFirmwareType(PRTSYSFWTYPE penmFirmwareType)
+{
+    if (RTLinuxSysFsExists("firmware/efi/"))
+        *penmFirmwareType = RTSYSFWTYPE_UEFI;
+    else if (RTLinuxSysFsExists(""))
+        *penmFirmwareType = RTSYSFWTYPE_BIOS;
+    else
+    {
+        *penmFirmwareType = RTSYSFWTYPE_INVALID;
+        return VERR_NOT_SUPPORTED;
+    }
+    return VINF_SUCCESS;
+}
+RT_EXPORT_SYMBOL(RTSystemQueryFirmwareType);
+
+
+RTDECL(int) RTSystemQueryFirmwareBoolean(RTSYSFWBOOL enmBoolean, bool *pfValue)
+{
+    *pfValue = false;
+
+    /*
+     * Translate the property to variable base filename.
+     */
+    const char *pszName;
+    switch (enmBoolean)
+    {
+        case RTSYSFWBOOL_SECURE_BOOT:
+            pszName = "firmware/efi/efivars/SecureBoot";
+            break;
+
+        default:
+            AssertReturn(enmBoolean > RTSYSFWBOOL_INVALID && enmBoolean < RTSYSFWBOOL_END, VERR_INVALID_PARAMETER);
+            return VERR_SYS_UNSUPPORTED_FIRMWARE_PROPERTY;
+
+    }
+
+    /*
+     * Try open and read the variable value.
+     */
+    RTFILE hFile;
+    int rc = RTLinuxSysFsOpen(&hFile, "%s-" VBOX_UEFI_UUID_GLOBALS, pszName);
+    /** @todo try other suffixes if file-not-found. */
+    if (RT_SUCCESS(rc))
+    {
+        uint8_t abBuf[16];
+        size_t  cbRead = 0;
+        rc = RTLinuxSysFsReadFile(hFile, abBuf, sizeof(abBuf), &cbRead);
+        *pfValue = cbRead > 1 && abBuf[cbRead - 1] != 0;
+        RTFileClose(hFile);
+    }
+    else if (rc == VERR_FILE_NOT_FOUND || rc == VERR_PATH_NOT_FOUND)
+        rc = VINF_SUCCESS;
+    else if (rc == VERR_PERMISSION_DENIED)
+        rc = VERR_NOT_SUPPORTED;
+
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTSystemQueryFirmwareBoolean);
+
diff --git a/src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp b/src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp
new file mode 100644
index 00000000..91cf6eb2
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp
@@ -0,0 +1,96 @@
+/* $Id: RTSystemQueryDmiString-linux.cpp $ */
+/** @file
+ * IPRT - RTSystemQueryDmiString, linux ring-3.
+ */
+
+/*
+ * Copyright (C) 2010-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/err.h>
+#include <iprt/assert.h>
+#include <iprt/linux/sysfs.h>
+
+#include <errno.h>
+
+
+RTDECL(int) RTSystemQueryDmiString(RTSYSDMISTR enmString, char *pszBuf, size_t cbBuf)
+{
+    AssertPtrReturn(pszBuf, VERR_INVALID_POINTER);
+    AssertReturn(cbBuf > 0, VERR_INVALID_PARAMETER);
+    *pszBuf = '\0';
+    AssertReturn(enmString > RTSYSDMISTR_INVALID && enmString < RTSYSDMISTR_END, VERR_INVALID_PARAMETER);
+
+    const char *pszSysFsName;
+    switch (enmString)
+    {
+        case RTSYSDMISTR_PRODUCT_NAME:      pszSysFsName = "id/product_name"; break;
+        case RTSYSDMISTR_PRODUCT_VERSION:   pszSysFsName = "id/product_version"; break;
+        case RTSYSDMISTR_PRODUCT_UUID:      pszSysFsName = "id/product_uuid"; break;
+        case RTSYSDMISTR_PRODUCT_SERIAL:    pszSysFsName = "id/product_serial"; break;
+        case RTSYSDMISTR_MANUFACTURER:      pszSysFsName = "id/sys_vendor"; break;
+        default:
+            return VERR_NOT_SUPPORTED;
+    }
+
+    size_t cbRead = 0;
+    int rc = RTLinuxSysFsReadStrFile(pszBuf, cbBuf, &cbRead, "devices/virtual/dmi/%s", pszSysFsName);
+    if (RT_FAILURE(rc) && rc != VERR_BUFFER_OVERFLOW)
+        rc = RTLinuxSysFsReadStrFile(pszBuf, cbBuf, &cbRead, "class/dmi/%s", pszSysFsName);
+    if (RT_FAILURE(rc) && rc != VERR_BUFFER_OVERFLOW)
+    {
+        switch (rc)
+        {
+            case VINF_SUCCESS:
+                AssertFailed();
+                break;
+            case VERR_FILE_NOT_FOUND:
+            case VERR_PATH_NOT_FOUND:
+            case VERR_IS_A_DIRECTORY:
+                rc = VERR_NOT_SUPPORTED;
+                break;
+            case VERR_PERMISSION_DENIED:
+            case VERR_ACCESS_DENIED:
+                rc = VERR_ACCESS_DENIED;
+                break;
+        }
+    }
+
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTSystemQueryDmiString);
+
diff --git a/src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp b/src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp
new file mode 100644
index 00000000..fd198e30
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp
@@ -0,0 +1,111 @@
+/* $Id: RTSystemShutdown-linux.cpp $ */
+/** @file
+ * IPRT - RTSystemShutdown, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2012-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/env.h>
+#include <iprt/err.h>
+#include <iprt/process.h>
+#include <iprt/string.h>
+
+
+RTDECL(int) RTSystemShutdown(RTMSINTERVAL cMsDelay, uint32_t fFlags, const char *pszLogMsg)
+{
+    AssertPtrReturn(pszLogMsg, VERR_INVALID_POINTER);
+    AssertReturn(!(fFlags & ~RTSYSTEM_SHUTDOWN_VALID_MASK), VERR_INVALID_PARAMETER);
+
+    /*
+     * Assemble the argument vector.
+     */
+    int         iArg = 0;
+    const char *apszArgs[6];
+
+    RT_BZERO(apszArgs, sizeof(apszArgs));
+
+    apszArgs[iArg++] = "/sbin/shutdown";
+    switch (fFlags & RTSYSTEM_SHUTDOWN_ACTION_MASK)
+    {
+        case RTSYSTEM_SHUTDOWN_HALT:
+            apszArgs[iArg++] = "-h";
+            apszArgs[iArg++] = "-H";
+            break;
+        case RTSYSTEM_SHUTDOWN_REBOOT:
+            apszArgs[iArg++] = "-r";
+            break;
+        case RTSYSTEM_SHUTDOWN_POWER_OFF:
+        case RTSYSTEM_SHUTDOWN_POWER_OFF_HALT:
+            apszArgs[iArg++] = "-h";
+            apszArgs[iArg++] = "-P";
+            break;
+    }
+
+    char szWhen[80];
+    if (cMsDelay < 500)
+        strcpy(szWhen, "now");
+    else
+        RTStrPrintf(szWhen, sizeof(szWhen), "%u", (unsigned)((cMsDelay + 499) / 1000));
+    apszArgs[iArg++] = szWhen;
+
+    apszArgs[iArg++] = pszLogMsg;
+
+
+    /*
+     * Start the shutdown process and wait for it to complete.
+     */
+    RTPROCESS hProc;
+    int rc = RTProcCreate(apszArgs[0], apszArgs, RTENV_DEFAULT, 0 /*fFlags*/, &hProc);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    RTPROCSTATUS ProcStatus;
+    rc = RTProcWait(hProc, RTPROCWAIT_FLAGS_BLOCK, &ProcStatus);
+    if (RT_SUCCESS(rc))
+    {
+        if (   ProcStatus.enmReason != RTPROCEXITREASON_NORMAL
+            || ProcStatus.iStatus   != 0)
+            rc = VERR_SYS_SHUTDOWN_FAILED;
+    }
+
+    return rc;
+}
+RT_EXPORT_SYMBOL(RTSystemShutdown);
+
diff --git a/src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp b/src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp
new file mode 100644
index 00000000..26c0afdb
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp
@@ -0,0 +1,121 @@
+/* $Id: RTThreadGetNativeState-linux.cpp $ */
+/** @file
+ * IPRT - RTThreadGetNativeState, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2010-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <iprt/thread.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/errcore.h>
+#include <iprt/string.h>
+
+#include "internal/thread.h"
+
+#include <unistd.h>
+#include <sys/fcntl.h>
+
+
+RTDECL(RTTHREADNATIVESTATE) RTThreadGetNativeState(RTTHREAD hThread)
+{
+    RTTHREADNATIVESTATE enmRet  = RTTHREADNATIVESTATE_INVALID;
+    PRTTHREADINT        pThread = rtThreadGet(hThread);
+    if (pThread)
+    {
+        enmRet = RTTHREADNATIVESTATE_UNKNOWN;
+
+        char szName[512];
+        RTStrPrintf(szName, sizeof(szName), "/proc/self/task/%u/stat", pThread->tid);
+        int fd = open(szName, O_RDONLY, 0);
+        if (fd >= 0)
+        {
+            ssize_t cch = read(fd, szName, sizeof(szName) - 1);
+            close(fd);
+            if (cch > 0)
+            {
+                szName[cch] = '\0';
+
+                /* skip the pid, the (comm name) and stop at the status char. */
+                const char *psz = szName;
+                while (   *psz
+                       && (   *psz != ')'
+                           || !RT_C_IS_SPACE(psz[1])
+                           || !RT_C_IS_ALPHA(psz[2])
+                           || !RT_C_IS_SPACE(psz[3])
+                          )
+                      )
+                    psz++;
+                if (*psz == ')')
+                {
+                    switch (psz[2])
+                    {
+                        case 'R':   /* running */
+                            enmRet = RTTHREADNATIVESTATE_RUNNING;
+                            break;
+
+                        case 'S':   /* sleeping */
+                        case 'D':   /* disk sleeping */
+                            enmRet = RTTHREADNATIVESTATE_BLOCKED;
+                            break;
+
+                        case 'T':   /* stopped or tracking stop */
+                            enmRet = RTTHREADNATIVESTATE_SUSPENDED;
+                            break;
+
+                        case 'Z':   /* zombie */
+                        case 'X':   /* dead */
+                            enmRet = RTTHREADNATIVESTATE_TERMINATED;
+                            break;
+
+                        default:
+                            AssertMsgFailed(("state=%c\n", psz[2]));
+                            enmRet = RTTHREADNATIVESTATE_UNKNOWN;
+                            break;
+                    }
+                }
+                else
+                    AssertMsgFailed(("stat='%s'\n", szName));
+            }
+        }
+        rtThreadRelease(pThread);
+    }
+    return enmRet;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/fileaio-linux.cpp b/src/VBox/Runtime/r3/linux/fileaio-linux.cpp
new file mode 100644
index 00000000..2f365a45
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/fileaio-linux.cpp
@@ -0,0 +1,847 @@
+/* $Id: fileaio-linux.cpp $ */
+/** @file
+ * IPRT - File async I/O, native implementation for the Linux host platform.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+/** @page pg_rtfileaio_linux     RTFile Async I/O - Linux Implementation Notes
+ * @internal
+ *
+ * Linux implements the kernel async I/O API through the io_* syscalls. They are
+ * not exposed in the glibc (the aio_* API uses userspace threads and blocking
+ * I/O operations to simulate async behavior). There is an external library
+ * called libaio which implements these syscalls but because we don't want to
+ * have another dependency and this library is not installed by default and the
+ * interface is really simple we use the kernel interface directly using wrapper
+ * functions.
+ *
+ * The interface has some limitations. The first one is that the file must be
+ * opened with O_DIRECT. This disables caching done by the kernel which can be
+ * compensated if the user of this API implements caching itself. The next
+ * limitation is that data buffers must be aligned at a 512 byte boundary or the
+ * request will fail.
+ */
+/** @todo r=bird: What's this about "must be opened with O_DIRECT"? An
+ *        explanation would be nice, esp. seeing what Linus is quoted saying
+ *        about it in the open man page... */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+#include <iprt/asm.h>
+#include <iprt/mem.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/thread.h>
+#include "internal/fileaio.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <errno.h>
+
+#include <iprt/file.h>
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/** The async I/O context handle */
+typedef unsigned long LNXKAIOCONTEXT;
+
+/**
+ * Supported commands for the iocbs
+ */
+enum
+{
+    LNXKAIO_IOCB_CMD_READ   = 0,
+    LNXKAIO_IOCB_CMD_WRITE  = 1,
+    LNXKAIO_IOCB_CMD_FSYNC  = 2,
+    LNXKAIO_IOCB_CMD_FDSYNC = 3
+};
+
+/**
+ * The iocb structure of a request which is passed to the kernel.
+ *
+ * We redefined this here because the version in the header lacks padding
+ * for 32bit.
+ */
+typedef struct LNXKAIOIOCB
+{
+    /** Opaque pointer to data which is returned on an I/O event. */
+    void     *pvUser;
+#ifdef RT_ARCH_X86
+    uint32_t  u32Padding0;
+#endif
+    /** Contains the request number and is set by the kernel. */
+    uint32_t  u32Key;
+    /** Reserved. */
+    uint32_t  u32Reserved0;
+    /** The I/O opcode. */
+    uint16_t  u16IoOpCode;
+    /** Request priority. */
+    int16_t   i16Priority;
+    /** The file descriptor. */
+    uint32_t  uFileDesc;
+    /** The userspace pointer to the buffer containing/receiving the data. */
+    void     *pvBuf;
+#ifdef RT_ARCH_X86
+    uint32_t  u32Padding1;
+#endif
+    /** How many bytes to transfer. */
+#if ARCH_BITS == 32
+    uint32_t  cbTransfer;
+    uint32_t  u32Padding2;
+#elif ARCH_BITS == 64
+    uint64_t  cbTransfer;
+#else
+# error "Unknown architecture"
+#endif
+    /** At which offset to start the transfer. */
+    int64_t   off;
+    /** Reserved. */
+    uint64_t  u64Reserved1;
+    /** Flags */
+    uint32_t  fFlags;
+    /** Readyness signal file descriptor. */
+    uint32_t  u32ResFd;
+} LNXKAIOIOCB, *PLNXKAIOIOCB;
+
+/**
+ * I/O event structure to notify about completed requests.
+ * Redefined here too because of the padding.
+ */
+typedef struct LNXKAIOIOEVENT
+{
+    /** The pvUser field from the iocb. */
+    void         *pvUser;
+#if ARCH_BITS == 32
+    uint32_t      u32Padding0;
+#endif
+    /** The LNXKAIOIOCB object this event is for. */
+    PLNXKAIOIOCB *pIoCB;
+#if ARCH_BITS == 32
+    uint32_t      u32Padding1;
+#endif
+    /** The result code of the operation .*/
+#if ARCH_BITS == 32
+    int32_t       rc;
+    uint32_t      u32Padding2;
+#elif ARCH_BITS == 64
+    int64_t       rc;
+#else
+# error "Unknown architecture"
+#endif
+    /** Secondary result code. */
+#if ARCH_BITS == 32
+    int32_t       rc2;
+    uint32_t      u32Padding3;
+#elif ARCH_BITS == 64
+    int64_t       rc2;
+#else
+# error "Unknown architecture"
+#endif
+} LNXKAIOIOEVENT, *PLNXKAIOIOEVENT;
+
+
+/**
+ * Async I/O completion context state.
+ */
+typedef struct RTFILEAIOCTXINTERNAL
+{
+    /** Handle to the async I/O context. */
+    LNXKAIOCONTEXT      AioContext;
+    /** Maximum number of requests this context can handle. */
+    int                 cRequestsMax;
+    /** Current number of requests active on this context. */
+    volatile int32_t    cRequests;
+    /** The ID of the thread which is currently waiting for requests. */
+    volatile RTTHREAD   hThreadWait;
+    /** Flag whether the thread was woken up. */
+    volatile bool       fWokenUp;
+    /** Flag whether the thread is currently waiting in the syscall. */
+    volatile bool       fWaiting;
+    /** Flags given during creation. */
+    uint32_t            fFlags;
+    /** Magic value (RTFILEAIOCTX_MAGIC). */
+    uint32_t            u32Magic;
+} RTFILEAIOCTXINTERNAL;
+/** Pointer to an internal context structure. */
+typedef RTFILEAIOCTXINTERNAL *PRTFILEAIOCTXINTERNAL;
+
+/**
+ * Async I/O request state.
+ */
+typedef struct RTFILEAIOREQINTERNAL
+{
+    /** The aio control block. This must be the FIRST elment in
+     *  the structure! (see notes below) */
+    LNXKAIOIOCB           AioCB;
+    /** Current state the request is in. */
+    RTFILEAIOREQSTATE     enmState;
+    /** The I/O context this request is associated with. */
+    LNXKAIOCONTEXT        AioContext;
+    /** Return code the request completed with. */
+    int                   Rc;
+    /** Number of bytes actually transferred. */
+    size_t                cbTransfered;
+    /** Completion context we are assigned to. */
+    PRTFILEAIOCTXINTERNAL pCtxInt;
+    /** Magic value  (RTFILEAIOREQ_MAGIC). */
+    uint32_t              u32Magic;
+} RTFILEAIOREQINTERNAL;
+/** Pointer to an internal request structure. */
+typedef RTFILEAIOREQINTERNAL *PRTFILEAIOREQINTERNAL;
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** The max number of events to get in one call. */
+#define AIO_MAXIMUM_REQUESTS_PER_CONTEXT 64
+
+
+/**
+ * Creates a new async I/O context.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxCreate(unsigned cEvents, LNXKAIOCONTEXT *pAioContext)
+{
+    int rc = syscall(__NR_io_setup, cEvents, pAioContext);
+    if (RT_UNLIKELY(rc == -1))
+    {
+        if (errno == EAGAIN)
+            return VERR_FILE_AIO_INSUFFICIENT_EVENTS;
+        else
+            return RTErrConvertFromErrno(errno);
+    }
+
+    return VINF_SUCCESS;
+}
+
+/**
+ * Destroys a async I/O context.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxDestroy(LNXKAIOCONTEXT AioContext)
+{
+    int rc = syscall(__NR_io_destroy, AioContext);
+    if (RT_UNLIKELY(rc == -1))
+        return RTErrConvertFromErrno(errno);
+
+    return VINF_SUCCESS;
+}
+
+/**
+ * Submits an array of I/O requests to the kernel.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxSubmit(LNXKAIOCONTEXT AioContext, long cReqs, LNXKAIOIOCB **ppIoCB, int *pcSubmitted)
+{
+    int rc = syscall(__NR_io_submit, AioContext, cReqs, ppIoCB);
+    if (RT_UNLIKELY(rc == -1))
+        return RTErrConvertFromErrno(errno);
+
+    *pcSubmitted = rc;
+
+    return VINF_SUCCESS;
+}
+
+/**
+ * Cancels a I/O request.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxCancel(LNXKAIOCONTEXT AioContext, PLNXKAIOIOCB pIoCB, PLNXKAIOIOEVENT pIoResult)
+{
+    int rc = syscall(__NR_io_cancel, AioContext, pIoCB, pIoResult);
+    if (RT_UNLIKELY(rc == -1))
+        return RTErrConvertFromErrno(errno);
+
+    return VINF_SUCCESS;
+}
+
+/**
+ * Waits for I/O events.
+ * @returns Number of events (natural number w/ 0), IPRT error code (negative).
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxGetEvents(LNXKAIOCONTEXT AioContext, long cReqsMin, long cReqs,
+                                            PLNXKAIOIOEVENT paIoResults, struct timespec *pTimeout)
+{
+    int rc = syscall(__NR_io_getevents, AioContext, cReqsMin, cReqs, paIoResults, pTimeout);
+    if (RT_UNLIKELY(rc == -1))
+        return RTErrConvertFromErrno(errno);
+
+    return rc;
+}
+
+RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
+{
+    int rc = VINF_SUCCESS;
+    AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
+
+    /*
+     * Check if the API is implemented by creating a
+     * completion port.
+     */
+    LNXKAIOCONTEXT AioContext = 0;
+    rc = rtFileAsyncIoLinuxCreate(1, &AioContext);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    rc = rtFileAsyncIoLinuxDestroy(AioContext);
+    if (RT_FAILURE(rc))
+        return rc;
+
+    /* Supported - fill in the limits. The alignment is the only restriction. */
+    pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
+    pAioLimits->cbBufferAlignment   = 512;
+
+    return VINF_SUCCESS;
+}
+
+
+RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
+{
+    AssertPtrReturn(phReq, VERR_INVALID_POINTER);
+
+    /*
+     * Allocate a new request and initialize it.
+     */
+    PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(*pReqInt));
+    if (RT_UNLIKELY(!pReqInt))
+        return VERR_NO_MEMORY;
+
+    pReqInt->pCtxInt   = NULL;
+    pReqInt->u32Magic  = RTFILEAIOREQ_MAGIC;
+    RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+
+    *phReq = (RTFILEAIOREQ)pReqInt;
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
+{
+    /*
+     * Validate the handle and ignore nil.
+     */
+    if (hReq == NIL_RTFILEAIOREQ)
+        return VINF_SUCCESS;
+    PRTFILEAIOREQINTERNAL pReqInt = hReq;
+    RTFILEAIOREQ_VALID_RETURN(pReqInt);
+    RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+
+    /*
+     * Trash the magic and free it.
+     */
+    ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
+    RTMemFree(pReqInt);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Worker setting up the request.
+ */
+DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
+                                            uint16_t uTransferDirection,
+                                            RTFOFF off, void *pvBuf, size_t cbTransfer,
+                                            void *pvUser)
+{
+    /*
+     * Validate the input.
+     */
+    PRTFILEAIOREQINTERNAL pReqInt = hReq;
+    RTFILEAIOREQ_VALID_RETURN(pReqInt);
+    RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+    Assert(hFile != NIL_RTFILE);
+
+    if (uTransferDirection != LNXKAIO_IOCB_CMD_FSYNC)
+    {
+        AssertPtr(pvBuf);
+        Assert(off >= 0);
+        Assert(cbTransfer > 0);
+    }
+
+    /*
+     * Setup the control block and clear the finished flag.
+     */
+    pReqInt->AioCB.u16IoOpCode = uTransferDirection;
+    pReqInt->AioCB.uFileDesc   = RTFileToNative(hFile);
+    pReqInt->AioCB.off         = off;
+    pReqInt->AioCB.cbTransfer  = cbTransfer;
+    pReqInt->AioCB.pvBuf       = pvBuf;
+    pReqInt->AioCB.pvUser      = pvUser;
+
+    pReqInt->pCtxInt           = NULL;
+    RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
+                                    void *pvBuf, size_t cbRead, void *pvUser)
+{
+    return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_READ,
+                                       off, pvBuf, cbRead, pvUser);
+}
+
+
+RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
+                                     void const *pvBuf, size_t cbWrite, void *pvUser)
+{
+    return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_WRITE,
+                                       off, (void *)pvBuf, cbWrite, pvUser);
+}
+
+
+RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
+{
+    PRTFILEAIOREQINTERNAL pReqInt = hReq;
+    RTFILEAIOREQ_VALID_RETURN(pReqInt);
+    AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_HANDLE);
+    RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+
+    return rtFileAioReqPrepareTransfer(pReqInt, hFile, LNXKAIO_IOCB_CMD_FSYNC,
+                                       0, NULL, 0, pvUser);
+}
+
+
+RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
+{
+    PRTFILEAIOREQINTERNAL pReqInt = hReq;
+    RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
+
+    return pReqInt->AioCB.pvUser;
+}
+
+
+RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
+{
+    PRTFILEAIOREQINTERNAL pReqInt = hReq;
+    RTFILEAIOREQ_VALID_RETURN(pReqInt);
+    RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
+
+    LNXKAIOIOEVENT AioEvent;
+    int rc = rtFileAsyncIoLinuxCancel(pReqInt->AioContext, &pReqInt->AioCB, &AioEvent);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Decrement request count because the request will never arrive at the
+         * completion port.
+         */
+        AssertMsg(RT_VALID_PTR(pReqInt->pCtxInt), ("Invalid state. Request was canceled but wasn't submitted\n"));
+
+        ASMAtomicDecS32(&pReqInt->pCtxInt->cRequests);
+        pReqInt->Rc = VERR_FILE_AIO_CANCELED;
+        RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+        return VINF_SUCCESS;
+    }
+    if (rc == VERR_TRY_AGAIN)
+        return VERR_FILE_AIO_IN_PROGRESS;
+    return rc;
+}
+
+
+RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
+{
+    PRTFILEAIOREQINTERNAL pReqInt = hReq;
+    RTFILEAIOREQ_VALID_RETURN(pReqInt);
+    AssertPtrNull(pcbTransfered);
+    RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+    RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
+
+    if (    pcbTransfered
+        &&  RT_SUCCESS(pReqInt->Rc))
+        *pcbTransfered = pReqInt->cbTransfered;
+
+    return pReqInt->Rc;
+}
+
+
+RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
+                               uint32_t fFlags)
+{
+    PRTFILEAIOCTXINTERNAL pCtxInt;
+    AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
+    AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
+
+    /* The kernel interface needs a maximum. */
+    if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
+        return VERR_OUT_OF_RANGE;
+
+    pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOCTXINTERNAL));
+    if (RT_UNLIKELY(!pCtxInt))
+        return VERR_NO_MEMORY;
+
+    /* Init the event handle. */
+    int rc = rtFileAsyncIoLinuxCreate(cAioReqsMax, &pCtxInt->AioContext);
+    if (RT_SUCCESS(rc))
+    {
+        pCtxInt->fWokenUp     = false;
+        pCtxInt->fWaiting     = false;
+        pCtxInt->hThreadWait  = NIL_RTTHREAD;
+        pCtxInt->cRequestsMax = cAioReqsMax;
+        pCtxInt->fFlags       = fFlags;
+        pCtxInt->u32Magic     = RTFILEAIOCTX_MAGIC;
+        *phAioCtx = (RTFILEAIOCTX)pCtxInt;
+    }
+    else
+        RTMemFree(pCtxInt);
+
+    return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
+{
+    /* Validate the handle and ignore nil. */
+    if (hAioCtx == NIL_RTFILEAIOCTX)
+        return VINF_SUCCESS;
+    PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+    RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+
+    /* Cannot destroy a busy context. */
+    if (RT_UNLIKELY(pCtxInt->cRequests))
+        return VERR_FILE_AIO_BUSY;
+
+    /* The native bit first, then mark it as dead and free it. */
+    int rc = rtFileAsyncIoLinuxDestroy(pCtxInt->AioContext);
+    if (RT_FAILURE(rc))
+        return rc;
+    ASMAtomicUoWriteU32(&pCtxInt->u32Magic, RTFILEAIOCTX_MAGIC_DEAD);
+    RTMemFree(pCtxInt);
+
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
+{
+    /* Nil means global here. */
+    if (hAioCtx == NIL_RTFILEAIOCTX)
+        return RTFILEAIO_UNLIMITED_REQS; /** @todo r=bird: I'm a bit puzzled by this return value since it
+                                          *                is completely useless in RTFileAioCtxCreate. */
+
+    /* Return 0 if the handle is invalid, it's better than garbage I think... */
+    PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+    RTFILEAIOCTX_VALID_RETURN_RC(pCtxInt, 0);
+
+    return pCtxInt->cRequestsMax;
+}
+
+RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
+{
+    /* Nothing to do. */
+    NOREF(hAioCtx); NOREF(hFile);
+    return VINF_SUCCESS;
+}
+
+RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
+{
+    int rc = VINF_SUCCESS;
+
+    /*
+     * Parameter validation.
+     */
+    PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+    RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+    AssertReturn(cReqs > 0,  VERR_INVALID_PARAMETER);
+    AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
+    uint32_t i = cReqs;
+    PRTFILEAIOREQINTERNAL pReqInt = NULL;
+
+    /*
+     * Validate requests and associate with the context.
+     */
+    while (i-- > 0)
+    {
+        pReqInt = pahReqs[i];
+        if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
+        {
+            /* Undo everything and stop submitting. */
+            size_t iUndo = cReqs;
+            while (iUndo-- > i)
+            {
+                pReqInt = pahReqs[iUndo];
+                RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+                pReqInt->pCtxInt = NULL;
+            }
+            return VERR_INVALID_HANDLE;
+        }
+
+        pReqInt->AioContext = pCtxInt->AioContext;
+        pReqInt->pCtxInt    = pCtxInt;
+        RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
+    }
+
+    do
+    {
+        /*
+         * We cast pahReqs to the Linux iocb structure to avoid copying the requests
+         * into a temporary array. This is possible because the iocb structure is
+         * the first element in the request structure (see PRTFILEAIOCTXINTERNAL).
+         */
+        int cReqsSubmitted = 0;
+        rc = rtFileAsyncIoLinuxSubmit(pCtxInt->AioContext, cReqs,
+                                      (PLNXKAIOIOCB *)pahReqs,
+                                      &cReqsSubmitted);
+        if (RT_FAILURE(rc))
+        {
+            /*
+             * We encountered an error.
+             * This means that the first IoCB
+             * is not correctly initialized
+             * (invalid buffer alignment or bad file descriptor).
+             * Revert every request into the prepared state except
+             * the first one which will switch to completed.
+             * Another reason could be insufficient resources.
+             */
+            i = cReqs;
+            while (i-- > 0)
+            {
+                /* Already validated. */
+                pReqInt = pahReqs[i];
+                pReqInt->pCtxInt    = NULL;
+                pReqInt->AioContext = 0;
+                RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+            }
+
+            if (rc == VERR_TRY_AGAIN)
+                return VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
+            else
+            {
+                /* The first request failed. */
+                pReqInt = pahReqs[0];
+                RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+                pReqInt->Rc = rc;
+                pReqInt->cbTransfered = 0;
+                return rc;
+            }
+        }
+
+        /* Advance. */
+        cReqs   -= cReqsSubmitted;
+        pahReqs += cReqsSubmitted;
+        ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
+
+    } while (cReqs);
+
+    return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
+                             PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
+{
+    /*
+     * Validate the parameters, making sure to always set pcReqs.
+     */
+    AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
+    *pcReqs = 0; /* always set */
+    PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+    RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+    AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
+    AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
+    AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
+
+    /*
+     * Can't wait if there are not requests around.
+     */
+    if (   RT_UNLIKELY(ASMAtomicUoReadS32(&pCtxInt->cRequests) == 0)
+        && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
+        return VERR_FILE_AIO_NO_REQUEST;
+
+    /*
+     * Convert the timeout if specified.
+     */
+    struct timespec    *pTimeout = NULL;
+    struct timespec     Timeout = {0,0};
+    uint64_t            StartNanoTS = 0;
+    if (cMillies != RT_INDEFINITE_WAIT)
+    {
+        Timeout.tv_sec  = cMillies / 1000;
+        Timeout.tv_nsec = cMillies % 1000 * 1000000;
+        pTimeout = &Timeout;
+        StartNanoTS = RTTimeNanoTS();
+    }
+
+    /* Wait for at least one. */
+    if (!cMinReqs)
+        cMinReqs = 1;
+
+    /* For the wakeup call. */
+    Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
+    ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
+
+    /*
+     * Loop until we're woken up, hit an error (incl timeout), or
+     * have collected the desired number of requests.
+     */
+    int rc = VINF_SUCCESS;
+    int cRequestsCompleted = 0;
+    while (!pCtxInt->fWokenUp)
+    {
+        LNXKAIOIOEVENT  aPortEvents[AIO_MAXIMUM_REQUESTS_PER_CONTEXT];
+        int             cRequestsToWait = RT_MIN(cReqs, AIO_MAXIMUM_REQUESTS_PER_CONTEXT);
+        ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
+        rc = rtFileAsyncIoLinuxGetEvents(pCtxInt->AioContext, cMinReqs, cRequestsToWait, &aPortEvents[0], pTimeout);
+        ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
+        if (RT_FAILURE(rc))
+            break;
+        uint32_t const cDone = rc;
+        rc = VINF_SUCCESS;
+
+        /*
+         * Process received events / requests.
+         */
+        for (uint32_t i = 0; i < cDone; i++)
+        {
+            /*
+             * The iocb is the first element in our request structure.
+             * So we can safely cast it directly to the handle (see above)
+             */
+            PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)aPortEvents[i].pIoCB;
+            AssertPtr(pReqInt);
+            Assert(pReqInt->u32Magic == RTFILEAIOREQ_MAGIC);
+
+            /** @todo aeichner: The rc field contains the result code
+             *  like you can find in errno for the normal read/write ops.
+             *  But there is a second field called rc2. I don't know the
+             *  purpose for it yet.
+             */
+            if (RT_UNLIKELY(aPortEvents[i].rc < 0))
+                pReqInt->Rc = RTErrConvertFromErrno(-aPortEvents[i].rc); /* Convert to positive value. */
+            else
+            {
+                pReqInt->Rc = VINF_SUCCESS;
+                pReqInt->cbTransfered = aPortEvents[i].rc;
+            }
+
+            /* Mark the request as finished. */
+            RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+
+            pahReqs[cRequestsCompleted++] = (RTFILEAIOREQ)pReqInt;
+        }
+
+        /*
+         * Done Yet? If not advance and try again.
+         */
+        if (cDone >= cMinReqs)
+            break;
+        cMinReqs -= cDone;
+        cReqs    -= cDone;
+
+        if (cMillies != RT_INDEFINITE_WAIT)
+        {
+            /* The API doesn't return ETIMEDOUT, so we have to fix that ourselves. */
+            uint64_t NanoTS = RTTimeNanoTS();
+            uint64_t cMilliesElapsed = (NanoTS - StartNanoTS) / 1000000;
+            if (cMilliesElapsed >= cMillies)
+            {
+                rc = VERR_TIMEOUT;
+                break;
+            }
+
+            /* The syscall supposedly updates it, but we're paranoid. :-) */
+            Timeout.tv_sec  = (cMillies - (RTMSINTERVAL)cMilliesElapsed) / 1000;
+            Timeout.tv_nsec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) % 1000 * 1000000;
+        }
+    }
+
+    /*
+     * Update the context state and set the return value.
+     */
+    *pcReqs = cRequestsCompleted;
+    ASMAtomicSubS32(&pCtxInt->cRequests, cRequestsCompleted);
+    Assert(pCtxInt->hThreadWait == RTThreadSelf());
+    ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
+
+    /*
+     * Clear the wakeup flag and set rc.
+     */
+    if (    pCtxInt->fWokenUp
+        &&  RT_SUCCESS(rc))
+    {
+        ASMAtomicXchgBool(&pCtxInt->fWokenUp, false);
+        rc = VERR_INTERRUPTED;
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
+{
+    PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+    RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+
+    /** @todo r=bird: Define the protocol for how to resume work after calling
+     *        this function. */
+
+    bool fWokenUp    = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
+
+    /*
+     * Read the thread handle before the status flag.
+     * If we read the handle after the flag we might
+     * end up with an invalid handle because the thread
+     * waiting in RTFileAioCtxWakeup() might get scheduled
+     * before we read the flag and returns.
+     * We can ensure that the handle is valid if fWaiting is true
+     * when reading the handle before the status flag.
+     */
+    RTTHREAD hThread;
+    ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
+    bool fWaiting    = ASMAtomicReadBool(&pCtxInt->fWaiting);
+    if (    !fWokenUp
+        &&  fWaiting)
+    {
+        /*
+         * If a thread waits the handle must be valid.
+         * It is possible that the thread returns from
+         * rtFileAsyncIoLinuxGetEvents() before the signal
+         * is send.
+         * This is no problem because we already set fWokenUp
+         * to true which will let the thread return VERR_INTERRUPTED
+         * and the next call to RTFileAioCtxWait() will not
+         * return VERR_INTERRUPTED because signals are not saved
+         * and will simply vanish if the destination thread can't
+         * receive it.
+         */
+        Assert(hThread != NIL_RTTHREAD);
+        RTThreadPoke(hThread);
+    }
+
+    return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/ioqueue-iouringfile-provider.cpp b/src/VBox/Runtime/r3/linux/ioqueue-iouringfile-provider.cpp
new file mode 100644
index 00000000..f6719664
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/ioqueue-iouringfile-provider.cpp
@@ -0,0 +1,940 @@
+/* $Id: ioqueue-iouringfile-provider.cpp $ */
+/** @file
+ * IPRT - I/O queue, Linux io_uring interface I/O file provider.
+ */
+
+/*
+ * Copyright (C) 2019-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+/** @page pg_rtioqueue_linux     RTIoQueue - Linux io_uring implementation notes
+ * @internal
+ *
+ * The io_uring interface is the most recent interface added to the Linux kernel
+ * to deliver fast and efficient I/O. It was first added with kernel version 5.1 and is
+ * thus not available on most systems as of writing this backend (July 2019).
+ * It supersedes the old async I/O interface and cleans up with some restrictions like
+ * having to disable caching for the file.
+ * The interface is centered around a submission and completion queue to queue multiple new
+ * requests for the kernel to process and get notified about completions to reduce the amount
+ * of context switches to an absolute minimum. It also offers advanced features like
+ * registering a fixed set of memory buffers for I/O upfront to reduce the processing overhead
+ * even more.
+ *
+ * The first implementation will only make use of the basic features and more advanced features
+ * will be added later.
+ * The adept developer probably noticed that the public IPRT I/O queue API resembles the io_uring
+ * interface in many aspects. This is not by accident but to reduce our own overhead as much as possible
+ * while still keeping a consistent platform independent API which allows efficient implementations on
+ * other hosts when they come up.
+ *
+ * The public kernel io_uring interface is completely defined in this file to avoid dragging in additional
+ * dependencies and to avoid compile problems on older hosts missing the interface just like it is done
+ * for the Linux RTFileAio* API  The necessary interface definitions and descriptions where retrieved from:
+ *     * http://kernel.dk/io_uring.pdf
+ *     * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/uapi/linux/io_uring.h
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_IOQUEUE
+#include <iprt/ioqueue.h>
+
+#include <iprt/assertcompile.h>
+#include <iprt/asm.h>
+#include <iprt/errcore.h>
+#include <iprt/file.h>
+#include <iprt/log.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+
+#include "internal/ioqueue.h"
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+
+/** The syscall number of io_uring_setup(). */
+#define LNX_IOURING_SYSCALL_SETUP     425
+/** The syscall number of io_uring_enter(). */
+#define LNX_IOURING_SYSCALL_ENTER     426
+/** The syscall number of io_uring_register(). */
+#define LNX_IOURING_SYSCALL_REGISTER  427
+/** eventfd2() syscall not associated with io_uring but used for kicking waiters. */
+#define LNX_SYSCALL_EVENTFD2          290
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+
+/**
+ * Linux io_uring completion event.
+ */
+typedef struct LNXIOURINGCQE
+{
+    /** Opaque user data associated with the completed request. */
+    uint64_t                    u64User;
+    /** The status code of the request. */
+    int32_t                     rcLnx;
+    /** Some flags which are not used as of now. */
+    uint32_t                    fFlags;
+} LNXIOURINGCQE;
+AssertCompileSize(LNXIOURINGCQE, 16);
+/** Pointer to a Linux io_uring completion event. */
+typedef LNXIOURINGCQE *PLNXIOURINGCQE;
+/** Pointer to a constant linux io_uring completion event. */
+typedef const LNXIOURINGCQE *PCLNXIOURINGCQE;
+
+
+/**
+ * Linux io_uring submission queue entry.
+ */
+typedef struct LNXIOURINGSQE
+{
+    /** The opcode for the request. */
+    uint8_t                     u8Opc;
+    /** Common flags for the request. */
+    uint8_t                     u8Flags;
+    /** Assigned I/O priority. */
+    uint16_t                    u16IoPrio;
+    /** The file descriptor the request is for. */
+    int32_t                     i32Fd;
+    /** The start offset into the file for the request. */
+    uint64_t                    u64OffStart;
+    /** Buffer pointer or Pointer to io vector array depending on opcode. */
+    uint64_t                    u64AddrBufIoVec;
+    /** Size of the buffer in bytes or number of io vectors. */
+    uint32_t                    u32BufIoVecSz;
+    /** Opcode dependent data. */
+    union
+    {
+        /** Flags for read/write requests. */
+        uint32_t                u32KrnlRwFlags;
+        /** Flags for fsync() like requests. */
+        uint32_t                u32FsyncFlags;
+        /** Flags for poll() like requests. */
+        uint16_t                u16PollFlags;
+        /** Flags for sync_file_range() like requests. */
+        uint32_t                u32SyncFileRangeFlags;
+        /** Flags for requests requiring a msg structure. */
+        uint32_t                u32MsgFlags;
+    } uOpc;
+    /** Opaque user data associated with the request and returned durign completion. */
+    uint64_t                    u64User;
+    /** Request type dependent data. */
+    union
+    {
+        /** Fixed buffer index if indicated by the request flags. */
+        uint16_t                u16FixedBufIdx;
+        /** Padding to align the structure to 64 bytes. */
+        uint64_t                au64Padding[3];
+    } uReq;
+} LNXIOURINGSQE;
+AssertCompileSize(LNXIOURINGSQE, 64);
+/** Pointer to a Linux io_uring submission queue entry. */
+typedef LNXIOURINGSQE *PLNXIOURINGSQE;
+/** Pointer to a constant Linux io_uring submission queue entry. */
+typedef const LNXIOURINGSQE *PCLNXIOURINGSQE;
+
+
+/**
+ * Linux u_ioring SQ ring header structure to maintain the queue.
+ */
+typedef struct LNXIOURINGSQ
+{
+    /** The current head position to fill in new requests. */
+    uint32_t                    u32OffHead;
+    /** The current tail position the kernel starts processing from. */
+    uint32_t                    u32OffTail;
+    /** The mask for the head and tail counters to apply to retrieve the index. */
+    uint32_t                    u32OffRingMask;
+    /** Number of entries in the SQ ring. */
+    uint32_t                    u32OffRingEntries;
+    /** Flags set asychronously by the kernel. */
+    uint32_t                    u32OffFlags;
+    /** Counter of dropped requests. */
+    uint32_t                    u32OffDroppedReqs;
+    /** Offset where to find the array of SQ entries. */
+    uint32_t                    u32OffArray;
+    /** Reserved. */
+    uint32_t                    u32Rsvd0;
+    /** Reserved. */
+    uint64_t                    u64Rsvd1;
+} LNXIOURINGSQ;
+AssertCompileSize(LNXIOURINGSQ, 40);
+/** Pointer to a Linux u_ioring SQ ring header. */
+typedef LNXIOURINGSQ *PLNXIOURINGSQ;
+/** Pointer to a constant Linux u_ioring SQ ring header. */
+typedef const LNXIOURINGSQ *PCLNXIOURINGSQ;
+
+
+/**
+ * Linux io_uring CQ ring header structure to maintain the queue.
+ */
+typedef struct LNXIOURINGCQ
+{
+    /** The current head position the kernel modifies when completion events happen. */
+    uint32_t                    u32OffHead;
+    /** The current tail position to read completion events from. */
+    uint32_t                    u32OffTail;
+    /** The mask for the head and tail counters to apply to retrieve the index. */
+    uint32_t                    u32OffRingMask;
+    /** Number of entries in the CQ ring. */
+    uint32_t                    u32OffRingEntries;
+    /** Number of CQ overflows happened. */
+    uint32_t                    u32OffOverflowCnt;
+    /** */
+    uint32_t                    u32OffCqes;
+    /** Reserved. */
+    uint64_t                    au64Rsvd0[2];
+} LNXIOURINGCQ;
+AssertCompileSize(LNXIOURINGCQ, 40);
+/** Pointer to a Linux u_ioring CQ ring header. */
+typedef LNXIOURINGCQ *PLNXIOURINGCQ;
+/** Pointer to a constant Linux u_ioring CQ ring header. */
+typedef const LNXIOURINGCQ *PCLNXIOURINGCQ;
+
+
+/**
+ * Linux io_uring parameters passed to io_uring_setup().
+ */
+typedef struct LNXIOURINGPARAMS
+{
+    /** Number of SQ entries requested, must be power of 2. */
+    uint32_t                    u32SqEntriesCnt;
+    /** Number of CQ entries requested, must be power of 2. */
+    uint32_t                    u32CqEntriesCnt;
+    /** Flags for the ring, , see LNX_IOURING_SETUP_F_*. */
+    uint32_t                    u32Flags;
+    /** Affinity of the kernel side SQ polling thread if enabled. */
+    uint32_t                    u32SqPollCpu;
+    /** Milliseconds after the kernel side SQ polling thread goes to sleep
+     * if there is are no requests to process. */
+    uint32_t                    u32SqPollIdleMs;
+    /** Reserved. */
+    uint32_t                    au32Rsvd0[5];
+    /** Offsets returned for the submission queue. */
+    LNXIOURINGSQ                SqOffsets;
+    /** Offsets returned for the completion queue. */
+    LNXIOURINGCQ                CqOffsets;
+} LNXIOURINGPARAMS;
+/** Pointer to Linux io_uring parameters. */
+typedef LNXIOURINGPARAMS *PLNXIOURINGPARAMS;
+/** Pointer to constant Linux io_uring parameters. */
+typedef const LNXIOURINGPARAMS *PCLNXIOURINGPARAMS;
+
+
+/** @name LNXIOURINGSQE::u8Opc defined opcodes.
+ * @{ */
+/** Opcode to profile the interface, does nothing. */
+#define LNX_IOURING_OPC_NOP             0
+/** preadv() like request. */
+#define LNX_IOURING_OPC_READV           1
+/** pwritev() like request. */
+#define LNX_IOURING_OPC_WRITEV          2
+/** fsync() like request. */
+#define LNX_IOURING_OPC_FSYNC           3
+/** Read request using a fixed preset buffer. */
+#define LNX_IOURING_OPC_READ_FIXED      4
+/** Write request using a fixed preset buffer. */
+#define LNX_IOURING_OPC_WRITE_FIXED     5
+/** Add file descriptor to pollset. */
+#define LNX_IOURING_OPC_POLL_ADD        6
+/** Remove file descriptor from pollset. */
+#define LNX_IOURING_OPC_POLL_REMOVE     7
+/** sync_file_range() like request. */
+#define LNX_IOURING_OPC_SYNC_FILE_RANGE 8
+/** sendmsg() like request. */
+#define LNX_IOURING_OPC_SENDMSG         9
+/** recvmsg() like request. */
+#define LNX_IOURING_OPC_RECVMSG         10
+/** @} */
+
+
+/** @name Additional flags for LNX_IOURING_OPC_FSYNC requests.
+ * @{ */
+/** Sync userdata as well instead of metadata only. */
+#define LNX_IOURING_OPC_FSYNC_DATASYNC  RT_BIT_32(0)
+/** @} */
+
+
+/** @name Flags for the LNX_IOURING_SYSCALL_SETUP syscall.
+ * @{ */
+/** The I/O context is polled. */
+#define LNX_IOURING_SETUP_F_IOPOLL      RT_BIT_32(0)
+/** The kernel should poll the submission queue. */
+#define LNX_IOURING_SETUP_F_SQPOLL      RT_BIT_32(1)
+/** Sets the CPU affinity of the kernel thread polling the submission queue. */
+#define LNX_IOURING_SETUP_F_SQAFF       RT_BIT_32(2)
+/** @} */
+
+
+/** @name Flags for LNXIOURINGSQE::u8Flags.
+ * @{ */
+/** The file descriptor was registered before use. */
+#define LNX_IOURING_SQE_F_FIXED_FILE    RT_BIT(0)
+/** Complete all active requests before issuing the request with the flag set. */
+#define LNX_IOURING_SQE_F_IO_DRAIN      RT_BIT(1)
+/** Links the request with the flag set to the next one. */
+#define LNX_IOURING_SQE_F_IO_LINK       RT_BIT(2)
+/** @} */
+
+
+/** @name Magic mmap offsets to map submission and completion queues.
+ * @{ */
+/** Used to map the submission queue. */
+#define LNX_IOURING_MMAP_OFF_SQ         UINT64_C(0)
+/** Used to map the completion queue. */
+#define LNX_IOURING_MMAP_OFF_CQ         UINT64_C(0x8000000)
+/** Used to map the submission queue entries array. */
+#define LNX_IOURING_MMAP_OFF_SQES       UINT64_C(0x10000000)
+/** @} */
+
+
+/** @name Flags used for the SQ ring structure.
+ * @{ */
+/** The kernel thread needs a io_uring_enter() wakeup to continue processing requests. */
+#define LNX_IOURING_SQ_RING_F_NEED_WAKEUP           RT_BIT_32(0)
+/** @} */
+
+
+/** @name Flags for the LNX_IOURING_SYSCALL_ENTER syscall.
+ * @{ */
+/** Retrieve completion events for the completion queue. */
+#define LNX_IOURING_ENTER_F_GETEVENTS               RT_BIT_32(0)
+/** Wakes the suspended kernel thread processing the requests. */
+#define LNX_IOURING_ENTER_F_SQ_WAKEUP               RT_BIT_32(1)
+/** @} */
+
+
+/** @name Opcodes for the LNX_IOURING_SYSCALL_REGISTER syscall.
+ * @{ */
+/** Register a fixed set of buffers. */
+#define LNX_IOURING_REGISTER_OPC_BUFFERS_REGISTER   0
+/** Unregisters a fixed set of buffers registered previously. */
+#define LNX_IOURING_REGISTER_OPC_BUFFERS_UNREGISTER 1
+/** Register a fixed set of files. */
+#define LNX_IOURING_REGISTER_OPC_FILES_REGISTER     2
+/** Unregisters a fixed set of files registered previously. */
+#define LNX_IOURING_REGISTER_OPC_FILES_UNREGISTER   3
+/** Register an eventfd associated with the I/O ring. */
+#define LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER   4
+/** Unregisters an eventfd registered previously. */
+#define LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER 5
+/** @} */
+
+
+/**
+ * SQ ring structure.
+ *
+ * @note Some members of this structure point to memory shared with the kernel,
+ *       hence the volatile keyword.
+ */
+typedef struct RTIOQUEUESQ
+{
+    /** Pointer to the head counter. */
+    volatile uint32_t           *pidxHead;
+    /** Pointer to the tail counter. */
+    volatile uint32_t           *pidxTail;
+    /** Mask to apply for the counters to get to the index. */
+    uint32_t                    fRingMask;
+    /** Number of entries in the ring. */
+    uint32_t                    cEntries;
+    /** Pointer to the global flags. */
+    volatile uint32_t           *pfFlags;
+    /** Pointer to the indirection array used for indexing the real SQ entries. */
+    volatile uint32_t           *paidxSqes;
+} RTIOQUEUESQ;
+
+
+/**
+ * CQ ring structure.
+ *
+ * @note Some members of this structure point to memory shared with the kernel,
+ *       hence the volatile keyword.
+ */
+typedef struct RTIOQUEUECQ
+{
+    /** Pointer to the head counter. */
+    volatile uint32_t           *pidxHead;
+    /** Pointer to the tail counter. */
+    volatile uint32_t           *pidxTail;
+    /** Mask to apply for the counters to get to the index. */
+    uint32_t                    fRingMask;
+    /** Number of entries in the ring. */
+    uint32_t                    cEntries;
+    /** Pointer to the completion entry ring. */
+    volatile LNXIOURINGCQE      *paCqes;
+} RTIOQUEUECQ;
+
+
+/**
+ * Internal I/O queue provider instance data.
+ */
+typedef struct RTIOQUEUEPROVINT
+{
+    /** The io_uring file descriptor. */
+    int                         iFdIoCtx;
+    /** The eventfd file descriptor registered with the ring. */
+    int                         iFdEvt;
+    /** The submission queue. */
+    RTIOQUEUESQ                 Sq;
+    /** The currently uncommitted tail for the SQ. */
+    uint32_t                    idxSqTail;
+    /** Numbere of uncommitted SQEs. */
+    uint32_t                    cSqesToCommit;
+    /** The completion queue. */
+    RTIOQUEUECQ                 Cq;
+    /** Pointer to the mapped SQES entries. */
+    PLNXIOURINGSQE              paSqes;
+    /** Pointer to the iovec structure used for non S/G requests. */
+    struct iovec                *paIoVecs;
+    /** Pointer returned by mmap() for the SQ ring, used for unmapping. */
+    void                        *pvMMapSqRing;
+    /** Pointer returned by mmap() for the CQ ring, used for unmapping. */
+    void                        *pvMMapCqRing;
+    /** Pointer returned by mmap() for the SQ entries array, used for unmapping. */
+    void                        *pvMMapSqes;
+    /** Size of the mapped SQ ring, used for unmapping. */
+    size_t                      cbMMapSqRing;
+    /** Size of the mapped CQ ring, used for unmapping. */
+    size_t                      cbMMapCqRing;
+    /** Size of the mapped SQ entries array, used for unmapping. */
+    size_t                      cbMMapSqes;
+    /** Flag whether the waiter was woken up externally. */
+    volatile bool               fExtIntr;
+} RTIOQUEUEPROVINT;
+/** Pointer to the internal I/O queue provider instance data. */
+typedef RTIOQUEUEPROVINT *PRTIOQUEUEPROVINT;
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+
+/**
+ * Syscall wrapper for io_uring_setup().
+ *
+ * @returns IPRT status code.
+ * @param   cEntries            Number of entries for submission and completion queues.
+ * @param   pParams             Additional parameters for the I/O ring and updated return values
+ *                              on success.
+ * @param   piFdIoCtx           Where to store the file descriptor of the I/O ring on success.
+ */
+DECLINLINE(int) rtIoQueueLnxIoURingSetup(uint32_t cEntries, PLNXIOURINGPARAMS pParams, int32_t *piFdIoCtx)
+{
+    int rcLnx = syscall(LNX_IOURING_SYSCALL_SETUP, cEntries, pParams);
+    if (RT_UNLIKELY(rcLnx == -1))
+        return RTErrConvertFromErrno(errno);
+
+    *piFdIoCtx = rcLnx;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Syscall wrapper for io_uring_enter().
+ *
+ * @returns IPRT status code.
+ * @param   iFdIoCtx            The I/O ring file descriptor.
+ * @param   cToSubmit           Maximum number of requests waiting for processing.
+ * @param   cMinComplete        Minimum number of completion events to accumulate before returning.
+ * @param   fFlags              Flags for io_uring_enter(), see LNX_IOURING_ENTER_F_*.
+ */
+DECLINLINE(int) rtIoQueueLnxIoURingEnter(int32_t iFdIoCtx, uint32_t cToSubmit, uint32_t cMinComplete,
+                                         uint32_t fFlags)
+{
+    int rcLnx = syscall(LNX_IOURING_SYSCALL_ENTER, iFdIoCtx, cToSubmit, cMinComplete, fFlags,
+                        NULL, 0);
+    if (RT_UNLIKELY(rcLnx == -1))
+        return RTErrConvertFromErrno(errno);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Syscall wrapper for io_uring_register().
+ *
+ * @returns IPRT status code.
+ * @param   iFdIoCtx            The I/O ring file descriptor.
+ * @param   uOpc                Operation to perform, see LNX_IOURING_REGISTER_OPC_*.
+ * @param   pvArg               Opaque arguments.
+ * @param   cArgs               Number of arguments.
+ */
+DECLINLINE(int) rtIoQueueLnxIoURingRegister(int32_t iFdIoCtx, uint32_t uOpc, void *pvArg,
+                                            uint32_t cArgs)
+{
+    int rcLnx = syscall(LNX_IOURING_SYSCALL_REGISTER, iFdIoCtx, uOpc, pvArg, cArgs);
+    if (RT_UNLIKELY(rcLnx == -1))
+        return RTErrConvertFromErrno(errno);
+
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * mmap() wrapper for the common bits and returning an IPRT status code.
+ *
+ * @returns IPRT status code.
+ * @param   iFdIoCtx            The I/O ring file descriptor.
+ * @param   offMmap             The mmap() offset.
+ * @param   cbMmap              How much to map.
+ * @param   ppv                 Where to store the pointer to the mapping on success.
+ */
+DECLINLINE(int) rtIoQueueLnxIoURingMmap(int iFdIoCtx, off_t offMmap, size_t cbMmap, void **ppv)
+{
+    void *pv = mmap(0, cbMmap, PROT_READ | PROT_WRITE , MAP_SHARED | MAP_POPULATE, iFdIoCtx, offMmap);
+    if (pv != MAP_FAILED)
+    {
+        *ppv = pv;
+        return VINF_SUCCESS;
+    }
+
+    return RTErrConvertFromErrno(errno);
+}
+
+
+/**
+ * eventfd2() syscall wrapper.
+ *
+ * @returns IPRT status code.
+ * @param   uValInit            The initial value of the maintained counter.
+ * @param   fFlags              Flags controlling the eventfd behavior.
+ * @param   piFdEvt             Where to store the file descriptor of the eventfd object on success.
+ */
+DECLINLINE(int) rtIoQueueLnxEventfd2(uint32_t uValInit, uint32_t fFlags, int *piFdEvt)
+{
+    int rcLnx = syscall(LNX_SYSCALL_EVENTFD2, uValInit, fFlags);
+    if (RT_UNLIKELY(rcLnx == -1))
+        return RTErrConvertFromErrno(errno);
+
+    *piFdEvt = rcLnx;
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks the completion event queue for pending events.
+ *
+ * @param   pThis               The provider instance.
+ * @param   paCEvt              Pointer to the array of completion events.
+ * @param   cCEvt               Maximum number of completion events the array can hold.
+ * @param   pcCEvtSeen          Where to store the number of completion events processed.
+ */
+static void rtIoQueueLnxIoURingFileProvCqCheck(PRTIOQUEUEPROVINT pThis, PRTIOQUEUECEVT paCEvt,
+                                               uint32_t cCEvt, uint32_t *pcCEvtSeen)
+{
+    /* The fencing and atomic accesses are kind of overkill and probably not required (dev paranoia). */
+    ASMReadFence();
+    uint32_t idxCqHead = ASMAtomicReadU32(pThis->Cq.pidxHead);
+    uint32_t idxCqTail = ASMAtomicReadU32(pThis->Cq.pidxTail);
+    ASMReadFence();
+
+    uint32_t cCEvtSeen = 0;
+
+    while (   idxCqTail != idxCqHead
+           && cCEvtSeen < cCEvt)
+    {
+        /* Get the index. */
+        uint32_t idxCqe = idxCqHead & pThis->Cq.fRingMask;
+        volatile LNXIOURINGCQE *pCqe = &pThis->Cq.paCqes[idxCqe];
+
+        paCEvt->pvUser = (void *)(uintptr_t)pCqe->u64User;
+        if (pCqe->rcLnx >= 0)
+        {
+            paCEvt->rcReq    = VINF_SUCCESS;
+            paCEvt->cbXfered = (size_t)pCqe->rcLnx;
+        }
+        else
+            paCEvt->rcReq = RTErrConvertFromErrno(-pCqe->rcLnx);
+
+#ifdef RT_STRICT /* poison */
+        memset((void *)pCqe, 0xff, sizeof(*pCqe));
+#endif
+
+        paCEvt++;
+        cCEvtSeen++;
+        idxCqHead++;
+    }
+
+    *pcCEvtSeen = cCEvtSeen;
+
+    /* Paranoia strikes again. */
+    ASMWriteFence();
+    ASMAtomicWriteU32(pThis->Cq.pidxHead, idxCqHead);
+    ASMWriteFence();
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnIsSupported} */
+static DECLCALLBACK(bool) rtIoQueueLnxIoURingFileProv_IsSupported(void)
+{
+    /*
+     * Try to create a simple I/O ring and close it again.
+     * The common code/public API already checked for the proper handle type.
+     */
+    int iFdIoCtx = 0;
+    bool fSupp = false;
+    LNXIOURINGPARAMS Params;
+    RT_ZERO(Params);
+
+    int rc = rtIoQueueLnxIoURingSetup(16, &Params, &iFdIoCtx);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Check that we can register an eventfd descriptor to get notified about
+         * completion events while being able to kick the waiter externally out of the wait.
+         */
+        int iFdEvt = 0;
+        rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &iFdEvt);
+        if (RT_SUCCESS(rc))
+        {
+            rc = rtIoQueueLnxIoURingRegister(iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER,
+                                             &iFdEvt, 1 /*cArgs*/);
+            if (RT_SUCCESS(rc))
+                fSupp = true;
+
+            int rcLnx = close(iFdEvt); Assert(!rcLnx); RT_NOREF(rcLnx);
+        }
+        int rcLnx = close(iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
+    }
+
+    return fSupp;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueInit} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_QueueInit(RTIOQUEUEPROV hIoQueueProv, uint32_t fFlags,
+                                                               uint32_t cSqEntries, uint32_t cCqEntries)
+{
+    RT_NOREF(fFlags, cCqEntries);
+
+    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
+    LNXIOURINGPARAMS Params;
+    RT_ZERO(Params);
+
+    pThis->cSqesToCommit = 0;
+    pThis->fExtIntr      = false;
+
+    int rc = rtIoQueueLnxIoURingSetup(cSqEntries, &Params, &pThis->iFdIoCtx);
+    if (RT_SUCCESS(rc))
+    {
+        /* Map the rings into userspace. */
+        pThis->cbMMapSqRing = Params.SqOffsets.u32OffArray + Params.u32SqEntriesCnt * sizeof(uint32_t);
+        pThis->cbMMapCqRing = Params.CqOffsets.u32OffCqes + Params.u32CqEntriesCnt * sizeof(LNXIOURINGCQE);
+        pThis->cbMMapSqes   = Params.u32SqEntriesCnt * sizeof(LNXIOURINGSQE);
+
+        pThis->paIoVecs = (struct iovec *)RTMemAllocZ(Params.u32SqEntriesCnt * sizeof(struct iovec));
+        if (RT_LIKELY(pThis->paIoVecs))
+        {
+            rc = rtIoQueueLnxEventfd2(0 /*uValInit*/, 0 /*fFlags*/, &pThis->iFdEvt);
+            if (RT_SUCCESS(rc))
+            {
+                rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_REGISTER, &pThis->iFdEvt, 1 /*cArgs*/);
+                if (RT_SUCCESS(rc))
+                {
+                    rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQ, pThis->cbMMapSqRing, &pThis->pvMMapSqRing);
+                    if (RT_SUCCESS(rc))
+                    {
+                        rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_CQ, pThis->cbMMapCqRing, &pThis->pvMMapCqRing);
+                        if (RT_SUCCESS(rc))
+                        {
+                            rc = rtIoQueueLnxIoURingMmap(pThis->iFdIoCtx, LNX_IOURING_MMAP_OFF_SQES, pThis->cbMMapSqes, &pThis->pvMMapSqes);
+                            if (RT_SUCCESS(rc))
+                            {
+                                uint8_t *pbTmp = (uint8_t *)pThis->pvMMapSqRing;
+
+                                pThis->Sq.pidxHead  = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffHead);
+                                pThis->Sq.pidxTail  = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffTail);
+                                pThis->Sq.fRingMask = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingMask);
+                                pThis->Sq.cEntries  = *(uint32_t *)(pbTmp + Params.SqOffsets.u32OffRingEntries);
+                                pThis->Sq.pfFlags   = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffFlags);
+                                pThis->Sq.paidxSqes = (uint32_t *)(pbTmp + Params.SqOffsets.u32OffArray);
+                                pThis->idxSqTail    = *pThis->Sq.pidxTail;
+
+                                pThis->paSqes       = (PLNXIOURINGSQE)pThis->pvMMapSqes;
+
+                                pbTmp = (uint8_t *)pThis->pvMMapCqRing;
+
+                                pThis->Cq.pidxHead  = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffHead);
+                                pThis->Cq.pidxTail  = (uint32_t *)(pbTmp + Params.CqOffsets.u32OffTail);
+                                pThis->Cq.fRingMask = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingMask);
+                                pThis->Cq.cEntries  = *(uint32_t *)(pbTmp + Params.CqOffsets.u32OffRingEntries);
+                                pThis->Cq.paCqes    = (PLNXIOURINGCQE)(pbTmp + Params.CqOffsets.u32OffCqes);
+                                return VINF_SUCCESS;
+                            }
+
+                            munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing);
+                        }
+
+                        munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing);
+                    }
+
+                    rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
+                    AssertRC(rc);
+                }
+
+                close(pThis->iFdEvt);
+            }
+
+            RTMemFree(pThis->paIoVecs);
+        }
+
+        int rcLnx = close(pThis->iFdIoCtx); Assert(!rcLnx); RT_NOREF(rcLnx);
+    }
+
+    return rc;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnQueueDestroy} */
+static DECLCALLBACK(void) rtIoQueueLnxIoURingFileProv_QueueDestroy(RTIOQUEUEPROV hIoQueueProv)
+{
+    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
+
+    int rcLnx = munmap(pThis->pvMMapSqRing, pThis->cbMMapSqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
+    rcLnx = munmap(pThis->pvMMapCqRing, pThis->cbMMapCqRing); Assert(!rcLnx); RT_NOREF(rcLnx);
+    rcLnx = munmap(pThis->pvMMapSqes, pThis->cbMMapSqes); Assert(!rcLnx); RT_NOREF(rcLnx);
+
+    int rc = rtIoQueueLnxIoURingRegister(pThis->iFdIoCtx, LNX_IOURING_REGISTER_OPC_EVENTFD_UNREGISTER, NULL, 0);
+    AssertRC(rc);
+
+    close(pThis->iFdEvt);
+    close(pThis->iFdIoCtx);
+    RTMemFree(pThis->paIoVecs);
+
+    RT_ZERO(pThis);
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnHandleRegister} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_HandleRegister(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle)
+{
+    RT_NOREF(hIoQueueProv, pHandle);
+    /** @todo Add support for fixed file sets later. */
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnHandleDeregister} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_HandleDeregister(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle)
+{
+    RT_NOREF(hIoQueueProv, pHandle);
+    /** @todo Add support for fixed file sets later. */
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnReqPrepare} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_ReqPrepare(RTIOQUEUEPROV hIoQueueProv, PCRTHANDLE pHandle, RTIOQUEUEOP enmOp,
+                                                                uint64_t off, void *pvBuf, size_t cbBuf, uint32_t fReqFlags,
+                                                                void *pvUser)
+{
+    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
+    RT_NOREF(fReqFlags);
+
+    uint32_t idx = pThis->idxSqTail & pThis->Sq.fRingMask;
+    PLNXIOURINGSQE pSqe = &pThis->paSqes[idx];
+    struct iovec *pIoVec = &pThis->paIoVecs[idx];
+
+    pIoVec->iov_base = pvBuf;
+    pIoVec->iov_len  = cbBuf;
+
+    pSqe->u8Flags         = 0;
+    pSqe->u16IoPrio       = 0;
+    pSqe->i32Fd           = (int32_t)RTFileToNative(pHandle->u.hFile);
+    pSqe->u64OffStart     = off;
+    pSqe->u64AddrBufIoVec = (uint64_t)(uintptr_t)pIoVec;
+    pSqe->u32BufIoVecSz   = 1;
+    pSqe->u64User         = (uint64_t)(uintptr_t)pvUser;
+
+    switch (enmOp)
+    {
+        case RTIOQUEUEOP_READ:
+            pSqe->u8Opc               = LNX_IOURING_OPC_READV;
+            pSqe->uOpc.u32KrnlRwFlags = 0;
+            break;
+        case RTIOQUEUEOP_WRITE:
+            pSqe->u8Opc               = LNX_IOURING_OPC_WRITEV;
+            pSqe->uOpc.u32KrnlRwFlags = 0;
+            break;
+        case RTIOQUEUEOP_SYNC:
+            pSqe->u8Opc              = LNX_IOURING_OPC_FSYNC;
+            pSqe->uOpc.u32FsyncFlags = 0;
+            break;
+        default:
+            AssertMsgFailedReturn(("Invalid I/O queue operation: %d\n", enmOp),
+                                  VERR_INVALID_PARAMETER);
+    }
+
+    pThis->Sq.paidxSqes[idx] = idx;
+    pThis->idxSqTail++;
+    pThis->cSqesToCommit++;
+    return VINF_SUCCESS;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnCommit} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_Commit(RTIOQUEUEPROV hIoQueueProv, uint32_t *pcReqsCommitted)
+{
+    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
+
+    ASMWriteFence();
+    ASMAtomicWriteU32(pThis->Sq.pidxTail, pThis->idxSqTail);
+    ASMWriteFence();
+
+    int rc = rtIoQueueLnxIoURingEnter(pThis->iFdIoCtx, pThis->cSqesToCommit, 0, 0 /*fFlags*/);
+    if (RT_SUCCESS(rc))
+    {
+        *pcReqsCommitted = pThis->cSqesToCommit;
+        pThis->cSqesToCommit = 0;
+    }
+
+    return rc;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnEvtWait} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_EvtWait(RTIOQUEUEPROV hIoQueueProv, PRTIOQUEUECEVT paCEvt, uint32_t cCEvt,
+                                                             uint32_t cMinWait, uint32_t *pcCEvt, uint32_t fFlags)
+{
+    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
+    int rc = VINF_SUCCESS;
+    uint32_t cCEvtSeen = 0;
+
+    RT_NOREF(fFlags);
+
+    /*
+     * Check the completion queue first for any completed events which might save us a
+     * context switch later on.
+     */
+    rtIoQueueLnxIoURingFileProvCqCheck(pThis, paCEvt, cCEvt, &cCEvtSeen);
+
+    while (   cCEvtSeen < cMinWait
+           && RT_SUCCESS(rc))
+    {
+        /*
+         * We can employ a blocking read on the event file descriptor, it will return
+         * either when woken up externally or when there are completion events pending.
+         */
+        uint64_t uCnt = 0; /**< The counter value returned upon a successful read(). */
+        ssize_t rcLnx = read(pThis->iFdEvt, &uCnt, sizeof(uCnt));
+        if (rcLnx == sizeof(uCnt))
+        {
+            uint32_t cCEvtThisSeen = 0;
+            rtIoQueueLnxIoURingFileProvCqCheck(pThis, &paCEvt[cCEvtSeen], cCEvt - cCEvtSeen, &cCEvtThisSeen);
+            cCEvtSeen += cCEvtThisSeen;
+
+            /* Whether we got woken up externally. */
+            if (ASMAtomicXchgBool(&pThis->fExtIntr, false))
+                rc = VERR_INTERRUPTED;
+        }
+        else if (rcLnx == -1)
+            rc = RTErrConvertFromErrno(errno);
+        else
+            AssertMsgFailed(("Unexpected read() -> 0\n"));
+    }
+
+    *pcCEvt = cCEvtSeen;
+    return rc;
+}
+
+
+/** @interface_method_impl{RTIOQUEUEPROVVTABLE,pfnEvtWaitWakeup} */
+static DECLCALLBACK(int) rtIoQueueLnxIoURingFileProv_EvtWaitWakeup(RTIOQUEUEPROV hIoQueueProv)
+{
+    PRTIOQUEUEPROVINT pThis = hIoQueueProv;
+    int rc = VINF_SUCCESS;
+
+    if (!ASMAtomicXchgBool(&pThis->fExtIntr, true))
+    {
+        const uint64_t uValAdd = 1;
+        ssize_t rcLnx = write(pThis->iFdEvt, &uValAdd, sizeof(uValAdd));
+
+        Assert(rcLnx == -1 || rcLnx == sizeof(uValAdd));
+        if (rcLnx == -1)
+            rc = RTErrConvertFromErrno(errno);
+    }
+
+    return rc;
+}
+
+
+/**
+ * Async file I/O queue provider virtual method table.
+ */
+RT_DECL_DATA_CONST(RTIOQUEUEPROVVTABLE const) g_RTIoQueueLnxIoURingProv =
+{
+    /** uVersion */
+    RTIOQUEUEPROVVTABLE_VERSION,
+    /** pszId */
+    "LnxIoURingFile",
+    /** cbIoQueueProv */
+    sizeof(RTIOQUEUEPROVINT),
+    /** enmHnd */
+    RTHANDLETYPE_FILE,
+    /** fFlags */
+    0,
+    /** pfnIsSupported */
+    rtIoQueueLnxIoURingFileProv_IsSupported,
+    /** pfnQueueInit  */
+    rtIoQueueLnxIoURingFileProv_QueueInit,
+    /** pfnQueueDestroy */
+    rtIoQueueLnxIoURingFileProv_QueueDestroy,
+    /** pfnHandleRegister */
+    rtIoQueueLnxIoURingFileProv_HandleRegister,
+    /** pfnHandleDeregister */
+    rtIoQueueLnxIoURingFileProv_HandleDeregister,
+    /** pfnReqPrepare */
+    rtIoQueueLnxIoURingFileProv_ReqPrepare,
+    /** pfnReqPrepareSg */
+    NULL,
+    /** pfnCommit */
+    rtIoQueueLnxIoURingFileProv_Commit,
+    /** pfnEvtWait */
+    rtIoQueueLnxIoURingFileProv_EvtWait,
+    /** pfnEvtWaitWakeup */
+    rtIoQueueLnxIoURingFileProv_EvtWaitWakeup,
+    /** uEndMarker */
+    RTIOQUEUEPROVVTABLE_VERSION
+};
+
diff --git a/src/VBox/Runtime/r3/linux/krnlmod-linux.cpp b/src/VBox/Runtime/r3/linux/krnlmod-linux.cpp
new file mode 100644
index 00000000..6d81d530
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/krnlmod-linux.cpp
@@ -0,0 +1,358 @@
+/* $Id: krnlmod-linux.cpp $ */
+/** @file
+ * IPRT - Kernel module, Linux.
+ */
+
+/*
+ * Copyright (C) 2017-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYSTEM
+#include <iprt/krnlmod.h>
+#include <iprt/linux/sysfs.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/dir.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include <iprt/types.h>
+
+
+/**
+ * Internal kernel information record state.
+ */
+typedef struct RTKRNLMODINFOINT
+{
+    /** Reference counter. */
+    volatile uint32_t   cRefs;
+    /** Reference count for the kernel module. */
+    uint32_t            cRefKrnlMod;
+    /** Load address of the kernel module. */
+    RTR0UINTPTR         uLoadAddr;
+    /** Size of the kernel module. */
+    size_t              cbKrnlMod;
+    /** Size of the name in characters including the zero terminator. */
+    size_t              cchName;
+    /** Module name - variable in size. */
+    char                achName[1];
+} RTKRNLMODINFOINT;
+/** Pointer to the internal kernel module information record. */
+typedef RTKRNLMODINFOINT *PRTKRNLMODINFOINT;
+/** Pointer to a const internal kernel module information record. */
+typedef const RTKRNLMODINFOINT *PCRTKRNLMODINFOINT;
+
+
+
+/**
+ * Destroy the given kernel module information record.
+ *
+ * @param   pThis            The record to destroy.
+ */
+static void rtKrnlModInfoDestroy(PRTKRNLMODINFOINT pThis)
+{
+    RTMemFree(pThis);
+}
+
+
+static int rtKrnlModLinuxReadIntFileDef(unsigned uBase, int64_t *pi64, int64_t i64Def,
+                                        const char *pszName, const char *pszPath)
+{
+    int rc = RTLinuxSysFsReadIntFile(uBase, pi64, "module/%s/%s", pszName, pszPath);
+    if (rc == VERR_FILE_NOT_FOUND)
+    {
+        *pi64 = i64Def;
+        rc = VINF_SUCCESS;
+    }
+
+    return rc;
+}
+
+/**
+ * Creates a new kernel module information record for the given module.
+ *
+ * @returns IPRT status code.
+ * @param   pszName          The kernel module name.
+ * @param   phKrnlModInfo    Where to store the handle to the kernel module information record
+ *                           on success.
+ */
+static int rtKrnlModLinuxInfoCreate(const char *pszName, PRTKRNLMODINFO phKrnlModInfo)
+{
+    int rc = VINF_SUCCESS;
+    size_t cchName = strlen(pszName) + 1;
+    PRTKRNLMODINFOINT pThis = (PRTKRNLMODINFOINT)RTMemAllocZ(RT_UOFFSETOF_DYN(RTKRNLMODINFOINT, achName[cchName]));
+    if (RT_LIKELY(pThis))
+    {
+        memcpy(&pThis->achName[0], pszName, cchName);
+        pThis->cchName     = cchName;
+        pThis->cRefs       = 1;
+
+        int64_t iTmp = 0;
+        rc = rtKrnlModLinuxReadIntFileDef(10, &iTmp, 0, pszName, "refcnt");
+        if (RT_SUCCESS(rc))
+            pThis->cRefKrnlMod = (uint32_t)iTmp;
+
+        rc = rtKrnlModLinuxReadIntFileDef(10, &iTmp, 0, pszName, "coresize");
+        if (RT_SUCCESS(rc))
+            pThis->cbKrnlMod = iTmp;
+
+        rc = rtKrnlModLinuxReadIntFileDef(16, &iTmp, 0, pszName, "sections/.text");
+        if (RT_SUCCESS(rc))
+            pThis->uLoadAddr = iTmp;
+
+        if (RT_SUCCESS(rc))
+            *phKrnlModInfo = pThis;
+        else
+            RTMemFree(pThis);
+    }
+    else
+        rc = VERR_NO_MEMORY;
+
+    return rc;
+}
+
+
+RTDECL(int) RTKrnlModQueryLoaded(const char *pszName, bool *pfLoaded)
+{
+    AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+    AssertPtrReturn(pfLoaded, VERR_INVALID_POINTER);
+
+    int rc = RTLinuxSysFsExists("module/%s", pszName);
+    if (rc == VINF_SUCCESS)
+        *pfLoaded = true;
+    else if (rc == VERR_FILE_NOT_FOUND)
+    {
+        *pfLoaded = false;
+        rc = VINF_SUCCESS;
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTKrnlModLoadedQueryInfo(const char *pszName, PRTKRNLMODINFO phKrnlModInfo)
+{
+    AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+    AssertPtrReturn(phKrnlModInfo, VERR_INVALID_POINTER);
+
+    int rc = RTLinuxSysFsExists("module/%s", pszName);
+    if (rc == VINF_SUCCESS)
+        rc = rtKrnlModLinuxInfoCreate(pszName, phKrnlModInfo);
+    else if (rc == VERR_FILE_NOT_FOUND)
+        rc = VERR_NOT_FOUND;
+
+    return rc;
+}
+
+
+RTDECL(uint32_t) RTKrnlModLoadedGetCount(void)
+{
+    uint32_t cKmodsLoaded = 0;
+
+    RTDIR hDir = NULL;
+    int rc = RTDirOpen(&hDir, "/sys/module");
+    if (RT_SUCCESS(rc))
+    {
+        RTDIRENTRY DirEnt;
+        rc = RTDirRead(hDir, &DirEnt, NULL);
+        while (RT_SUCCESS(rc))
+        {
+            if (!RTDirEntryIsStdDotLink(&DirEnt))
+                cKmodsLoaded++;
+            rc = RTDirRead(hDir, &DirEnt, NULL);
+        }
+
+        RTDirClose(hDir);
+    }
+
+
+    return cKmodsLoaded;
+}
+
+
+RTDECL(int) RTKrnlModLoadedQueryInfoAll(PRTKRNLMODINFO pahKrnlModInfo, uint32_t cEntriesMax,
+                                        uint32_t *pcEntries)
+{
+    if (cEntriesMax > 0)
+        AssertPtrReturn(pahKrnlModInfo, VERR_INVALID_POINTER);
+
+    uint32_t cKmodsLoaded = RTKrnlModLoadedGetCount();
+    if (cEntriesMax < cKmodsLoaded)
+    {
+        if (*pcEntries)
+            *pcEntries = cKmodsLoaded;
+        return VERR_BUFFER_OVERFLOW;
+    }
+
+    RTDIR hDir = NULL;
+    int rc = RTDirOpen(&hDir, "/sys/module");
+    if (RT_SUCCESS(rc))
+    {
+        unsigned idxKrnlModInfo = 0;
+        RTDIRENTRY DirEnt;
+
+        rc = RTDirRead(hDir, &DirEnt, NULL);
+        while (RT_SUCCESS(rc))
+        {
+            if (!RTDirEntryIsStdDotLink(&DirEnt))
+            {
+                rc = rtKrnlModLinuxInfoCreate(DirEnt.szName, &pahKrnlModInfo[idxKrnlModInfo]);
+                if (RT_SUCCESS(rc))
+                    idxKrnlModInfo++;
+            }
+
+            if (RT_SUCCESS(rc))
+                rc = RTDirRead(hDir, &DirEnt, NULL);
+        }
+
+        if (rc == VERR_NO_MORE_FILES)
+            rc = VINF_SUCCESS;
+        else if (RT_FAILURE(rc))
+        {
+            /* Rollback */
+            while (idxKrnlModInfo-- > 0)
+                RTKrnlModInfoRelease(pahKrnlModInfo[idxKrnlModInfo]);
+        }
+
+        if (*pcEntries)
+            *pcEntries = cKmodsLoaded;
+
+        RTDirClose(hDir);
+    }
+
+    return rc;
+}
+
+
+RTDECL(uint32_t) RTKrnlModInfoRetain(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    AssertPtrReturn(pThis, UINT32_MAX);
+
+    uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+    AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pThis));
+    return cRefs;
+}
+
+
+RTDECL(uint32_t) RTKrnlModInfoRelease(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    if (!pThis)
+        return 0;
+    AssertPtrReturn(pThis, UINT32_MAX);
+
+    uint32_t cRefs = ASMAtomicDecU32(&pThis->cRefs);
+    AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pThis));
+    if (cRefs == 0)
+        rtKrnlModInfoDestroy(pThis);
+    return cRefs;
+}
+
+
+RTDECL(uint32_t) RTKrnlModInfoGetRefCnt(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    AssertPtrReturn(pThis, 0);
+
+    return pThis->cRefKrnlMod;
+}
+
+
+RTDECL(const char *) RTKrnlModInfoGetName(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    AssertPtrReturn(pThis, NULL);
+
+    return &pThis->achName[0];
+}
+
+
+RTDECL(const char *) RTKrnlModInfoGetFilePath(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    AssertPtrReturn(pThis, NULL);
+
+    return NULL;
+}
+
+
+RTDECL(size_t) RTKrnlModInfoGetSize(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    AssertPtrReturn(pThis, 0);
+
+    return pThis->cbKrnlMod;
+}
+
+
+RTDECL(RTR0UINTPTR) RTKrnlModInfoGetLoadAddr(RTKRNLMODINFO hKrnlModInfo)
+{
+    PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+    AssertPtrReturn(pThis, 0);
+
+    return pThis->uLoadAddr;
+}
+
+
+RTDECL(int) RTKrnlModInfoQueryRefModInfo(RTKRNLMODINFO hKrnlModInfo, uint32_t idx,
+                                         PRTKRNLMODINFO phKrnlModInfoRef)
+{
+    RT_NOREF3(hKrnlModInfo, idx, phKrnlModInfoRef);
+    return VERR_NOT_IMPLEMENTED;
+}
+
+
+RTDECL(int) RTKrnlModLoadByName(const char *pszName)
+{
+    AssertPtrReturn(pszName, VERR_INVALID_PARAMETER);
+
+    return VERR_NOT_SUPPORTED;
+}
+
+
+RTDECL(int) RTKrnlModLoadByPath(const char *pszPath)
+{
+    AssertPtrReturn(pszPath, VERR_INVALID_PARAMETER);
+
+    return VERR_NOT_SUPPORTED;
+}
+
+
+RTDECL(int) RTKrnlModUnloadByName(const char *pszName)
+{
+    AssertPtrReturn(pszName, VERR_INVALID_PARAMETER);
+
+    return VERR_NOT_SUPPORTED;
+}
diff --git a/src/VBox/Runtime/r3/linux/mp-linux.cpp b/src/VBox/Runtime/r3/linux/mp-linux.cpp
new file mode 100644
index 00000000..935dfd22
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/mp-linux.cpp
@@ -0,0 +1,328 @@
+/* $Id: mp-linux.cpp $ */
+/** @file
+ * IPRT - Multiprocessor, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYSTEM
+#include <stdio.h>
+#include <errno.h>
+
+#include <iprt/mp.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloca.h>
+#include <iprt/cpuset.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/linux/sysfs.h>
+
+
+/**
+ * Internal worker that determines the max possible CPU count.
+ *
+ * @returns Max cpus.
+ */
+static RTCPUID rtMpLinuxMaxCpus(void)
+{
+#if 0 /* this doesn't do the right thing :-/ */
+    int cMax = sysconf(_SC_NPROCESSORS_CONF);
+    Assert(cMax >= 1);
+    return cMax;
+#else
+    static uint32_t s_cMax = 0;
+    if (!s_cMax)
+    {
+        int cMax = 1;
+        for (unsigned iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++)
+            if (RTLinuxSysFsExists("devices/system/cpu/cpu%d", iCpu))
+                cMax = iCpu + 1;
+        ASMAtomicUoWriteU32((uint32_t volatile *)&s_cMax, cMax);
+        return cMax;
+    }
+    return s_cMax;
+#endif
+}
+
+/**
+ * Internal worker that picks the processor speed in MHz from /proc/cpuinfo.
+ *
+ * @returns CPU frequency.
+ */
+static uint32_t rtMpLinuxGetFrequency(RTCPUID idCpu)
+{
+    FILE *pFile = fopen("/proc/cpuinfo", "r");
+    if (!pFile)
+        return 0;
+
+    char sz[256];
+    RTCPUID idCpuFound = NIL_RTCPUID;
+    uint32_t Frequency = 0;
+    while (fgets(sz, sizeof(sz), pFile))
+    {
+        char *psz;
+        if (   !strncmp(sz, RT_STR_TUPLE("processor"))
+            && (sz[10] == ' ' || sz[10] == '\t' || sz[10] == ':')
+            && (psz = strchr(sz, ':')))
+        {
+            psz += 2;
+            int64_t iCpu;
+            int rc = RTStrToInt64Ex(psz, NULL, 0, &iCpu);
+            if (RT_SUCCESS(rc))
+                idCpuFound = iCpu;
+        }
+        else if (   idCpu == idCpuFound
+                 && !strncmp(sz, RT_STR_TUPLE("cpu MHz"))
+                 && (sz[10] == ' ' || sz[10] == '\t' || sz[10] == ':')
+                 && (psz = strchr(sz, ':')))
+        {
+            psz += 2;
+            int64_t v;
+            int rc = RTStrToInt64Ex(psz, &psz, 0, &v);
+            if (RT_SUCCESS(rc))
+            {
+                Frequency = v;
+                break;
+            }
+        }
+    }
+    fclose(pFile);
+    return Frequency;
+}
+
+
+/** @todo RTmpCpuId(). */
+
+RTDECL(int) RTMpCpuIdToSetIndex(RTCPUID idCpu)
+{
+    return idCpu < rtMpLinuxMaxCpus() ? (int)idCpu : -1;
+}
+
+
+RTDECL(RTCPUID) RTMpCpuIdFromSetIndex(int iCpu)
+{
+    return (unsigned)iCpu < rtMpLinuxMaxCpus() ? iCpu : NIL_RTCPUID;
+}
+
+
+RTDECL(RTCPUID) RTMpGetMaxCpuId(void)
+{
+    return rtMpLinuxMaxCpus() - 1;
+}
+
+
+RTDECL(bool) RTMpIsCpuOnline(RTCPUID idCpu)
+{
+    /** @todo check if there is a simpler interface than this... */
+    int64_t i = 0;
+    int rc = RTLinuxSysFsReadIntFile(0, &i, "devices/system/cpu/cpu%d/online", (int)idCpu);
+    if (    RT_FAILURE(rc)
+        &&  RTLinuxSysFsExists("devices/system/cpu/cpu%d", (int)idCpu))
+    {
+        /** @todo Assert(!RTLinuxSysFsExists("devices/system/cpu/cpu%d/online",
+         *               (int)idCpu));
+         * Unfortunately, the online file wasn't always world readable (centos
+         * 2.6.18-164). */
+        i = 1;
+        rc = VINF_SUCCESS;
+    }
+
+    AssertMsg(i == 0 || i == -1 || i == 1, ("i=%d\n", i));
+    return RT_SUCCESS(rc) && i != 0;
+}
+
+
+RTDECL(bool) RTMpIsCpuPossible(RTCPUID idCpu)
+{
+    /** @todo check this up with hotplugging! */
+    return RTLinuxSysFsExists("devices/system/cpu/cpu%d", (int)idCpu);
+}
+
+
+RTDECL(PRTCPUSET) RTMpGetSet(PRTCPUSET pSet)
+{
+    RTCpuSetEmpty(pSet);
+    RTCPUID cMax = rtMpLinuxMaxCpus();
+    for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+        if (RTMpIsCpuPossible(idCpu))
+            RTCpuSetAdd(pSet, idCpu);
+    return pSet;
+}
+
+
+RTDECL(RTCPUID) RTMpGetCount(void)
+{
+    RTCPUSET Set;
+    RTMpGetSet(&Set);
+    return RTCpuSetCount(&Set);
+}
+
+
+RTDECL(RTCPUID) RTMpGetCoreCount(void)
+{
+    RTCPUID     cMax      = rtMpLinuxMaxCpus();
+    uint32_t   *paidCores = (uint32_t *)alloca(sizeof(paidCores[0]) * (cMax + 1));
+    uint32_t   *paidPckgs = (uint32_t *)alloca(sizeof(paidPckgs[0]) * (cMax + 1));
+    uint32_t    cCores    = 0;
+    for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+    {
+        if (RTMpIsCpuPossible(idCpu))
+        {
+            int64_t idCore = 0;
+            int64_t idPckg = 0;
+
+            int rc = RTLinuxSysFsReadIntFile(0, &idCore, "devices/system/cpu/cpu%d/topology/core_id", (int)idCpu);
+            if (RT_SUCCESS(rc))
+                rc = RTLinuxSysFsReadIntFile(0, &idPckg, "devices/system/cpu/cpu%d/topology/physical_package_id", (int)idCpu);
+
+            if (RT_SUCCESS(rc))
+            {
+                uint32_t i;
+
+                for (i = 0; i < cCores; i++)
+                    if (   paidCores[i] == (uint32_t)idCore
+                        && paidPckgs[i] == (uint32_t)idPckg)
+                        break;
+                if (i >= cCores)
+                {
+                    paidCores[cCores] = (uint32_t)idCore;
+                    paidPckgs[cCores] = (uint32_t)idPckg;
+                    cCores++;
+                }
+            }
+        }
+    }
+    Assert(cCores > 0);
+    return cCores;
+}
+
+
+RTDECL(PRTCPUSET) RTMpGetOnlineSet(PRTCPUSET pSet)
+{
+    RTCpuSetEmpty(pSet);
+    RTCPUID cMax = rtMpLinuxMaxCpus();
+    for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+        if (RTMpIsCpuOnline(idCpu))
+            RTCpuSetAdd(pSet, idCpu);
+    return pSet;
+}
+
+
+RTDECL(RTCPUID) RTMpGetOnlineCount(void)
+{
+    RTCPUSET Set;
+    RTMpGetOnlineSet(&Set);
+    return RTCpuSetCount(&Set);
+}
+
+
+RTDECL(RTCPUID) RTMpGetOnlineCoreCount(void)
+{
+    RTCPUID     cMax      = rtMpLinuxMaxCpus();
+    uint32_t   *paidCores = (uint32_t *)alloca(sizeof(paidCores[0]) * (cMax + 1));
+    uint32_t   *paidPckgs = (uint32_t *)alloca(sizeof(paidPckgs[0]) * (cMax + 1));
+    uint32_t    cCores    = 0;
+    for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+    {
+        if (RTMpIsCpuOnline(idCpu))
+        {
+            int64_t idCore = 0;
+            int64_t idPckg = 0;
+
+            int rc = RTLinuxSysFsReadIntFile(0, &idCore, "devices/system/cpu/cpu%d/topology/core_id", (int)idCpu);
+            if (RT_SUCCESS(rc))
+                rc = RTLinuxSysFsReadIntFile(0, &idPckg, "devices/system/cpu/cpu%d/topology/physical_package_id", (int)idCpu);
+
+            if (RT_SUCCESS(rc))
+            {
+                uint32_t i;
+
+                for (i = 0; i < cCores; i++)
+                    if (   paidCores[i] == idCore
+                        && paidPckgs[i] == idPckg)
+                        break;
+                if (i >= cCores)
+                {
+                    paidCores[cCores] = idCore;
+                    paidPckgs[cCores] = idPckg;
+                    cCores++;
+                }
+            }
+        }
+    }
+    Assert(cCores > 0);
+    return cCores;
+}
+
+
+
+RTDECL(uint32_t) RTMpGetCurFrequency(RTCPUID idCpu)
+{
+    int64_t kHz = 0;
+    int rc = RTLinuxSysFsReadIntFile(0, &kHz, "devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq", (int)idCpu);
+    if (RT_FAILURE(rc))
+    {
+        /*
+         * The file may be just unreadable - in that case use plan B, i.e.
+         * /proc/cpuinfo to get the data we want. The assumption is that if
+         * cpuinfo_cur_freq doesn't exist then the speed won't change, and
+         * thus cur == max. If it does exist then cpuinfo contains the
+         * current frequency.
+         */
+        kHz = rtMpLinuxGetFrequency(idCpu) * 1000;
+    }
+    return (kHz + 999) / 1000;
+}
+
+
+RTDECL(uint32_t) RTMpGetMaxFrequency(RTCPUID idCpu)
+{
+    int64_t kHz = 0;
+    int rc = RTLinuxSysFsReadIntFile(0, &kHz, "devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", (int)idCpu);
+    if (RT_FAILURE(rc))
+    {
+        /*
+         * Check if the file isn't there - if it is there, then /proc/cpuinfo
+         * would provide current frequency information, which is wrong.
+         */
+        if (!RTLinuxSysFsExists("devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", (int)idCpu))
+            kHz = rtMpLinuxGetFrequency(idCpu) * 1000;
+        else
+            kHz = 0;
+    }
+    return (kHz + 999) / 1000;
+}
diff --git a/src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp b/src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp
new file mode 100644
index 00000000..bd3edc12
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp
@@ -0,0 +1,79 @@
+/* $Id: rtProcInitExePath-linux.cpp $ */
+/** @file
+ * IPRT - rtProcInitName, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <unistd.h>
+#include <errno.h>
+
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/path.h>
+#include "internal/process.h"
+#include "internal/path.h"
+
+
+DECLHIDDEN(int) rtProcInitExePath(char *pszPath, size_t cchPath)
+{
+    /*
+     * Read the /proc/self/exe link, convert to native and return it.
+     */
+    int cchLink = readlink("/proc/self/exe", pszPath, cchPath - 1);
+    if (cchLink > 0 && (size_t)cchLink <= cchPath - 1)
+    {
+        pszPath[cchLink] = '\0';
+
+        char const *pszTmp;
+        int rc = rtPathFromNative(&pszTmp, pszPath, NULL);
+        AssertMsgRCReturn(rc, ("rc=%Rrc pszLink=\"%s\"\nhex: %.*Rhxs\n", rc, pszPath, cchLink, pszPath), rc);
+        if (pszTmp != pszPath)
+        {
+            rc = RTStrCopy(pszPath, cchPath, pszTmp);
+            rtPathFreeIprt(pszTmp, pszPath);
+        }
+        return rc;
+    }
+
+    int err = errno;
+    int rc = RTErrConvertFromErrno(err);
+    AssertMsgFailed(("rc=%Rrc err=%d cchLink=%d\n", rc, err, cchLink));
+    return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/sched-linux.cpp b/src/VBox/Runtime/r3/linux/sched-linux.cpp
new file mode 100644
index 00000000..0b7370e0
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/sched-linux.cpp
@@ -0,0 +1,707 @@
+/* $Id: sched-linux.cpp $ */
+/** @file
+ * IPRT - Scheduling, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+/*
+ * !WARNING!
+ *
+ * When talking about lowering and raising priority, we do *NOT* refer to
+ * the common direction priority values takes on unix systems (lower means
+ * higher). So, when we raise the priority of a linux thread the nice
+ * value will decrease, and when we lower the priority the nice value
+ * will increase. Confusing, right?
+ *
+ * !WARNING!
+ */
+
+
+
+/** @def THREAD_LOGGING
+ * Be very careful with enabling this, it may cause deadlocks when combined
+ * with the 'thread' logging prefix.
+ */
+#ifdef DOXYGEN_RUNNING
+# define THREAD_LOGGING
+#endif
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_THREAD
+#include <errno.h>
+#include <pthread.h>
+#include <limits.h>
+#include <sched.h>
+#include <unistd.h>
+#include <sys/resource.h>
+
+#include <iprt/thread.h>
+#include <iprt/process.h>
+#include <iprt/semaphore.h>
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/log.h>
+#include <iprt/errcore.h>
+#include "internal/sched.h"
+#include "internal/thread.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+
+/** Array scheduler attributes corresponding to each of the thread types.
+ * @internal */
+typedef struct PROCPRIORITYTYPE
+{
+    /** For sanity include the array index. */
+    RTTHREADTYPE    enmType;
+    /** The thread priority or nice delta - depends on which priority type. */
+    int             iPriority;
+} PROCPRIORITYTYPE;
+
+
+/**
+ * Configuration of one priority.
+ * @internal
+ */
+typedef struct
+{
+    /** The priority. */
+    RTPROCPRIORITY  enmPriority;
+    /** The name of this priority. */
+    const char     *pszName;
+    /** The process nice value. */
+    int             iNice;
+    /** The delta applied to the iPriority value. */
+    int             iDelta;
+    /** Array scheduler attributes corresponding to each of the thread types. */
+    const PROCPRIORITYTYPE *paTypes;
+} PROCPRIORITY;
+
+
+/**
+ * Saved priority settings
+ * @internal
+ */
+typedef struct
+{
+    /** Process priority. */
+    int                 iPriority;
+    /** Process level. */
+    struct sched_param  SchedParam;
+    /** Process level. */
+    int                 iPolicy;
+    /** pthread level. */
+    struct sched_param  PthreadSchedParam;
+    /** pthread level. */
+    int                 iPthreadPolicy;
+} SAVEDPRIORITY, *PSAVEDPRIORITY;
+
+
+/**
+ * Priorities for checking by separate thread
+ * @internal
+ */
+typedef struct
+{
+    /** The current thread priority to assume first. */
+    int                 iCurrent;
+    /** The thread priority to try set afterwards. */
+    int                 iNew;
+} VALIDATORPRIORITYPAIR, *PVALIDATORPRIORITYPAIR;
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/**
+ * Deltas for a process in which we are not restricted
+ * to only be lowering the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesLinuxFree[RTTHREADTYPE_END] =
+{
+    { RTTHREADTYPE_INVALID,                 -999999999 },
+    { RTTHREADTYPE_INFREQUENT_POLLER,       +3 },
+    { RTTHREADTYPE_MAIN_HEAVY_WORKER,       +2 },
+    { RTTHREADTYPE_EMULATION,               +1 },
+    { RTTHREADTYPE_DEFAULT,                  0 },
+    { RTTHREADTYPE_GUI,                      0 },
+    { RTTHREADTYPE_MAIN_WORKER,              0 },
+    { RTTHREADTYPE_VRDP_IO,                 -1 },
+    { RTTHREADTYPE_DEBUGGER,                -1 },
+    { RTTHREADTYPE_MSG_PUMP,                -2 },
+    { RTTHREADTYPE_IO,                      -3 },
+    { RTTHREADTYPE_TIMER,                   -4 }
+};
+
+/**
+ * Deltas for a process in which we are restricted and can only lower the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesLinuxRestricted[RTTHREADTYPE_END] =
+{
+    { RTTHREADTYPE_INVALID,                 -999999999 },
+    { RTTHREADTYPE_INFREQUENT_POLLER,       +3 },
+    { RTTHREADTYPE_MAIN_HEAVY_WORKER,       +2 },
+    { RTTHREADTYPE_EMULATION,               +1 },
+    { RTTHREADTYPE_DEFAULT,                  0 },
+    { RTTHREADTYPE_GUI,                      0 },
+    { RTTHREADTYPE_MAIN_WORKER,              0 },
+    { RTTHREADTYPE_VRDP_IO,                  0 },
+    { RTTHREADTYPE_DEBUGGER,                 0 },
+    { RTTHREADTYPE_MSG_PUMP,                 0 },
+    { RTTHREADTYPE_IO,                       0 },
+    { RTTHREADTYPE_TIMER,                    0 }
+};
+
+/**
+ * All threads have the same priority.
+ *
+ * This is typically chosen when we find that we can't raise the priority
+ * to the process default of a thread created by a low priority thread.
+ */
+static const PROCPRIORITYTYPE g_aTypesLinuxFlat[RTTHREADTYPE_END] =
+{
+    { RTTHREADTYPE_INVALID,                 -999999999 },
+    { RTTHREADTYPE_INFREQUENT_POLLER,        0 },
+    { RTTHREADTYPE_MAIN_HEAVY_WORKER,        0 },
+    { RTTHREADTYPE_EMULATION,                0 },
+    { RTTHREADTYPE_DEFAULT,                  0 },
+    { RTTHREADTYPE_GUI,                      0 },
+    { RTTHREADTYPE_MAIN_WORKER,              0 },
+    { RTTHREADTYPE_VRDP_IO,                  0 },
+    { RTTHREADTYPE_DEBUGGER,                 0 },
+    { RTTHREADTYPE_MSG_PUMP,                 0 },
+    { RTTHREADTYPE_IO,                       0 },
+    { RTTHREADTYPE_TIMER,                    0 }
+};
+
+/**
+ * Process and thread level priority, full access at thread level.
+ */
+static const PROCPRIORITY   g_aUnixConfigs[] =
+{
+    { RTPROCPRIORITY_FLAT,      "Flat",      0,   0, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_LOW,       "Low",       9,   9, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_LOW,       "Low",       9,   9, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_LOW,       "Low",      15,  15, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_LOW,       "Low",      15,  15, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_LOW,       "Low",      17,  17, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_LOW,       "Low",      17,  17, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_LOW,       "Low",      19,  19, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_LOW,       "Low",       9,   9, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_LOW,       "Low",      15,  15, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_LOW,       "Low",      17,  17, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_NORMAL,    "Normal",    0,   0, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_NORMAL,    "Normal",    0,   0, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_NORMAL,    "Normal",    0,   0, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_HIGH,      "High",     -9,  -9, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_HIGH,      "High",     -7,  -7, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_HIGH,      "High",     -5,  -5, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_HIGH,      "High",     -3,  -3, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_HIGH,      "High",     -1,  -1, g_aTypesLinuxFree },
+    { RTPROCPRIORITY_HIGH,      "High",     -9,  -9, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_HIGH,      "High",     -7,  -7, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_HIGH,      "High",     -5,  -5, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_HIGH,      "High",     -3,  -3, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_HIGH,      "High",     -1,  -1, g_aTypesLinuxRestricted },
+    { RTPROCPRIORITY_HIGH,      "High",     -9,  -9, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_HIGH,      "High",     -7,  -7, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_HIGH,      "High",     -5,  -5, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_HIGH,      "High",     -3,  -3, g_aTypesLinuxFlat },
+    { RTPROCPRIORITY_HIGH,      "High",     -1,  -1, g_aTypesLinuxFlat }
+};
+
+/**
+ * The dynamic default priority configuration.
+ *
+ * This will be recalulated at runtime depending on what the
+ * system allow us to do and what the current priority is.
+ */
+static PROCPRIORITY g_aDefaultPriority =
+{
+    RTPROCPRIORITY_LOW, "Default", 0, 0, g_aTypesLinuxRestricted
+};
+
+/** Pointer to the current priority configuration. */
+static const PROCPRIORITY *g_pProcessPriority = &g_aDefaultPriority;
+
+/** Set if we can raise the priority of a thread beyond the default.
+ *
+ * It might mean we have the CAP_SYS_NICE capability or that the
+ * process's RLIMIT_NICE is higher than the priority of the thread
+ * calculating the defaults.
+ */
+static bool g_fCanRaisePriority = false;
+
+/** Set if we can restore the priority after having temporarily lowered or raised it. */
+static bool g_fCanRestorePriority = false;
+
+/** Set if we can NOT raise the priority to the process default in a thread
+ * created by a thread running below the process default.
+ */
+static bool g_fScrewedUpMaxPriorityLimitInheritance = true;
+
+/** The highest priority we can set. */
+static int  g_iMaxPriority = 0;
+
+/** The lower priority we can set. */
+static int  g_iMinPriority = 19;
+
+/** Set when we've successfully determined the capabilities of the process and kernel. */
+static bool g_fInitialized = false;
+
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+
+
+/**
+ * Saves all the scheduling attributes we can think of.
+ */
+static void rtSchedNativeSave(PSAVEDPRIORITY pSave)
+{
+    memset(pSave, 0xff, sizeof(*pSave));
+
+    errno = 0;
+    pSave->iPriority = getpriority(PRIO_PROCESS, 0 /* current process */);
+    Assert(errno == 0);
+
+    errno = 0;
+    sched_getparam(0 /* current process */, &pSave->SchedParam);
+    Assert(errno == 0);
+
+    errno = 0;
+    pSave->iPolicy = sched_getscheduler(0 /* current process */);
+    Assert(errno == 0);
+
+    int rc = pthread_getschedparam(pthread_self(), &pSave->iPthreadPolicy, &pSave->PthreadSchedParam);
+    Assert(rc == 0); NOREF(rc);
+}
+
+
+/**
+ * Restores scheduling attributes.
+ * Most of this won't work right, but anyway...
+ */
+static void rtSchedNativeRestore(PSAVEDPRIORITY pSave)
+{
+    setpriority(PRIO_PROCESS, 0, pSave->iPriority);
+    sched_setscheduler(0, pSave->iPolicy, &pSave->SchedParam);
+    sched_setparam(0, &pSave->SchedParam);
+    pthread_setschedparam(pthread_self(), pSave->iPthreadPolicy, &pSave->PthreadSchedParam);
+}
+
+
+/**
+ * Called on the priority proxy thread if requested running, otherwise
+ * rtSchedRunThread() calls it directly.
+ */
+static DECLCALLBACK(int) rtSchedRunThreadCallback(pthread_t *pThread, void *(*pfnThread)(void *pvArg), void *pvArg)
+{
+    int rc = pthread_create(pThread, NULL, pfnThread, pvArg);
+    if (!rc)
+        return VINF_SUCCESS;
+    return RTErrConvertFromErrno(rc);
+}
+
+
+/**
+ * Starts a worker thread and wait for it to complete.
+ *
+ * We cannot use RTThreadCreate since we're already owner of the RW lock.
+ */
+static int rtSchedRunThread(void *(*pfnThread)(void *pvArg), void *pvArg, bool fUsePriorityProxy)
+{
+    /*
+     * Create the thread.
+     */
+    pthread_t Thread;
+    int rc;
+#ifndef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+    RT_NOREF(fUsePriorityProxy);
+#else
+    if (   fUsePriorityProxy
+        && rtThreadPosixPriorityProxyStart())
+        rc = rtThreadPosixPriorityProxyCall(NULL, (PFNRT)rtSchedRunThreadCallback, 3, &Thread, pfnThread, pvArg);
+    else
+#endif
+        rc = rtSchedRunThreadCallback(&Thread, pfnThread, pvArg);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Wait for the thread to finish.
+         */
+        void *pvRet = (void *)-1;
+        do
+        {
+            rc = pthread_join(Thread, &pvRet);
+        } while (rc == EINTR);
+        if (rc)
+            return RTErrConvertFromErrno(rc);
+        return (int)(uintptr_t)pvRet;
+    }
+    return rc;
+}
+
+
+static void rtSchedDumpPriority(void)
+{
+#ifdef THREAD_LOGGING
+    Log(("Priority: g_fCanRaisePriority=%RTbool g_fCanRestorePriority=%RTbool g_fScrewedUpMaxPriorityLimitInheritance=%RTbool\n",
+         g_fCanRaisePriority, g_fCanRestorePriority, g_fScrewedUpMaxPriorityLimitInheritance));
+    Log(("Priority: g_iMaxPriority=%d g_iMinPriority=%d\n", g_iMaxPriority, g_iMinPriority));
+    Log(("Priority: enmPriority=%d \"%s\" iNice=%d iDelta=%d\n",
+         g_pProcessPriority->enmPriority,
+         g_pProcessPriority->pszName,
+         g_pProcessPriority->iNice,
+         g_pProcessPriority->iDelta));
+    Log(("Priority:  %2d INFREQUENT_POLLER = %d\n", RTTHREADTYPE_INFREQUENT_POLLER, g_pProcessPriority->paTypes[RTTHREADTYPE_INFREQUENT_POLLER].iPriority));
+    Log(("Priority:  %2d MAIN_HEAVY_WORKER = %d\n", RTTHREADTYPE_MAIN_HEAVY_WORKER, g_pProcessPriority->paTypes[RTTHREADTYPE_MAIN_HEAVY_WORKER].iPriority));
+    Log(("Priority:  %2d EMULATION         = %d\n", RTTHREADTYPE_EMULATION        , g_pProcessPriority->paTypes[RTTHREADTYPE_EMULATION        ].iPriority));
+    Log(("Priority:  %2d DEFAULT           = %d\n", RTTHREADTYPE_DEFAULT          , g_pProcessPriority->paTypes[RTTHREADTYPE_DEFAULT          ].iPriority));
+    Log(("Priority:  %2d GUI               = %d\n", RTTHREADTYPE_GUI              , g_pProcessPriority->paTypes[RTTHREADTYPE_GUI              ].iPriority));
+    Log(("Priority:  %2d MAIN_WORKER       = %d\n", RTTHREADTYPE_MAIN_WORKER      , g_pProcessPriority->paTypes[RTTHREADTYPE_MAIN_WORKER      ].iPriority));
+    Log(("Priority:  %2d VRDP_IO           = %d\n", RTTHREADTYPE_VRDP_IO          , g_pProcessPriority->paTypes[RTTHREADTYPE_VRDP_IO          ].iPriority));
+    Log(("Priority:  %2d DEBUGGER          = %d\n", RTTHREADTYPE_DEBUGGER         , g_pProcessPriority->paTypes[RTTHREADTYPE_DEBUGGER         ].iPriority));
+    Log(("Priority:  %2d MSG_PUMP          = %d\n", RTTHREADTYPE_MSG_PUMP         , g_pProcessPriority->paTypes[RTTHREADTYPE_MSG_PUMP         ].iPriority));
+    Log(("Priority:  %2d IO                = %d\n", RTTHREADTYPE_IO               , g_pProcessPriority->paTypes[RTTHREADTYPE_IO               ].iPriority));
+    Log(("Priority:  %2d TIMER             = %d\n", RTTHREADTYPE_TIMER            , g_pProcessPriority->paTypes[RTTHREADTYPE_TIMER            ].iPriority));
+#endif
+}
+
+
+/**
+ * This just checks if it can raise the priority after having been
+ * created by a thread with a low priority.
+ *
+ * @returns zero on success, non-zero on failure.
+ * @param   pvUser  The priority of the parent before it was lowered (cast to int).
+ */
+static void *rtSchedNativeSubProberThread(void *pvUser)
+{
+    int iPriority = getpriority(PRIO_PROCESS, 0);
+    Assert(iPriority == g_iMinPriority);
+
+    if (setpriority(PRIO_PROCESS, 0, iPriority + 1))
+        return (void *)-1;
+    if (setpriority(PRIO_PROCESS, 0, (int)(intptr_t)pvUser))
+        return (void *)-1;
+    return (void *)0;
+}
+
+
+/**
+ * The prober thread.
+ * We don't want to mess with the priority of the calling thread.
+ *
+ * @remark  This is pretty presumptive stuff, but if it works on Linux and
+ *          FreeBSD it does what I want.
+ */
+static void *rtSchedNativeProberThread(void *pvUser)
+{
+    NOREF(pvUser);
+    SAVEDPRIORITY SavedPriority;
+    rtSchedNativeSave(&SavedPriority);
+
+    /*
+     * Check if we can get higher priority (typically only root can do this).
+     * (Won't work right if our priority is -19 to start with, but what the heck.)
+     *
+     * We assume that the priority range is -19 to 19. Should probably find the right
+     * define for this.
+     */
+    int iStart = getpriority(PRIO_PROCESS, 0);
+    int i = iStart;
+    while (i-- > -20)
+        if (setpriority(PRIO_PROCESS, 0, i))
+            break;
+    g_iMaxPriority = getpriority(PRIO_PROCESS, 0);
+    g_fCanRaisePriority = g_iMaxPriority < iStart;
+    g_fCanRestorePriority = setpriority(PRIO_PROCESS, 0, iStart) == 0;
+
+    /*
+     * Check if we temporarily lower the thread priority.
+     * Again, we assume we're not at the extreme end of the priority scale.
+     */
+    iStart = getpriority(PRIO_PROCESS, 0);
+    i = iStart;
+    while (i++ < 19)
+        if (setpriority(PRIO_PROCESS, 0, i))
+            break;
+    g_iMinPriority = getpriority(PRIO_PROCESS, 0);
+    if (    setpriority(PRIO_PROCESS, 0, iStart)
+        ||  getpriority(PRIO_PROCESS, 0) != iStart)
+        g_fCanRestorePriority = false;
+    if (g_iMinPriority == g_iMaxPriority)
+        g_fCanRestorePriority = g_fCanRaisePriority = false;
+
+    /*
+     * Check what happens to child threads when the parent lowers the
+     * priority when it's being created.
+     */
+    iStart = getpriority(PRIO_PROCESS, 0);
+    g_fScrewedUpMaxPriorityLimitInheritance = true;
+    if (    g_fCanRestorePriority
+        &&  !setpriority(PRIO_PROCESS, 0, g_iMinPriority)
+        &&  iStart != g_iMinPriority)
+    {
+        if (rtSchedRunThread(rtSchedNativeSubProberThread, (void *)(intptr_t)iStart, false /*fUsePriorityProxy*/) == 0)
+            g_fScrewedUpMaxPriorityLimitInheritance = false;
+    }
+
+    /* done */
+    rtSchedNativeRestore(&SavedPriority);
+    return (void *)VINF_SUCCESS;
+}
+
+
+/**
+ * Calculate the scheduling properties for all the threads in the default
+ * process priority, assuming the current thread have the type enmType.
+ *
+ * @returns iprt status code.
+ * @param   enmType     The thread type to be assumed for the current thread.
+ */
+DECLHIDDEN(int) rtSchedNativeCalcDefaultPriority(RTTHREADTYPE enmType)
+{
+    Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);
+
+    /*
+     * First figure out what's we're allowed to do in this process.
+     */
+    if (!g_fInitialized)
+    {
+        int iPriority = getpriority(PRIO_PROCESS, 0);
+#ifdef RLIMIT_RTPRIO
+        /** @todo */
+#endif
+        int rc = rtSchedRunThread(rtSchedNativeProberThread, NULL, false /*fUsePriorityProxy*/);
+        if (RT_FAILURE(rc))
+            return rc;
+        Assert(getpriority(PRIO_PROCESS, 0) == iPriority); NOREF(iPriority);
+        g_fInitialized = true;
+    }
+
+    /*
+     * Select the right priority type table and update the default
+     * process priority structure.
+     */
+    if (g_fCanRaisePriority && g_fCanRestorePriority && !g_fScrewedUpMaxPriorityLimitInheritance)
+        g_aDefaultPriority.paTypes = &g_aTypesLinuxFree[0];
+    else if (!g_fCanRaisePriority && g_fCanRestorePriority && !g_fScrewedUpMaxPriorityLimitInheritance)
+        g_aDefaultPriority.paTypes = &g_aTypesLinuxRestricted[0];
+    else
+        g_aDefaultPriority.paTypes = &g_aTypesLinuxFlat[0];
+    Assert(enmType == g_aDefaultPriority.paTypes[enmType].enmType);
+
+    int iPriority = getpriority(PRIO_PROCESS, 0 /* current process */);
+    g_aDefaultPriority.iNice = iPriority - g_aDefaultPriority.paTypes[enmType].iPriority;
+    g_aDefaultPriority.iDelta = g_aDefaultPriority.iNice;
+
+    rtSchedDumpPriority();
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * The process priority validator thread.
+ * (We don't want to mess with the priority of the calling thread.)
+ */
+static void *rtSchedNativeValidatorThread(void *pvUser)
+{
+    PVALIDATORPRIORITYPAIR pPrioPair = (PVALIDATORPRIORITYPAIR)pvUser;
+    SAVEDPRIORITY SavedPriority;
+    rtSchedNativeSave(&SavedPriority);
+
+    int rc = VINF_SUCCESS;
+
+    /*
+     * Set the priority to the current value for specified thread type, but
+     * only if we have any threads of this type (caller checked - INT_MAX).
+     */
+    if (pPrioPair->iCurrent != INT_MAX)
+        if (setpriority(PRIO_PROCESS, 0, pPrioPair->iCurrent))
+            rc = RTErrConvertFromErrno(errno);
+
+    /*
+     * Try set the new priority.
+     */
+    if (RT_SUCCESS(rc) && setpriority(PRIO_PROCESS, 0, pPrioPair->iNew))
+        rc = RTErrConvertFromErrno(errno);
+
+    /* done */
+    rtSchedNativeRestore(&SavedPriority);
+    return (void *)(intptr_t)rc;
+}
+
+
+/**
+ * Validates the ability to apply suggested priority scheme.
+ *
+ * The function checks that we're able to apply all the thread types in the
+ * suggested priority scheme.
+ *
+ * @returns iprt status code.
+ * @param   pCfg                The priority scheme to validate.
+ * @param   fHavePriorityProxy  Set if we've got a priority proxy thread,
+ *                              otherwise clear.
+ */
+static int rtSchedNativeCheckThreadTypes(const PROCPRIORITY *pCfg, bool fHavePriorityProxy)
+{
+    int i = RTTHREADTYPE_END;
+    while (--i > RTTHREADTYPE_INVALID)
+    {
+        VALIDATORPRIORITYPAIR PrioPair;
+        PrioPair.iCurrent = g_pProcessPriority->paTypes[i].iPriority + g_pProcessPriority->iDelta;
+        PrioPair.iNew     = pCfg->paTypes[i].iPriority               + pCfg->iDelta;
+        if (g_acRTThreadTypeStats[i] == 0)
+            PrioPair.iCurrent = INT_MAX;
+
+#ifdef RT_STRICT
+        int const iPriority = getpriority(PRIO_PROCESS, 0);
+#endif
+        int rc = rtSchedRunThread(rtSchedNativeValidatorThread, &PrioPair, fHavePriorityProxy /*fUsePriorityProxy*/);
+        Assert(getpriority(PRIO_PROCESS, 0) == iPriority);
+
+        if (RT_FAILURE(rc))
+            return rc;
+    }
+    return VINF_SUCCESS;
+}
+
+
+DECLHIDDEN(int) rtProcNativeSetPriority(RTPROCPRIORITY enmPriority)
+{
+    Assert(enmPriority > RTPROCPRIORITY_INVALID && enmPriority < RTPROCPRIORITY_LAST);
+
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+    /*
+     * Make sure the proxy creation thread is started so we don't 'lose' our
+     * initial priority if it's lowered.
+     */
+    bool const fHavePriorityProxy = rtThreadPosixPriorityProxyStart();
+#else
+    bool const fHavePriorityProxy = false;
+#endif
+
+    int rc;
+    if (enmPriority == RTPROCPRIORITY_DEFAULT)
+    {
+        /*
+         * If we've lowered priority since the process started, it may be impossible
+         * to raise it again for existing thread (new threads will work fine).
+         */
+        rc = rtSchedNativeCheckThreadTypes(&g_aDefaultPriority, fHavePriorityProxy);
+        if (RT_SUCCESS(rc))
+            g_pProcessPriority = &g_aDefaultPriority;
+    }
+    else
+    {
+        /*
+         * Find a configuration which matches and can be applied.
+         */
+        rc = VERR_NOT_FOUND;
+        for (unsigned i = 0; i < RT_ELEMENTS(g_aUnixConfigs); i++)
+            if (g_aUnixConfigs[i].enmPriority == enmPriority)
+            {
+                int rc2 = rtSchedNativeCheckThreadTypes(&g_aUnixConfigs[i], fHavePriorityProxy);
+                if (RT_SUCCESS(rc2))
+                {
+                    g_pProcessPriority = &g_aUnixConfigs[i];
+                    rc = VINF_SUCCESS;
+                    break;
+                }
+                if (rc == VERR_NOT_FOUND || rc == VERR_ACCESS_DENIED)
+                    rc = rc2;
+            }
+    }
+
+#ifdef THREAD_LOGGING
+    LogFlow(("rtProcNativeSetPriority: returns %Rrc enmPriority=%d\n", rc, enmPriority));
+    rtSchedDumpPriority();
+#endif
+    return rc;
+}
+
+
+/**
+ * Called on the priority proxy thread if it's running, otherwise
+ * rtThreadNativeSetPriority calls it directly.
+ */
+static DECLCALLBACK(int) rtThreadLinuxSetPriorityCallback(PRTTHREADINT pThread, int iPriority)
+{
+    if (!setpriority(PRIO_PROCESS, pThread->tid, iPriority))
+    {
+        AssertMsg(iPriority == getpriority(PRIO_PROCESS, pThread->tid),
+                  ("iPriority=%d getpriority()=%d\n", iPriority, getpriority(PRIO_PROCESS, pThread->tid)));
+#ifdef THREAD_LOGGING
+        Log(("rtThreadNativeSetPriority: Thread=%p enmType=%d iPriority=%d pid=%d tid=%d\n",
+             pThread->Core.Key, enmType, iPriority, getpid(), pThread->tid));
+#endif
+        return VINF_SUCCESS;
+    }
+    AssertMsgFailed(("setpriority(,, %d) -> errno=%d rc=%Rrc\n", iPriority, errno, RTErrConvertFromErrno(errno)));
+    return VINF_SUCCESS; //non-fatal for now.
+}
+
+
+DECLHIDDEN(int) rtThreadNativeSetPriority(PRTTHREADINT pThread, RTTHREADTYPE enmType)
+{
+    /* sanity */
+    Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);
+    Assert(enmType == g_pProcessPriority->paTypes[enmType].enmType);
+
+    /*
+     * The thread ID is zero for alien threads, so skip these or we'd risk
+     * modifying our own priority.
+     */
+    if (!pThread->tid)
+        return VINF_SUCCESS;
+
+    /*
+     * Calculate the thread priority and apply it, preferrably via the priority proxy thread.
+     */
+    int const iPriority = g_pProcessPriority->paTypes[enmType].iPriority + g_pProcessPriority->iDelta;
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+    if (rtThreadPosixPriorityProxyStart())
+        return rtThreadPosixPriorityProxyCall(pThread, (PFNRT)rtThreadLinuxSetPriorityCallback, 2, pThread, iPriority);
+#endif
+    return rtThreadLinuxSetPriorityCallback(pThread, iPriority);
+}
+
diff --git a/src/VBox/Runtime/r3/linux/semevent-linux.cpp b/src/VBox/Runtime/r3/linux/semevent-linux.cpp
new file mode 100644
index 00000000..c3a973c1
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semevent-linux.cpp
@@ -0,0 +1,607 @@
+/* $Id: semevent-linux.cpp $ */
+/** @file
+ * IPRT - Event Semaphore, Linux (2.6.0 and later).
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+#include <features.h>
+#if __GLIBC_PREREQ(2,6) && !defined(IPRT_WITH_FUTEX_BASED_SEMS)
+
+/*
+ * glibc 2.6 fixed a serious bug in the mutex implementation. We wrote this
+ * linux specific event semaphores code in order to work around the bug. We
+ * will fall back on the pthread-based implementation if glibc is known to
+ * contain the bug fix.
+ *
+ * The external reference to epoll_pwait is a hack which prevents that we link
+ * against glibc < 2.6.
+ */
+# include "../posix/semevent-posix.cpp"
+__asm__ (".global epoll_pwait");
+
+#else /* glibc < 2.6 */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+#include "internal/mem.h"
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+
+#include "semwait-linux.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Linux (single wakup) event semaphore.
+ */
+struct RTSEMEVENTINTERNAL
+{
+    /** Magic value. */
+    intptr_t volatile   iMagic;
+    /** The futex state variable.
+     * 0 means not signalled.
+       1 means signalled. */
+    uint32_t volatile   fSignalled;
+    /** The number of waiting threads */
+    int32_t volatile    cWaiters;
+#ifdef RTSEMEVENT_STRICT
+    /** Signallers. */
+    RTLOCKVALRECSHRD    Signallers;
+    /** Indicates that lock validation should be performed. */
+    bool volatile       fEverHadSignallers;
+#endif
+    /** The creation flags. */
+    uint32_t            fFlags;
+};
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Whether we can use FUTEX_WAIT_BITSET. */
+static int volatile g_fCanUseWaitBitSet = -1;
+
+
+
+
+RTDECL(int)  RTSemEventCreate(PRTSEMEVENT phEventSem)
+{
+    return RTSemEventCreateEx(phEventSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int)  RTSemEventCreateEx(PRTSEMEVENT phEventSem, uint32_t fFlags, RTLOCKVALCLASS hClass, const char *pszNameFmt, ...)
+{
+    AssertReturn(!(fFlags & ~(RTSEMEVENT_FLAGS_NO_LOCK_VAL | RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)), VERR_INVALID_PARAMETER);
+    Assert(!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK) || (fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL));
+
+    /*
+     * Make sure we know whether FUTEX_WAIT_BITSET works.
+     */
+    rtSemLinuxCheckForFutexWaitBitSet(&g_fCanUseWaitBitSet);
+#if defined(DEBUG_bird) && !defined(IN_GUEST)
+    Assert(g_fCanUseWaitBitSet == true);
+#endif
+
+    /*
+     * Allocate semaphore handle.
+     */
+    struct RTSEMEVENTINTERNAL *pThis;
+    if (!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+        pThis = (struct RTSEMEVENTINTERNAL *)RTMemAlloc(sizeof(struct RTSEMEVENTINTERNAL));
+    else
+        pThis = (struct RTSEMEVENTINTERNAL *)rtMemBaseAlloc(sizeof(struct RTSEMEVENTINTERNAL));
+    if (pThis)
+    {
+        pThis->iMagic     = RTSEMEVENT_MAGIC;
+        pThis->cWaiters   = 0;
+        pThis->fSignalled = 0;
+        pThis->fFlags     = fFlags;
+#ifdef RTSEMEVENT_STRICT
+        if (!pszNameFmt)
+        {
+            static uint32_t volatile s_iSemEventAnon = 0;
+            RTLockValidatorRecSharedInit(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+                                         true /*fSignaller*/, !(fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL),
+                                         "RTSemEvent-%u", ASMAtomicIncU32(&s_iSemEventAnon) - 1);
+        }
+        else
+        {
+            va_list va;
+            va_start(va, pszNameFmt);
+            RTLockValidatorRecSharedInitV(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+                                          true /*fSignaller*/, !(fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL),
+                                          pszNameFmt, va);
+            va_end(va);
+        }
+        pThis->fEverHadSignallers = false;
+#else
+        RT_NOREF(hClass, pszNameFmt);
+#endif
+
+        *phEventSem = pThis;
+        return VINF_SUCCESS;
+    }
+    return  VERR_NO_MEMORY;
+}
+
+
+RTDECL(int)  RTSemEventDestroy(RTSEMEVENT hEventSem)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+    if (pThis == NIL_RTSEMEVENT)
+        return VINF_SUCCESS;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->iMagic == RTSEMEVENT_MAGIC, VERR_INVALID_HANDLE);
+
+    /*
+     * Invalidate the semaphore and wake up anyone waiting on it.
+     */
+    ASMAtomicXchgSize(&pThis->iMagic, RTSEMEVENT_MAGIC | UINT32_C(0x80000000));
+    if (ASMAtomicXchgS32(&pThis->cWaiters, INT32_MIN / 2) > 0)
+    {
+        sys_futex(&pThis->fSignalled, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+        usleep(1000);
+    }
+
+    /*
+     * Free the semaphore memory and be gone.
+     */
+#ifdef RTSEMEVENT_STRICT
+    RTLockValidatorRecSharedDelete(&pThis->Signallers);
+#endif
+    if (!(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+        RTMemFree(pThis);
+    else
+        rtMemBaseFree(pThis);
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(int)  RTSemEventSignal(RTSEMEVENT hEventSem)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->iMagic == RTSEMEVENT_MAGIC, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMEVENT_STRICT
+    if (pThis->fEverHadSignallers)
+    {
+        int rc9 = RTLockValidatorRecSharedCheckSignaller(&pThis->Signallers, NIL_RTTHREAD);
+        if (RT_FAILURE(rc9))
+            return rc9;
+    }
+#endif
+
+    ASMAtomicWriteU32(&pThis->fSignalled, 1);
+    if (ASMAtomicReadS32(&pThis->cWaiters) < 1)
+        return VINF_SUCCESS;
+
+    /* somebody is waiting, try wake up one of them. */
+    long cWoken = sys_futex(&pThis->fSignalled, FUTEX_WAKE, 1, NULL, NULL, 0);
+    if (RT_LIKELY(cWoken >= 0))
+        return VINF_SUCCESS;
+
+    if (RT_UNLIKELY(pThis->iMagic != RTSEMEVENT_MAGIC))
+        return VERR_SEM_DESTROYED;
+
+    return VERR_INVALID_PARAMETER;
+}
+
+
+/**
+ * Performs an indefinite wait on the event.
+ */
+static int rtSemEventLinuxWaitIndefinite(struct RTSEMEVENTINTERNAL *pThis, uint32_t fFlags, PCRTLOCKVALSRCPOS pSrcPos)
+{
+    RT_NOREF_PV(pSrcPos);
+
+    /*
+     * Quickly check whether it's signaled and there are no other waiters.
+     */
+    uint32_t cWaiters = ASMAtomicIncS32(&pThis->cWaiters);
+    if (   cWaiters == 1
+        && ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+    {
+        ASMAtomicDecS32(&pThis->cWaiters);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * The wait loop.
+     */
+#ifdef RTSEMEVENT_STRICT
+    RTTHREAD hThreadSelf = !(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)
+                         ? RTThreadSelfAutoAdopt()
+                         : RTThreadSelf();
+#else
+    RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+    int rc = VINF_SUCCESS;
+    for (;;)
+    {
+#ifdef RTSEMEVENT_STRICT
+        if (pThis->fEverHadSignallers)
+        {
+            rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+                                                       RT_INDEFINITE_WAIT, RTTHREADSTATE_EVENT, true);
+            if (RT_FAILURE(rc))
+                break;
+        }
+#endif
+        RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT, true);
+        long lrc = sys_futex(&pThis->fSignalled, FUTEX_WAIT, 0, NULL /*pTimeout*/, NULL, 0);
+        RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT);
+        if (RT_UNLIKELY(pThis->iMagic != RTSEMEVENT_MAGIC))
+        {
+            rc = VERR_SEM_DESTROYED;
+            break;
+        }
+
+        if (RT_LIKELY(lrc == 0 || lrc == -EWOULDBLOCK))
+        {
+            /* successful wakeup or fSignalled > 0 in the meantime */
+            if (ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+                break;
+        }
+        else if (lrc == -ETIMEDOUT)
+        {
+            rc = VERR_TIMEOUT;
+            break;
+        }
+        else if (lrc == -EINTR)
+        {
+            if (fFlags & RTSEMWAIT_FLAGS_NORESUME)
+            {
+                rc = VERR_INTERRUPTED;
+                break;
+            }
+        }
+        else
+        {
+            /* this shouldn't happen! */
+            AssertMsgFailed(("rc=%ld errno=%d\n", lrc, errno));
+            rc = RTErrConvertFromErrno(lrc);
+            break;
+        }
+    }
+
+    ASMAtomicDecS32(&pThis->cWaiters);
+    return rc;
+}
+
+
+/**
+ * Handle polling (timeout already expired at the time of the call).
+ *
+ * @returns VINF_SUCCESS, VERR_TIMEOUT, VERR_SEM_DESTROYED.
+ * @param   pThis               The semaphore.
+ */
+static int rtSemEventLinuxWaitPoll(struct RTSEMEVENTINTERNAL *pThis)
+{
+    /*
+     * What we do here is isn't quite fair to anyone else waiting on it, however
+     * it might not be as bad as all that for callers making repeated poll calls
+     * because they cannot block, as that would be a virtual wait but without the
+     * chance of a permanept queue position.   So, I hope we can live with this.
+     */
+    if (ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+        return VINF_SUCCESS;
+    return VERR_TIMEOUT;
+}
+
+
+/**
+ * Performs an timed wait on the event.
+ */
+static int rtSemEventLinuxWaitTimed(struct RTSEMEVENTINTERNAL *pThis, uint32_t fFlags,
+                                    uint64_t uTimeout, PCRTLOCKVALSRCPOS pSrcPos)
+{
+    RT_NOREF_PV(pSrcPos);
+
+    /*
+     * Convert the timeout value.
+     */
+    struct timespec TsTimeout;
+    int             iWaitOp;
+    uint32_t        uWaitVal3;
+    uint64_t        nsAbsTimeout = uTimeout; /* (older gcc maybe used uninitialized) */
+    uTimeout = rtSemLinuxCalcDeadline(fFlags, uTimeout, g_fCanUseWaitBitSet, &TsTimeout, &iWaitOp, &uWaitVal3, &nsAbsTimeout);
+    if (uTimeout == 0)
+        return rtSemEventLinuxWaitPoll(pThis);
+    if (uTimeout == UINT64_MAX)
+        return rtSemEventLinuxWaitIndefinite(pThis, fFlags, pSrcPos);
+
+    /*
+     * Quickly check whether it's signaled and there are no other waiters.
+     */
+    uint32_t cWaiters = ASMAtomicIncS32(&pThis->cWaiters);
+    if (   cWaiters == 1
+        && ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+    {
+        ASMAtomicDecS32(&pThis->cWaiters);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * The wait loop.
+     */
+#ifdef RTSEMEVENT_STRICT
+    RTTHREAD hThreadSelf = !(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)
+                         ? RTThreadSelfAutoAdopt()
+                         : RTThreadSelf();
+#else
+    RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+    int rc = VINF_SUCCESS;
+    for (;;)
+    {
+#ifdef RTSEMEVENT_STRICT
+        if (pThis->fEverHadSignallers)
+        {
+            rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+                                                       iWaitOp == FUTEX_WAIT ? uTimeout / RT_NS_1MS : RT_MS_1HOUR /*whatever*/,
+                                                       RTTHREADSTATE_EVENT, true);
+            if (RT_FAILURE(rc))
+                break;
+        }
+#endif
+        RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT, true);
+        long lrc = sys_futex(&pThis->fSignalled, iWaitOp, 0, &TsTimeout, NULL, uWaitVal3);
+        RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT);
+        if (RT_UNLIKELY(pThis->iMagic != RTSEMEVENT_MAGIC))
+        {
+            rc = VERR_SEM_DESTROYED;
+            break;
+        }
+
+        if (RT_LIKELY(lrc == 0 || lrc == -EWOULDBLOCK))
+        {
+            /* successful wakeup or fSignalled > 0 in the meantime */
+            if (ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+                break;
+        }
+        else if (lrc == -ETIMEDOUT)
+        {
+#ifdef RT_STRICT
+            uint64_t const uNow = RTTimeNanoTS();
+            AssertMsg(uNow >= nsAbsTimeout || nsAbsTimeout - uNow < RT_NS_1MS,
+                      ("%#RX64 - %#RX64 => %#RX64 (%RI64)\n", nsAbsTimeout, uNow, nsAbsTimeout - uNow, nsAbsTimeout - uNow));
+#endif
+            rc = VERR_TIMEOUT;
+            break;
+        }
+        else if (lrc == -EINTR)
+        {
+            if (fFlags & RTSEMWAIT_FLAGS_NORESUME)
+            {
+                rc = VERR_INTERRUPTED;
+                break;
+            }
+        }
+        else
+        {
+            /* this shouldn't happen! */
+            AssertMsgFailed(("rc=%ld errno=%d\n", lrc, errno));
+            rc = RTErrConvertFromErrno(lrc);
+            break;
+        }
+
+        /* adjust the relative timeout */
+        if (iWaitOp == FUTEX_WAIT)
+        {
+            int64_t i64Diff = nsAbsTimeout - RTTimeSystemNanoTS();
+            if (i64Diff < 1000)
+            {
+                rc = VERR_TIMEOUT;
+                break;
+            }
+            TsTimeout.tv_sec  = (uint64_t)i64Diff / RT_NS_1SEC;
+            TsTimeout.tv_nsec = (uint64_t)i64Diff % RT_NS_1SEC;
+        }
+    }
+
+    ASMAtomicDecS32(&pThis->cWaiters);
+    return rc;
+}
+
+
+/**
+ * Internal wait worker function.
+ */
+DECLINLINE(int) rtSemEventLinuxWait(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout, PCRTLOCKVALSRCPOS pSrcPos)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->iMagic == RTSEMEVENT_MAGIC, VERR_INVALID_HANDLE);
+    AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+#ifdef RT_STRICT
+    uint32_t const fSignalled = pThis->fSignalled;
+    Assert(fSignalled == false || fSignalled == true);
+#endif
+
+    /*
+     * Timed or indefinite wait?
+     */
+    if (fFlags & RTSEMWAIT_FLAGS_INDEFINITE)
+        return rtSemEventLinuxWaitIndefinite(pThis, fFlags, pSrcPos);
+    return rtSemEventLinuxWaitTimed(hEventSem, fFlags, uTimeout, pSrcPos);
+}
+
+
+RTDECL(int) RTSemEventWait(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies)
+{
+    int rc;
+#ifndef RTSEMEVENT_STRICT
+    if (cMillies == RT_INDEFINITE_WAIT)
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, NULL);
+    else
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+                                 cMillies, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    if (cMillies == RT_INDEFINITE_WAIT)
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, &SrcPos);
+    else
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+                                 cMillies, &SrcPos);
+#endif
+    Assert(rc != VERR_INTERRUPTED);
+    return rc;
+}
+
+
+RTDECL(int)  RTSemEventWaitNoResume(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies)
+{
+    int rc;
+#ifndef RTSEMEVENT_STRICT
+    if (cMillies == RT_INDEFINITE_WAIT)
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, NULL);
+    else
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+                                 cMillies, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    if (cMillies == RT_INDEFINITE_WAIT)
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, &SrcPos);
+    else
+        rc = rtSemEventLinuxWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+                                 cMillies, &SrcPos);
+#endif
+    Assert(rc != VERR_INTERRUPTED);
+    return rc;
+}
+
+
+RTDECL(int)  RTSemEventWaitEx(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+    return rtSemEventLinuxWait(hEventSem, fFlags, uTimeout, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    return rtSemEventLinuxWait(hEventSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+
+
+RTDECL(int)  RTSemEventWaitExDebug(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout,
+                                   RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+    return rtSemEventLinuxWait(hEventSem, fFlags, uTimeout, &SrcPos);
+}
+
+
+RTDECL(uint32_t) RTSemEventGetResolution(void)
+{
+    /** @todo we have 1ns parameter resolution, but need to verify that this is what
+     *        the kernel actually will use when setting the timer.  Most likely
+     *        it's rounded a little, but hopefully not to a multiple of HZ. */
+    return 1;
+}
+
+
+RTDECL(void) RTSemEventSetSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+    struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+    AssertPtrReturnVoid(pThis);
+    AssertReturnVoid(pThis->iMagic == RTSEMEVENT_MAGIC);
+
+    ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+    RTLockValidatorRecSharedResetOwner(&pThis->Signallers, hThread, NULL);
+#else
+    RT_NOREF(hEventSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventAddSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+    struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+    AssertPtrReturnVoid(pThis);
+    AssertReturnVoid(pThis->iMagic == RTSEMEVENT_MAGIC);
+
+    ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+    RTLockValidatorRecSharedAddOwner(&pThis->Signallers, hThread, NULL);
+#else
+    RT_NOREF(hEventSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventRemoveSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+    struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+    AssertPtrReturnVoid(pThis);
+    AssertReturnVoid(pThis->iMagic == RTSEMEVENT_MAGIC);
+
+    RTLockValidatorRecSharedRemoveOwner(&pThis->Signallers, hThread);
+#else
+    RT_NOREF(hEventSem, hThread);
+#endif
+}
+
+#endif /* glibc < 2.6 || IPRT_WITH_FUTEX_BASED_SEMS */
+
diff --git a/src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp b/src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp
new file mode 100644
index 00000000..87554838
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp
@@ -0,0 +1,600 @@
+/* $Id: semeventmulti-linux.cpp $ */
+/** @file
+ * IPRT - Multiple Release Event Semaphore, Linux (2.6.x+).
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+#include <features.h>
+#if __GLIBC_PREREQ(2,6) && !defined(IPRT_WITH_FUTEX_BASED_SEMS)
+
+/*
+ * glibc 2.6 fixed a serious bug in the mutex implementation. We wrote this
+ * linux specific event semaphores code in order to work around the bug. As it
+ * turns out, this code seems to have an unresolved issue (@bugref{2599}), so we'll
+ * fall back on the pthread based implementation if glibc is known to contain
+ * the bug fix.
+ *
+ * The external reference to epoll_pwait is a hack which prevents that we link
+ * against glibc < 2.6.
+ */
+#include "../posix/semeventmulti-posix.cpp"
+__asm__ (".global epoll_pwait");
+
+#else /* glibc < 2.6 */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+#include "internal/strict.h"
+
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+
+#include "semwait-linux.h"
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Linux multiple wakup event semaphore.
+ */
+struct RTSEMEVENTMULTIINTERNAL
+{
+    /** Magic value. */
+    uint32_t volatile   u32Magic;
+    /** The futex state variable, see RTSEMEVENTMULTI_LNX_XXX. */
+    uint32_t volatile   uState;
+#ifdef RT_STRICT
+    /** Increased on every signalling call. */
+    uint32_t volatile   uSignalSerialNo;
+#endif
+#ifdef RTSEMEVENTMULTI_STRICT
+    /** Signallers. */
+    RTLOCKVALRECSHRD    Signallers;
+    /** Indicates that lock validation should be performed. */
+    bool volatile       fEverHadSignallers;
+#endif
+};
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+/** @name RTSEMEVENTMULTI_LNX_XXX - state
+ * @{ */
+#define RTSEMEVENTMULTI_LNX_NOT_SIGNALED            UINT32_C(0x00000000)
+#define RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS    UINT32_C(0x00000001)
+#define RTSEMEVENTMULTI_LNX_SIGNALED                UINT32_C(0x00000003)
+/** @} */
+
+#define ASSERT_VALID_STATE(a_uState) \
+    AssertMsg(   (a_uState) == RTSEMEVENTMULTI_LNX_NOT_SIGNALED \
+              || (a_uState) == RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS \
+              || (a_uState) == RTSEMEVENTMULTI_LNX_SIGNALED, \
+              (#a_uState "=%s\n", a_uState))
+
+
+/*********************************************************************************************************************************
+*   Global Variables                                                                                                             *
+*********************************************************************************************************************************/
+/** Whether we can use FUTEX_WAIT_BITSET. */
+static int volatile g_fCanUseWaitBitSet = -1;
+
+
+RTDECL(int)  RTSemEventMultiCreate(PRTSEMEVENTMULTI phEventMultiSem)
+{
+    return RTSemEventMultiCreateEx(phEventMultiSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int)  RTSemEventMultiCreateEx(PRTSEMEVENTMULTI phEventMultiSem, uint32_t fFlags, RTLOCKVALCLASS hClass,
+                                     const char *pszNameFmt, ...)
+{
+    AssertReturn(!(fFlags & ~RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+    /*
+     * Make sure we know whether FUTEX_WAIT_BITSET works.
+     */
+    rtSemLinuxCheckForFutexWaitBitSet(&g_fCanUseWaitBitSet);
+#if defined(DEBUG_bird) && !defined(IN_GUEST)
+    Assert(g_fCanUseWaitBitSet == true);
+#endif
+
+    /*
+     * Allocate semaphore handle.
+     */
+    struct RTSEMEVENTMULTIINTERNAL *pThis = (struct RTSEMEVENTMULTIINTERNAL *)RTMemAlloc(sizeof(struct RTSEMEVENTMULTIINTERNAL));
+    if (pThis)
+    {
+        pThis->u32Magic        = RTSEMEVENTMULTI_MAGIC;
+        pThis->uState          = RTSEMEVENTMULTI_LNX_NOT_SIGNALED;
+#ifdef RT_STRICT
+        pThis->uSignalSerialNo = 0;
+#endif
+#ifdef RTSEMEVENTMULTI_STRICT
+        if (!pszNameFmt)
+        {
+            static uint32_t volatile s_iSemEventMultiAnon = 0;
+            RTLockValidatorRecSharedInit(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+                                         true /*fSignaller*/, !(fFlags & RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL),
+                                         "RTSemEventMulti-%u", ASMAtomicIncU32(&s_iSemEventMultiAnon) - 1);
+        }
+        else
+        {
+            va_list va;
+            va_start(va, pszNameFmt);
+            RTLockValidatorRecSharedInitV(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+                                          true /*fSignaller*/, !(fFlags & RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL),
+                                          pszNameFmt, va);
+            va_end(va);
+        }
+        pThis->fEverHadSignallers = false;
+#else
+        RT_NOREF(hClass, pszNameFmt);
+#endif
+
+        *phEventMultiSem = pThis;
+        return VINF_SUCCESS;
+    }
+    return  VERR_NO_MEMORY;
+}
+
+
+RTDECL(int)  RTSemEventMultiDestroy(RTSEMEVENTMULTI hEventMultiSem)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+    if (pThis == NIL_RTSEMEVENTMULTI)
+        return VINF_SUCCESS;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, VERR_INVALID_HANDLE);
+
+    /*
+     * Invalidate the semaphore and wake up anyone waiting on it.
+     */
+    ASMAtomicWriteU32(&pThis->u32Magic, RTSEMEVENTMULTI_MAGIC + 1);
+    if (ASMAtomicXchgU32(&pThis->uState, RTSEMEVENTMULTI_LNX_SIGNALED) == RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS)
+    {
+        sys_futex(&pThis->uState, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+        usleep(1000);
+    }
+
+    /*
+     * Free the semaphore memory and be gone.
+     */
+#ifdef RTSEMEVENTMULTI_STRICT
+    RTLockValidatorRecSharedDelete(&pThis->Signallers);
+#endif
+    RTMemFree(pThis);
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(int)  RTSemEventMultiSignal(RTSEMEVENTMULTI hEventMultiSem)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMEVENTMULTI_STRICT
+    if (pThis->fEverHadSignallers)
+    {
+        int rc9 = RTLockValidatorRecSharedCheckSignaller(&pThis->Signallers, NIL_RTTHREAD);
+        if (RT_FAILURE(rc9))
+            return rc9;
+    }
+#endif
+
+    /*
+     * Signal it.
+     */
+#ifdef RT_STRICT
+    ASMAtomicIncU32(&pThis->uSignalSerialNo);
+#endif
+    uint32_t uOld = ASMAtomicXchgU32(&pThis->uState, RTSEMEVENTMULTI_LNX_SIGNALED);
+    if (uOld == RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS)
+    {
+        /* wake up sleeping threads. */
+        long cWoken = sys_futex(&pThis->uState, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+        AssertMsg(cWoken >= 0, ("%ld\n", cWoken)); NOREF(cWoken);
+    }
+    ASSERT_VALID_STATE(uOld);
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(int)  RTSemEventMultiReset(RTSEMEVENTMULTI hEventMultiSem)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, VERR_INVALID_HANDLE);
+#ifdef RT_STRICT
+    uint32_t const uState = pThis->uState;
+    ASSERT_VALID_STATE(uState);
+#endif
+
+    /*
+     * Reset it.
+     */
+    ASMAtomicCmpXchgU32(&pThis->uState, RTSEMEVENTMULTI_LNX_NOT_SIGNALED, RTSEMEVENTMULTI_LNX_SIGNALED);
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Performs an indefinite wait on the event.
+ */
+static int rtSemEventMultiLinuxWaitIndefinite(struct RTSEMEVENTMULTIINTERNAL *pThis, uint32_t fFlags, PCRTLOCKVALSRCPOS pSrcPos)
+{
+    RT_NOREF(pSrcPos);
+
+    /*
+     * Quickly check whether it's signaled.
+     */
+    uint32_t uState = ASMAtomicUoReadU32(&pThis->uState);
+    if (uState == RTSEMEVENTMULTI_LNX_SIGNALED)
+        return VINF_SUCCESS;
+    ASSERT_VALID_STATE(uState);
+
+    /*
+     * The wait loop.
+     */
+#ifdef RTSEMEVENTMULTI_STRICT
+    RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+#else
+    RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+    for (unsigned i = 0;; i++)
+    {
+        /*
+         * Start waiting. We only account for there being or having been
+         * threads waiting on the semaphore to keep things simple.
+         */
+        uState = ASMAtomicUoReadU32(&pThis->uState);
+        if (   uState == RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS
+            || (   uState == RTSEMEVENTMULTI_LNX_NOT_SIGNALED
+                && ASMAtomicCmpXchgU32(&pThis->uState, RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS,
+                                       RTSEMEVENTMULTI_LNX_NOT_SIGNALED)))
+        {
+#ifdef RTSEMEVENTMULTI_STRICT
+            if (pThis->fEverHadSignallers)
+            {
+                int rc9 = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+                                                                RT_INDEFINITE_WAIT, RTTHREADSTATE_EVENT_MULTI, true);
+                if (RT_FAILURE(rc9))
+                    return rc9;
+            }
+#endif
+#ifdef RT_STRICT
+            uint32_t const uPrevSignalSerialNo = ASMAtomicReadU32(&pThis->uSignalSerialNo);
+#endif
+            RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT_MULTI, true);
+            long rc = sys_futex(&pThis->uState, FUTEX_WAIT, 1, NULL /*pTimeout*/, NULL, 0);
+            RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT_MULTI);
+
+            /* Check that the structure is still alive before continuing. */
+            if (RT_LIKELY(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC))
+            { /*likely*/ }
+            else
+                return VERR_SEM_DESTROYED;
+
+            /*
+             * Return if success.
+             */
+            if (rc == 0)
+            {
+                Assert(uPrevSignalSerialNo != ASMAtomicReadU32(&pThis->uSignalSerialNo));
+                return VINF_SUCCESS;
+            }
+
+            /*
+             * Act on the wakup code.
+             */
+            if (rc == -EWOULDBLOCK)
+                /* retry, the value changed. */;
+            else if (rc == -EINTR)
+            {
+                if (fFlags & RTSEMWAIT_FLAGS_NORESUME)
+                    return VERR_INTERRUPTED;
+            }
+            else
+            {
+                /* this shouldn't happen! */
+                AssertMsgFailed(("rc=%ld errno=%d\n", rc, errno));
+                return RTErrConvertFromErrno(rc);
+            }
+        }
+        else if (uState == RTSEMEVENTMULTI_LNX_SIGNALED)
+            return VINF_SUCCESS;
+        else
+            ASSERT_VALID_STATE(uState);
+    }
+}
+
+
+/**
+ * Handle polling (timeout already expired at the time of the call).
+ *
+ * @returns VINF_SUCCESS, VERR_TIMEOUT, VERR_SEM_DESTROYED.
+ * @param   pThis               The semaphore.
+ */
+static int rtSemEventMultiLinuxWaitPoll(struct RTSEMEVENTMULTIINTERNAL *pThis)
+{
+    uint32_t uState = ASMAtomicUoReadU32(&pThis->uState);
+    if (uState == RTSEMEVENTMULTI_LNX_SIGNALED)
+        return VINF_SUCCESS;
+    return VERR_TIMEOUT;
+}
+
+
+/**
+ * Performs an indefinite wait on the event.
+ */
+static int rtSemEventMultiLinuxWaitTimed(struct RTSEMEVENTMULTIINTERNAL *pThis, uint32_t fFlags, uint64_t uTimeout,
+                                         PCRTLOCKVALSRCPOS pSrcPos)
+{
+    RT_NOREF(pSrcPos);
+
+    /*
+     * Quickly check whether it's signaled.
+     */
+    uint32_t uState = ASMAtomicUoReadU32(&pThis->uState);
+    if (uState == RTSEMEVENTMULTI_LNX_SIGNALED)
+        return VINF_SUCCESS;
+    ASSERT_VALID_STATE(uState);
+
+    /*
+     * Convert the timeout value.
+     */
+    struct timespec TsTimeout;
+    int             iWaitOp;
+    uint32_t        uWaitVal3;
+    uint64_t        nsAbsTimeout = uTimeout; /* (older gcc maybe used uninitialized) */
+    uTimeout = rtSemLinuxCalcDeadline(fFlags, uTimeout, g_fCanUseWaitBitSet, &TsTimeout, &iWaitOp, &uWaitVal3, &nsAbsTimeout);
+    if (uTimeout == 0)
+        return rtSemEventMultiLinuxWaitPoll(pThis);
+    if (uTimeout == UINT64_MAX)
+        return rtSemEventMultiLinuxWaitIndefinite(pThis, fFlags, pSrcPos);
+
+    /*
+     * The wait loop.
+     */
+#ifdef RTSEMEVENTMULTI_STRICT
+    RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+#else
+    RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+    for (unsigned i = 0;; i++)
+    {
+        /*
+         * Start waiting. We only account for there being or having been
+         * threads waiting on the semaphore to keep things simple.
+         */
+        uState = ASMAtomicUoReadU32(&pThis->uState);
+        if (   uState == RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS
+            || (   uState == RTSEMEVENTMULTI_LNX_NOT_SIGNALED
+                && ASMAtomicCmpXchgU32(&pThis->uState, RTSEMEVENTMULTI_LNX_NOT_SIGNALED_WAITERS,
+                                       RTSEMEVENTMULTI_LNX_NOT_SIGNALED)))
+        {
+#ifdef RTSEMEVENTMULTI_STRICT
+            if (pThis->fEverHadSignallers)
+            {
+                int rc9 = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+                                                                uTimeout / UINT32_C(1000000), RTTHREADSTATE_EVENT_MULTI, true);
+                if (RT_FAILURE(rc9))
+                    return rc9;
+            }
+#endif
+#ifdef RT_STRICT
+            uint32_t const uPrevSignalSerialNo = ASMAtomicReadU32(&pThis->uSignalSerialNo);
+#endif
+            RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT_MULTI, true);
+            long rc = sys_futex(&pThis->uState, iWaitOp, 1, &TsTimeout, NULL, uWaitVal3);
+            RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT_MULTI);
+
+            /* Check that the structure is still alive before continuing. */
+            if (RT_LIKELY(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC))
+            { /*likely*/ }
+            else
+                return VERR_SEM_DESTROYED;
+
+            /*
+             * Return if success.
+             */
+            if (rc == 0)
+            {
+                Assert(uPrevSignalSerialNo != ASMAtomicReadU32(&pThis->uSignalSerialNo));
+                return VINF_SUCCESS;
+            }
+
+            /*
+             * Act on the wakup code.
+             */
+            if (rc == -ETIMEDOUT)
+            {
+                /** @todo something is broken here. shows up every now and again in the ata
+                 *        code. Should try to run the timeout against RTTimeMilliTS to
+                 *        check that it's doing the right thing... */
+#ifdef RT_STRICT
+                uint64_t const uNow = RTTimeNanoTS();
+                AssertMsg(uNow >= nsAbsTimeout || nsAbsTimeout - uNow < RT_NS_1MS,
+                          ("%#RX64 - %#RX64 => %#RX64 (%RI64)\n", nsAbsTimeout, uNow, nsAbsTimeout - uNow, nsAbsTimeout - uNow));
+#endif
+                return VERR_TIMEOUT;
+            }
+            if (rc == -EWOULDBLOCK)
+            {
+                /* retry, the value changed. */;
+            }
+            else if (rc == -EINTR)
+            {
+                if (fFlags & RTSEMWAIT_FLAGS_NORESUME)
+                    return VERR_INTERRUPTED;
+            }
+            else
+            {
+                /* this shouldn't happen! */
+                AssertMsgFailed(("rc=%ld errno=%d\n", rc, errno));
+                return RTErrConvertFromErrno(rc);
+            }
+        }
+        else if (uState == RTSEMEVENTMULTI_LNX_SIGNALED)
+            return VINF_SUCCESS;
+        else
+            ASSERT_VALID_STATE(uState);
+
+        /* adjust the relative timeout if relative */
+        if (iWaitOp == FUTEX_WAIT)
+        {
+            int64_t i64Diff = nsAbsTimeout - RTTimeSystemNanoTS();
+            if (i64Diff < 1000)
+                return VERR_TIMEOUT;
+            TsTimeout.tv_sec  = (uint64_t)i64Diff / RT_NS_1SEC;
+            TsTimeout.tv_nsec = (uint64_t)i64Diff % RT_NS_1SEC;
+        }
+    }
+}
+
+/**
+ * Internal wait worker function.
+ */
+DECLINLINE(int) rtSemEventLnxMultiWait(RTSEMEVENTMULTI hEventSem, uint32_t fFlags, uint64_t uTimeout, PCRTLOCKVALSRCPOS pSrcPos)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, VERR_INVALID_HANDLE);
+    AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+
+    /*
+     * Timed or indefinite wait?
+     */
+    if (fFlags & RTSEMWAIT_FLAGS_INDEFINITE)
+        return rtSemEventMultiLinuxWaitIndefinite(pThis, fFlags, pSrcPos);
+    return rtSemEventMultiLinuxWaitTimed(hEventSem, fFlags, uTimeout, pSrcPos);
+}
+
+
+#undef RTSemEventMultiWaitEx
+RTDECL(int)  RTSemEventMultiWaitEx(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+    return rtSemEventLnxMultiWait(hEventMultiSem, fFlags, uTimeout, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    return rtSemEventLnxMultiWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+
+
+RTDECL(int)  RTSemEventMultiWaitExDebug(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout,
+                                        RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+    return rtSemEventLnxMultiWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+}
+
+
+RTDECL(void) RTSemEventMultiSetSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+    AssertPtrReturnVoid(pThis);
+    AssertReturnVoid(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC);
+
+    ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+    RTLockValidatorRecSharedResetOwner(&pThis->Signallers, hThread, NULL);
+#else
+    RT_NOREF(hEventMultiSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventMultiAddSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+    AssertPtrReturnVoid(pThis);
+    AssertReturnVoid(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC);
+
+    ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+    RTLockValidatorRecSharedAddOwner(&pThis->Signallers, hThread, NULL);
+#else
+    RT_NOREF(hEventMultiSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventMultiRemoveSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+    struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+    AssertPtrReturnVoid(pThis);
+    AssertReturnVoid(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC);
+
+    RTLockValidatorRecSharedRemoveOwner(&pThis->Signallers, hThread);
+#else
+    RT_NOREF(hEventMultiSem, hThread);
+#endif
+}
+
+#endif /* glibc < 2.6 || IPRT_WITH_FUTEX_BASED_SEMS */
+
diff --git a/src/VBox/Runtime/r3/linux/semmutex-linux.cpp b/src/VBox/Runtime/r3/linux/semmutex-linux.cpp
new file mode 100644
index 00000000..09cd866f
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semmutex-linux.cpp
@@ -0,0 +1,475 @@
+/* $Id: semmutex-linux.cpp $ */
+/** @file
+ * IPRT - Mutex Semaphore, Linux  (2.6.x+).
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloc.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/thread.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#if 0 /* With 2.6.17 futex.h has become C++ unfriendly. */
+# include <linux/futex.h>
+#else
+# define FUTEX_WAIT 0
+# define FUTEX_WAKE 1
+#endif
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+/**
+ * Linux internal representation of a Mutex semaphore.
+ */
+struct RTSEMMUTEXINTERNAL
+{
+    /** The futex state variable.
+     * 0 means unlocked.
+     * 1 means locked, no waiters.
+     * 2 means locked, one or more waiters.
+     */
+    int32_t volatile    iState;
+    /** Nesting count. */
+    uint32_t volatile   cNestings;
+    /** The owner of the mutex. */
+    pthread_t volatile  Owner;
+    /** Magic value (RTSEMMUTEX_MAGIC). */
+    uint32_t  volatile  u32Magic;
+#ifdef RTSEMMUTEX_STRICT
+    /** Lock validator record associated with this mutex. */
+    RTLOCKVALRECEXCL    ValidatorRec;
+#endif
+};
+
+
+
+/**
+ * Wrapper for the futex syscall.
+ */
+static long sys_futex(int32_t volatile *uaddr, int op, int val, struct timespec *utime, int32_t *uaddr2, int val3)
+{
+    errno = 0;
+    long rc = syscall(__NR_futex, uaddr, op, val, utime, uaddr2, val3);
+    if (rc < 0)
+    {
+        Assert(rc == -1);
+        rc = -errno;
+    }
+    return rc;
+}
+
+
+#undef RTSemMutexCreate
+RTDECL(int)  RTSemMutexCreate(PRTSEMMUTEX phMutexSem)
+{
+    return RTSemMutexCreateEx(phMutexSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, NULL);
+}
+
+
+RTDECL(int) RTSemMutexCreateEx(PRTSEMMUTEX phMutexSem, uint32_t fFlags,
+                               RTLOCKVALCLASS hClass, uint32_t uSubClass, const char *pszNameFmt, ...)
+{
+    AssertReturn(!(fFlags & ~RTSEMMUTEX_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+    /*
+     * Allocate semaphore handle.
+     */
+    struct RTSEMMUTEXINTERNAL *pThis = (struct RTSEMMUTEXINTERNAL *)RTMemAlloc(sizeof(struct RTSEMMUTEXINTERNAL));
+    if (pThis)
+    {
+        pThis->u32Magic     = RTSEMMUTEX_MAGIC;
+        pThis->iState       = 0;
+        pThis->Owner        = (pthread_t)~0;
+        pThis->cNestings    = 0;
+#ifdef RTSEMMUTEX_STRICT
+        if (!pszNameFmt)
+        {
+            static uint32_t volatile s_iMutexAnon = 0;
+            RTLockValidatorRecExclInit(&pThis->ValidatorRec, hClass, uSubClass, pThis,
+                                       !(fFlags & RTSEMMUTEX_FLAGS_NO_LOCK_VAL),
+                                       "RTSemMutex-%u", ASMAtomicIncU32(&s_iMutexAnon) - 1);
+        }
+        else
+        {
+            va_list va;
+            va_start(va, pszNameFmt);
+            RTLockValidatorRecExclInitV(&pThis->ValidatorRec, hClass, uSubClass, pThis,
+                                        !(fFlags & RTSEMMUTEX_FLAGS_NO_LOCK_VAL), pszNameFmt, va);
+            va_end(va);
+        }
+#else
+        RT_NOREF(hClass, uSubClass, pszNameFmt);
+#endif
+
+        *phMutexSem = pThis;
+        return VINF_SUCCESS;
+    }
+
+    return VERR_NO_MEMORY;
+}
+
+
+RTDECL(int)  RTSemMutexDestroy(RTSEMMUTEX hMutexSem)
+{
+    /*
+     * Validate input.
+     */
+    if (hMutexSem == NIL_RTSEMMUTEX)
+        return VINF_SUCCESS;
+    struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertMsgReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC,
+                    ("hMutexSem=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+                    VERR_INVALID_HANDLE);
+
+    /*
+     * Invalidate the semaphore and wake up anyone waiting on it.
+     */
+    ASMAtomicWriteU32(&pThis->u32Magic, RTSEMMUTEX_MAGIC_DEAD);
+    if (ASMAtomicXchgS32(&pThis->iState, 0) > 0)
+    {
+        sys_futex(&pThis->iState, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+        usleep(1000);
+    }
+    pThis->Owner        = (pthread_t)~0;
+    pThis->cNestings    = 0;
+#ifdef RTSEMMUTEX_STRICT
+    RTLockValidatorRecExclDelete(&pThis->ValidatorRec);
+#endif
+
+    /*
+     * Free the semaphore memory and be gone.
+     */
+    RTMemFree(pThis);
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTSemMutexSetSubClass(RTSEMMUTEX hMutexSem, uint32_t uSubClass)
+{
+#ifdef RTSEMMUTEX_STRICT
+    /*
+     * Validate.
+     */
+    RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+    AssertPtrReturn(pThis, RTLOCKVAL_SUB_CLASS_INVALID);
+    AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, RTLOCKVAL_SUB_CLASS_INVALID);
+
+    return RTLockValidatorRecExclSetSubClass(&pThis->ValidatorRec, uSubClass);
+#else
+    RT_NOREF(hMutexSem, uSubClass);
+    return RTLOCKVAL_SUB_CLASS_INVALID;
+#endif
+}
+
+
+DECL_FORCE_INLINE(int) rtSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, bool fAutoResume, PCRTLOCKVALSRCPOS pSrcPos)
+{
+    RT_NOREF(pSrcPos);
+
+    /*
+     * Validate input.
+     */
+    struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+    /*
+     * Check if nested request.
+     */
+    pthread_t Self = pthread_self();
+    if (    pThis->Owner == Self
+        &&  pThis->cNestings > 0)
+    {
+#ifdef RTSEMMUTEX_STRICT
+        int rc9 = RTLockValidatorRecExclRecursion(&pThis->ValidatorRec, pSrcPos);
+        if (RT_FAILURE(rc9))
+            return rc9;
+#endif
+        ASMAtomicIncU32(&pThis->cNestings);
+        return VINF_SUCCESS;
+    }
+
+#ifdef RTSEMMUTEX_STRICT
+    RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+    if (cMillies)
+    {
+        int rc9 = RTLockValidatorRecExclCheckOrder(&pThis->ValidatorRec, hThreadSelf, pSrcPos, cMillies);
+        if (RT_FAILURE(rc9))
+            return rc9;
+    }
+#else
+    RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+
+    /*
+     * Convert timeout value.
+     */
+    struct timespec ts;
+    struct timespec *pTimeout = NULL;
+    uint64_t u64End = 0; /* shut up gcc */
+    if (cMillies != RT_INDEFINITE_WAIT)
+    {
+        ts.tv_sec  = cMillies / 1000;
+        ts.tv_nsec = (cMillies % 1000) * UINT32_C(1000000);
+        u64End = RTTimeSystemNanoTS() + cMillies * UINT64_C(1000000);
+        pTimeout = &ts;
+    }
+
+    /*
+     * Lock the mutex.
+     * Optimize for the uncontended case (makes 1-2 ns difference).
+     */
+    if (RT_UNLIKELY(!ASMAtomicCmpXchgS32(&pThis->iState, 1, 0)))
+    {
+        for (;;)
+        {
+            int32_t iOld = ASMAtomicXchgS32(&pThis->iState, 2);
+
+            /*
+             * Was the lock released in the meantime? This is unlikely (but possible)
+             */
+            if (RT_UNLIKELY(iOld == 0))
+                break;
+
+            /*
+             * Go to sleep.
+             */
+            if (pTimeout && ( pTimeout->tv_sec || pTimeout->tv_nsec ))
+            {
+#ifdef RTSEMMUTEX_STRICT
+                int rc9 = RTLockValidatorRecExclCheckBlocking(&pThis->ValidatorRec, hThreadSelf, pSrcPos, true,
+                                                              cMillies, RTTHREADSTATE_MUTEX, true);
+                if (RT_FAILURE(rc9))
+                    return rc9;
+#else
+                RTThreadBlocking(hThreadSelf, RTTHREADSTATE_MUTEX, true);
+#endif
+            }
+
+            long rc = sys_futex(&pThis->iState, FUTEX_WAIT, 2, pTimeout, NULL, 0);
+
+            RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_MUTEX);
+            if (RT_UNLIKELY(pThis->u32Magic != RTSEMMUTEX_MAGIC))
+                return VERR_SEM_DESTROYED;
+
+            /*
+             * Act on the wakup code.
+             */
+            if (rc == -ETIMEDOUT)
+            {
+                Assert(pTimeout);
+                return VERR_TIMEOUT;
+            }
+            if (rc == 0)
+                /* we'll leave the loop now unless another thread is faster */;
+            else if (rc == -EWOULDBLOCK)
+                /* retry with new value. */;
+            else if (rc == -EINTR)
+            {
+                if (!fAutoResume)
+                    return VERR_INTERRUPTED;
+            }
+            else
+            {
+                /* this shouldn't happen! */
+                AssertMsgFailed(("rc=%ld errno=%d\n", rc, errno));
+                return RTErrConvertFromErrno(rc);
+            }
+
+            /* adjust the relative timeout */
+            if (pTimeout)
+            {
+                int64_t i64Diff = u64End - RTTimeSystemNanoTS();
+                if (i64Diff < 1000)
+                {
+                    rc = VERR_TIMEOUT;
+                    break;
+                }
+                ts.tv_sec  = (uint64_t)i64Diff / UINT32_C(1000000000);
+                ts.tv_nsec = (uint64_t)i64Diff % UINT32_C(1000000000);
+            }
+        }
+
+        /*
+         * When leaving this loop, iState is set to 2. This means that we gained the
+         * lock and there are _possibly_ some waiters. We don't know exactly as another
+         * thread might entered this loop at nearly the same time. Therefore we will
+         * call futex_wakeup once too often (if _no_ other thread entered this loop).
+         * The key problem is the simple futex_wait test for x != y (iState != 2) in
+         * our case).
+         */
+    }
+
+    /*
+     * Set the owner and nesting.
+     */
+    pThis->Owner = Self;
+    ASMAtomicWriteU32(&pThis->cNestings, 1);
+#ifdef RTSEMMUTEX_STRICT
+    RTLockValidatorRecExclSetOwner(&pThis->ValidatorRec, hThreadSelf, pSrcPos, true);
+#endif
+    return VINF_SUCCESS;
+}
+
+
+#undef RTSemMutexRequest
+RTDECL(int) RTSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMMUTEX_STRICT
+    int rc = rtSemMutexRequest(hMutexSem, cMillies, true, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    int rc = rtSemMutexRequest(hMutexSem, cMillies, true, &SrcPos);
+#endif
+    Assert(rc != VERR_INTERRUPTED);
+    return rc;
+}
+
+
+RTDECL(int) RTSemMutexRequestDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+    int rc = rtSemMutexRequest(hMutexSem, cMillies, true, &SrcPos);
+    Assert(rc != VERR_INTERRUPTED);
+    return rc;
+}
+
+
+#undef RTSemMutexRequestNoResume
+RTDECL(int) RTSemMutexRequestNoResume(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMMUTEX_STRICT
+    return rtSemMutexRequest(hMutexSem, cMillies, false, NULL);
+#else
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+    return rtSemMutexRequest(hMutexSem, cMillies, false, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemMutexRequestNoResumeDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+    RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+    return rtSemMutexRequest(hMutexSem, cMillies, false, &SrcPos);
+}
+
+
+RTDECL(int) RTSemMutexRelease(RTSEMMUTEX hMutexSem)
+{
+    /*
+     * Validate input.
+     */
+    struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMMUTEX_STRICT
+    int rc9 = RTLockValidatorRecExclReleaseOwner(&pThis->ValidatorRec, pThis->cNestings == 1);
+    if (RT_FAILURE(rc9))
+        return rc9;
+#endif
+
+    /*
+     * Check if nested.
+     */
+    pthread_t Self = pthread_self();
+    if (RT_UNLIKELY(    pThis->Owner != Self
+                    ||  pThis->cNestings == 0))
+    {
+        AssertMsgFailed(("Not owner of mutex %p!! Self=%08x Owner=%08x cNestings=%d\n",
+                         pThis, Self, pThis->Owner, pThis->cNestings));
+        return VERR_NOT_OWNER;
+    }
+
+    /*
+     * If nested we'll just pop a nesting.
+     */
+    if (pThis->cNestings > 1)
+    {
+        ASMAtomicDecU32(&pThis->cNestings);
+        return VINF_SUCCESS;
+    }
+
+    /*
+     * Clear the state. (cNestings == 1)
+     */
+    pThis->Owner = (pthread_t)~0;
+    ASMAtomicWriteU32(&pThis->cNestings, 0);
+
+    /*
+     * Release the mutex.
+     */
+    int32_t iNew = ASMAtomicDecS32(&pThis->iState);
+    if (RT_UNLIKELY(iNew != 0))
+    {
+        /* somebody is waiting, try wake up one of them. */
+        ASMAtomicXchgS32(&pThis->iState, 0);
+        (void)sys_futex(&pThis->iState, FUTEX_WAKE, 1, NULL, NULL, 0);
+    }
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(bool) RTSemMutexIsOwned(RTSEMMUTEX hMutexSem)
+{
+    /*
+     * Validate.
+     */
+    RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+    AssertPtrReturn(pThis, false);
+    AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, false);
+
+    return pThis->Owner != (pthread_t)~0;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/semwait-linux.h b/src/VBox/Runtime/r3/linux/semwait-linux.h
new file mode 100644
index 00000000..0f533845
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semwait-linux.h
@@ -0,0 +1,233 @@
+/* $Id: semwait-linux.h $ */
+/** @file
+ * IPRT - Common semaphore wait code, Linux.
+ */
+
+/*
+ * Copyright (C) 2021-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+#ifndef IPRT_INCLUDED_SRC_r3_linux_semwait_linux_h
+#define IPRT_INCLUDED_SRC_r3_linux_semwait_linux_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+
+/* With 2.6.17 futex.h has become C++ unfriendly, so define the bits we need. */
+#define FUTEX_WAIT 0
+#define FUTEX_WAKE 1
+#define FUTEX_WAIT_BITSET 9 /**< @since 2.6.25 - uses absolute timeout. */
+
+
+/**
+ * Wrapper for the futex syscall.
+ */
+DECLINLINE(long) sys_futex(uint32_t volatile *uaddr, int op, int val, struct timespec *utime, int32_t *uaddr2, int val3)
+{
+    errno = 0;
+    long rc = syscall(__NR_futex, uaddr, op, val, utime, uaddr2, val3);
+    if (rc < 0)
+    {
+        Assert(rc == -1);
+        rc = -errno;
+    }
+    return rc;
+}
+
+
+DECL_NO_INLINE(static, void) rtSemLinuxCheckForFutexWaitBitSetSlow(int volatile *pfCanUseWaitBitSet)
+{
+    uint32_t uTestVar = UINT32_MAX;
+    long rc = sys_futex(&uTestVar, FUTEX_WAIT_BITSET, UINT32_C(0xf0f0f0f0), NULL, NULL, UINT32_MAX);
+    *pfCanUseWaitBitSet = rc == -EAGAIN;
+    AssertMsg(rc == -ENOSYS || rc == -EAGAIN, ("%d\n", rc));
+}
+
+
+DECLINLINE(void) rtSemLinuxCheckForFutexWaitBitSet(int volatile *pfCanUseWaitBitSet)
+{
+    if (*pfCanUseWaitBitSet != -1)
+    { /* likely */ }
+    else
+        rtSemLinuxCheckForFutexWaitBitSetSlow(pfCanUseWaitBitSet);
+}
+
+
+/**
+ * Converts a extended wait timeout specification to an timespec and
+ * corresponding futex operation, as well as an approximate relative nanosecond
+ * interval.
+ *
+ * @note    This does not check for RTSEMWAIT_FLAGS_INDEFINITE, caller should've
+ *          done that already.
+ *
+ * @returns The relative wait in nanoseconds.  0 for a poll call, UINT64_MAX for
+ *          an effectively indefinite wait.
+ * @param   fFlags              RTSEMWAIT_FLAGS_XXX.
+ * @param   fCanUseWaitBitSet   Whether we can use FUTEX_WAIT_BITMSET or not.
+ * @param   uTimeout            The timeout.
+ * @param   pDeadline           Where to return the deadline.
+ * @param   piWaitOp            Where to return the FUTEX wait operation number.
+ * @param   puWaitVal3          Where to return the FUTEX wait value 3.
+ * @param   pnsAbsTimeout       Where to return the absolute timeout in case of
+ *                              a resuming relative call (i.e. FUTEX_WAIT).
+ */
+DECL_FORCE_INLINE(uint64_t)
+rtSemLinuxCalcDeadline(uint32_t fFlags, uint64_t uTimeout, int fCanUseWaitBitSet,
+                       struct timespec *pDeadline, int *piWaitOp, uint32_t *puWaitVal3, uint64_t *pnsAbsTimeout)
+{
+    Assert(!(fFlags & RTSEMWAIT_FLAGS_INDEFINITE));
+
+    if (fFlags & RTSEMWAIT_FLAGS_RELATIVE)
+    {
+        Assert(!(fFlags & RTSEMWAIT_FLAGS_ABSOLUTE));
+
+        /*
+         * Polling call?
+         */
+        if (uTimeout == 0)
+            return 0;
+
+        /*
+         * We use FUTEX_WAIT here as it takes a relative timespec.
+         *
+         * Note! For non-resuming waits, we can skip calculating the absolute
+         *       time ASSUMING it is only needed for timeout adjustments
+         *       after an -EINTR return.
+         */
+        if (fFlags & RTSEMWAIT_FLAGS_MILLISECS)
+        {
+            if (   sizeof(pDeadline->tv_sec) >= sizeof(uint64_t)
+                || uTimeout < (uint64_t)UINT32_MAX * RT_MS_1SEC)
+            {
+                pDeadline->tv_sec  = uTimeout / RT_MS_1SEC;
+                pDeadline->tv_nsec = (uTimeout % RT_MS_1SEC) & RT_NS_1MS;
+                uTimeout *= RT_NS_1MS;
+            }
+            else
+                return UINT64_MAX;
+        }
+        else
+        {
+            Assert(fFlags & RTSEMWAIT_FLAGS_NANOSECS);
+            if (   sizeof(pDeadline->tv_sec) >= sizeof(uint64_t)
+                || uTimeout < (uint64_t)UINT32_MAX * RT_NS_1SEC)
+            {
+                pDeadline->tv_sec  = uTimeout / RT_NS_1SEC;
+                pDeadline->tv_nsec = uTimeout % RT_NS_1SEC;
+            }
+            else
+                return UINT64_MAX;
+        }
+
+#ifdef RT_STRICT
+        if (!(fFlags & RTSEMWAIT_FLAGS_RESUME))
+            *pnsAbsTimeout = uTimeout;
+        else
+#endif
+            *pnsAbsTimeout = RTTimeNanoTS() + uTimeout; /* Note! only relevant for relative waits (FUTEX_WAIT). */
+    }
+    else
+    {
+        /* Absolute deadline: */
+        Assert(fFlags & RTSEMWAIT_FLAGS_ABSOLUTE);
+        if (fCanUseWaitBitSet == true)
+        {
+            /*
+             * Use FUTEX_WAIT_BITSET as it takes an absolute deadline.
+             */
+            if (fFlags & RTSEMWAIT_FLAGS_MILLISECS)
+            {
+                if (   sizeof(pDeadline->tv_sec) >= sizeof(uint64_t)
+                    || uTimeout < (uint64_t)UINT32_MAX * RT_MS_1SEC)
+                {
+                    pDeadline->tv_sec  = uTimeout / RT_MS_1SEC;
+                    pDeadline->tv_nsec = (uTimeout % RT_MS_1SEC) & RT_NS_1MS;
+                }
+                else
+                    return UINT64_MAX;
+            }
+            else
+            {
+                Assert(fFlags & RTSEMWAIT_FLAGS_NANOSECS);
+                if (   sizeof(pDeadline->tv_sec) >= sizeof(uint64_t)
+                    || uTimeout < (uint64_t)UINT32_MAX * RT_NS_1SEC)
+                {
+                    pDeadline->tv_sec  = uTimeout / RT_NS_1SEC;
+                    pDeadline->tv_nsec = uTimeout % RT_NS_1SEC;
+                }
+                else
+                    return UINT64_MAX;
+            }
+            *pnsAbsTimeout = uTimeout;
+            *piWaitOp      = FUTEX_WAIT_BITSET;
+            *puWaitVal3    = UINT32_MAX;
+            return RT_MS_1SEC; /* Whatever non-zero; Whole point is not calling RTTimeNanoTS() in this path. */
+        }
+
+        /*
+         * FUTEX_WAIT_BITSET is not available, so use FUTEX_WAIT with a
+         * relative timeout.
+         */
+        if (fFlags & RTSEMWAIT_FLAGS_MILLISECS)
+        {
+            if (uTimeout < UINT64_MAX / RT_NS_1MS)
+                uTimeout *= RT_NS_1MS;
+            else
+                return UINT64_MAX;
+        }
+
+        uint64_t const u64Now = RTTimeNanoTS();
+        if (u64Now < uTimeout)
+        {
+            *pnsAbsTimeout = uTimeout;
+            uTimeout      -= u64Now;
+        }
+        else
+            return 0;
+
+        if (   sizeof(pDeadline->tv_sec) >= sizeof(uint64_t)
+            || uTimeout < (uint64_t)UINT32_MAX * RT_NS_1SEC)
+        {
+            pDeadline->tv_sec  = uTimeout / RT_NS_1SEC;
+            pDeadline->tv_nsec = uTimeout % RT_NS_1SEC;
+        }
+        else
+            return UINT64_MAX;
+    }
+
+    *piWaitOp   = FUTEX_WAIT;
+    *puWaitVal3 = 0;
+    return uTimeout;
+}
+
+#endif /* !IPRT_INCLUDED_SRC_r3_linux_semwait_linux_h */
+
diff --git a/src/VBox/Runtime/r3/linux/sysfs.cpp b/src/VBox/Runtime/r3/linux/sysfs.cpp
new file mode 100644
index 00000000..6324fe00
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/sysfs.cpp
@@ -0,0 +1,736 @@
+/* $Id: sysfs.cpp $ */
+/** @file
+ * IPRT - Linux sysfs access.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYSTEM
+#include <iprt/assert.h>
+#include <iprt/dir.h>
+#include <iprt/err.h>
+#include <iprt/file.h>
+#include <iprt/fs.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include <iprt/symlink.h>
+
+#include <iprt/linux/sysfs.h>
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+
+
+
+/**
+ * Constructs the path of a sysfs file from the format parameters passed,
+ * prepending a prefix if the path is relative.
+ *
+ * @returns IPRT status code.
+ * @param   pszPrefix  The prefix to prepend if the path is relative.  Must end
+ *                     in '/'.
+ * @param   pszBuf     Where to write the path.  Must be at least
+ *                     sizeof(@a pszPrefix) characters long
+ * @param   cchBuf     The size of the buffer pointed to by @a pszBuf.
+ * @param   pszFormat  The name format, either absolute or relative to the
+ *                     prefix specified by @a pszPrefix.
+ * @param   va         The format args.
+ */
+static int rtLinuxConstructPathV(char *pszBuf, size_t cchBuf,
+                                 const char *pszPrefix,
+                                 const char *pszFormat, va_list va)
+{
+    size_t const cchPrefix = strlen(pszPrefix);
+    AssertReturn(pszPrefix[cchPrefix - 1] == '/', VERR_INVALID_PARAMETER);
+    AssertReturn(cchBuf > cchPrefix + 1, VERR_INVALID_PARAMETER);
+
+    ssize_t cch = RTStrPrintf2V(pszBuf, cchBuf, pszFormat, va);
+    AssertReturn(cch >= 0, VERR_BUFFER_OVERFLOW);
+
+    if (*pszBuf != '/')
+    {
+        AssertReturn(cchBuf >= (size_t)cch + cchPrefix + 1, VERR_BUFFER_OVERFLOW);
+        memmove(pszBuf + cchPrefix, pszBuf, (size_t)cch + 1);
+        memcpy(pszBuf, pszPrefix, cchPrefix);
+    }
+    return VINF_SUCCESS;
+}
+
+
+/**
+ * Constructs the path of a sysfs file from the format parameters passed,
+ * prepending a prefix if the path is relative.
+ *
+ * @returns IPRT status code.
+ * @param   pszPrefix  The prefix to prepend if the path is relative.  Must end
+ *                     in '/'.
+ * @param   pszBuf     Where to write the path.  Must be at least
+ *                     sizeof(@a pszPrefix) characters long
+ * @param   cchBuf     The size of the buffer pointed to by @a pszBuf.
+ * @param   pszFormat  The name format, either absolute or relative to "/sys/".
+ * @param   ...        The format args.
+ */
+DECLINLINE(int) rtLinuxConstructPath(char *pszBuf, size_t cchBuf,
+                                     const char *pszPrefix,
+                                     const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = rtLinuxConstructPathV(pszBuf, cchBuf, pszPrefix, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+/**
+ * Constructs the path of a sysfs file from the format parameters passed,
+ * prepending "/sys/" if the path is relative.
+ *
+ * @returns IPRT status code.
+ * @param   pszBuf     Where to write the path.  Must be at least
+ *                     sizeof("/sys/") characters long
+ * @param   cchBuf     The size of the buffer pointed to by @a pszBuf.
+ * @param   pszFormat  The name format, either absolute or relative to "/sys/".
+ * @param   va         The format args.
+ */
+DECLINLINE(int) rtLinuxSysFsConstructPath(char *pszBuf, size_t cchBuf, const char *pszFormat, va_list va)
+{
+    return rtLinuxConstructPathV(pszBuf, cchBuf, "/sys/", pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxConstructPathV(char *pszPath, size_t cbPath, const char *pszFormat, va_list va)
+{
+    return rtLinuxSysFsConstructPath(pszPath, cbPath, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxConstructPath(char *pszPath, size_t cbPath, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = rtLinuxSysFsConstructPath(pszPath, cbPath, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsExistsExV(const char *pszFormat, va_list va)
+{
+    int iSavedErrno = errno;
+
+    /*
+     * Construct the filename and call stat.
+     */
+    char szFilename[RTPATH_MAX];
+    int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        struct stat st;
+        int rcStat = stat(szFilename, &st);
+        if (rcStat != 0)
+            rc = RTErrConvertFromErrno(errno);
+    }
+
+    errno = iSavedErrno;
+    return rc;
+}
+
+
+RTDECL(bool) RTLinuxSysFsExistsV(const char *pszFormat, va_list va)
+{
+    return RT_SUCCESS(RTLinuxSysFsExistsExV(pszFormat, va));
+}
+
+
+RTDECL(int) RTLinuxSysFsExistsEx(const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsExistsExV(pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(bool) RTLinuxSysFsExists(const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    bool fRet = RTLinuxSysFsExistsV(pszFormat, va);
+    va_end(va);
+    return fRet;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpenV(PRTFILE phFile, const char *pszFormat, va_list va)
+{
+    /*
+     * Construct the filename and call open.
+     */
+    char szFilename[RTPATH_MAX];
+    int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+    if (RT_SUCCESS(rc))
+        rc = RTFileOpen(phFile, szFilename, RTFILE_O_OPEN | RTFILE_O_READ | RTFILE_O_DENY_NONE);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpenExV(PRTFILE phFile, uint64_t fOpen, const char *pszFormat, va_list va)
+{
+    /*
+     * Construct the filename and call open.
+     */
+    char szFilename[RTPATH_MAX];
+    int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+    if (RT_SUCCESS(rc))
+        rc = RTFileOpen(phFile, szFilename, fOpen);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpen(PRTFILE phFile, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsOpenV(phFile, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpenEx(PRTFILE phFile, uint64_t fOpen, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsOpenExV(phFile, fOpen, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadStr(RTFILE hFile, char *pszBuf, size_t cchBuf, size_t *pcchRead)
+{
+    Assert(cchBuf > 1); /* not mandatory */
+
+    int rc;
+    size_t cchRead;
+    rc = RTFileRead(hFile, pszBuf, cchBuf, &cchRead);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * ASSUME that if we've read less than we asked for, we've reached the
+         * end of the file.  Otherwise, we've been given a buffer too small for
+         * the entire remainder of the file.
+         */
+        if (cchRead < cchBuf)
+            pszBuf[cchRead] = '\0';
+        else if (cchBuf)
+        {
+            rc = RTFileSeek(hFile, -1, RTFILE_SEEK_CURRENT, NULL);
+            if (RT_SUCCESS(rc))
+                rc = VERR_BUFFER_OVERFLOW;
+            cchRead = cchBuf - 1;
+            pszBuf[cchRead] = '\0';
+        }
+        else
+            rc = VERR_BUFFER_OVERFLOW;
+    }
+    else
+    {
+        if (cchBuf > 0)
+            *pszBuf = '\0';
+        cchRead = 0;
+    }
+
+    if (pcchRead)
+        *pcchRead = cchRead;
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteStr(RTFILE hFile, const char *pszBuf, size_t cchBuf, size_t *pcchWritten)
+{
+    if (!cchBuf)
+        cchBuf = strlen(pszBuf) + 1; /* Include the terminator */
+    return RTFileWrite(hFile, pszBuf, cchBuf, pcchWritten);
+}
+
+
+RTDECL(int) RTLinuxSysFsReadFile(RTFILE hFile, void *pvBuf, size_t cbBuf, size_t *pcbRead)
+{
+    int    rc;
+    size_t cbRead = 0;
+
+    rc = RTFileRead(hFile, pvBuf, cbBuf, &cbRead);
+    if (RT_SUCCESS(rc))
+    {
+        if (pcbRead)
+            *pcbRead = cbRead;
+        if (cbRead < cbBuf)
+            rc = VINF_SUCCESS;
+        else
+        {
+            /* Check for EOF */
+            uint64_t offCur = 0;
+            uint8_t bRead;
+            rc = RTFileSeek(hFile, 0, RTFILE_SEEK_CURRENT, &offCur);
+            if (RT_SUCCESS(rc))
+            {
+                int rc2 = RTFileRead(hFile, &bRead, 1, NULL);
+                if (RT_SUCCESS(rc2))
+                {
+                    rc = VERR_BUFFER_OVERFLOW;
+
+                    rc2 = RTFileSeek(hFile, offCur, RTFILE_SEEK_BEGIN, NULL);
+                    if (RT_FAILURE(rc2))
+                        rc = rc2;
+                }
+                else if (rc2 != VERR_EOF)
+                    rc = rc2;
+            }
+        }
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteFile(RTFILE hFile, void *pvBuf, size_t cbBuf, size_t *pcbWritten)
+{
+    return RTFileWrite(hFile, pvBuf, cbBuf, pcbWritten);
+}
+
+
+RTDECL(int) RTLinuxSysFsReadIntFileV(unsigned uBase, int64_t *pi64, const char *pszFormat, va_list va)
+{
+    RTFILE hFile;
+
+    AssertPtrReturn(pi64, VERR_INVALID_POINTER);
+
+    int rc = RTLinuxSysFsOpenV(&hFile, pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        char szNum[128];
+        size_t cchNum;
+        rc = RTLinuxSysFsReadStr(hFile, szNum, sizeof(szNum), &cchNum);
+        if (RT_SUCCESS(rc))
+        {
+            if (cchNum > 0)
+            {
+                int64_t i64Ret = -1;
+                rc = RTStrToInt64Ex(szNum, NULL, uBase, &i64Ret);
+                if (RT_SUCCESS(rc))
+                    *pi64 = i64Ret;
+            }
+            else
+                rc = VERR_INVALID_PARAMETER;
+        }
+
+        RTFileClose(hFile);
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadIntFile(unsigned uBase, int64_t *pi64, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsReadIntFileV(uBase, pi64, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU8FileV(unsigned uBase, uint8_t u8, const char *pszFormat, va_list va)
+{
+    return RTLinuxSysFsWriteU64FileV(uBase, u8, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU8File(unsigned uBase, uint8_t u8, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsWriteU64FileV(uBase, u8, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU16FileV(unsigned uBase, uint16_t u16, const char *pszFormat, va_list va)
+{
+    return RTLinuxSysFsWriteU64FileV(uBase, u16, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU16File(unsigned uBase, uint16_t u16, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsWriteU64FileV(uBase, u16, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU32FileV(unsigned uBase, uint32_t u32, const char *pszFormat, va_list va)
+{
+    return RTLinuxSysFsWriteU64FileV(uBase, u32, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU32File(unsigned uBase, uint32_t u32, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsWriteU64FileV(uBase, u32, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU64FileV(unsigned uBase, uint64_t u64, const char *pszFormat, va_list va)
+{
+    RTFILE hFile;
+
+    const char *pszFmt = NULL;
+    switch (uBase)
+    {
+        case 8:
+            pszFmt = "%#llo";
+            break;
+        case 10:
+            pszFmt = "%llu";
+            break;
+        case 16:
+            pszFmt = "%#llx";
+            break;
+        default:
+            return VERR_INVALID_PARAMETER;
+    }
+
+    int rc = RTLinuxSysFsOpenExV(&hFile, RTFILE_O_OPEN | RTFILE_O_WRITE | RTFILE_O_DENY_NONE, pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        char szNum[128];
+        size_t cchNum = RTStrPrintf(szNum, sizeof(szNum), pszFmt, u64);
+        if (cchNum > 0)
+        {
+            size_t cbWritten = 0;
+            rc = RTLinuxSysFsWriteStr(hFile, &szNum[0], cchNum, &cbWritten);
+            if (   RT_SUCCESS(rc)
+                && cbWritten != cchNum)
+                rc = VERR_BUFFER_OVERFLOW;
+        }
+        else
+            rc = VERR_INVALID_PARAMETER;
+
+        RTFileClose(hFile);
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU64File(unsigned uBase, uint32_t u64, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsWriteU64FileV(uBase, u64, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadDevNumFileV(dev_t *pDevNum, const char *pszFormat, va_list va)
+{
+    RTFILE hFile;
+
+    AssertPtrReturn(pDevNum, VERR_INVALID_POINTER);
+
+    int rc = RTLinuxSysFsOpenV(&hFile, pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        size_t cchNum = 0;
+        char szNum[128];
+        rc = RTLinuxSysFsReadStr(hFile, szNum, sizeof(szNum), &cchNum);
+        if (RT_SUCCESS(rc))
+        {
+            if (cchNum > 0)
+            {
+                uint32_t u32Maj = 0;
+                uint32_t u32Min = 0;
+                char *pszNext = NULL;
+                rc = RTStrToUInt32Ex(szNum, &pszNext, 10, &u32Maj);
+                if (RT_FAILURE(rc) || (rc != VWRN_TRAILING_CHARS) || (*pszNext != ':'))
+                    rc = VERR_INVALID_PARAMETER;
+                else
+                {
+                    rc = RTStrToUInt32Ex(pszNext + 1, NULL, 10, &u32Min);
+                    if (   rc != VINF_SUCCESS
+                        && rc != VWRN_TRAILING_CHARS
+                        && rc != VWRN_TRAILING_SPACES)
+                        rc = VERR_INVALID_PARAMETER;
+                    else
+                        *pDevNum = makedev(u32Maj, u32Min);
+                }
+            }
+            else
+                rc = VERR_INVALID_PARAMETER;
+        }
+
+        RTFileClose(hFile);
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadDevNumFile(dev_t *pDevNum, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsReadDevNumFileV(pDevNum, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadStrFileV(char *pszBuf, size_t cchBuf, size_t *pcchRead, const char *pszFormat, va_list va)
+{
+    RTFILE hFile;
+
+    AssertPtrReturn(pszBuf, VERR_INVALID_POINTER);
+
+    int rc = RTLinuxSysFsOpenV(&hFile, pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        /*
+         * Note! We cannot use RTLinuxSysFsReadStr here as it has different
+         *       semantics wrt to newline characters.  It is not known why
+         *       the semantics has to differ... Michael, any clues?
+         */
+        size_t cchRead;
+        rc = RTFileRead(hFile, pszBuf, cchBuf, &cchRead);
+        if (RT_SUCCESS(rc))
+        {
+            char *pchNewLine = (char *)memchr(pszBuf, '\n', cchRead);
+            if (pchNewLine)
+            {
+                *pchNewLine = '\0';
+                cchRead = pchNewLine - pszBuf;
+            }
+            else if (cchRead < cchBuf)
+                pszBuf[cchRead] = '\0';
+            else
+            {
+                if (cchBuf)
+                {
+                    cchRead = cchBuf - 1;
+                    pszBuf[cchRead] = '\0';
+                }
+                else
+                    cchRead = 0;
+                rc = VERR_BUFFER_OVERFLOW;
+            }
+        }
+        else
+            cchRead = 0;
+
+        RTFileClose(hFile);
+
+        if (pcchRead)
+            *pcchRead = cchRead;
+    }
+    else
+    {
+        if (cchBuf)
+            *pszBuf = '\0';
+        if (pcchRead)
+            *pcchRead = 0;
+    }
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadStrFile(char *pszBuf, size_t cchBuf, size_t *pcchRead, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsReadStrFileV(pszBuf, cchBuf, pcchRead, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteStrFileV(const char *pszBuf, size_t cchBuf, size_t *pcchWritten, const char *pszFormat, va_list va)
+{
+    RTFILE hFile;
+
+    AssertPtrReturn(pszBuf, VERR_INVALID_POINTER);
+
+    int rc = RTLinuxSysFsOpenExV(&hFile, RTFILE_O_OPEN | RTFILE_O_WRITE | RTFILE_O_DENY_NONE, pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        rc = RTLinuxSysFsWriteStr(hFile, pszBuf, cchBuf, pcchWritten);
+        RTFileClose(hFile);
+    }
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteStrFile(const char *pszBuf, size_t cchBuf, size_t *pcchWritten, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsWriteStrFileV(pszBuf, cchBuf, pcchWritten, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+RTDECL(int) RTLinuxSysFsGetLinkDestV(char *pszBuf, size_t cchBuf, size_t *pchBuf, const char *pszFormat, va_list va)
+{
+    AssertReturn(cchBuf >= 2, VERR_INVALID_PARAMETER);
+
+    /*
+     * Construct the filename and read the link.
+     */
+    char szFilename[RTPATH_MAX];
+    int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+    if (RT_SUCCESS(rc))
+    {
+        char szLink[RTPATH_MAX];
+        rc = RTSymlinkRead(szFilename, szLink, sizeof(szLink), 0);
+        if (RT_SUCCESS(rc))
+        {
+            /*
+             * Extract the file name component and copy it into the return buffer.
+             */
+            size_t cchName;
+            const char *pszName = RTPathFilename(szLink);
+            if (pszName)
+            {
+                cchName = strlen(pszName);
+                if (cchName < cchBuf)
+                    memcpy(pszBuf, pszName, cchName + 1);
+                else
+                    rc = VERR_BUFFER_OVERFLOW;
+            }
+            else
+            {
+                *pszBuf = '\0';
+                cchName = 0;
+            }
+
+            if (pchBuf)
+                *pchBuf = cchName;
+        }
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsGetLinkDest(char *pszBuf, size_t cchBuf, size_t *pchBuf, const char *pszFormat, ...)
+{
+    va_list va;
+    va_start(va, pszFormat);
+    int rc = RTLinuxSysFsGetLinkDestV(pszBuf, cchBuf, pchBuf, pszFormat, va);
+    va_end(va);
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxCheckDevicePathV(dev_t DevNum, RTFMODE fMode, char *pszBuf,
+                                    size_t cchBuf, const char *pszPattern,
+                                    va_list va)
+{
+    AssertReturn(cchBuf >= 2, VERR_INVALID_PARAMETER);
+    AssertReturn(   fMode == RTFS_TYPE_DEV_CHAR
+                 || fMode == RTFS_TYPE_DEV_BLOCK,
+                 VERR_INVALID_PARAMETER);
+    AssertPtrReturn(pszPattern, VERR_INVALID_PARAMETER);
+
+    /*
+     * Construct the filename and read the link.
+     */
+    char szFilename[RTPATH_MAX];
+    int rc = rtLinuxConstructPathV(szFilename, sizeof(szFilename), "/dev/",
+                                   pszPattern, va);
+    if (RT_SUCCESS(rc))
+    {
+        RTFSOBJINFO Info;
+        rc = RTPathQueryInfo(szFilename, &Info, RTFSOBJATTRADD_UNIX);
+        if (   rc == VERR_PATH_NOT_FOUND
+            || (   RT_SUCCESS(rc)
+                && (   Info.Attr.u.Unix.Device != DevNum
+                    || (Info.Attr.fMode & RTFS_TYPE_MASK) != fMode)))
+            rc = VERR_FILE_NOT_FOUND;
+
+        if (RT_SUCCESS(rc))
+        {
+            size_t cchPath = strlen(szFilename);
+            if (cchPath < cchBuf)
+                memcpy(pszBuf, szFilename, cchPath + 1);
+            else
+                rc = VERR_BUFFER_OVERFLOW;
+        }
+    }
+
+    return rc;
+}
+
+
+RTDECL(int) RTLinuxCheckDevicePath(dev_t DevNum, RTFMODE fMode, char *pszBuf,
+                                   size_t cchBuf, const char *pszPattern,
+                                   ...)
+{
+    va_list va;
+    va_start(va, pszPattern);
+    int rc = RTLinuxCheckDevicePathV(DevNum, fMode, pszBuf, cchBuf,
+                                     pszPattern, va);
+    va_end(va);
+    return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/systemmem-linux.cpp b/src/VBox/Runtime/r3/linux/systemmem-linux.cpp
new file mode 100644
index 00000000..11764ab7
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/systemmem-linux.cpp
@@ -0,0 +1,119 @@
+/* $Id: systemmem-linux.cpp $ */
+/** @file
+ * IPRT - RTSystemQueryTotalRam, Linux ring-3.
+ */
+
+/*
+ * Copyright (C) 2012-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+
+#include <stdio.h>
+#include <errno.h>
+
+/* Satisfy compiller warning */
+#define __EXPORTED_HEADERS__
+#include <sys/sysinfo.h>
+#undef __EXPORTED_HEADERS__
+
+
+RTDECL(int) RTSystemQueryTotalRam(uint64_t *pcb)
+{
+    AssertPtrReturn(pcb, VERR_INVALID_POINTER);
+
+    struct sysinfo info;
+    int rc = sysinfo(&info);
+    if (rc == 0)
+    {
+        *pcb = (uint64_t)info.totalram * info.mem_unit;
+        return VINF_SUCCESS;
+    }
+    return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTSystemQueryAvailableRam(uint64_t *pcb)
+{
+    AssertPtrReturn(pcb, VERR_INVALID_POINTER);
+
+    FILE *pFile = fopen("/proc/meminfo", "r");
+    if (pFile)
+    {
+        int rc = VERR_NOT_FOUND;
+        uint64_t cbTotal = 0;
+        uint64_t cbFree = 0;
+        uint64_t cbBuffers = 0;
+        uint64_t cbCached = 0;
+        char sz[256];
+        while (fgets(sz, sizeof(sz), pFile))
+        {
+            if (!strncmp(sz, RT_STR_TUPLE("MemTotal:")))
+                rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("MemTotal:")]), NULL, 0, &cbTotal);
+            else if (!strncmp(sz, RT_STR_TUPLE("MemFree:")))
+                rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("MemFree:")]), NULL, 0, &cbFree);
+            else if (!strncmp(sz, RT_STR_TUPLE("Buffers:")))
+                rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("Buffers:")]), NULL, 0, &cbBuffers);
+            else if (!strncmp(sz, RT_STR_TUPLE("Cached:")))
+                rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("Cached:")]), NULL, 0, &cbCached);
+            if (RT_FAILURE(rc))
+                break;
+        }
+        fclose(pFile);
+        if (RT_SUCCESS(rc))
+        {
+            *pcb = (cbFree + cbBuffers + cbCached) * _1K;
+            return VINF_SUCCESS;
+        }
+    }
+    /*
+     * Fallback (e.g. /proc not mapped) to sysinfo. Less accurat because there
+     * is no information about the cached memory. 'Cached:' from above is only
+     * accessible through proc :-(
+     */
+    struct sysinfo info;
+    int rc = sysinfo(&info);
+    if (rc == 0)
+    {
+        *pcb = ((uint64_t)info.freeram + info.bufferram) * info.mem_unit;
+        return VINF_SUCCESS;
+    }
+    return RTErrConvertFromErrno(errno);
+}
+
diff --git a/src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp b/src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp
new file mode 100644
index 00000000..2726e716
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp
@@ -0,0 +1,105 @@
+/* $Id: thread-affinity-linux.cpp $ */
+/** @file
+ * IPRT - Thread Affinity, Linux ring-3 implementation.
+ */
+
+/*
+ * Copyright (C) 2011-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include <features.h>
+#if __GLIBC_PREREQ(2,4)
+
+#include <sched.h>
+#include <unistd.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include <iprt/thread.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/cpuset.h>
+#include <iprt/err.h>
+#include <iprt/mp.h>
+
+
+
+RTR3DECL(int) RTThreadSetAffinity(PCRTCPUSET pCpuSet)
+{
+    /* convert */
+    cpu_set_t LnxCpuSet;
+    CPU_ZERO(&LnxCpuSet);
+    if (!pCpuSet)
+        for (unsigned iCpu = 0; iCpu < CPU_SETSIZE; iCpu++)
+            CPU_SET(iCpu, &LnxCpuSet);
+    else
+        for (unsigned iCpu = 0; iCpu < RT_MIN(CPU_SETSIZE, RTCPUSET_MAX_CPUS); iCpu++)
+            if (RTCpuSetIsMemberByIndex(pCpuSet, iCpu))
+                CPU_SET(iCpu, &LnxCpuSet);
+
+    int rc = pthread_setaffinity_np(pthread_self(), sizeof(LnxCpuSet), &LnxCpuSet);
+    if (!rc)
+        return VINF_SUCCESS;
+    rc = errno;
+    if (rc == ENOENT)
+        return VERR_CPU_NOT_FOUND;
+    return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTThreadGetAffinity(PRTCPUSET pCpuSet)
+{
+    cpu_set_t LnxCpuSet;
+    int rc = pthread_getaffinity_np(pthread_self(), sizeof(LnxCpuSet), &LnxCpuSet);
+    if (rc != 0)
+        return RTErrConvertFromErrno(errno);
+
+    /* convert */
+    RTCpuSetEmpty(pCpuSet);
+    for (unsigned iCpu = 0; iCpu < RT_MIN(CPU_SETSIZE, RTCPUSET_MAX_CPUS); iCpu++)
+        if (CPU_ISSET(iCpu, &LnxCpuSet))
+            RTCpuSetAddByIndex(pCpuSet, iCpu);
+
+    return VINF_SUCCESS;
+}
+
+#else
+# include "../../generic/RTThreadGetAffinity-stub-generic.cpp"
+# include "../../generic/RTThreadSetAffinity-stub-generic.cpp"
+#endif
+
diff --git a/src/VBox/Runtime/r3/linux/time-linux.cpp b/src/VBox/Runtime/r3/linux/time-linux.cpp
new file mode 100644
index 00000000..6ceac2de
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/time-linux.cpp
@@ -0,0 +1,169 @@
+/* $Id: time-linux.cpp $ */
+/** @file
+ * IPRT - Time, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#define RTTIME_INCL_TIMEVAL
+#include <sys/time.h>
+#include <time.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifndef __NR_clock_gettime
+# define __NR_timer_create      259
+# define __NR_clock_gettime     (__NR_timer_create+6)
+#endif
+
+#include <iprt/time.h>
+#include "internal/time.h"
+
+
+DECLINLINE(int) sys_clock_gettime(clockid_t id,  struct timespec *ts)
+{
+    int rc = syscall(__NR_clock_gettime, id, ts);
+    if (rc >= 0)
+        return rc;
+    return -1;
+}
+
+
+/**
+ * Wrapper around various monotone time sources.
+ */
+DECLINLINE(int) mono_clock(struct timespec *ts)
+{
+    static int iWorking = -1;
+    switch (iWorking)
+    {
+#ifdef CLOCK_MONOTONIC
+        /*
+         * Standard clock_gettime()
+         */
+        case 0:
+            return clock_gettime(CLOCK_MONOTONIC, ts);
+
+        /*
+         * Syscall clock_gettime().
+         */
+        case 1:
+            return sys_clock_gettime(CLOCK_MONOTONIC, ts);
+
+#endif /* CLOCK_MONOTONIC */
+
+
+        /*
+         * Figure out what's working.
+         */
+        case -1:
+        {
+#ifdef CLOCK_MONOTONIC
+            /*
+             * Real-Time API.
+             */
+            int rc = clock_gettime(CLOCK_MONOTONIC, ts);
+            if (!rc)
+            {
+                iWorking = 0;
+                return 0;
+            }
+
+            rc = sys_clock_gettime(CLOCK_MONOTONIC, ts);
+            if (!rc)
+            {
+                iWorking = 1;
+                return 0;
+            }
+#endif /* CLOCK_MONOTONIC */
+
+            /* give up */
+            iWorking = -2;
+            break;
+        }
+    }
+    return -1;
+}
+
+
+DECLINLINE(uint64_t) rtTimeGetSystemNanoTS(void)
+{
+    /* check monotonic clock first. */
+    static bool fMonoClock = true;
+    if (fMonoClock)
+    {
+        struct timespec ts;
+        if (!mono_clock(&ts))
+            return (uint64_t)ts.tv_sec * RT_NS_1SEC_64
+                 + ts.tv_nsec;
+        fMonoClock = false;
+    }
+
+    /* fallback to gettimeofday(). */
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (uint64_t)tv.tv_sec  * RT_NS_1SEC_64
+         + (uint64_t)(tv.tv_usec * RT_NS_1US);
+}
+
+
+/**
+ * Gets the current nanosecond timestamp.
+ *
+ * This differs from RTTimeNanoTS in that it will use system APIs and not do any
+ * resolution or performance optimizations.
+ *
+ * @returns nanosecond timestamp.
+ */
+RTDECL(uint64_t) RTTimeSystemNanoTS(void)
+{
+    return rtTimeGetSystemNanoTS();
+}
+
+
+/**
+ * Gets the current millisecond timestamp.
+ *
+ * This differs from RTTimeNanoTS in that it will use system APIs and not do any
+ * resolution or performance optimizations.
+ *
+ * @returns millisecond timestamp.
+ */
+RTDECL(uint64_t) RTTimeSystemMilliTS(void)
+{
+    return rtTimeGetSystemNanoTS() / RT_NS_1MS;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/tpm-linux.cpp b/src/VBox/Runtime/r3/linux/tpm-linux.cpp
new file mode 100644
index 00000000..4851eabc
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/tpm-linux.cpp
@@ -0,0 +1,229 @@
+/* $Id: tpm-linux.cpp $ */
+/** @file
+ * IPRT - Trusted Platform Module (TPM) access, Linux variant.
+ */
+
+/*
+ * Copyright (C) 2021-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+*   Header Files                                                                                                                 *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_DEFAULT
+#include <iprt/tpm.h>
+
+#include <iprt/assertcompile.h>
+#include <iprt/asm.h>
+#include <iprt/err.h>
+#include <iprt/file.h>
+#include <iprt/log.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include <iprt/linux/sysfs.h>
+
+
+/*********************************************************************************************************************************
+*   Defined Constants And Macros                                                                                                 *
+*********************************************************************************************************************************/
+
+
+/*********************************************************************************************************************************
+*   Structures and Typedefs                                                                                                      *
+*********************************************************************************************************************************/
+
+/**
+ * Internal TPM instance data.
+ */
+typedef struct RTTPMINT
+{
+    /** Handle to the /dev/tpmX device. */
+    RTFILE                      hTpmDev;
+    /** Handle to the sysfs cancel interface. */
+    RTFILE                      hTpmCancel;
+    /** The deduced TPM version. */
+    RTTPMVERSION                enmTpmVers;
+    /** Flag whether a request is currently being executed. */
+    volatile bool               fReqExec;
+} RTTPMINT;
+/** Pointer to the internal TPM instance data. */
+typedef RTTPMINT *PRTTPMINT;
+
+
+/*********************************************************************************************************************************
+*   Internal Functions                                                                                                           *
+*********************************************************************************************************************************/
+
+RTDECL(int) RTTpmOpen(PRTTPM phTpm, uint32_t idTpm)
+{
+    AssertPtrReturn(phTpm, VERR_INVALID_POINTER);
+    if (idTpm == RTTPM_ID_DEFAULT)
+        idTpm = 0;
+
+    int rc = VINF_SUCCESS;
+    PRTTPMINT pThis = (PRTTPMINT)RTMemAllocZ(sizeof(*pThis));
+    if (pThis)
+    {
+        pThis->hTpmDev    = NIL_RTFILE;
+        pThis->hTpmCancel = NIL_RTFILE;
+        pThis->enmTpmVers = RTTPMVERSION_UNKNOWN;
+        pThis->fReqExec   = false;
+
+        rc = RTFileOpenF(&pThis->hTpmDev, RTFILE_O_OPEN | RTFILE_O_READWRITE | RTFILE_O_DENY_NONE,
+                         "/dev/tpm%u", idTpm);
+        if (RT_SUCCESS(rc))
+        {
+            /* Open the sysfs path to cancel a request, either /sys/class/tpm/tpmX/device/cancel or /sys/class/misc/tpmX/device/cancel. */
+            rc = RTFileOpenF(&pThis->hTpmCancel, RTFILE_O_OPEN | RTFILE_O_WRITE | RTFILE_O_DENY_NONE,
+                             "/sys/class/tpm/tpm%u/device/cancel", idTpm);
+            if (rc == VERR_FILE_NOT_FOUND)
+                rc = RTFileOpenF(&pThis->hTpmCancel, RTFILE_O_OPEN | RTFILE_O_WRITE | RTFILE_O_DENY_NONE,
+                                 "/sys/class/misc/tpm%u/device/cancel", idTpm);
+            if (   RT_SUCCESS(rc)
+                || rc == VERR_FILE_NOT_FOUND)
+            {
+                /* Try to figure out the TPM version. */
+                int64_t iVersion = 0;
+                rc = RTLinuxSysFsReadIntFile(10 /*uBase*/, &iVersion, "/sys/class/tpm/tpm%u/tpm_version_major", idTpm);
+                if (rc == VERR_FILE_NOT_FOUND)
+                    rc = RTLinuxSysFsReadIntFile(10 /*uBase*/, &iVersion, "/sys/class/misc/tpm%u/tpm_version_major", idTpm);
+                if (RT_SUCCESS(rc))
+                {
+                    if (iVersion == 1)
+                        pThis->enmTpmVers = RTTPMVERSION_1_2;
+                    else if (iVersion == 2)
+                        pThis->enmTpmVers = RTTPMVERSION_2_0;
+                }
+
+                *phTpm = pThis;
+                return VINF_SUCCESS;
+            }
+
+            RTFileClose(pThis->hTpmDev);
+            pThis->hTpmDev = NIL_RTFILE;
+        }
+
+        RTMemFree(pThis);
+    }
+    else
+        rc = VERR_NO_MEMORY;
+    return rc;
+}
+
+
+RTDECL(int) RTTpmClose(RTTPM hTpm)
+{
+    PRTTPMINT pThis = hTpm;
+
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+
+    RTFileClose(pThis->hTpmDev);
+    if (pThis->hTpmCancel != NIL_RTFILE)
+        RTFileClose(pThis->hTpmCancel);
+
+    pThis->hTpmDev    = NIL_RTFILE;
+    pThis->hTpmCancel = NIL_RTFILE;
+    RTMemFree(pThis);
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(RTTPMVERSION) RTTpmGetVersion(RTTPM hTpm)
+{
+    PRTTPMINT pThis = hTpm;
+
+    AssertPtrReturn(pThis, RTTPMVERSION_INVALID);
+    return pThis->enmTpmVers;
+}
+
+
+RTDECL(uint32_t) RTTpmGetLocalityMax(RTTPM hTpm)
+{
+    RT_NOREF(hTpm);
+    return 0; /* On Linux only TPM locality 0 is supported. */
+}
+
+
+RTDECL(int) RTTpmReqCancel(RTTPM hTpm)
+{
+    PRTTPMINT pThis = hTpm;
+
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    if (pThis->hTpmCancel == NIL_RTFILE)
+        return VERR_NOT_SUPPORTED;
+
+    if (ASMAtomicReadBool(&pThis->fReqExec))
+    {
+        uint8_t bCancel = '-';
+        return RTFileWrite(pThis->hTpmCancel, &bCancel, sizeof(bCancel), NULL /*pcbWritten*/);
+    }
+
+    return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTTpmReqExec(RTTPM hTpm, uint8_t bLoc, const void *pvReq, size_t cbReq,
+                         void *pvResp, size_t cbRespMax, size_t *pcbResp)
+{
+    PRTTPMINT pThis = hTpm;
+
+    AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+    AssertPtrReturn(pvReq, VERR_INVALID_POINTER);
+    AssertPtrReturn(pvResp, VERR_INVALID_POINTER);
+    AssertReturn(cbReq && cbRespMax, VERR_INVALID_PARAMETER);
+    AssertReturn(bLoc == 0, VERR_NOT_SUPPORTED); /** @todo There doesn't seem to be a way to use a different locality. */
+
+    /* The request has to be supplied by a single blocking write. */
+    ASMAtomicXchgBool(&pThis->fReqExec, true);
+    int rc = RTFileWrite(pThis->hTpmDev, pvReq, cbReq, NULL /*pcbWritten*/);
+    if (RT_SUCCESS(rc))
+    {
+        size_t cbResp = 0;
+        /* The response has to be retrieved in a single read as well. */
+        rc = RTFileRead(pThis->hTpmDev, pvResp, cbRespMax, &cbResp);
+        ASMAtomicXchgBool(&pThis->fReqExec, false);
+        if (RT_SUCCESS(rc))
+        {
+            /* Check whether the response is complete. */
+            if (   cbResp >= sizeof(TPMRESPHDR)
+                && RTTpmRespGetSz((PCTPMRESPHDR)pvResp) == cbResp)
+            {
+                if (pcbResp)
+                    *pcbResp = cbResp;
+            }
+            else
+                rc = VERR_BUFFER_OVERFLOW;
+        }
+    }
+
+    return rc;
+}
+
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:17:27 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:17:27 +0000
commit	f215e02bf85f68d3a6106c2a1f4f7f063f819064 (patch)
tree	6bb5b92c046312c4e95ac2620b10ddf482d3fa8b /src/VBox/Runtime/r3/linux
parent	Initial commit. (diff)
download	virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.tar.xz virtualbox-f215e02bf85f68d3a6106c2a1f4f7f063f819064.zip