summaryrefslogtreecommitdiffstats
path: root/src/VBox/Runtime/r3/linux
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 03:01:46 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 03:01:46 +0000
commitf8fe689a81f906d1b91bb3220acde2a4ecb14c5b (patch)
tree26484e9d7e2c67806c2d1760196ff01aaa858e8c /src/VBox/Runtime/r3/linux
parentInitial commit. (diff)
downloadvirtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.tar.xz
virtualbox-f8fe689a81f906d1b91bb3220acde2a4ecb14c5b.zip
Adding upstream version 6.0.4-dfsg.upstream/6.0.4-dfsgupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/VBox/Runtime/r3/linux/Makefile.kup0
-rw-r--r--src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp77
-rw-r--r--src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp118
-rw-r--r--src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp86
-rw-r--r--src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp101
-rw-r--r--src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp111
-rw-r--r--src/VBox/Runtime/r3/linux/fileaio-linux.cpp838
-rw-r--r--src/VBox/Runtime/r3/linux/krnlmod-linux.cpp324
-rw-r--r--src/VBox/Runtime/r3/linux/mp-linux.cpp318
-rw-r--r--src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp69
-rw-r--r--src/VBox/Runtime/r3/linux/sched-linux.cpp614
-rw-r--r--src/VBox/Runtime/r3/linux/semevent-linux.cpp417
-rw-r--r--src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp453
-rw-r--r--src/VBox/Runtime/r3/linux/semmutex-linux.cpp465
-rw-r--r--src/VBox/Runtime/r3/linux/sysfs.cpp714
-rw-r--r--src/VBox/Runtime/r3/linux/systemmem-linux.cpp109
-rw-r--r--src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp95
-rw-r--r--src/VBox/Runtime/r3/linux/time-linux.cpp159
18 files changed, 5068 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r3/linux/Makefile.kup b/src/VBox/Runtime/r3/linux/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/Makefile.kup
diff --git a/src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp b/src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp
new file mode 100644
index 00000000..637dca3d
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTFileSetAllocationSize-linux.cpp
@@ -0,0 +1,77 @@
+/* $Id: RTFileSetAllocationSize-linux.cpp $ */
+/** @file
+ * IPRT - RTFileSetAllocationSize, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2016-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+#include <iprt/file.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+
+/**
+ * The Linux specific fallocate() method.
+ */
+typedef int (*PFNLNXFALLOCATE) (int iFd, int fMode, off_t offStart, off_t cb);
+/** Flag to specify that the file size should not be extended. */
+#define LNX_FALLOC_FL_KEEP_SIZE 1
+
+RTDECL(int) RTFileSetAllocationSize(RTFILE hFile, uint64_t cbSize, uint32_t fFlags)
+{
+ AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & ~RTFILE_ALLOC_SIZE_F_VALID), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(sizeof(off_t) >= sizeof(cbSize) || RT_HIDWORD(cbSize) == 0,
+ ("64-bit filesize not supported! cbSize=%lld\n", cbSize),
+ VERR_NOT_SUPPORTED);
+
+ int rc = VINF_SUCCESS;
+ PFNLNXFALLOCATE pfnLnxFAllocate = (PFNLNXFALLOCATE)(uintptr_t)dlsym(RTLD_DEFAULT, "fallocate64");
+ if (VALID_PTR(pfnLnxFAllocate))
+ {
+ int fLnxFlags = (fFlags & RTFILE_ALLOC_SIZE_F_KEEP_SIZE) ? LNX_FALLOC_FL_KEEP_SIZE : 0;
+ int rcLnx = pfnLnxFAllocate(RTFileToNative(hFile), fLnxFlags, 0, cbSize);
+ if (rcLnx != 0)
+ {
+ if (errno == EOPNOTSUPP)
+ rc = VERR_NOT_SUPPORTED;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+
+ return rc;
+}
+RT_EXPORT_SYMBOL(RTFileSetAllocationSize);
diff --git a/src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp b/src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp
new file mode 100644
index 00000000..4e8b3735
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTProcIsRunningByName-linux.cpp
@@ -0,0 +1,118 @@
+/* $Id: RTProcIsRunningByName-linux.cpp $ */
+/** @file
+ * IPRT - RTProcIsRunningByName, Linux implementation.
+ */
+
+/*
+ * Copyright (C) 2009-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <iprt/process.h>
+#include <iprt/string.h>
+#include <iprt/dir.h>
+#include <iprt/path.h>
+#include <iprt/stream.h>
+#include <iprt/param.h>
+#include <iprt/assert.h>
+
+#include <unistd.h>
+
+
+RTR3DECL(bool) RTProcIsRunningByName(const char *pszName)
+{
+ /*
+ * Quick validation.
+ */
+ if (!pszName)
+ return false;
+
+ bool const fWithPath = RTPathHavePath(pszName);
+
+ /*
+ * Enumerate /proc.
+ */
+ RTDIR hDir;
+ int rc = RTDirOpen(&hDir, "/proc");
+ AssertMsgRCReturn(rc, ("RTDirOpen on /proc failed: rc=%Rrc\n", rc), false);
+ if (RT_SUCCESS(rc))
+ {
+ RTDIRENTRY DirEntry;
+ while (RT_SUCCESS(RTDirRead(hDir, &DirEntry, NULL)))
+ {
+ /*
+ * Filter numeric directory entries only.
+ */
+ if ( ( DirEntry.enmType == RTDIRENTRYTYPE_DIRECTORY
+ || DirEntry.enmType == RTDIRENTRYTYPE_UNKNOWN)
+ && RTStrToUInt32(DirEntry.szName) > 0)
+ {
+ /*
+ * Try readlink on exe first since it's more faster and reliable.
+ * Fall back on reading the first line in cmdline if that fails
+ * (access errors typically). cmdline is unreliable as it might
+ * contain whatever the execv caller passes as argv[0].
+ */
+ char szName[RTPATH_MAX];
+ RTStrPrintf(szName, sizeof(szName), "/proc/%s/exe", &DirEntry.szName[0]);
+ char szExe[RTPATH_MAX];
+ int cchLink = readlink(szName, szExe, sizeof(szExe) - 1);
+ if ( cchLink > 0
+ && (size_t)cchLink < sizeof(szExe))
+ {
+ szExe[cchLink] = '\0';
+ rc = VINF_SUCCESS;
+ }
+ else
+ {
+ RTStrPrintf(szName, sizeof(szName), "/proc/%s/cmdline", &DirEntry.szName[0]);
+ PRTSTREAM pStream;
+ rc = RTStrmOpen(szName, "r", &pStream);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTStrmGetLine(pStream, szExe, sizeof(szExe));
+ RTStrmClose(pStream);
+ }
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * We are interested on the file name part only.
+ */
+ char const *pszProcName = fWithPath ? szExe : RTPathFilename(szExe);
+ if (RTStrCmp(pszProcName, pszName) == 0)
+ {
+ /* Found it! */
+ RTDirClose(hDir);
+ return true;
+ }
+ }
+ }
+ }
+ RTDirClose(hDir);
+ }
+
+ return false;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp b/src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp
new file mode 100644
index 00000000..dd15a58f
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTSystemQueryDmiString-linux.cpp
@@ -0,0 +1,86 @@
+/* $Id: RTSystemQueryDmiString-linux.cpp $ */
+/** @file
+ * IPRT - RTSystemQueryDmiString, linux ring-3.
+ */
+
+/*
+ * Copyright (C) 2010-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/err.h>
+#include <iprt/assert.h>
+#include <iprt/linux/sysfs.h>
+
+#include <errno.h>
+
+
+RTDECL(int) RTSystemQueryDmiString(RTSYSDMISTR enmString, char *pszBuf, size_t cbBuf)
+{
+ AssertPtrReturn(pszBuf, VERR_INVALID_POINTER);
+ AssertReturn(cbBuf > 0, VERR_INVALID_PARAMETER);
+ *pszBuf = '\0';
+ AssertReturn(enmString > RTSYSDMISTR_INVALID && enmString < RTSYSDMISTR_END, VERR_INVALID_PARAMETER);
+
+ const char *pszSysFsName;
+ switch (enmString)
+ {
+ case RTSYSDMISTR_PRODUCT_NAME: pszSysFsName = "id/product_name"; break;
+ case RTSYSDMISTR_PRODUCT_VERSION: pszSysFsName = "id/product_version"; break;
+ case RTSYSDMISTR_PRODUCT_UUID: pszSysFsName = "id/product_uuid"; break;
+ case RTSYSDMISTR_PRODUCT_SERIAL: pszSysFsName = "id/product_serial"; break;
+ case RTSYSDMISTR_MANUFACTURER: pszSysFsName = "id/sys_vendor"; break;
+ default:
+ return VERR_NOT_SUPPORTED;
+ }
+
+ size_t cbRead = 0;
+ int rc = RTLinuxSysFsReadStrFile(pszBuf, cbBuf, &cbRead, "devices/virtual/dmi/%s", pszSysFsName);
+ if (RT_FAILURE(rc) && rc != VERR_BUFFER_OVERFLOW)
+ rc = RTLinuxSysFsReadStrFile(pszBuf, cbBuf, &cbRead, "class/dmi/%s", pszSysFsName);
+ if (RT_FAILURE(rc) && rc != VERR_BUFFER_OVERFLOW)
+ {
+ switch (rc)
+ {
+ case VINF_SUCCESS:
+ AssertFailed();
+ break;
+ case VERR_FILE_NOT_FOUND:
+ case VERR_PATH_NOT_FOUND:
+ case VERR_IS_A_DIRECTORY:
+ rc = VERR_NOT_SUPPORTED;
+ break;
+ case VERR_PERMISSION_DENIED:
+ case VERR_ACCESS_DENIED:
+ rc = VERR_ACCESS_DENIED;
+ break;
+ }
+ }
+
+ return rc;
+}
+RT_EXPORT_SYMBOL(RTSystemQueryDmiString);
+
diff --git a/src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp b/src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp
new file mode 100644
index 00000000..f180d102
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTSystemShutdown-linux.cpp
@@ -0,0 +1,101 @@
+/* $Id: RTSystemShutdown-linux.cpp $ */
+/** @file
+ * IPRT - RTSystemShutdown, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2012-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/env.h>
+#include <iprt/err.h>
+#include <iprt/process.h>
+#include <iprt/string.h>
+
+
+RTDECL(int) RTSystemShutdown(RTMSINTERVAL cMsDelay, uint32_t fFlags, const char *pszLogMsg)
+{
+ AssertPtrReturn(pszLogMsg, VERR_INVALID_POINTER);
+ AssertReturn(!(fFlags & ~RTSYSTEM_SHUTDOWN_VALID_MASK), VERR_INVALID_PARAMETER);
+
+ /*
+ * Assemble the argument vector.
+ */
+ int iArg = 0;
+ const char *apszArgs[6];
+
+ RT_BZERO(apszArgs, sizeof(apszArgs));
+
+ apszArgs[iArg++] = "/sbin/shutdown";
+ switch (fFlags & RTSYSTEM_SHUTDOWN_ACTION_MASK)
+ {
+ case RTSYSTEM_SHUTDOWN_HALT:
+ apszArgs[iArg++] = "-h";
+ apszArgs[iArg++] = "-H";
+ break;
+ case RTSYSTEM_SHUTDOWN_REBOOT:
+ apszArgs[iArg++] = "-r";
+ break;
+ case RTSYSTEM_SHUTDOWN_POWER_OFF:
+ case RTSYSTEM_SHUTDOWN_POWER_OFF_HALT:
+ apszArgs[iArg++] = "-h";
+ apszArgs[iArg++] = "-P";
+ break;
+ }
+
+ char szWhen[80];
+ if (cMsDelay < 500)
+ strcpy(szWhen, "now");
+ else
+ RTStrPrintf(szWhen, sizeof(szWhen), "%u", (unsigned)((cMsDelay + 499) / 1000));
+ apszArgs[iArg++] = szWhen;
+
+ apszArgs[iArg++] = pszLogMsg;
+
+
+ /*
+ * Start the shutdown process and wait for it to complete.
+ */
+ RTPROCESS hProc;
+ int rc = RTProcCreate(apszArgs[0], apszArgs, RTENV_DEFAULT, 0 /*fFlags*/, &hProc);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ RTPROCSTATUS ProcStatus;
+ rc = RTProcWait(hProc, RTPROCWAIT_FLAGS_BLOCK, &ProcStatus);
+ if (RT_SUCCESS(rc))
+ {
+ if ( ProcStatus.enmReason != RTPROCEXITREASON_NORMAL
+ || ProcStatus.iStatus != 0)
+ rc = VERR_SYS_SHUTDOWN_FAILED;
+ }
+
+ return rc;
+}
+RT_EXPORT_SYMBOL(RTSystemShutdown);
+
diff --git a/src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp b/src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp
new file mode 100644
index 00000000..066fa966
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/RTThreadGetNativeState-linux.cpp
@@ -0,0 +1,111 @@
+/* $Id: RTThreadGetNativeState-linux.cpp $ */
+/** @file
+ * IPRT - RTThreadGetNativeState, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2010-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <iprt/thread.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/errcore.h>
+#include <iprt/string.h>
+
+#include "internal/thread.h"
+
+#include <unistd.h>
+#include <sys/fcntl.h>
+
+
+RTDECL(RTTHREADNATIVESTATE) RTThreadGetNativeState(RTTHREAD hThread)
+{
+ RTTHREADNATIVESTATE enmRet = RTTHREADNATIVESTATE_INVALID;
+ PRTTHREADINT pThread = rtThreadGet(hThread);
+ if (pThread)
+ {
+ enmRet = RTTHREADNATIVESTATE_UNKNOWN;
+
+ char szName[512];
+ RTStrPrintf(szName, sizeof(szName), "/proc/self/task/%u/stat", pThread->tid);
+ int fd = open(szName, O_RDONLY, 0);
+ if (fd >= 0)
+ {
+ ssize_t cch = read(fd, szName, sizeof(szName) - 1);
+ close(fd);
+ if (cch > 0)
+ {
+ szName[cch] = '\0';
+
+ /* skip the pid, the (comm name) and stop at the status char. */
+ const char *psz = szName;
+ while ( *psz
+ && ( *psz != ')'
+ || !RT_C_IS_SPACE(psz[1])
+ || !RT_C_IS_ALPHA(psz[2])
+ || !RT_C_IS_SPACE(psz[3])
+ )
+ )
+ psz++;
+ if (*psz == ')')
+ {
+ switch (psz[2])
+ {
+ case 'R': /* running */
+ enmRet = RTTHREADNATIVESTATE_RUNNING;
+ break;
+
+ case 'S': /* sleeping */
+ case 'D': /* disk sleeping */
+ enmRet = RTTHREADNATIVESTATE_BLOCKED;
+ break;
+
+ case 'T': /* stopped or tracking stop */
+ enmRet = RTTHREADNATIVESTATE_SUSPENDED;
+ break;
+
+ case 'Z': /* zombie */
+ case 'X': /* dead */
+ enmRet = RTTHREADNATIVESTATE_TERMINATED;
+ break;
+
+ default:
+ AssertMsgFailed(("state=%c\n", psz[2]));
+ enmRet = RTTHREADNATIVESTATE_UNKNOWN;
+ break;
+ }
+ }
+ else
+ AssertMsgFailed(("stat='%s'\n", szName));
+ }
+ }
+ rtThreadRelease(pThread);
+ }
+ return enmRet;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/fileaio-linux.cpp b/src/VBox/Runtime/r3/linux/fileaio-linux.cpp
new file mode 100644
index 00000000..9df7f96c
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/fileaio-linux.cpp
@@ -0,0 +1,838 @@
+/* $Id: fileaio-linux.cpp $ */
+/** @file
+ * IPRT - File async I/O, native implementation for the Linux host platform.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+/** @page pg_rtfileaio_linux RTFile Async I/O - Linux Implementation Notes
+ * @internal
+ *
+ * Linux implements the kernel async I/O API through the io_* syscalls. They are
+ * not exposed in the glibc (the aio_* API uses userspace threads and blocking
+ * I/O operations to simulate async behavior). There is an external library
+ * called libaio which implements these syscalls but because we don't want to
+ * have another dependency and this library is not installed by default and the
+ * interface is really simple we use the kernel interface directly using wrapper
+ * functions.
+ *
+ * The interface has some limitations. The first one is that the file must be
+ * opened with O_DIRECT. This disables caching done by the kernel which can be
+ * compensated if the user of this API implements caching itself. The next
+ * limitation is that data buffers must be aligned at a 512 byte boundary or the
+ * request will fail.
+ */
+/** @todo r=bird: What's this about "must be opened with O_DIRECT"? An
+ * explanation would be nice, esp. seeing what Linus is quoted saying
+ * about it in the open man page... */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+#include <iprt/asm.h>
+#include <iprt/mem.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/thread.h>
+#include "internal/fileaio.h"
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <errno.h>
+
+#include <iprt/file.h>
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/** The async I/O context handle */
+typedef unsigned long LNXKAIOCONTEXT;
+
+/**
+ * Supported commands for the iocbs
+ */
+enum
+{
+ LNXKAIO_IOCB_CMD_READ = 0,
+ LNXKAIO_IOCB_CMD_WRITE = 1,
+ LNXKAIO_IOCB_CMD_FSYNC = 2,
+ LNXKAIO_IOCB_CMD_FDSYNC = 3
+};
+
+/**
+ * The iocb structure of a request which is passed to the kernel.
+ *
+ * We redefined this here because the version in the header lacks padding
+ * for 32bit.
+ */
+typedef struct LNXKAIOIOCB
+{
+ /** Opaque pointer to data which is returned on an I/O event. */
+ void *pvUser;
+#ifdef RT_ARCH_X86
+ uint32_t u32Padding0;
+#endif
+ /** Contains the request number and is set by the kernel. */
+ uint32_t u32Key;
+ /** Reserved. */
+ uint32_t u32Reserved0;
+ /** The I/O opcode. */
+ uint16_t u16IoOpCode;
+ /** Request priority. */
+ int16_t i16Priority;
+ /** The file descriptor. */
+ uint32_t uFileDesc;
+ /** The userspace pointer to the buffer containing/receiving the data. */
+ void *pvBuf;
+#ifdef RT_ARCH_X86
+ uint32_t u32Padding1;
+#endif
+ /** How many bytes to transfer. */
+#ifdef RT_ARCH_X86
+ uint32_t cbTransfer;
+ uint32_t u32Padding2;
+#elif defined(RT_ARCH_AMD64)
+ uint64_t cbTransfer;
+#else
+# error "Unknown architecture"
+#endif
+ /** At which offset to start the transfer. */
+ int64_t off;
+ /** Reserved. */
+ uint64_t u64Reserved1;
+ /** Flags */
+ uint32_t fFlags;
+ /** Readyness signal file descriptor. */
+ uint32_t u32ResFd;
+} LNXKAIOIOCB, *PLNXKAIOIOCB;
+
+/**
+ * I/O event structure to notify about completed requests.
+ * Redefined here too because of the padding.
+ */
+typedef struct LNXKAIOIOEVENT
+{
+ /** The pvUser field from the iocb. */
+ void *pvUser;
+#ifdef RT_ARCH_X86
+ uint32_t u32Padding0;
+#endif
+ /** The LNXKAIOIOCB object this event is for. */
+ PLNXKAIOIOCB *pIoCB;
+#ifdef RT_ARCH_X86
+ uint32_t u32Padding1;
+#endif
+ /** The result code of the operation .*/
+#ifdef RT_ARCH_X86
+ int32_t rc;
+ uint32_t u32Padding2;
+#elif defined(RT_ARCH_AMD64)
+ int64_t rc;
+#else
+# error "Unknown architecture"
+#endif
+ /** Secondary result code. */
+#ifdef RT_ARCH_X86
+ int32_t rc2;
+ uint32_t u32Padding3;
+#elif defined(RT_ARCH_AMD64)
+ int64_t rc2;
+#else
+# error "Unknown architecture"
+#endif
+} LNXKAIOIOEVENT, *PLNXKAIOIOEVENT;
+
+
+/**
+ * Async I/O completion context state.
+ */
+typedef struct RTFILEAIOCTXINTERNAL
+{
+ /** Handle to the async I/O context. */
+ LNXKAIOCONTEXT AioContext;
+ /** Maximum number of requests this context can handle. */
+ int cRequestsMax;
+ /** Current number of requests active on this context. */
+ volatile int32_t cRequests;
+ /** The ID of the thread which is currently waiting for requests. */
+ volatile RTTHREAD hThreadWait;
+ /** Flag whether the thread was woken up. */
+ volatile bool fWokenUp;
+ /** Flag whether the thread is currently waiting in the syscall. */
+ volatile bool fWaiting;
+ /** Flags given during creation. */
+ uint32_t fFlags;
+ /** Magic value (RTFILEAIOCTX_MAGIC). */
+ uint32_t u32Magic;
+} RTFILEAIOCTXINTERNAL;
+/** Pointer to an internal context structure. */
+typedef RTFILEAIOCTXINTERNAL *PRTFILEAIOCTXINTERNAL;
+
+/**
+ * Async I/O request state.
+ */
+typedef struct RTFILEAIOREQINTERNAL
+{
+ /** The aio control block. This must be the FIRST elment in
+ * the structure! (see notes below) */
+ LNXKAIOIOCB AioCB;
+ /** Current state the request is in. */
+ RTFILEAIOREQSTATE enmState;
+ /** The I/O context this request is associated with. */
+ LNXKAIOCONTEXT AioContext;
+ /** Return code the request completed with. */
+ int Rc;
+ /** Number of bytes actually transferred. */
+ size_t cbTransfered;
+ /** Completion context we are assigned to. */
+ PRTFILEAIOCTXINTERNAL pCtxInt;
+ /** Magic value (RTFILEAIOREQ_MAGIC). */
+ uint32_t u32Magic;
+} RTFILEAIOREQINTERNAL;
+/** Pointer to an internal request structure. */
+typedef RTFILEAIOREQINTERNAL *PRTFILEAIOREQINTERNAL;
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** The max number of events to get in one call. */
+#define AIO_MAXIMUM_REQUESTS_PER_CONTEXT 64
+
+
+/**
+ * Creates a new async I/O context.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxCreate(unsigned cEvents, LNXKAIOCONTEXT *pAioContext)
+{
+ int rc = syscall(__NR_io_setup, cEvents, pAioContext);
+ if (RT_UNLIKELY(rc == -1))
+ {
+ if (errno == EAGAIN)
+ return VERR_FILE_AIO_INSUFFICIENT_EVENTS;
+ else
+ return RTErrConvertFromErrno(errno);
+ }
+
+ return VINF_SUCCESS;
+}
+
+/**
+ * Destroys a async I/O context.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxDestroy(LNXKAIOCONTEXT AioContext)
+{
+ int rc = syscall(__NR_io_destroy, AioContext);
+ if (RT_UNLIKELY(rc == -1))
+ return RTErrConvertFromErrno(errno);
+
+ return VINF_SUCCESS;
+}
+
+/**
+ * Submits an array of I/O requests to the kernel.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxSubmit(LNXKAIOCONTEXT AioContext, long cReqs, LNXKAIOIOCB **ppIoCB, int *pcSubmitted)
+{
+ int rc = syscall(__NR_io_submit, AioContext, cReqs, ppIoCB);
+ if (RT_UNLIKELY(rc == -1))
+ return RTErrConvertFromErrno(errno);
+
+ *pcSubmitted = rc;
+
+ return VINF_SUCCESS;
+}
+
+/**
+ * Cancels a I/O request.
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxCancel(LNXKAIOCONTEXT AioContext, PLNXKAIOIOCB pIoCB, PLNXKAIOIOEVENT pIoResult)
+{
+ int rc = syscall(__NR_io_cancel, AioContext, pIoCB, pIoResult);
+ if (RT_UNLIKELY(rc == -1))
+ return RTErrConvertFromErrno(errno);
+
+ return VINF_SUCCESS;
+}
+
+/**
+ * Waits for I/O events.
+ * @returns Number of events (natural number w/ 0), IPRT error code (negative).
+ */
+DECLINLINE(int) rtFileAsyncIoLinuxGetEvents(LNXKAIOCONTEXT AioContext, long cReqsMin, long cReqs,
+ PLNXKAIOIOEVENT paIoResults, struct timespec *pTimeout)
+{
+ int rc = syscall(__NR_io_getevents, AioContext, cReqsMin, cReqs, paIoResults, pTimeout);
+ if (RT_UNLIKELY(rc == -1))
+ return RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
+RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
+{
+ int rc = VINF_SUCCESS;
+ AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
+
+ /*
+ * Check if the API is implemented by creating a
+ * completion port.
+ */
+ LNXKAIOCONTEXT AioContext = 0;
+ rc = rtFileAsyncIoLinuxCreate(1, &AioContext);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ rc = rtFileAsyncIoLinuxDestroy(AioContext);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /* Supported - fill in the limits. The alignment is the only restriction. */
+ pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
+ pAioLimits->cbBufferAlignment = 512;
+
+ return VINF_SUCCESS;
+}
+
+
+RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
+{
+ AssertPtrReturn(phReq, VERR_INVALID_POINTER);
+
+ /*
+ * Allocate a new request and initialize it.
+ */
+ PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(*pReqInt));
+ if (RT_UNLIKELY(!pReqInt))
+ return VERR_NO_MEMORY;
+
+ pReqInt->pCtxInt = NULL;
+ pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+
+ *phReq = (RTFILEAIOREQ)pReqInt;
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
+{
+ /*
+ * Validate the handle and ignore nil.
+ */
+ if (hReq == NIL_RTFILEAIOREQ)
+ return VINF_SUCCESS;
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+
+ /*
+ * Trash the magic and free it.
+ */
+ ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
+ RTMemFree(pReqInt);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Worker setting up the request.
+ */
+DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
+ uint16_t uTransferDirection,
+ RTFOFF off, void *pvBuf, size_t cbTransfer,
+ void *pvUser)
+{
+ /*
+ * Validate the input.
+ */
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+ Assert(hFile != NIL_RTFILE);
+
+ if (uTransferDirection != LNXKAIO_IOCB_CMD_FSYNC)
+ {
+ AssertPtr(pvBuf);
+ Assert(off >= 0);
+ Assert(cbTransfer > 0);
+ }
+
+ /*
+ * Setup the control block and clear the finished flag.
+ */
+ pReqInt->AioCB.u16IoOpCode = uTransferDirection;
+ pReqInt->AioCB.uFileDesc = RTFileToNative(hFile);
+ pReqInt->AioCB.off = off;
+ pReqInt->AioCB.cbTransfer = cbTransfer;
+ pReqInt->AioCB.pvBuf = pvBuf;
+ pReqInt->AioCB.pvUser = pvUser;
+
+ pReqInt->pCtxInt = NULL;
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
+ void *pvBuf, size_t cbRead, void *pvUser)
+{
+ return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_READ,
+ off, pvBuf, cbRead, pvUser);
+}
+
+
+RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
+ void const *pvBuf, size_t cbWrite, void *pvUser)
+{
+ return rtFileAioReqPrepareTransfer(hReq, hFile, LNXKAIO_IOCB_CMD_WRITE,
+ off, (void *)pvBuf, cbWrite, pvUser);
+}
+
+
+RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_HANDLE);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+
+ return rtFileAioReqPrepareTransfer(pReqInt, hFile, LNXKAIO_IOCB_CMD_FSYNC,
+ 0, NULL, 0, pvUser);
+}
+
+
+RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
+
+ return pReqInt->AioCB.pvUser;
+}
+
+
+RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
+
+ LNXKAIOIOEVENT AioEvent;
+ int rc = rtFileAsyncIoLinuxCancel(pReqInt->AioContext, &pReqInt->AioCB, &AioEvent);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Decrement request count because the request will never arrive at the
+ * completion port.
+ */
+ AssertMsg(VALID_PTR(pReqInt->pCtxInt),
+ ("Invalid state. Request was canceled but wasn't submitted\n"));
+
+ ASMAtomicDecS32(&pReqInt->pCtxInt->cRequests);
+ pReqInt->Rc = VERR_FILE_AIO_CANCELED;
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+ return VINF_SUCCESS;
+ }
+ if (rc == VERR_TRY_AGAIN)
+ return VERR_FILE_AIO_IN_PROGRESS;
+ return rc;
+}
+
+
+RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ AssertPtrNull(pcbTransfered);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
+
+ if ( pcbTransfered
+ && RT_SUCCESS(pReqInt->Rc))
+ *pcbTransfered = pReqInt->cbTransfered;
+
+ return pReqInt->Rc;
+}
+
+
+RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
+ uint32_t fFlags)
+{
+ PRTFILEAIOCTXINTERNAL pCtxInt;
+ AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
+ AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
+
+ /* The kernel interface needs a maximum. */
+ if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
+ return VERR_OUT_OF_RANGE;
+
+ pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOCTXINTERNAL));
+ if (RT_UNLIKELY(!pCtxInt))
+ return VERR_NO_MEMORY;
+
+ /* Init the event handle. */
+ int rc = rtFileAsyncIoLinuxCreate(cAioReqsMax, &pCtxInt->AioContext);
+ if (RT_SUCCESS(rc))
+ {
+ pCtxInt->fWokenUp = false;
+ pCtxInt->fWaiting = false;
+ pCtxInt->hThreadWait = NIL_RTTHREAD;
+ pCtxInt->cRequestsMax = cAioReqsMax;
+ pCtxInt->fFlags = fFlags;
+ pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
+ *phAioCtx = (RTFILEAIOCTX)pCtxInt;
+ }
+ else
+ RTMemFree(pCtxInt);
+
+ return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
+{
+ /* Validate the handle and ignore nil. */
+ if (hAioCtx == NIL_RTFILEAIOCTX)
+ return VINF_SUCCESS;
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+ RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+
+ /* Cannot destroy a busy context. */
+ if (RT_UNLIKELY(pCtxInt->cRequests))
+ return VERR_FILE_AIO_BUSY;
+
+ /* The native bit first, then mark it as dead and free it. */
+ int rc = rtFileAsyncIoLinuxDestroy(pCtxInt->AioContext);
+ if (RT_FAILURE(rc))
+ return rc;
+ ASMAtomicUoWriteU32(&pCtxInt->u32Magic, RTFILEAIOCTX_MAGIC_DEAD);
+ RTMemFree(pCtxInt);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
+{
+ /* Nil means global here. */
+ if (hAioCtx == NIL_RTFILEAIOCTX)
+ return RTFILEAIO_UNLIMITED_REQS; /** @todo r=bird: I'm a bit puzzled by this return value since it
+ * is completely useless in RTFileAioCtxCreate. */
+
+ /* Return 0 if the handle is invalid, it's better than garbage I think... */
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+ RTFILEAIOCTX_VALID_RETURN_RC(pCtxInt, 0);
+
+ return pCtxInt->cRequestsMax;
+}
+
+RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
+{
+ /* Nothing to do. */
+ NOREF(hAioCtx); NOREF(hFile);
+ return VINF_SUCCESS;
+}
+
+RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
+{
+ int rc = VINF_SUCCESS;
+
+ /*
+ * Parameter validation.
+ */
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+ RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+ AssertReturn(cReqs > 0, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
+ uint32_t i = cReqs;
+ PRTFILEAIOREQINTERNAL pReqInt = NULL;
+
+ /*
+ * Validate requests and associate with the context.
+ */
+ while (i-- > 0)
+ {
+ pReqInt = pahReqs[i];
+ if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
+ {
+ /* Undo everything and stop submitting. */
+ size_t iUndo = cReqs;
+ while (iUndo-- > i)
+ {
+ pReqInt = pahReqs[iUndo];
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+ pReqInt->pCtxInt = NULL;
+ }
+ return VERR_INVALID_HANDLE;
+ }
+
+ pReqInt->AioContext = pCtxInt->AioContext;
+ pReqInt->pCtxInt = pCtxInt;
+ RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
+ }
+
+ do
+ {
+ /*
+ * We cast pahReqs to the Linux iocb structure to avoid copying the requests
+ * into a temporary array. This is possible because the iocb structure is
+ * the first element in the request structure (see PRTFILEAIOCTXINTERNAL).
+ */
+ int cReqsSubmitted = 0;
+ rc = rtFileAsyncIoLinuxSubmit(pCtxInt->AioContext, cReqs,
+ (PLNXKAIOIOCB *)pahReqs,
+ &cReqsSubmitted);
+ if (RT_FAILURE(rc))
+ {
+ /*
+ * We encountered an error.
+ * This means that the first IoCB
+ * is not correctly initialized
+ * (invalid buffer alignment or bad file descriptor).
+ * Revert every request into the prepared state except
+ * the first one which will switch to completed.
+ * Another reason could be insufficient resources.
+ */
+ i = cReqs;
+ while (i-- > 0)
+ {
+ /* Already validated. */
+ pReqInt = pahReqs[i];
+ pReqInt->pCtxInt = NULL;
+ pReqInt->AioContext = 0;
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+ }
+
+ if (rc == VERR_TRY_AGAIN)
+ return VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
+ else
+ {
+ /* The first request failed. */
+ pReqInt = pahReqs[0];
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+ pReqInt->Rc = rc;
+ pReqInt->cbTransfered = 0;
+ return rc;
+ }
+ }
+
+ /* Advance. */
+ cReqs -= cReqsSubmitted;
+ pahReqs += cReqsSubmitted;
+ ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
+
+ } while (cReqs);
+
+ return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
+ PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
+{
+ /*
+ * Validate the parameters, making sure to always set pcReqs.
+ */
+ AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
+ *pcReqs = 0; /* always set */
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+ RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+ AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
+ AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
+ AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
+
+ /*
+ * Can't wait if there are not requests around.
+ */
+ if ( RT_UNLIKELY(ASMAtomicUoReadS32(&pCtxInt->cRequests) == 0)
+ && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
+ return VERR_FILE_AIO_NO_REQUEST;
+
+ /*
+ * Convert the timeout if specified.
+ */
+ struct timespec *pTimeout = NULL;
+ struct timespec Timeout = {0,0};
+ uint64_t StartNanoTS = 0;
+ if (cMillies != RT_INDEFINITE_WAIT)
+ {
+ Timeout.tv_sec = cMillies / 1000;
+ Timeout.tv_nsec = cMillies % 1000 * 1000000;
+ pTimeout = &Timeout;
+ StartNanoTS = RTTimeNanoTS();
+ }
+
+ /* Wait for at least one. */
+ if (!cMinReqs)
+ cMinReqs = 1;
+
+ /* For the wakeup call. */
+ Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
+ ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
+
+ /*
+ * Loop until we're woken up, hit an error (incl timeout), or
+ * have collected the desired number of requests.
+ */
+ int rc = VINF_SUCCESS;
+ int cRequestsCompleted = 0;
+ while (!pCtxInt->fWokenUp)
+ {
+ LNXKAIOIOEVENT aPortEvents[AIO_MAXIMUM_REQUESTS_PER_CONTEXT];
+ int cRequestsToWait = RT_MIN(cReqs, AIO_MAXIMUM_REQUESTS_PER_CONTEXT);
+ ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
+ rc = rtFileAsyncIoLinuxGetEvents(pCtxInt->AioContext, cMinReqs, cRequestsToWait, &aPortEvents[0], pTimeout);
+ ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
+ if (RT_FAILURE(rc))
+ break;
+ uint32_t const cDone = rc;
+ rc = VINF_SUCCESS;
+
+ /*
+ * Process received events / requests.
+ */
+ for (uint32_t i = 0; i < cDone; i++)
+ {
+ /*
+ * The iocb is the first element in our request structure.
+ * So we can safely cast it directly to the handle (see above)
+ */
+ PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)aPortEvents[i].pIoCB;
+ AssertPtr(pReqInt);
+ Assert(pReqInt->u32Magic == RTFILEAIOREQ_MAGIC);
+
+ /** @todo aeichner: The rc field contains the result code
+ * like you can find in errno for the normal read/write ops.
+ * But there is a second field called rc2. I don't know the
+ * purpose for it yet.
+ */
+ if (RT_UNLIKELY(aPortEvents[i].rc < 0))
+ pReqInt->Rc = RTErrConvertFromErrno(-aPortEvents[i].rc); /* Convert to positive value. */
+ else
+ {
+ pReqInt->Rc = VINF_SUCCESS;
+ pReqInt->cbTransfered = aPortEvents[i].rc;
+ }
+
+ /* Mark the request as finished. */
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+
+ pahReqs[cRequestsCompleted++] = (RTFILEAIOREQ)pReqInt;
+ }
+
+ /*
+ * Done Yet? If not advance and try again.
+ */
+ if (cDone >= cMinReqs)
+ break;
+ cMinReqs -= cDone;
+ cReqs -= cDone;
+
+ if (cMillies != RT_INDEFINITE_WAIT)
+ {
+ /* The API doesn't return ETIMEDOUT, so we have to fix that ourselves. */
+ uint64_t NanoTS = RTTimeNanoTS();
+ uint64_t cMilliesElapsed = (NanoTS - StartNanoTS) / 1000000;
+ if (cMilliesElapsed >= cMillies)
+ {
+ rc = VERR_TIMEOUT;
+ break;
+ }
+
+ /* The syscall supposedly updates it, but we're paranoid. :-) */
+ Timeout.tv_sec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) / 1000;
+ Timeout.tv_nsec = (cMillies - (RTMSINTERVAL)cMilliesElapsed) % 1000 * 1000000;
+ }
+ }
+
+ /*
+ * Update the context state and set the return value.
+ */
+ *pcReqs = cRequestsCompleted;
+ ASMAtomicSubS32(&pCtxInt->cRequests, cRequestsCompleted);
+ Assert(pCtxInt->hThreadWait == RTThreadSelf());
+ ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
+
+ /*
+ * Clear the wakeup flag and set rc.
+ */
+ if ( pCtxInt->fWokenUp
+ && RT_SUCCESS(rc))
+ {
+ ASMAtomicXchgBool(&pCtxInt->fWokenUp, false);
+ rc = VERR_INTERRUPTED;
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
+{
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+ RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+
+ /** @todo r=bird: Define the protocol for how to resume work after calling
+ * this function. */
+
+ bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
+
+ /*
+ * Read the thread handle before the status flag.
+ * If we read the handle after the flag we might
+ * end up with an invalid handle because the thread
+ * waiting in RTFileAioCtxWakeup() might get scheduled
+ * before we read the flag and returns.
+ * We can ensure that the handle is valid if fWaiting is true
+ * when reading the handle before the status flag.
+ */
+ RTTHREAD hThread;
+ ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
+ bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
+ if ( !fWokenUp
+ && fWaiting)
+ {
+ /*
+ * If a thread waits the handle must be valid.
+ * It is possible that the thread returns from
+ * rtFileAsyncIoLinuxGetEvents() before the signal
+ * is send.
+ * This is no problem because we already set fWokenUp
+ * to true which will let the thread return VERR_INTERRUPTED
+ * and the next call to RTFileAioCtxWait() will not
+ * return VERR_INTERRUPTED because signals are not saved
+ * and will simply vanish if the destination thread can't
+ * receive it.
+ */
+ Assert(hThread != NIL_RTTHREAD);
+ RTThreadPoke(hThread);
+ }
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/krnlmod-linux.cpp b/src/VBox/Runtime/r3/linux/krnlmod-linux.cpp
new file mode 100644
index 00000000..f5f55647
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/krnlmod-linux.cpp
@@ -0,0 +1,324 @@
+/* $Id: krnlmod-linux.cpp $ */
+/** @file
+ * IPRT - Kernel module, Linux.
+ */
+
+/*
+ * Copyright (C) 2017-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYSTEM
+#include <iprt/krnlmod.h>
+#include <iprt/linux/sysfs.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/dir.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include <iprt/types.h>
+
+
+/**
+ * Internal kernel information record state.
+ */
+typedef struct RTKRNLMODINFOINT
+{
+ /** Reference counter. */
+ volatile uint32_t cRefs;
+ /** Reference count for the kernel module. */
+ uint32_t cRefKrnlMod;
+ /** Load address of the kernel module. */
+ RTR0UINTPTR uLoadAddr;
+ /** Size of the kernel module. */
+ size_t cbKrnlMod;
+ /** Size of the name in characters including the zero terminator. */
+ size_t cchName;
+ /** Module name - variable in size. */
+ char achName[1];
+} RTKRNLMODINFOINT;
+/** Pointer to the internal kernel module information record. */
+typedef RTKRNLMODINFOINT *PRTKRNLMODINFOINT;
+/** Pointer to a const internal kernel module information record. */
+typedef const RTKRNLMODINFOINT *PCRTKRNLMODINFOINT;
+
+
+
+/**
+ * Destroy the given kernel module information record.
+ *
+ * @returns nothing.
+ * @param pThis The record to destroy.
+ */
+static void rtKrnlModInfoDestroy(PRTKRNLMODINFOINT pThis)
+{
+ RTMemFree(pThis);
+}
+
+
+static int rtKrnlModLinuxReadIntFileDef(unsigned uBase, int64_t *pi64, int64_t i64Def,
+ const char *pszName, const char *pszPath)
+{
+ int rc = RTLinuxSysFsReadIntFile(uBase, pi64, "module/%s/%s", pszName, pszPath);
+ if (rc == VERR_FILE_NOT_FOUND)
+ {
+ *pi64 = i64Def;
+ rc = VINF_SUCCESS;
+ }
+
+ return rc;
+}
+
+/**
+ * Creates a new kernel module information record for the given module.
+ *
+ * @returns IPRT status code.
+ * @param pszName The kernel module name.
+ * @param phKrnlModInfo Where to store the handle to the kernel module information record
+ * on success.
+ */
+static int rtKrnlModLinuxInfoCreate(const char *pszName, PRTKRNLMODINFO phKrnlModInfo)
+{
+ int rc = VINF_SUCCESS;
+ size_t cchName = strlen(pszName) + 1;
+ PRTKRNLMODINFOINT pThis = (PRTKRNLMODINFOINT)RTMemAllocZ(RT_UOFFSETOF_DYN(RTKRNLMODINFOINT, achName[cchName]));
+ if (RT_LIKELY(pThis))
+ {
+ memcpy(&pThis->achName[0], pszName, cchName);
+ pThis->cchName = cchName;
+ pThis->cRefs = 1;
+
+ int64_t iTmp = 0;
+ rc = rtKrnlModLinuxReadIntFileDef(10, &iTmp, 0, pszName, "refcnt");
+ if (RT_SUCCESS(rc))
+ pThis->cRefKrnlMod = (uint32_t)iTmp;
+
+ rc = rtKrnlModLinuxReadIntFileDef(10, &iTmp, 0, pszName, "coresize");
+ if (RT_SUCCESS(rc))
+ pThis->cbKrnlMod = iTmp;
+
+ rc = rtKrnlModLinuxReadIntFileDef(16, &iTmp, 0, pszName, "sections/.text");
+ if (RT_SUCCESS(rc))
+ pThis->uLoadAddr = iTmp;
+
+ if (RT_SUCCESS(rc))
+ *phKrnlModInfo = pThis;
+ else
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+}
+
+
+RTDECL(int) RTKrnlModQueryLoaded(const char *pszName, bool *pfLoaded)
+{
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+ AssertPtrReturn(pfLoaded, VERR_INVALID_POINTER);
+
+ int rc = RTLinuxSysFsExists("module/%s", pszName);
+ if (rc == VINF_SUCCESS)
+ *pfLoaded = true;
+ else if (rc == VERR_FILE_NOT_FOUND)
+ {
+ *pfLoaded = false;
+ rc = VINF_SUCCESS;
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTKrnlModLoadedQueryInfo(const char *pszName, PRTKRNLMODINFO phKrnlModInfo)
+{
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+ AssertPtrReturn(phKrnlModInfo, VERR_INVALID_POINTER);
+
+ int rc = RTLinuxSysFsExists("module/%s", pszName);
+ if (rc == VINF_SUCCESS)
+ rc = rtKrnlModLinuxInfoCreate(pszName, phKrnlModInfo);
+ else if (rc == VERR_FILE_NOT_FOUND)
+ rc = VERR_NOT_FOUND;
+
+ return rc;
+}
+
+
+RTDECL(uint32_t) RTKrnlModLoadedGetCount(void)
+{
+ uint32_t cKmodsLoaded = 0;
+
+ RTDIR hDir = NULL;
+ int rc = RTDirOpen(&hDir, "/sys/module");
+ if (RT_SUCCESS(rc))
+ {
+ RTDIRENTRY DirEnt;
+ rc = RTDirRead(hDir, &DirEnt, NULL);
+ while (RT_SUCCESS(rc))
+ {
+ if (!RTDirEntryIsStdDotLink(&DirEnt))
+ cKmodsLoaded++;
+ rc = RTDirRead(hDir, &DirEnt, NULL);
+ }
+
+ RTDirClose(hDir);
+ }
+
+
+ return cKmodsLoaded;
+}
+
+
+RTDECL(int) RTKrnlModLoadedQueryInfoAll(PRTKRNLMODINFO pahKrnlModInfo, uint32_t cEntriesMax,
+ uint32_t *pcEntries)
+{
+ AssertReturn(VALID_PTR(pahKrnlModInfo) || cEntriesMax == 0, VERR_INVALID_PARAMETER);
+
+ uint32_t cKmodsLoaded = RTKrnlModLoadedGetCount();
+ if (cEntriesMax < cKmodsLoaded)
+ {
+ if (*pcEntries)
+ *pcEntries = cKmodsLoaded;
+ return VERR_BUFFER_OVERFLOW;
+ }
+
+ RTDIR hDir = NULL;
+ int rc = RTDirOpen(&hDir, "/sys/module");
+ if (RT_SUCCESS(rc))
+ {
+ unsigned idxKrnlModInfo = 0;
+ RTDIRENTRY DirEnt;
+
+ rc = RTDirRead(hDir, &DirEnt, NULL);
+ while (RT_SUCCESS(rc))
+ {
+ if (!RTDirEntryIsStdDotLink(&DirEnt))
+ {
+ rc = rtKrnlModLinuxInfoCreate(DirEnt.szName, &pahKrnlModInfo[idxKrnlModInfo]);
+ if (RT_SUCCESS(rc))
+ idxKrnlModInfo++;
+ }
+
+ if (RT_SUCCESS(rc))
+ rc = RTDirRead(hDir, &DirEnt, NULL);
+ }
+
+ if (rc == VERR_NO_MORE_FILES)
+ rc = VINF_SUCCESS;
+ else if (RT_FAILURE(rc))
+ {
+ /* Rollback */
+ while (idxKrnlModInfo-- > 0)
+ RTKrnlModInfoRelease(pahKrnlModInfo[idxKrnlModInfo]);
+ }
+
+ if (*pcEntries)
+ *pcEntries = cKmodsLoaded;
+
+ RTDirClose(hDir);
+ }
+
+ return rc;
+}
+
+
+RTDECL(uint32_t) RTKrnlModInfoRetain(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ AssertPtrReturn(pThis, UINT32_MAX);
+
+ uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+ AssertMsg(cRefs > 1 && cRefs < _1M, ("%#x %p\n", cRefs, pThis));
+ return cRefs;
+}
+
+
+RTDECL(uint32_t) RTKrnlModInfoRelease(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ if (!pThis)
+ return 0;
+ AssertPtrReturn(pThis, UINT32_MAX);
+
+ uint32_t cRefs = ASMAtomicDecU32(&pThis->cRefs);
+ AssertMsg(cRefs < _1M, ("%#x %p\n", cRefs, pThis));
+ if (cRefs == 0)
+ rtKrnlModInfoDestroy(pThis);
+ return cRefs;
+}
+
+
+RTDECL(uint32_t) RTKrnlModInfoGetRefCnt(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ AssertPtrReturn(pThis, 0);
+
+ return pThis->cRefKrnlMod;
+}
+
+
+RTDECL(const char *) RTKrnlModInfoGetName(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ AssertPtrReturn(pThis, NULL);
+
+ return &pThis->achName[0];
+}
+
+
+RTDECL(const char *) RTKrnlModInfoGetFilePath(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ AssertPtrReturn(pThis, NULL);
+
+ return NULL;
+}
+
+
+RTDECL(size_t) RTKrnlModInfoGetSize(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ AssertPtrReturn(pThis, 0);
+
+ return pThis->cbKrnlMod;
+}
+
+
+RTDECL(RTR0UINTPTR) RTKrnlModInfoGetLoadAddr(RTKRNLMODINFO hKrnlModInfo)
+{
+ PRTKRNLMODINFOINT pThis = hKrnlModInfo;
+ AssertPtrReturn(pThis, 0);
+
+ return pThis->uLoadAddr;
+}
+
+
+RTDECL(int) RTKrnlModInfoQueryRefModInfo(RTKRNLMODINFO hKrnlModInfo, uint32_t idx,
+ PRTKRNLMODINFO phKrnlModInfoRef)
+{
+ RT_NOREF3(hKrnlModInfo, idx, phKrnlModInfoRef);
+ return VERR_NOT_IMPLEMENTED;
+}
diff --git a/src/VBox/Runtime/r3/linux/mp-linux.cpp b/src/VBox/Runtime/r3/linux/mp-linux.cpp
new file mode 100644
index 00000000..3622b2a8
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/mp-linux.cpp
@@ -0,0 +1,318 @@
+/* $Id: mp-linux.cpp $ */
+/** @file
+ * IPRT - Multiprocessor, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYSTEM
+#include <stdio.h>
+#include <errno.h>
+
+#include <iprt/mp.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloca.h>
+#include <iprt/cpuset.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/linux/sysfs.h>
+
+
+/**
+ * Internal worker that determines the max possible CPU count.
+ *
+ * @returns Max cpus.
+ */
+static RTCPUID rtMpLinuxMaxCpus(void)
+{
+#if 0 /* this doesn't do the right thing :-/ */
+ int cMax = sysconf(_SC_NPROCESSORS_CONF);
+ Assert(cMax >= 1);
+ return cMax;
+#else
+ static uint32_t s_cMax = 0;
+ if (!s_cMax)
+ {
+ int cMax = 1;
+ for (unsigned iCpu = 0; iCpu < RTCPUSET_MAX_CPUS; iCpu++)
+ if (RTLinuxSysFsExists("devices/system/cpu/cpu%d", iCpu))
+ cMax = iCpu + 1;
+ ASMAtomicUoWriteU32((uint32_t volatile *)&s_cMax, cMax);
+ return cMax;
+ }
+ return s_cMax;
+#endif
+}
+
+/**
+ * Internal worker that picks the processor speed in MHz from /proc/cpuinfo.
+ *
+ * @returns CPU frequency.
+ */
+static uint32_t rtMpLinuxGetFrequency(RTCPUID idCpu)
+{
+ FILE *pFile = fopen("/proc/cpuinfo", "r");
+ if (!pFile)
+ return 0;
+
+ char sz[256];
+ RTCPUID idCpuFound = NIL_RTCPUID;
+ uint32_t Frequency = 0;
+ while (fgets(sz, sizeof(sz), pFile))
+ {
+ char *psz;
+ if ( !strncmp(sz, RT_STR_TUPLE("processor"))
+ && (sz[10] == ' ' || sz[10] == '\t' || sz[10] == ':')
+ && (psz = strchr(sz, ':')))
+ {
+ psz += 2;
+ int64_t iCpu;
+ int rc = RTStrToInt64Ex(psz, NULL, 0, &iCpu);
+ if (RT_SUCCESS(rc))
+ idCpuFound = iCpu;
+ }
+ else if ( idCpu == idCpuFound
+ && !strncmp(sz, RT_STR_TUPLE("cpu MHz"))
+ && (sz[10] == ' ' || sz[10] == '\t' || sz[10] == ':')
+ && (psz = strchr(sz, ':')))
+ {
+ psz += 2;
+ int64_t v;
+ int rc = RTStrToInt64Ex(psz, &psz, 0, &v);
+ if (RT_SUCCESS(rc))
+ {
+ Frequency = v;
+ break;
+ }
+ }
+ }
+ fclose(pFile);
+ return Frequency;
+}
+
+
+/** @todo RTmpCpuId(). */
+
+RTDECL(int) RTMpCpuIdToSetIndex(RTCPUID idCpu)
+{
+ return idCpu < rtMpLinuxMaxCpus() ? (int)idCpu : -1;
+}
+
+
+RTDECL(RTCPUID) RTMpCpuIdFromSetIndex(int iCpu)
+{
+ return (unsigned)iCpu < rtMpLinuxMaxCpus() ? iCpu : NIL_RTCPUID;
+}
+
+
+RTDECL(RTCPUID) RTMpGetMaxCpuId(void)
+{
+ return rtMpLinuxMaxCpus() - 1;
+}
+
+
+RTDECL(bool) RTMpIsCpuOnline(RTCPUID idCpu)
+{
+ /** @todo check if there is a simpler interface than this... */
+ int64_t i = 0;
+ int rc = RTLinuxSysFsReadIntFile(0, &i, "devices/system/cpu/cpu%d/online", (int)idCpu);
+ if ( RT_FAILURE(rc)
+ && RTLinuxSysFsExists("devices/system/cpu/cpu%d", (int)idCpu))
+ {
+ /** @todo Assert(!RTLinuxSysFsExists("devices/system/cpu/cpu%d/online",
+ * (int)idCpu));
+ * Unfortunately, the online file wasn't always world readable (centos
+ * 2.6.18-164). */
+ i = 1;
+ rc = VINF_SUCCESS;
+ }
+
+ AssertMsg(i == 0 || i == -1 || i == 1, ("i=%d\n", i));
+ return RT_SUCCESS(rc) && i != 0;
+}
+
+
+RTDECL(bool) RTMpIsCpuPossible(RTCPUID idCpu)
+{
+ /** @todo check this up with hotplugging! */
+ return RTLinuxSysFsExists("devices/system/cpu/cpu%d", (int)idCpu);
+}
+
+
+RTDECL(PRTCPUSET) RTMpGetSet(PRTCPUSET pSet)
+{
+ RTCpuSetEmpty(pSet);
+ RTCPUID cMax = rtMpLinuxMaxCpus();
+ for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+ if (RTMpIsCpuPossible(idCpu))
+ RTCpuSetAdd(pSet, idCpu);
+ return pSet;
+}
+
+
+RTDECL(RTCPUID) RTMpGetCount(void)
+{
+ RTCPUSET Set;
+ RTMpGetSet(&Set);
+ return RTCpuSetCount(&Set);
+}
+
+
+RTDECL(RTCPUID) RTMpGetCoreCount(void)
+{
+ RTCPUID cMax = rtMpLinuxMaxCpus();
+ uint32_t *paidCores = (uint32_t *)alloca(sizeof(paidCores[0]) * (cMax + 1));
+ uint32_t *paidPckgs = (uint32_t *)alloca(sizeof(paidPckgs[0]) * (cMax + 1));
+ uint32_t cCores = 0;
+ for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+ {
+ if (RTMpIsCpuPossible(idCpu))
+ {
+ int64_t idCore = 0;
+ int64_t idPckg = 0;
+
+ int rc = RTLinuxSysFsReadIntFile(0, &idCore, "devices/system/cpu/cpu%d/topology/core_id", (int)idCpu);
+ if (RT_SUCCESS(rc))
+ rc = RTLinuxSysFsReadIntFile(0, &idPckg, "devices/system/cpu/cpu%d/topology/physical_package_id", (int)idCpu);
+
+ if (RT_SUCCESS(rc))
+ {
+ uint32_t i;
+
+ for (i = 0; i < cCores; i++)
+ if ( paidCores[i] == (uint32_t)idCore
+ && paidPckgs[i] == (uint32_t)idPckg)
+ break;
+ if (i >= cCores)
+ {
+ paidCores[cCores] = (uint32_t)idCore;
+ paidPckgs[cCores] = (uint32_t)idPckg;
+ cCores++;
+ }
+ }
+ }
+ }
+ Assert(cCores > 0);
+ return cCores;
+}
+
+
+RTDECL(PRTCPUSET) RTMpGetOnlineSet(PRTCPUSET pSet)
+{
+ RTCpuSetEmpty(pSet);
+ RTCPUID cMax = rtMpLinuxMaxCpus();
+ for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+ if (RTMpIsCpuOnline(idCpu))
+ RTCpuSetAdd(pSet, idCpu);
+ return pSet;
+}
+
+
+RTDECL(RTCPUID) RTMpGetOnlineCount(void)
+{
+ RTCPUSET Set;
+ RTMpGetOnlineSet(&Set);
+ return RTCpuSetCount(&Set);
+}
+
+
+RTDECL(RTCPUID) RTMpGetOnlineCoreCount(void)
+{
+ RTCPUID cMax = rtMpLinuxMaxCpus();
+ uint32_t *paidCores = (uint32_t *)alloca(sizeof(paidCores[0]) * (cMax + 1));
+ uint32_t *paidPckgs = (uint32_t *)alloca(sizeof(paidPckgs[0]) * (cMax + 1));
+ uint32_t cCores = 0;
+ for (RTCPUID idCpu = 0; idCpu < cMax; idCpu++)
+ {
+ if (RTMpIsCpuOnline(idCpu))
+ {
+ int64_t idCore = 0;
+ int64_t idPckg = 0;
+
+ int rc = RTLinuxSysFsReadIntFile(0, &idCore, "devices/system/cpu/cpu%d/topology/core_id", (int)idCpu);
+ if (RT_SUCCESS(rc))
+ rc = RTLinuxSysFsReadIntFile(0, &idPckg, "devices/system/cpu/cpu%d/topology/physical_package_id", (int)idCpu);
+
+ if (RT_SUCCESS(rc))
+ {
+ uint32_t i;
+
+ for (i = 0; i < cCores; i++)
+ if ( paidCores[i] == idCore
+ && paidPckgs[i] == idPckg)
+ break;
+ if (i >= cCores)
+ {
+ paidCores[cCores] = idCore;
+ paidPckgs[cCores] = idPckg;
+ cCores++;
+ }
+ }
+ }
+ }
+ Assert(cCores > 0);
+ return cCores;
+}
+
+
+
+RTDECL(uint32_t) RTMpGetCurFrequency(RTCPUID idCpu)
+{
+ int64_t kHz = 0;
+ int rc = RTLinuxSysFsReadIntFile(0, &kHz, "devices/system/cpu/cpu%d/cpufreq/cpuinfo_cur_freq", (int)idCpu);
+ if (RT_FAILURE(rc))
+ {
+ /*
+ * The file may be just unreadable - in that case use plan B, i.e.
+ * /proc/cpuinfo to get the data we want. The assumption is that if
+ * cpuinfo_cur_freq doesn't exist then the speed won't change, and
+ * thus cur == max. If it does exist then cpuinfo contains the
+ * current frequency.
+ */
+ kHz = rtMpLinuxGetFrequency(idCpu) * 1000;
+ }
+ return (kHz + 999) / 1000;
+}
+
+
+RTDECL(uint32_t) RTMpGetMaxFrequency(RTCPUID idCpu)
+{
+ int64_t kHz = 0;
+ int rc = RTLinuxSysFsReadIntFile(0, &kHz, "devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", (int)idCpu);
+ if (RT_FAILURE(rc))
+ {
+ /*
+ * Check if the file isn't there - if it is there, then /proc/cpuinfo
+ * would provide current frequency information, which is wrong.
+ */
+ if (!RTLinuxSysFsExists("devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", (int)idCpu))
+ kHz = rtMpLinuxGetFrequency(idCpu) * 1000;
+ else
+ kHz = 0;
+ }
+ return (kHz + 999) / 1000;
+}
diff --git a/src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp b/src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp
new file mode 100644
index 00000000..dd3a7a4f
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/rtProcInitExePath-linux.cpp
@@ -0,0 +1,69 @@
+/* $Id: rtProcInitExePath-linux.cpp $ */
+/** @file
+ * IPRT - rtProcInitName, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <unistd.h>
+#include <errno.h>
+
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/path.h>
+#include "internal/process.h"
+#include "internal/path.h"
+
+
+DECLHIDDEN(int) rtProcInitExePath(char *pszPath, size_t cchPath)
+{
+ /*
+ * Read the /proc/self/exe link, convert to native and return it.
+ */
+ int cchLink = readlink("/proc/self/exe", pszPath, cchPath - 1);
+ if (cchLink > 0 && (size_t)cchLink <= cchPath - 1)
+ {
+ pszPath[cchLink] = '\0';
+
+ char const *pszTmp;
+ int rc = rtPathFromNative(&pszTmp, pszPath, NULL);
+ AssertMsgRCReturn(rc, ("rc=%Rrc pszLink=\"%s\"\nhex: %.*Rhxs\n", rc, pszPath, cchLink, pszPath), rc);
+ if (pszTmp != pszPath)
+ {
+ rc = RTStrCopy(pszPath, cchPath, pszTmp);
+ rtPathFreeIprt(pszTmp, pszPath);
+ }
+ return rc;
+ }
+
+ int err = errno;
+ int rc = RTErrConvertFromErrno(err);
+ AssertMsgFailed(("rc=%Rrc err=%d cchLink=%d\n", rc, err, cchLink));
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/sched-linux.cpp b/src/VBox/Runtime/r3/linux/sched-linux.cpp
new file mode 100644
index 00000000..d792d07d
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/sched-linux.cpp
@@ -0,0 +1,614 @@
+/* $Id: sched-linux.cpp $ */
+/** @file
+ * IPRT - Scheduling, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+/*
+ * !WARNING!
+ *
+ * When talking about lowering and raising priority, we do *NOT* refer to
+ * the common direction priority values takes on unix systems (lower means
+ * higher). So, when we raise the priority of a linux thread the nice
+ * value will decrease, and when we lower the priority the nice value
+ * will increase. Confusing, right?
+ *
+ * !WARNING!
+ */
+
+
+
+/** @def THREAD_LOGGING
+ * Be very careful with enabling this, it may cause deadlocks when combined
+ * with the 'thread' logging prefix.
+ */
+#ifdef DOXYGEN_RUNNING
+# define THREAD_LOGGING
+#endif
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_THREAD
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <unistd.h>
+#include <sys/resource.h>
+
+#include <iprt/thread.h>
+#include <iprt/process.h>
+#include <iprt/semaphore.h>
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/log.h>
+#include <iprt/err.h>
+#include "internal/sched.h"
+#include "internal/thread.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/** Array scheduler attributes corresponding to each of the thread types.
+ * @internal */
+typedef struct PROCPRIORITYTYPE
+{
+ /** For sanity include the array index. */
+ RTTHREADTYPE enmType;
+ /** The thread priority or nice delta - depends on which priority type. */
+ int iPriority;
+} PROCPRIORITYTYPE;
+
+
+/**
+ * Configuration of one priority.
+ * @internal
+ */
+typedef struct
+{
+ /** The priority. */
+ RTPROCPRIORITY enmPriority;
+ /** The name of this priority. */
+ const char *pszName;
+ /** The process nice value. */
+ int iNice;
+ /** The delta applied to the iPriority value. */
+ int iDelta;
+ /** Array scheduler attributes corresponding to each of the thread types. */
+ const PROCPRIORITYTYPE *paTypes;
+} PROCPRIORITY;
+
+
+/**
+ * Saved priority settings
+ * @internal
+ */
+typedef struct
+{
+ /** Process priority. */
+ int iPriority;
+ /** Process level. */
+ struct sched_param SchedParam;
+ /** Process level. */
+ int iPolicy;
+ /** pthread level. */
+ struct sched_param PthreadSchedParam;
+ /** pthread level. */
+ int iPthreadPolicy;
+} SAVEDPRIORITY, *PSAVEDPRIORITY;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/**
+ * Deltas for a process in which we are not restricted
+ * to only be lowering the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesLinuxFree[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, +3 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, +2 },
+ { RTTHREADTYPE_EMULATION, +1 },
+ { RTTHREADTYPE_DEFAULT, 0 },
+ { RTTHREADTYPE_GUI, 0 },
+ { RTTHREADTYPE_MAIN_WORKER, 0 },
+ { RTTHREADTYPE_VRDP_IO, -1 },
+ { RTTHREADTYPE_DEBUGGER, -1 },
+ { RTTHREADTYPE_MSG_PUMP, -2 },
+ { RTTHREADTYPE_IO, -3 },
+ { RTTHREADTYPE_TIMER, -4 }
+};
+
+/**
+ * Deltas for a process in which we are restricted and can only lower the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesLinuxRestricted[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, +3 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, +2 },
+ { RTTHREADTYPE_EMULATION, +1 },
+ { RTTHREADTYPE_DEFAULT, 0 },
+ { RTTHREADTYPE_GUI, 0 },
+ { RTTHREADTYPE_MAIN_WORKER, 0 },
+ { RTTHREADTYPE_VRDP_IO, 0 },
+ { RTTHREADTYPE_DEBUGGER, 0 },
+ { RTTHREADTYPE_MSG_PUMP, 0 },
+ { RTTHREADTYPE_IO, 0 },
+ { RTTHREADTYPE_TIMER, 0 }
+};
+
+/**
+ * All threads have the same priority.
+ *
+ * This is typically chosen when we find that we can't raise the priority
+ * to the process default of a thread created by a low priority thread.
+ */
+static const PROCPRIORITYTYPE g_aTypesLinuxFlat[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, 0 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, 0 },
+ { RTTHREADTYPE_EMULATION, 0 },
+ { RTTHREADTYPE_DEFAULT, 0 },
+ { RTTHREADTYPE_GUI, 0 },
+ { RTTHREADTYPE_MAIN_WORKER, 0 },
+ { RTTHREADTYPE_VRDP_IO, 0 },
+ { RTTHREADTYPE_DEBUGGER, 0 },
+ { RTTHREADTYPE_MSG_PUMP, 0 },
+ { RTTHREADTYPE_IO, 0 },
+ { RTTHREADTYPE_TIMER, 0 }
+};
+
+/**
+ * Process and thread level priority, full access at thread level.
+ */
+static const PROCPRIORITY g_aUnixConfigs[] =
+{
+ { RTPROCPRIORITY_FLAT, "Flat", 0, 0, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_LOW, "Low", 9, 9, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_LOW, "Low", 9, 9, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_LOW, "Low", 15, 15, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_LOW, "Low", 15, 15, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_LOW, "Low", 17, 17, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_LOW, "Low", 17, 17, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_LOW, "Low", 19, 19, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_LOW, "Low", 9, 9, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_LOW, "Low", 15, 15, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_LOW, "Low", 17, 17, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_HIGH, "High", -9, -9, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_HIGH, "High", -7, -7, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_HIGH, "High", -5, -5, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_HIGH, "High", -3, -3, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_HIGH, "High", -1, -1, g_aTypesLinuxFree },
+ { RTPROCPRIORITY_HIGH, "High", -9, -9, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -7, -7, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -5, -5, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -3, -3, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -1, -1, g_aTypesLinuxRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -9, -9, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_HIGH, "High", -7, -7, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_HIGH, "High", -5, -5, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_HIGH, "High", -3, -3, g_aTypesLinuxFlat },
+ { RTPROCPRIORITY_HIGH, "High", -1, -1, g_aTypesLinuxFlat }
+};
+
+/**
+ * The dynamic default priority configuration.
+ *
+ * This will be recalulated at runtime depending on what the
+ * system allow us to do and what the current priority is.
+ */
+static PROCPRIORITY g_aDefaultPriority =
+{
+ RTPROCPRIORITY_LOW, "Default", 0, 0, g_aTypesLinuxRestricted
+};
+
+/** Pointer to the current priority configuration. */
+static const PROCPRIORITY *g_pProcessPriority = &g_aDefaultPriority;
+
+/** Set if we can raise the priority of a thread beyond the default.
+ *
+ * It might mean we have the CAP_SYS_NICE capability or that the
+ * process's RLIMIT_NICE is higher than the priority of the thread
+ * calculating the defaults.
+ */
+static bool g_fCanRaisePriority = false;
+
+/** Set if we can restore the priority after having temporarily lowered or raised it. */
+static bool g_fCanRestorePriority = false;
+
+/** Set if we can NOT raise the priority to the process default in a thread
+ * created by a thread running below the process default.
+ */
+static bool g_fScrewedUpMaxPriorityLimitInheritance = true;
+
+/** The highest priority we can set. */
+static int g_iMaxPriority = 0;
+
+/** The lower priority we can set. */
+static int g_iMinPriority = 19;
+
+/** Set when we've successfully determined the capabilities of the process and kernel. */
+static bool g_fInitialized = false;
+
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+
+
+/**
+ * Saves all the scheduling attributes we can think of.
+ */
+static void rtSchedNativeSave(PSAVEDPRIORITY pSave)
+{
+ memset(pSave, 0xff, sizeof(*pSave));
+
+ errno = 0;
+ pSave->iPriority = getpriority(PRIO_PROCESS, 0 /* current process */);
+ Assert(errno == 0);
+
+ errno = 0;
+ sched_getparam(0 /* current process */, &pSave->SchedParam);
+ Assert(errno == 0);
+
+ errno = 0;
+ pSave->iPolicy = sched_getscheduler(0 /* current process */);
+ Assert(errno == 0);
+
+ int rc = pthread_getschedparam(pthread_self(), &pSave->iPthreadPolicy, &pSave->PthreadSchedParam);
+ Assert(rc == 0); NOREF(rc);
+}
+
+
+/**
+ * Restores scheduling attributes.
+ * Most of this won't work right, but anyway...
+ */
+static void rtSchedNativeRestore(PSAVEDPRIORITY pSave)
+{
+ setpriority(PRIO_PROCESS, 0, pSave->iPriority);
+ sched_setscheduler(0, pSave->iPolicy, &pSave->SchedParam);
+ sched_setparam(0, &pSave->SchedParam);
+ pthread_setschedparam(pthread_self(), pSave->iPthreadPolicy, &pSave->PthreadSchedParam);
+}
+
+
+/**
+ * Starts a worker thread and wait for it to complete.
+ * We cannot use RTThreadCreate since we're already owner of the RW lock.
+ */
+static int rtSchedRunThread(void *(*pfnThread)(void *pvArg), void *pvArg)
+{
+ /*
+ * Create the thread.
+ */
+ pthread_t Thread;
+ int rc = pthread_create(&Thread, NULL, pfnThread, pvArg);
+ if (!rc)
+ {
+ /*
+ * Wait for the thread to finish.
+ */
+ void *pvRet = (void *)-1;
+ do
+ {
+ rc = pthread_join(Thread, &pvRet);
+ } while (rc == EINTR);
+ if (rc)
+ return RTErrConvertFromErrno(rc);
+ return (int)(uintptr_t)pvRet;
+ }
+ return RTErrConvertFromErrno(rc);
+}
+
+
+static void rtSchedDumpPriority(void)
+{
+#ifdef THREAD_LOGGING
+ Log(("Priority: g_fCanRaisePriority=%RTbool g_fCanRestorePriority=%RTbool g_fScrewedUpMaxPriorityLimitInheritance=%RTbool\n",
+ g_fCanRaisePriority, g_fCanRestorePriority, g_fScrewedUpMaxPriorityLimitInheritance));
+ Log(("Priority: g_iMaxPriority=%d g_iMinPriority=%d\n", g_iMaxPriority, g_iMinPriority));
+ Log(("Priority: enmPriority=%d \"%s\" iNice=%d iDelta=%d\n",
+ g_pProcessPriority->enmPriority,
+ g_pProcessPriority->pszName,
+ g_pProcessPriority->iNice,
+ g_pProcessPriority->iDelta));
+ Log(("Priority: %2d INFREQUENT_POLLER = %d\n", RTTHREADTYPE_INFREQUENT_POLLER, g_pProcessPriority->paTypes[RTTHREADTYPE_INFREQUENT_POLLER].iPriority));
+ Log(("Priority: %2d MAIN_HEAVY_WORKER = %d\n", RTTHREADTYPE_MAIN_HEAVY_WORKER, g_pProcessPriority->paTypes[RTTHREADTYPE_MAIN_HEAVY_WORKER].iPriority));
+ Log(("Priority: %2d EMULATION = %d\n", RTTHREADTYPE_EMULATION , g_pProcessPriority->paTypes[RTTHREADTYPE_EMULATION ].iPriority));
+ Log(("Priority: %2d DEFAULT = %d\n", RTTHREADTYPE_DEFAULT , g_pProcessPriority->paTypes[RTTHREADTYPE_DEFAULT ].iPriority));
+ Log(("Priority: %2d GUI = %d\n", RTTHREADTYPE_GUI , g_pProcessPriority->paTypes[RTTHREADTYPE_GUI ].iPriority));
+ Log(("Priority: %2d MAIN_WORKER = %d\n", RTTHREADTYPE_MAIN_WORKER , g_pProcessPriority->paTypes[RTTHREADTYPE_MAIN_WORKER ].iPriority));
+ Log(("Priority: %2d VRDP_IO = %d\n", RTTHREADTYPE_VRDP_IO , g_pProcessPriority->paTypes[RTTHREADTYPE_VRDP_IO ].iPriority));
+ Log(("Priority: %2d DEBUGGER = %d\n", RTTHREADTYPE_DEBUGGER , g_pProcessPriority->paTypes[RTTHREADTYPE_DEBUGGER ].iPriority));
+ Log(("Priority: %2d MSG_PUMP = %d\n", RTTHREADTYPE_MSG_PUMP , g_pProcessPriority->paTypes[RTTHREADTYPE_MSG_PUMP ].iPriority));
+ Log(("Priority: %2d IO = %d\n", RTTHREADTYPE_IO , g_pProcessPriority->paTypes[RTTHREADTYPE_IO ].iPriority));
+ Log(("Priority: %2d TIMER = %d\n", RTTHREADTYPE_TIMER , g_pProcessPriority->paTypes[RTTHREADTYPE_TIMER ].iPriority));
+#endif
+}
+
+
+/**
+ * This just checks if it can raise the priority after having been
+ * created by a thread with a low priority.
+ *
+ * @returns zero on success, non-zero on failure.
+ * @param pvUser The priority of the parent before it was lowered (cast to int).
+ */
+static void *rtSchedNativeSubProberThread(void *pvUser)
+{
+ int iPriority = getpriority(PRIO_PROCESS, 0);
+ Assert(iPriority == g_iMinPriority);
+
+ if (setpriority(PRIO_PROCESS, 0, iPriority + 1))
+ return (void *)-1;
+ if (setpriority(PRIO_PROCESS, 0, (int)(intptr_t)pvUser))
+ return (void *)-1;
+ return (void *)0;
+}
+
+
+/**
+ * The prober thread.
+ * We don't want to mess with the priority of the calling thread.
+ *
+ * @remark This is pretty presumptive stuff, but if it works on Linux and
+ * FreeBSD it does what I want.
+ */
+static void *rtSchedNativeProberThread(void *pvUser)
+{
+ NOREF(pvUser);
+ SAVEDPRIORITY SavedPriority;
+ rtSchedNativeSave(&SavedPriority);
+
+ /*
+ * Check if we can get higher priority (typically only root can do this).
+ * (Won't work right if our priority is -19 to start with, but what the heck.)
+ *
+ * We assume that the priority range is -19 to 19. Should probably find the right
+ * define for this.
+ */
+ int iStart = getpriority(PRIO_PROCESS, 0);
+ int i = iStart;
+ while (i-- > -20)
+ if (setpriority(PRIO_PROCESS, 0, i))
+ break;
+ g_iMaxPriority = getpriority(PRIO_PROCESS, 0);
+ g_fCanRaisePriority = g_iMaxPriority < iStart;
+ g_fCanRestorePriority = setpriority(PRIO_PROCESS, 0, iStart) == 0;
+
+ /*
+ * Check if we temporarily lower the thread priority.
+ * Again, we assume we're not at the extreme end of the priority scale.
+ */
+ iStart = getpriority(PRIO_PROCESS, 0);
+ i = iStart;
+ while (i++ < 19)
+ if (setpriority(PRIO_PROCESS, 0, i))
+ break;
+ g_iMinPriority = getpriority(PRIO_PROCESS, 0);
+ if ( setpriority(PRIO_PROCESS, 0, iStart)
+ || getpriority(PRIO_PROCESS, 0) != iStart)
+ g_fCanRestorePriority = false;
+ if (g_iMinPriority == g_iMaxPriority)
+ g_fCanRestorePriority = g_fCanRaisePriority = false;
+
+ /*
+ * Check what happens to child threads when the parent lowers the
+ * priority when it's being created.
+ */
+ iStart = getpriority(PRIO_PROCESS, 0);
+ g_fScrewedUpMaxPriorityLimitInheritance = true;
+ if ( g_fCanRestorePriority
+ && !setpriority(PRIO_PROCESS, 0, g_iMinPriority)
+ && iStart != g_iMinPriority)
+ {
+ if (rtSchedRunThread(rtSchedNativeSubProberThread, (void *)(intptr_t)iStart) == 0)
+ g_fScrewedUpMaxPriorityLimitInheritance = false;
+ }
+
+ /* done */
+ rtSchedNativeRestore(&SavedPriority);
+ return (void *)VINF_SUCCESS;
+}
+
+
+/**
+ * Calculate the scheduling properties for all the threads in the default
+ * process priority, assuming the current thread have the type enmType.
+ *
+ * @returns iprt status code.
+ * @param enmType The thread type to be assumed for the current thread.
+ */
+DECLHIDDEN(int) rtSchedNativeCalcDefaultPriority(RTTHREADTYPE enmType)
+{
+ Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);
+
+ /*
+ * First figure out what's we're allowed to do in this process.
+ */
+ if (!g_fInitialized)
+ {
+ int iPriority = getpriority(PRIO_PROCESS, 0);
+#ifdef RLIMIT_RTPRIO
+ /** @todo */
+#endif
+ int rc = rtSchedRunThread(rtSchedNativeProberThread, NULL);
+ if (RT_FAILURE(rc))
+ return rc;
+ Assert(getpriority(PRIO_PROCESS, 0) == iPriority); NOREF(iPriority);
+ g_fInitialized = true;
+ }
+
+ /*
+ * Select the right priority type table and update the default
+ * process priority structure.
+ */
+ if (g_fCanRaisePriority && g_fCanRestorePriority && !g_fScrewedUpMaxPriorityLimitInheritance)
+ g_aDefaultPriority.paTypes = &g_aTypesLinuxFree[0];
+ else if (!g_fCanRaisePriority && g_fCanRestorePriority && !g_fScrewedUpMaxPriorityLimitInheritance)
+ g_aDefaultPriority.paTypes = &g_aTypesLinuxRestricted[0];
+ else
+ g_aDefaultPriority.paTypes = &g_aTypesLinuxFlat[0];
+ Assert(enmType == g_aDefaultPriority.paTypes[enmType].enmType);
+
+ int iPriority = getpriority(PRIO_PROCESS, 0 /* current process */);
+ g_aDefaultPriority.iNice = iPriority - g_aDefaultPriority.paTypes[enmType].iPriority;
+ g_aDefaultPriority.iDelta = g_aDefaultPriority.iNice;
+
+ rtSchedDumpPriority();
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * The process priority validator thread.
+ * (We don't want to mess with the priority of the calling thread.)
+ */
+static void *rtSchedNativeValidatorThread(void *pvUser)
+{
+ const PROCPRIORITY *pCfg = (const PROCPRIORITY *)pvUser;
+ SAVEDPRIORITY SavedPriority;
+ rtSchedNativeSave(&SavedPriority);
+
+ /*
+ * Try out the priorities from the top and down.
+ */
+ int rc = VINF_SUCCESS;
+ int i = RTTHREADTYPE_END;
+ while (--i > RTTHREADTYPE_INVALID)
+ {
+ int iPriority = pCfg->paTypes[i].iPriority + pCfg->iDelta;
+ if (setpriority(PRIO_PROCESS, 0, iPriority))
+ {
+ rc = RTErrConvertFromErrno(errno);
+ break;
+ }
+ }
+
+ /* done */
+ rtSchedNativeRestore(&SavedPriority);
+ return (void *)(intptr_t)rc;
+}
+
+
+/**
+ * Validates and sets the process priority.
+ *
+ * This will check that all rtThreadNativeSetPriority() will success for all the
+ * thread types when applied to the current thread.
+ *
+ * @returns iprt status code.
+ * @param enmPriority The priority to validate and set.
+ */
+DECLHIDDEN(int) rtProcNativeSetPriority(RTPROCPRIORITY enmPriority)
+{
+ Assert(enmPriority > RTPROCPRIORITY_INVALID && enmPriority < RTPROCPRIORITY_LAST);
+
+ int rc = VINF_SUCCESS;
+ if (enmPriority == RTPROCPRIORITY_DEFAULT)
+ g_pProcessPriority = &g_aDefaultPriority;
+ else
+ {
+ /*
+ * Find a configuration which matches and can be applied.
+ */
+ rc = VERR_FILE_NOT_FOUND;
+ for (unsigned i = 0; i < RT_ELEMENTS(g_aUnixConfigs); i++)
+ {
+ if (g_aUnixConfigs[i].enmPriority == enmPriority)
+ {
+ int iPriority = getpriority(PRIO_PROCESS, 0);
+ int rc3 = rtSchedRunThread(rtSchedNativeValidatorThread, (void *)&g_aUnixConfigs[i]);
+ Assert(getpriority(PRIO_PROCESS, 0) == iPriority); NOREF(iPriority);
+ if (RT_SUCCESS(rc3))
+ {
+ g_pProcessPriority = &g_aUnixConfigs[i];
+ rc = VINF_SUCCESS;
+ break;
+ }
+ if (rc == VERR_FILE_NOT_FOUND)
+ rc = rc3;
+ }
+ }
+ }
+
+#ifdef THREAD_LOGGING
+ LogFlow(("rtProcNativeSetPriority: returns %Rrc enmPriority=%d\n", rc, enmPriority));
+ rtSchedDumpPriority();
+#endif
+ return rc;
+}
+
+
+/**
+ * Sets the priority of the thread according to the thread type
+ * and current process priority.
+ *
+ * The RTTHREADINT::enmType member has not yet been updated and will be updated by
+ * the caller on a successful return.
+ *
+ * @returns iprt status code.
+ * @param pThread The thread in question.
+ * @param enmType The thread type.
+ */
+DECLHIDDEN(int) rtThreadNativeSetPriority(PRTTHREADINT pThread, RTTHREADTYPE enmType)
+{
+ /* sanity */
+ Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);
+ Assert(enmType == g_pProcessPriority->paTypes[enmType].enmType);
+ Assert((pthread_t)pThread->Core.Key == pthread_self()); RT_NOREF_PV(pThread);
+
+ /*
+ * Calculate the thread priority and apply it.
+ */
+ int rc = VINF_SUCCESS;
+ int iPriority = g_pProcessPriority->paTypes[enmType].iPriority + g_pProcessPriority->iDelta;
+ if (!setpriority(PRIO_PROCESS, 0, iPriority))
+ {
+ AssertMsg(iPriority == getpriority(PRIO_PROCESS, 0), ("iPriority=%d getpriority()=%d\n", iPriority, getpriority(PRIO_PROCESS, 0)));
+#ifdef THREAD_LOGGING
+ Log(("rtThreadNativeSetPriority: Thread=%p enmType=%d iPriority=%d pid=%d\n", pThread->Core.Key, enmType, iPriority, getpid()));
+#endif
+ }
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ AssertMsgFailed(("setpriority(,, %d) -> errno=%d rc=%Rrc\n", iPriority, errno, rc));
+ rc = VINF_SUCCESS; //non-fatal for now.
+ }
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/semevent-linux.cpp b/src/VBox/Runtime/r3/linux/semevent-linux.cpp
new file mode 100644
index 00000000..deeb2d1e
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semevent-linux.cpp
@@ -0,0 +1,417 @@
+/* $Id: semevent-linux.cpp $ */
+/** @file
+ * IPRT - Event Semaphore, Linux (2.6.x+).
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+#include <features.h>
+#if __GLIBC_PREREQ(2,6) && !defined(IPRT_WITH_FUTEX_BASED_SEMS)
+
+/*
+ * glibc 2.6 fixed a serious bug in the mutex implementation. We wrote this
+ * linux specific event semaphores code in order to work around the bug. We
+ * will fall back on the pthread-based implementation if glibc is known to
+ * contain the bug fix.
+ *
+ * The external reference to epoll_pwait is a hack which prevents that we link
+ * against glibc < 2.6.
+ */
+#include "../posix/semevent-posix.cpp"
+__asm__ (".global epoll_pwait");
+
+#else /* glibc < 2.6 */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+#include "internal/mem.h"
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#if 0 /* With 2.6.17 futex.h has become C++ unfriendly. */
+# include <linux/futex.h>
+#else
+# define FUTEX_WAIT 0
+# define FUTEX_WAKE 1
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * Linux (single wakup) event semaphore.
+ */
+struct RTSEMEVENTINTERNAL
+{
+ /** Magic value. */
+ intptr_t volatile iMagic;
+ /** The futex state variable.
+ * 0 means not signalled.
+ 1 means signalled. */
+ uint32_t volatile fSignalled;
+ /** The number of waiting threads */
+ int32_t volatile cWaiters;
+#ifdef RTSEMEVENT_STRICT
+ /** Signallers. */
+ RTLOCKVALRECSHRD Signallers;
+ /** Indicates that lock validation should be performed. */
+ bool volatile fEverHadSignallers;
+#endif
+ /** The creation flags. */
+ uint32_t fFlags;
+};
+
+
+/**
+ * Wrapper for the futex syscall.
+ */
+static long sys_futex(uint32_t volatile *uaddr, int op, int val, struct timespec *utime, int32_t *uaddr2, int val3)
+{
+ errno = 0;
+ long rc = syscall(__NR_futex, uaddr, op, val, utime, uaddr2, val3);
+ if (rc < 0)
+ {
+ Assert(rc == -1);
+ rc = -errno;
+ }
+ return rc;
+}
+
+
+
+RTDECL(int) RTSemEventCreate(PRTSEMEVENT phEventSem)
+{
+ return RTSemEventCreateEx(phEventSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int) RTSemEventCreateEx(PRTSEMEVENT phEventSem, uint32_t fFlags, RTLOCKVALCLASS hClass, const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~(RTSEMEVENT_FLAGS_NO_LOCK_VAL | RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)), VERR_INVALID_PARAMETER);
+ Assert(!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK) || (fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL));
+
+ /*
+ * Allocate semaphore handle.
+ */
+ struct RTSEMEVENTINTERNAL *pThis;
+ if (!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+ pThis = (struct RTSEMEVENTINTERNAL *)RTMemAlloc(sizeof(struct RTSEMEVENTINTERNAL));
+ else
+ pThis = (struct RTSEMEVENTINTERNAL *)rtMemBaseAlloc(sizeof(struct RTSEMEVENTINTERNAL));
+ if (pThis)
+ {
+ pThis->iMagic = RTSEMEVENT_MAGIC;
+ pThis->cWaiters = 0;
+ pThis->fSignalled = 0;
+ pThis->fFlags = fFlags;
+#ifdef RTSEMEVENT_STRICT
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iSemEventAnon = 0;
+ RTLockValidatorRecSharedInit(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL),
+ "RTSemEvent-%u", ASMAtomicIncU32(&s_iSemEventAnon) - 1);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecSharedInitV(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL),
+ pszNameFmt, va);
+ va_end(va);
+ }
+ pThis->fEverHadSignallers = false;
+#else
+ RT_NOREF(hClass, pszNameFmt);
+#endif
+
+ *phEventSem = pThis;
+ return VINF_SUCCESS;
+ }
+ return VERR_NO_MEMORY;
+}
+
+
+RTDECL(int) RTSemEventDestroy(RTSEMEVENT hEventSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ if (pThis == NIL_RTSEMEVENT)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->iMagic == RTSEMEVENT_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Invalidate the semaphore and wake up anyone waiting on it.
+ */
+ ASMAtomicXchgSize(&pThis->iMagic, RTSEMEVENT_MAGIC | UINT32_C(0x80000000));
+ if (ASMAtomicXchgS32(&pThis->cWaiters, INT32_MIN / 2) > 0)
+ {
+ sys_futex(&pThis->fSignalled, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+ usleep(1000);
+ }
+
+ /*
+ * Free the semaphore memory and be gone.
+ */
+#ifdef RTSEMEVENT_STRICT
+ RTLockValidatorRecSharedDelete(&pThis->Signallers);
+#endif
+ if (!(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+ RTMemFree(pThis);
+ else
+ rtMemBaseFree(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSemEventSignal(RTSEMEVENT hEventSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->iMagic == RTSEMEVENT_MAGIC, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMEVENT_STRICT
+ if (pThis->fEverHadSignallers)
+ {
+ int rc9 = RTLockValidatorRecSharedCheckSignaller(&pThis->Signallers, NIL_RTTHREAD);
+ if (RT_FAILURE(rc9))
+ return rc9;
+ }
+#endif
+
+ ASMAtomicWriteU32(&pThis->fSignalled, 1);
+ if (ASMAtomicReadS32(&pThis->cWaiters) < 1)
+ return VINF_SUCCESS;
+
+ /* somebody is waiting, try wake up one of them. */
+ long cWoken = sys_futex(&pThis->fSignalled, FUTEX_WAKE, 1, NULL, NULL, 0);
+ if (RT_LIKELY(cWoken >= 0))
+ return VINF_SUCCESS;
+
+ if (RT_UNLIKELY(pThis->iMagic != RTSEMEVENT_MAGIC))
+ return VERR_SEM_DESTROYED;
+
+ return VERR_INVALID_PARAMETER;
+}
+
+
+static int rtSemEventWait(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies, bool fAutoResume)
+{
+#ifdef RTSEMEVENT_STRICT
+ PCRTLOCKVALSRCPOS pSrcPos = NULL;
+#endif
+
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->iMagic == RTSEMEVENT_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Quickly check whether it's signaled.
+ */
+ /** @todo this isn't fair if someone is already waiting on it. They should
+ * have the first go at it!
+ * (ASMAtomicReadS32(&pThis->cWaiters) == 0 || !cMillies) && ... */
+ if (ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+ return VINF_SUCCESS;
+
+ /*
+ * Convert the timeout value.
+ */
+ struct timespec ts;
+ struct timespec *pTimeout = NULL;
+ uint64_t u64End = 0; /* shut up gcc */
+ if (cMillies != RT_INDEFINITE_WAIT)
+ {
+ if (!cMillies)
+ return VERR_TIMEOUT;
+ ts.tv_sec = cMillies / 1000;
+ ts.tv_nsec = (cMillies % 1000) * UINT32_C(1000000);
+ u64End = RTTimeSystemNanoTS() + cMillies * UINT64_C(1000000);
+ pTimeout = &ts;
+ }
+
+ ASMAtomicIncS32(&pThis->cWaiters);
+
+ /*
+ * The wait loop.
+ */
+#ifdef RTSEMEVENT_STRICT
+ RTTHREAD hThreadSelf = !(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)
+ ? RTThreadSelfAutoAdopt()
+ : RTThreadSelf();
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+ int rc = VINF_SUCCESS;
+ for (;;)
+ {
+#ifdef RTSEMEVENT_STRICT
+ if (pThis->fEverHadSignallers)
+ {
+ rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+ cMillies, RTTHREADSTATE_EVENT, true);
+ if (RT_FAILURE(rc))
+ break;
+ }
+#endif
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT, true);
+ long lrc = sys_futex(&pThis->fSignalled, FUTEX_WAIT, 0, pTimeout, NULL, 0);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT);
+ if (RT_UNLIKELY(pThis->iMagic != RTSEMEVENT_MAGIC))
+ {
+ rc = VERR_SEM_DESTROYED;
+ break;
+ }
+
+ if (RT_LIKELY(lrc == 0 || lrc == -EWOULDBLOCK))
+ {
+ /* successful wakeup or fSignalled > 0 in the meantime */
+ if (ASMAtomicCmpXchgU32(&pThis->fSignalled, 0, 1))
+ break;
+ }
+ else if (lrc == -ETIMEDOUT)
+ {
+ rc = VERR_TIMEOUT;
+ break;
+ }
+ else if (lrc == -EINTR)
+ {
+ if (!fAutoResume)
+ {
+ rc = VERR_INTERRUPTED;
+ break;
+ }
+ }
+ else
+ {
+ /* this shouldn't happen! */
+ AssertMsgFailed(("rc=%ld errno=%d\n", lrc, errno));
+ rc = RTErrConvertFromErrno(lrc);
+ break;
+ }
+ /* adjust the relative timeout */
+ if (pTimeout)
+ {
+ int64_t i64Diff = u64End - RTTimeSystemNanoTS();
+ if (i64Diff < 1000)
+ {
+ rc = VERR_TIMEOUT;
+ break;
+ }
+ ts.tv_sec = (uint64_t)i64Diff / UINT32_C(1000000000);
+ ts.tv_nsec = (uint64_t)i64Diff % UINT32_C(1000000000);
+ }
+ }
+
+ ASMAtomicDecS32(&pThis->cWaiters);
+ return rc;
+}
+
+
+RTDECL(int) RTSemEventWait(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies)
+{
+ int rc = rtSemEventWait(hEventSem, cMillies, true);
+ Assert(rc != VERR_INTERRUPTED);
+ Assert(rc != VERR_TIMEOUT || cMillies != RT_INDEFINITE_WAIT);
+ return rc;
+}
+
+
+RTDECL(int) RTSemEventWaitNoResume(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies)
+{
+ return rtSemEventWait(hEventSem, cMillies, false);
+}
+
+
+RTDECL(void) RTSemEventSetSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturnVoid(pThis);
+ AssertReturnVoid(pThis->iMagic == RTSEMEVENT_MAGIC);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedResetOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF(hEventSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventAddSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturnVoid(pThis);
+ AssertReturnVoid(pThis->iMagic == RTSEMEVENT_MAGIC);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedAddOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF(hEventSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventRemoveSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturnVoid(pThis);
+ AssertReturnVoid(pThis->iMagic == RTSEMEVENT_MAGIC);
+
+ RTLockValidatorRecSharedRemoveOwner(&pThis->Signallers, hThread);
+#else
+ RT_NOREF(hEventSem, hThread);
+#endif
+}
+
+#endif /* glibc < 2.6 || IPRT_WITH_FUTEX_BASED_SEMS */
+
diff --git a/src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp b/src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp
new file mode 100644
index 00000000..9e049069
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semeventmulti-linux.cpp
@@ -0,0 +1,453 @@
+/* $Id: semeventmulti-linux.cpp $ */
+/** @file
+ * IPRT - Multiple Release Event Semaphore, Linux (2.6.x+).
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+#include <features.h>
+#if __GLIBC_PREREQ(2,6) && !defined(IPRT_WITH_FUTEX_BASED_SEMS)
+
+/*
+ * glibc 2.6 fixed a serious bug in the mutex implementation. We wrote this
+ * linux specific event semaphores code in order to work around the bug. As it
+ * turns out, this code seems to have an unresolved issue (@bugref{2599}), so we'll
+ * fall back on the pthread based implementation if glibc is known to contain
+ * the bug fix.
+ *
+ * The external reference to epoll_pwait is a hack which prevents that we link
+ * against glibc < 2.6.
+ */
+#include "../posix/semeventmulti-posix.cpp"
+__asm__ (".global epoll_pwait");
+
+#else /* glibc < 2.6 */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+#include "internal/strict.h"
+
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#if 0 /* With 2.6.17 futex.h has become C++ unfriendly. */
+# include <linux/futex.h>
+#else
+# define FUTEX_WAIT 0
+# define FUTEX_WAKE 1
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * Linux multiple wakup event semaphore.
+ */
+struct RTSEMEVENTMULTIINTERNAL
+{
+ /** Magic value. */
+ uint32_t volatile u32Magic;
+ /** The futex state variable.
+ * -1 means signaled.
+ * 0 means not signaled, no waiters.
+ * 1 means not signaled and that someone is waiting.
+ */
+ int32_t volatile iState;
+#ifdef RTSEMEVENTMULTI_STRICT
+ /** Signallers. */
+ RTLOCKVALRECSHRD Signallers;
+ /** Indicates that lock validation should be performed. */
+ bool volatile fEverHadSignallers;
+#endif
+};
+
+
+/**
+ * Wrapper for the futex syscall.
+ */
+static long sys_futex(int32_t volatile *uaddr, int op, int val, struct timespec *utime, int32_t *uaddr2, int val3)
+{
+ errno = 0;
+ long rc = syscall(__NR_futex, uaddr, op, val, utime, uaddr2, val3);
+ if (rc < 0)
+ {
+ Assert(rc == -1);
+ rc = -errno;
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTSemEventMultiCreate(PRTSEMEVENTMULTI phEventMultiSem)
+{
+ return RTSemEventMultiCreateEx(phEventMultiSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int) RTSemEventMultiCreateEx(PRTSEMEVENTMULTI phEventMultiSem, uint32_t fFlags, RTLOCKVALCLASS hClass,
+ const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+ /*
+ * Allocate semaphore handle.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = (struct RTSEMEVENTMULTIINTERNAL *)RTMemAlloc(sizeof(struct RTSEMEVENTMULTIINTERNAL));
+ if (pThis)
+ {
+ pThis->u32Magic = RTSEMEVENTMULTI_MAGIC;
+ pThis->iState = 0;
+#ifdef RTSEMEVENTMULTI_STRICT
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iSemEventMultiAnon = 0;
+ RTLockValidatorRecSharedInit(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL),
+ "RTSemEventMulti-%u", ASMAtomicIncU32(&s_iSemEventMultiAnon) - 1);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecSharedInitV(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL),
+ pszNameFmt, va);
+ va_end(va);
+ }
+ pThis->fEverHadSignallers = false;
+#else
+ RT_NOREF(hClass, pszNameFmt);
+#endif
+
+ *phEventMultiSem = pThis;
+ return VINF_SUCCESS;
+ }
+ return VERR_NO_MEMORY;
+}
+
+
+RTDECL(int) RTSemEventMultiDestroy(RTSEMEVENTMULTI hEventMultiSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ if (pThis == NIL_RTSEMEVENTMULTI)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Invalidate the semaphore and wake up anyone waiting on it.
+ */
+ ASMAtomicWriteU32(&pThis->u32Magic, RTSEMEVENTMULTI_MAGIC + 1);
+ if (ASMAtomicXchgS32(&pThis->iState, -1) == 1)
+ {
+ sys_futex(&pThis->iState, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+ usleep(1000);
+ }
+
+ /*
+ * Free the semaphore memory and be gone.
+ */
+#ifdef RTSEMEVENTMULTI_STRICT
+ RTLockValidatorRecSharedDelete(&pThis->Signallers);
+#endif
+ RTMemFree(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSemEventMultiSignal(RTSEMEVENTMULTI hEventMultiSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertReturn(VALID_PTR(pThis) && pThis->u32Magic == RTSEMEVENTMULTI_MAGIC,
+ VERR_INVALID_HANDLE);
+
+#ifdef RTSEMEVENTMULTI_STRICT
+ if (pThis->fEverHadSignallers)
+ {
+ int rc9 = RTLockValidatorRecSharedCheckSignaller(&pThis->Signallers, NIL_RTTHREAD);
+ if (RT_FAILURE(rc9))
+ return rc9;
+ }
+#endif
+
+
+ /*
+ * Signal it.
+ */
+ int32_t iOld = ASMAtomicXchgS32(&pThis->iState, -1);
+ if (iOld > 0)
+ {
+ /* wake up sleeping threads. */
+ long cWoken = sys_futex(&pThis->iState, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+ AssertMsg(cWoken >= 0, ("%ld\n", cWoken)); NOREF(cWoken);
+ }
+ Assert(iOld == 0 || iOld == -1 || iOld == 1);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSemEventMultiReset(RTSEMEVENTMULTI hEventMultiSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertReturn(VALID_PTR(pThis) && pThis->u32Magic == RTSEMEVENTMULTI_MAGIC,
+ VERR_INVALID_HANDLE);
+#ifdef RT_STRICT
+ int32_t i = pThis->iState;
+ Assert(i == 0 || i == -1 || i == 1);
+#endif
+
+ /*
+ * Reset it.
+ */
+ ASMAtomicCmpXchgS32(&pThis->iState, 0, -1);
+ return VINF_SUCCESS;
+}
+
+
+
+DECLINLINE(int) rtSemEventLnxMultiWait(struct RTSEMEVENTMULTIINTERNAL *pThis, uint32_t fFlags, uint64_t uTimeout,
+ PCRTLOCKVALSRCPOS pSrcPos)
+{
+ RT_NOREF(pSrcPos);
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+
+ /*
+ * Quickly check whether it's signaled.
+ */
+ int32_t iCur = ASMAtomicUoReadS32(&pThis->iState);
+ Assert(iCur == 0 || iCur == -1 || iCur == 1);
+ if (iCur == -1)
+ return VINF_SUCCESS;
+
+ /*
+ * Check and convert the timeout value.
+ */
+ struct timespec ts;
+ struct timespec *pTimeout = NULL;
+ uint64_t u64Deadline = 0; /* shut up gcc */
+ if (!(fFlags & RTSEMWAIT_FLAGS_INDEFINITE))
+ {
+ /* If the timeout is zero, then we're done. */
+ if (!uTimeout)
+ return VERR_TIMEOUT;
+
+ /* Convert it to a deadline + interval timespec. */
+ if (fFlags & RTSEMWAIT_FLAGS_MILLISECS)
+ uTimeout = uTimeout < UINT64_MAX / UINT32_C(1000000) * UINT32_C(1000000)
+ ? uTimeout * UINT32_C(1000000)
+ : UINT64_MAX;
+ if (uTimeout != UINT64_MAX) /* unofficial way of indicating an indefinite wait */
+ {
+ if (fFlags & RTSEMWAIT_FLAGS_RELATIVE)
+ u64Deadline = RTTimeSystemNanoTS() + uTimeout;
+ else
+ {
+ uint64_t u64Now = RTTimeSystemNanoTS();
+ if (uTimeout <= u64Now)
+ return VERR_TIMEOUT;
+ u64Deadline = uTimeout;
+ uTimeout -= u64Now;
+ }
+ if ( sizeof(ts.tv_sec) >= sizeof(uint64_t)
+ || uTimeout <= UINT64_C(1000000000) * UINT32_MAX)
+ {
+ ts.tv_nsec = uTimeout % UINT32_C(1000000000);
+ ts.tv_sec = uTimeout / UINT32_C(1000000000);
+ pTimeout = &ts;
+ }
+ }
+ }
+
+ /*
+ * The wait loop.
+ */
+#ifdef RTSEMEVENTMULTI_STRICT
+ RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+ for (unsigned i = 0;; i++)
+ {
+ /*
+ * Start waiting. We only account for there being or having been
+ * threads waiting on the semaphore to keep things simple.
+ */
+ iCur = ASMAtomicUoReadS32(&pThis->iState);
+ Assert(iCur == 0 || iCur == -1 || iCur == 1);
+ if ( iCur == 1
+ || ASMAtomicCmpXchgS32(&pThis->iState, 1, 0))
+ {
+ /* adjust the relative timeout */
+ if (pTimeout)
+ {
+ int64_t i64Diff = u64Deadline - RTTimeSystemNanoTS();
+ if (i64Diff < 1000)
+ return VERR_TIMEOUT;
+ ts.tv_sec = (uint64_t)i64Diff / UINT32_C(1000000000);
+ ts.tv_nsec = (uint64_t)i64Diff % UINT32_C(1000000000);
+ }
+#ifdef RTSEMEVENTMULTI_STRICT
+ if (pThis->fEverHadSignallers)
+ {
+ int rc9 = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+ uTimeout / UINT32_C(1000000), RTTHREADSTATE_EVENT_MULTI, true);
+ if (RT_FAILURE(rc9))
+ return rc9;
+ }
+#endif
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT_MULTI, true);
+ long rc = sys_futex(&pThis->iState, FUTEX_WAIT, 1, pTimeout, NULL, 0);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT_MULTI);
+ if (RT_UNLIKELY(pThis->u32Magic != RTSEMEVENTMULTI_MAGIC))
+ return VERR_SEM_DESTROYED;
+ if (rc == 0)
+ return VINF_SUCCESS;
+
+ /*
+ * Act on the wakup code.
+ */
+ if (rc == -ETIMEDOUT)
+ {
+/** @todo something is broken here. shows up every now and again in the ata
+ * code. Should try to run the timeout against RTTimeMilliTS to
+ * check that it's doing the right thing... */
+ Assert(pTimeout);
+ return VERR_TIMEOUT;
+ }
+ if (rc == -EWOULDBLOCK)
+ /* retry, the value changed. */;
+ else if (rc == -EINTR)
+ {
+ if (fFlags & RTSEMWAIT_FLAGS_NORESUME)
+ return VERR_INTERRUPTED;
+ }
+ else
+ {
+ /* this shouldn't happen! */
+ AssertMsgFailed(("rc=%ld errno=%d\n", rc, errno));
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+ else if (iCur == -1)
+ return VINF_SUCCESS;
+ }
+}
+
+
+#undef RTSemEventMultiWaitEx
+RTDECL(int) RTSemEventMultiWaitEx(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+ return rtSemEventLnxMultiWait(hEventMultiSem, fFlags, uTimeout, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemEventLnxMultiWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemEventMultiWaitExDebug(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout,
+ RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemEventLnxMultiWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+}
+
+
+RTDECL(void) RTSemEventMultiSetSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturnVoid(pThis);
+ AssertReturnVoid(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedResetOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF(hEventMultiSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventMultiAddSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturnVoid(pThis);
+ AssertReturnVoid(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedAddOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF(hEventMultiSem, hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventMultiRemoveSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturnVoid(pThis);
+ AssertReturnVoid(pThis->u32Magic == RTSEMEVENTMULTI_MAGIC);
+
+ RTLockValidatorRecSharedRemoveOwner(&pThis->Signallers, hThread);
+#else
+ RT_NOREF(hEventMultiSem, hThread);
+#endif
+}
+
+#endif /* glibc < 2.6 || IPRT_WITH_FUTEX_BASED_SEMS */
+
diff --git a/src/VBox/Runtime/r3/linux/semmutex-linux.cpp b/src/VBox/Runtime/r3/linux/semmutex-linux.cpp
new file mode 100644
index 00000000..69254b94
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/semmutex-linux.cpp
@@ -0,0 +1,465 @@
+/* $Id: semmutex-linux.cpp $ */
+/** @file
+ * IPRT - Mutex Semaphore, Linux (2.6.x+).
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloc.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/thread.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#if 0 /* With 2.6.17 futex.h has become C++ unfriendly. */
+# include <linux/futex.h>
+#else
+# define FUTEX_WAIT 0
+# define FUTEX_WAKE 1
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * Linux internal representation of a Mutex semaphore.
+ */
+struct RTSEMMUTEXINTERNAL
+{
+ /** The futex state variable.
+ * 0 means unlocked.
+ * 1 means locked, no waiters.
+ * 2 means locked, one or more waiters.
+ */
+ int32_t volatile iState;
+ /** Nesting count. */
+ uint32_t volatile cNestings;
+ /** The owner of the mutex. */
+ pthread_t volatile Owner;
+ /** Magic value (RTSEMMUTEX_MAGIC). */
+ uint32_t volatile u32Magic;
+#ifdef RTSEMMUTEX_STRICT
+ /** Lock validator record associated with this mutex. */
+ RTLOCKVALRECEXCL ValidatorRec;
+#endif
+};
+
+
+
+/**
+ * Wrapper for the futex syscall.
+ */
+static long sys_futex(int32_t volatile *uaddr, int op, int val, struct timespec *utime, int32_t *uaddr2, int val3)
+{
+ errno = 0;
+ long rc = syscall(__NR_futex, uaddr, op, val, utime, uaddr2, val3);
+ if (rc < 0)
+ {
+ Assert(rc == -1);
+ rc = -errno;
+ }
+ return rc;
+}
+
+
+#undef RTSemMutexCreate
+RTDECL(int) RTSemMutexCreate(PRTSEMMUTEX phMutexSem)
+{
+ return RTSemMutexCreateEx(phMutexSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, NULL);
+}
+
+
+RTDECL(int) RTSemMutexCreateEx(PRTSEMMUTEX phMutexSem, uint32_t fFlags,
+ RTLOCKVALCLASS hClass, uint32_t uSubClass, const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~RTSEMMUTEX_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+ /*
+ * Allocate semaphore handle.
+ */
+ struct RTSEMMUTEXINTERNAL *pThis = (struct RTSEMMUTEXINTERNAL *)RTMemAlloc(sizeof(struct RTSEMMUTEXINTERNAL));
+ if (pThis)
+ {
+ pThis->u32Magic = RTSEMMUTEX_MAGIC;
+ pThis->iState = 0;
+ pThis->Owner = (pthread_t)~0;
+ pThis->cNestings = 0;
+#ifdef RTSEMMUTEX_STRICT
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iMutexAnon = 0;
+ RTLockValidatorRecExclInit(&pThis->ValidatorRec, hClass, uSubClass, pThis,
+ !(fFlags & RTSEMMUTEX_FLAGS_NO_LOCK_VAL),
+ "RTSemMutex-%u", ASMAtomicIncU32(&s_iMutexAnon) - 1);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecExclInitV(&pThis->ValidatorRec, hClass, uSubClass, pThis,
+ !(fFlags & RTSEMMUTEX_FLAGS_NO_LOCK_VAL), pszNameFmt, va);
+ va_end(va);
+ }
+#else
+ RT_NOREF(hClass, uSubClass, pszNameFmt);
+#endif
+
+ *phMutexSem = pThis;
+ return VINF_SUCCESS;
+ }
+
+ return VERR_NO_MEMORY;
+}
+
+
+RTDECL(int) RTSemMutexDestroy(RTSEMMUTEX hMutexSem)
+{
+ /*
+ * Validate input.
+ */
+ if (hMutexSem == NIL_RTSEMMUTEX)
+ return VINF_SUCCESS;
+ struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertMsgReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC,
+ ("hMutexSem=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Invalidate the semaphore and wake up anyone waiting on it.
+ */
+ ASMAtomicWriteU32(&pThis->u32Magic, RTSEMMUTEX_MAGIC_DEAD);
+ if (ASMAtomicXchgS32(&pThis->iState, 0) > 0)
+ {
+ sys_futex(&pThis->iState, FUTEX_WAKE, INT_MAX, NULL, NULL, 0);
+ usleep(1000);
+ }
+ pThis->Owner = (pthread_t)~0;
+ pThis->cNestings = 0;
+#ifdef RTSEMMUTEX_STRICT
+ RTLockValidatorRecExclDelete(&pThis->ValidatorRec);
+#endif
+
+ /*
+ * Free the semaphore memory and be gone.
+ */
+ RTMemFree(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTSemMutexSetSubClass(RTSEMMUTEX hMutexSem, uint32_t uSubClass)
+{
+#ifdef RTSEMMUTEX_STRICT
+ /*
+ * Validate.
+ */
+ RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, RTLOCKVAL_SUB_CLASS_INVALID);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, RTLOCKVAL_SUB_CLASS_INVALID);
+
+ return RTLockValidatorRecExclSetSubClass(&pThis->ValidatorRec, uSubClass);
+#else
+ RT_NOREF(hMutexSem, uSubClass);
+ return RTLOCKVAL_SUB_CLASS_INVALID;
+#endif
+}
+
+
+DECL_FORCE_INLINE(int) rtSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, bool fAutoResume, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ RT_NOREF(pSrcPos);
+
+ /*
+ * Validate input.
+ */
+ struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Check if nested request.
+ */
+ pthread_t Self = pthread_self();
+ if ( pThis->Owner == Self
+ && pThis->cNestings > 0)
+ {
+#ifdef RTSEMMUTEX_STRICT
+ int rc9 = RTLockValidatorRecExclRecursion(&pThis->ValidatorRec, pSrcPos);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+ ASMAtomicIncU32(&pThis->cNestings);
+ return VINF_SUCCESS;
+ }
+
+#ifdef RTSEMMUTEX_STRICT
+ RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+ if (cMillies)
+ {
+ int rc9 = RTLockValidatorRecExclCheckOrder(&pThis->ValidatorRec, hThreadSelf, pSrcPos, cMillies);
+ if (RT_FAILURE(rc9))
+ return rc9;
+ }
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+
+ /*
+ * Convert timeout value.
+ */
+ struct timespec ts;
+ struct timespec *pTimeout = NULL;
+ uint64_t u64End = 0; /* shut up gcc */
+ if (cMillies != RT_INDEFINITE_WAIT)
+ {
+ ts.tv_sec = cMillies / 1000;
+ ts.tv_nsec = (cMillies % 1000) * UINT32_C(1000000);
+ u64End = RTTimeSystemNanoTS() + cMillies * UINT64_C(1000000);
+ pTimeout = &ts;
+ }
+
+ /*
+ * Lock the mutex.
+ * Optimize for the uncontended case (makes 1-2 ns difference).
+ */
+ if (RT_UNLIKELY(!ASMAtomicCmpXchgS32(&pThis->iState, 1, 0)))
+ {
+ for (;;)
+ {
+ int32_t iOld = ASMAtomicXchgS32(&pThis->iState, 2);
+
+ /*
+ * Was the lock released in the meantime? This is unlikely (but possible)
+ */
+ if (RT_UNLIKELY(iOld == 0))
+ break;
+
+ /*
+ * Go to sleep.
+ */
+ if (pTimeout && ( pTimeout->tv_sec || pTimeout->tv_nsec ))
+ {
+#ifdef RTSEMMUTEX_STRICT
+ int rc9 = RTLockValidatorRecExclCheckBlocking(&pThis->ValidatorRec, hThreadSelf, pSrcPos, true,
+ cMillies, RTTHREADSTATE_MUTEX, true);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#else
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_MUTEX, true);
+#endif
+ }
+
+ long rc = sys_futex(&pThis->iState, FUTEX_WAIT, 2, pTimeout, NULL, 0);
+
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_MUTEX);
+ if (RT_UNLIKELY(pThis->u32Magic != RTSEMMUTEX_MAGIC))
+ return VERR_SEM_DESTROYED;
+
+ /*
+ * Act on the wakup code.
+ */
+ if (rc == -ETIMEDOUT)
+ {
+ Assert(pTimeout);
+ return VERR_TIMEOUT;
+ }
+ if (rc == 0)
+ /* we'll leave the loop now unless another thread is faster */;
+ else if (rc == -EWOULDBLOCK)
+ /* retry with new value. */;
+ else if (rc == -EINTR)
+ {
+ if (!fAutoResume)
+ return VERR_INTERRUPTED;
+ }
+ else
+ {
+ /* this shouldn't happen! */
+ AssertMsgFailed(("rc=%ld errno=%d\n", rc, errno));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /* adjust the relative timeout */
+ if (pTimeout)
+ {
+ int64_t i64Diff = u64End - RTTimeSystemNanoTS();
+ if (i64Diff < 1000)
+ {
+ rc = VERR_TIMEOUT;
+ break;
+ }
+ ts.tv_sec = (uint64_t)i64Diff / UINT32_C(1000000000);
+ ts.tv_nsec = (uint64_t)i64Diff % UINT32_C(1000000000);
+ }
+ }
+
+ /*
+ * When leaving this loop, iState is set to 2. This means that we gained the
+ * lock and there are _possibly_ some waiters. We don't know exactly as another
+ * thread might entered this loop at nearly the same time. Therefore we will
+ * call futex_wakeup once too often (if _no_ other thread entered this loop).
+ * The key problem is the simple futex_wait test for x != y (iState != 2) in
+ * our case).
+ */
+ }
+
+ /*
+ * Set the owner and nesting.
+ */
+ pThis->Owner = Self;
+ ASMAtomicWriteU32(&pThis->cNestings, 1);
+#ifdef RTSEMMUTEX_STRICT
+ RTLockValidatorRecExclSetOwner(&pThis->ValidatorRec, hThreadSelf, pSrcPos, true);
+#endif
+ return VINF_SUCCESS;
+}
+
+
+#undef RTSemMutexRequest
+RTDECL(int) RTSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMMUTEX_STRICT
+ int rc = rtSemMutexRequest(hMutexSem, cMillies, true, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ int rc = rtSemMutexRequest(hMutexSem, cMillies, true, &SrcPos);
+#endif
+ Assert(rc != VERR_INTERRUPTED);
+ return rc;
+}
+
+
+RTDECL(int) RTSemMutexRequestDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ int rc = rtSemMutexRequest(hMutexSem, cMillies, true, &SrcPos);
+ Assert(rc != VERR_INTERRUPTED);
+ return rc;
+}
+
+
+#undef RTSemMutexRequestNoResume
+RTDECL(int) RTSemMutexRequestNoResume(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMMUTEX_STRICT
+ return rtSemMutexRequest(hMutexSem, cMillies, false, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemMutexRequest(hMutexSem, cMillies, false, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemMutexRequestNoResumeDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemMutexRequest(hMutexSem, cMillies, false, &SrcPos);
+}
+
+
+RTDECL(int) RTSemMutexRelease(RTSEMMUTEX hMutexSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMMUTEX_STRICT
+ int rc9 = RTLockValidatorRecExclReleaseOwner(&pThis->ValidatorRec, pThis->cNestings == 1);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+
+ /*
+ * Check if nested.
+ */
+ pthread_t Self = pthread_self();
+ if (RT_UNLIKELY( pThis->Owner != Self
+ || pThis->cNestings == 0))
+ {
+ AssertMsgFailed(("Not owner of mutex %p!! Self=%08x Owner=%08x cNestings=%d\n",
+ pThis, Self, pThis->Owner, pThis->cNestings));
+ return VERR_NOT_OWNER;
+ }
+
+ /*
+ * If nested we'll just pop a nesting.
+ */
+ if (pThis->cNestings > 1)
+ {
+ ASMAtomicDecU32(&pThis->cNestings);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Clear the state. (cNestings == 1)
+ */
+ pThis->Owner = (pthread_t)~0;
+ ASMAtomicWriteU32(&pThis->cNestings, 0);
+
+ /*
+ * Release the mutex.
+ */
+ int32_t iNew = ASMAtomicDecS32(&pThis->iState);
+ if (RT_UNLIKELY(iNew != 0))
+ {
+ /* somebody is waiting, try wake up one of them. */
+ ASMAtomicXchgS32(&pThis->iState, 0);
+ (void)sys_futex(&pThis->iState, FUTEX_WAKE, 1, NULL, NULL, 0);
+ }
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(bool) RTSemMutexIsOwned(RTSEMMUTEX hMutexSem)
+{
+ /*
+ * Validate.
+ */
+ RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, false);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, false);
+
+ return pThis->Owner != (pthread_t)~0;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/sysfs.cpp b/src/VBox/Runtime/r3/linux/sysfs.cpp
new file mode 100644
index 00000000..aeaa2726
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/sysfs.cpp
@@ -0,0 +1,714 @@
+/* $Id: sysfs.cpp $ */
+/** @file
+ * IPRT - Linux sysfs access.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYSTEM
+#include <iprt/assert.h>
+#include <iprt/dir.h>
+#include <iprt/err.h>
+#include <iprt/file.h>
+#include <iprt/fs.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include <iprt/symlink.h>
+
+#include <iprt/linux/sysfs.h>
+
+#include <unistd.h>
+#include <stdio.h>
+#include <sys/sysctl.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/sysmacros.h>
+#include <errno.h>
+
+
+
+/**
+ * Constructs the path of a sysfs file from the format parameters passed,
+ * prepending a prefix if the path is relative.
+ *
+ * @returns IPRT status code.
+ * @param pszPrefix The prefix to prepend if the path is relative. Must end
+ * in '/'.
+ * @param pszBuf Where to write the path. Must be at least
+ * sizeof(@a pszPrefix) characters long
+ * @param cchBuf The size of the buffer pointed to by @a pszBuf.
+ * @param pszFormat The name format, either absolute or relative to the
+ * prefix specified by @a pszPrefix.
+ * @param va The format args.
+ */
+static int rtLinuxConstructPathV(char *pszBuf, size_t cchBuf,
+ const char *pszPrefix,
+ const char *pszFormat, va_list va)
+{
+ size_t cchPrefix = strlen(pszPrefix);
+ AssertReturn(pszPrefix[cchPrefix - 1] == '/', VERR_INVALID_PARAMETER);
+ AssertReturn(cchBuf > cchPrefix + 1, VERR_INVALID_PARAMETER);
+
+ /** @todo While RTStrPrintfV prevents overflows, it doesn't make it easy to
+ * check for truncations. RTPath should provide some formatters and
+ * joiners which can take over this rather common task that is
+ * performed here. */
+ size_t cch = RTStrPrintfV(pszBuf, cchBuf, pszFormat, va);
+ if (*pszBuf != '/')
+ {
+ AssertReturn(cchBuf >= cch + cchPrefix + 1, VERR_BUFFER_OVERFLOW);
+ memmove(pszBuf + cchPrefix, pszBuf, cch + 1);
+ memcpy(pszBuf, pszPrefix, cchPrefix);
+ cch += cchPrefix;
+ }
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Constructs the path of a sysfs file from the format parameters passed,
+ * prepending a prefix if the path is relative.
+ *
+ * @returns IPRT status code.
+ * @param pszPrefix The prefix to prepend if the path is relative. Must end
+ * in '/'.
+ * @param pszBuf Where to write the path. Must be at least
+ * sizeof(@a pszPrefix) characters long
+ * @param cchBuf The size of the buffer pointed to by @a pszBuf.
+ * @param pszFormat The name format, either absolute or relative to "/sys/".
+ * @param ... The format args.
+ */
+DECLINLINE(int) rtLinuxConstructPath(char *pszBuf, size_t cchBuf,
+ const char *pszPrefix,
+ const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = rtLinuxConstructPathV(pszBuf, cchBuf, pszPrefix, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+/**
+ * Constructs the path of a sysfs file from the format parameters passed,
+ * prepending "/sys/" if the path is relative.
+ *
+ * @returns IPRT status code.
+ * @param pszBuf Where to write the path. Must be at least
+ * sizeof("/sys/") characters long
+ * @param cchBuf The size of the buffer pointed to by @a pszBuf.
+ * @param pszFormat The name format, either absolute or relative to "/sys/".
+ * @param va The format args.
+ */
+DECLINLINE(int) rtLinuxSysFsConstructPath(char *pszBuf, size_t cchBuf, const char *pszFormat, va_list va)
+{
+ return rtLinuxConstructPathV(pszBuf, cchBuf, "/sys/", pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsExistsExV(const char *pszFormat, va_list va)
+{
+ int iSavedErrno = errno;
+
+ /*
+ * Construct the filename and call stat.
+ */
+ char szFilename[RTPATH_MAX];
+ int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat st;
+ int rcStat = stat(szFilename, &st);
+ if (rcStat != 0)
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ errno = iSavedErrno;
+ return rc;
+}
+
+
+RTDECL(bool) RTLinuxSysFsExistsV(const char *pszFormat, va_list va)
+{
+ return RT_SUCCESS(RTLinuxSysFsExistsExV(pszFormat, va));
+}
+
+
+RTDECL(int) RTLinuxSysFsExistsEx(const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsExistsExV(pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(bool) RTLinuxSysFsExists(const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ bool fRet = RTLinuxSysFsExistsV(pszFormat, va);
+ va_end(va);
+ return fRet;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpenV(PRTFILE phFile, const char *pszFormat, va_list va)
+{
+ /*
+ * Construct the filename and call open.
+ */
+ char szFilename[RTPATH_MAX];
+ int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+ if (RT_SUCCESS(rc))
+ rc = RTFileOpen(phFile, szFilename, RTFILE_O_OPEN | RTFILE_O_READ | RTFILE_O_DENY_NONE);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpenExV(PRTFILE phFile, uint64_t fOpen, const char *pszFormat, va_list va)
+{
+ /*
+ * Construct the filename and call open.
+ */
+ char szFilename[RTPATH_MAX];
+ int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+ if (RT_SUCCESS(rc))
+ rc = RTFileOpen(phFile, szFilename, fOpen);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpen(PRTFILE phFile, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsOpenV(phFile, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsOpenEx(PRTFILE phFile, uint64_t fOpen, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsOpenExV(phFile, fOpen, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadStr(RTFILE hFile, char *pszBuf, size_t cchBuf, size_t *pcchRead)
+{
+ Assert(cchBuf > 1); /* not mandatory */
+
+ int rc;
+ size_t cchRead;
+ rc = RTFileRead(hFile, pszBuf, cchBuf, &cchRead);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * ASSUME that if we've read less than we asked for, we've reached the
+ * end of the file. Otherwise, we've been given a buffer too small for
+ * the entire remainder of the file.
+ */
+ if (cchRead < cchBuf)
+ pszBuf[cchRead] = '\0';
+ else if (cchBuf)
+ {
+ rc = RTFileSeek(hFile, -1, RTFILE_SEEK_CURRENT, NULL);
+ if (RT_SUCCESS(rc))
+ rc = VERR_BUFFER_OVERFLOW;
+ cchRead = cchBuf - 1;
+ pszBuf[cchRead] = '\0';
+ }
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+ else
+ {
+ if (cchBuf > 0)
+ *pszBuf = '\0';
+ cchRead = 0;
+ }
+
+ if (pcchRead)
+ *pcchRead = cchRead;
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteStr(RTFILE hFile, const char *pszBuf, size_t cchBuf, size_t *pcchWritten)
+{
+ if (!cchBuf)
+ cchBuf = strlen(pszBuf) + 1; /* Include the terminator */
+ return RTFileWrite(hFile, pszBuf, cchBuf, pcchWritten);
+}
+
+
+RTDECL(int) RTLinuxSysFsReadFile(RTFILE hFile, void *pvBuf, size_t cbBuf, size_t *pcbRead)
+{
+ int rc;
+ size_t cbRead = 0;
+
+ rc = RTFileRead(hFile, pvBuf, cbBuf, &cbRead);
+ if (RT_SUCCESS(rc))
+ {
+ if (pcbRead)
+ *pcbRead = cbRead;
+ if (cbRead < cbBuf)
+ rc = VINF_SUCCESS;
+ else
+ {
+ /* Check for EOF */
+ uint64_t offCur = 0;
+ uint8_t bRead;
+ rc = RTFileSeek(hFile, 0, RTFILE_SEEK_CURRENT, &offCur);
+ if (RT_SUCCESS(rc))
+ {
+ int rc2 = RTFileRead(hFile, &bRead, 1, NULL);
+ if (RT_SUCCESS(rc2))
+ {
+ rc = VERR_BUFFER_OVERFLOW;
+
+ rc2 = RTFileSeek(hFile, offCur, RTFILE_SEEK_BEGIN, NULL);
+ if (RT_FAILURE(rc2))
+ rc = rc2;
+ }
+ else if (rc2 != VERR_EOF)
+ rc = rc2;
+ }
+ }
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteFile(RTFILE hFile, void *pvBuf, size_t cbBuf, size_t *pcbWritten)
+{
+ return RTFileWrite(hFile, pvBuf, cbBuf, pcbWritten);
+}
+
+
+RTDECL(int) RTLinuxSysFsReadIntFileV(unsigned uBase, int64_t *pi64, const char *pszFormat, va_list va)
+{
+ RTFILE hFile;
+
+ AssertPtrReturn(pi64, VERR_INVALID_POINTER);
+
+ int rc = RTLinuxSysFsOpenV(&hFile, pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ char szNum[128];
+ size_t cchNum;
+ rc = RTLinuxSysFsReadStr(hFile, szNum, sizeof(szNum), &cchNum);
+ if (RT_SUCCESS(rc))
+ {
+ if (cchNum > 0)
+ {
+ int64_t i64Ret = -1;
+ rc = RTStrToInt64Ex(szNum, NULL, uBase, &i64Ret);
+ if (RT_SUCCESS(rc))
+ *pi64 = i64Ret;
+ }
+ else
+ rc = VERR_INVALID_PARAMETER;
+ }
+
+ RTFileClose(hFile);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadIntFile(unsigned uBase, int64_t *pi64, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsReadIntFileV(uBase, pi64, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU8FileV(unsigned uBase, uint8_t u8, const char *pszFormat, va_list va)
+{
+ return RTLinuxSysFsWriteU64FileV(uBase, u8, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU8File(unsigned uBase, uint8_t u8, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsWriteU64FileV(uBase, u8, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU16FileV(unsigned uBase, uint16_t u16, const char *pszFormat, va_list va)
+{
+ return RTLinuxSysFsWriteU64FileV(uBase, u16, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU16File(unsigned uBase, uint16_t u16, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsWriteU64FileV(uBase, u16, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU32FileV(unsigned uBase, uint32_t u32, const char *pszFormat, va_list va)
+{
+ return RTLinuxSysFsWriteU64FileV(uBase, u32, pszFormat, va);
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU32File(unsigned uBase, uint32_t u32, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsWriteU64FileV(uBase, u32, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU64FileV(unsigned uBase, uint64_t u64, const char *pszFormat, va_list va)
+{
+ RTFILE hFile;
+
+ const char *pszFmt = NULL;
+ switch (uBase)
+ {
+ case 8:
+ pszFmt = "%#llo";
+ break;
+ case 10:
+ pszFmt = "%llu";
+ break;
+ case 16:
+ pszFmt = "%#llx";
+ break;
+ default:
+ return VERR_INVALID_PARAMETER;
+ }
+
+ int rc = RTLinuxSysFsOpenExV(&hFile, RTFILE_O_OPEN | RTFILE_O_WRITE | RTFILE_O_DENY_NONE, pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ char szNum[128];
+ size_t cchNum = RTStrPrintf(szNum, sizeof(szNum), pszFmt, u64);
+ if (cchNum > 0)
+ {
+ size_t cbWritten = 0;
+ rc = RTLinuxSysFsWriteStr(hFile, &szNum[0], cchNum, &cbWritten);
+ if ( RT_SUCCESS(rc)
+ && cbWritten != cchNum)
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+ else
+ rc = VERR_INVALID_PARAMETER;
+
+ RTFileClose(hFile);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteU64File(unsigned uBase, uint32_t u64, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsWriteU64FileV(uBase, u64, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadDevNumFileV(dev_t *pDevNum, const char *pszFormat, va_list va)
+{
+ RTFILE hFile;
+
+ AssertPtrReturn(pDevNum, VERR_INVALID_POINTER);
+
+ int rc = RTLinuxSysFsOpenV(&hFile, pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ size_t cchNum = 0;
+ char szNum[128];
+ rc = RTLinuxSysFsReadStr(hFile, szNum, sizeof(szNum), &cchNum);
+ if (RT_SUCCESS(rc))
+ {
+ if (cchNum > 0)
+ {
+ uint32_t u32Maj = 0;
+ uint32_t u32Min = 0;
+ char *pszNext = NULL;
+ rc = RTStrToUInt32Ex(szNum, &pszNext, 10, &u32Maj);
+ if (RT_FAILURE(rc) || (rc != VWRN_TRAILING_CHARS) || (*pszNext != ':'))
+ rc = VERR_INVALID_PARAMETER;
+ else
+ {
+ rc = RTStrToUInt32Ex(pszNext + 1, NULL, 10, &u32Min);
+ if ( rc != VINF_SUCCESS
+ && rc != VWRN_TRAILING_CHARS
+ && rc != VWRN_TRAILING_SPACES)
+ rc = VERR_INVALID_PARAMETER;
+ else
+ *pDevNum = makedev(u32Maj, u32Min);
+ }
+ }
+ else
+ rc = VERR_INVALID_PARAMETER;
+ }
+
+ RTFileClose(hFile);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadDevNumFile(dev_t *pDevNum, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsReadDevNumFileV(pDevNum, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadStrFileV(char *pszBuf, size_t cchBuf, size_t *pcchRead, const char *pszFormat, va_list va)
+{
+ RTFILE hFile;
+
+ AssertPtrReturn(pszBuf, VERR_INVALID_POINTER);
+
+ int rc = RTLinuxSysFsOpenV(&hFile, pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Note! We cannot use RTLinuxSysFsReadStr here as it has different
+ * semantics wrt to newline characters. It is not known why
+ * the semantics has to differ... Michael, any clues?
+ */
+ size_t cchRead;
+ rc = RTFileRead(hFile, pszBuf, cchBuf, &cchRead);
+ if (RT_SUCCESS(rc))
+ {
+ char *pchNewLine = (char *)memchr(pszBuf, '\n', cchRead);
+ if (pchNewLine)
+ {
+ *pchNewLine = '\0';
+ cchRead = pchNewLine - pszBuf;
+ }
+ else if (cchRead < cchBuf)
+ pszBuf[cchRead] = '\0';
+ else
+ {
+ if (cchBuf)
+ {
+ cchRead = cchBuf - 1;
+ pszBuf[cchRead] = '\0';
+ }
+ else
+ cchRead = 0;
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+ }
+ else
+ cchRead = 0;
+
+ RTFileClose(hFile);
+
+ if (pcchRead)
+ *pcchRead = cchRead;
+ }
+ else
+ {
+ if (cchBuf)
+ *pszBuf = '\0';
+ if (pcchRead)
+ *pcchRead = 0;
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsReadStrFile(char *pszBuf, size_t cchBuf, size_t *pcchRead, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsReadStrFileV(pszBuf, cchBuf, pcchRead, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteStrFileV(const char *pszBuf, size_t cchBuf, size_t *pcchWritten, const char *pszFormat, va_list va)
+{
+ RTFILE hFile;
+
+ AssertPtrReturn(pszBuf, VERR_INVALID_POINTER);
+
+ int rc = RTLinuxSysFsOpenExV(&hFile, RTFILE_O_OPEN | RTFILE_O_WRITE | RTFILE_O_DENY_NONE, pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTLinuxSysFsWriteStr(hFile, pszBuf, cchBuf, pcchWritten);
+ RTFileClose(hFile);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsWriteStrFile(const char *pszBuf, size_t cchBuf, size_t *pcchWritten, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsWriteStrFileV(pszBuf, cchBuf, pcchWritten, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+RTDECL(int) RTLinuxSysFsGetLinkDestV(char *pszBuf, size_t cchBuf, size_t *pchBuf, const char *pszFormat, va_list va)
+{
+ AssertReturn(cchBuf >= 2, VERR_INVALID_PARAMETER);
+
+ /*
+ * Construct the filename and read the link.
+ */
+ char szFilename[RTPATH_MAX];
+ int rc = rtLinuxSysFsConstructPath(szFilename, sizeof(szFilename), pszFormat, va);
+ if (RT_SUCCESS(rc))
+ {
+ char szLink[RTPATH_MAX];
+ rc = RTSymlinkRead(szFilename, szLink, sizeof(szLink), 0);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Extract the file name component and copy it into the return buffer.
+ */
+ size_t cchName;
+ const char *pszName = RTPathFilename(szLink);
+ if (pszName)
+ {
+ cchName = strlen(pszName);
+ if (cchName < cchBuf)
+ memcpy(pszBuf, pszName, cchName + 1);
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+ else
+ {
+ *pszBuf = '\0';
+ cchName = 0;
+ }
+
+ if (pchBuf)
+ *pchBuf = cchName;
+ }
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxSysFsGetLinkDest(char *pszBuf, size_t cchBuf, size_t *pchBuf, const char *pszFormat, ...)
+{
+ va_list va;
+ va_start(va, pszFormat);
+ int rc = RTLinuxSysFsGetLinkDestV(pszBuf, cchBuf, pchBuf, pszFormat, va);
+ va_end(va);
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxCheckDevicePathV(dev_t DevNum, RTFMODE fMode, char *pszBuf,
+ size_t cchBuf, const char *pszPattern,
+ va_list va)
+{
+ AssertReturn(cchBuf >= 2, VERR_INVALID_PARAMETER);
+ AssertReturn( fMode == RTFS_TYPE_DEV_CHAR
+ || fMode == RTFS_TYPE_DEV_BLOCK,
+ VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pszPattern, VERR_INVALID_PARAMETER);
+
+ /*
+ * Construct the filename and read the link.
+ */
+ char szFilename[RTPATH_MAX];
+ int rc = rtLinuxConstructPathV(szFilename, sizeof(szFilename), "/dev/",
+ pszPattern, va);
+ if (RT_SUCCESS(rc))
+ {
+ RTFSOBJINFO Info;
+ rc = RTPathQueryInfo(szFilename, &Info, RTFSOBJATTRADD_UNIX);
+ if ( rc == VERR_PATH_NOT_FOUND
+ || ( RT_SUCCESS(rc)
+ && ( Info.Attr.u.Unix.Device != DevNum
+ || (Info.Attr.fMode & RTFS_TYPE_MASK) != fMode)))
+ rc = VERR_FILE_NOT_FOUND;
+
+ if (RT_SUCCESS(rc))
+ {
+ size_t cchPath = strlen(szFilename);
+ if (cchPath < cchBuf)
+ memcpy(pszBuf, szFilename, cchPath + 1);
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTLinuxCheckDevicePath(dev_t DevNum, RTFMODE fMode, char *pszBuf,
+ size_t cchBuf, const char *pszPattern,
+ ...)
+{
+ va_list va;
+ va_start(va, pszPattern);
+ int rc = RTLinuxCheckDevicePathV(DevNum, fMode, pszBuf, cchBuf,
+ pszPattern, va);
+ va_end(va);
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/linux/systemmem-linux.cpp b/src/VBox/Runtime/r3/linux/systemmem-linux.cpp
new file mode 100644
index 00000000..93bd0c0a
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/systemmem-linux.cpp
@@ -0,0 +1,109 @@
+/* $Id: systemmem-linux.cpp $ */
+/** @file
+ * IPRT - RTSystemQueryTotalRam, Linux ring-3.
+ */
+
+/*
+ * Copyright (C) 2012-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+
+#include <stdio.h>
+#include <errno.h>
+
+/* Satisfy compiller warning */
+#define __EXPORTED_HEADERS__
+#include <sys/sysinfo.h>
+#undef __EXPORTED_HEADERS__
+
+
+RTDECL(int) RTSystemQueryTotalRam(uint64_t *pcb)
+{
+ AssertPtrReturn(pcb, VERR_INVALID_POINTER);
+
+ struct sysinfo info;
+ int rc = sysinfo(&info);
+ if (rc == 0)
+ {
+ *pcb = (uint64_t)info.totalram * info.mem_unit;
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTSystemQueryAvailableRam(uint64_t *pcb)
+{
+ AssertPtrReturn(pcb, VERR_INVALID_POINTER);
+
+ FILE *pFile = fopen("/proc/meminfo", "r");
+ if (pFile)
+ {
+ int rc = VERR_NOT_FOUND;
+ uint64_t cbTotal = 0;
+ uint64_t cbFree = 0;
+ uint64_t cbBuffers = 0;
+ uint64_t cbCached = 0;
+ char sz[256];
+ while (fgets(sz, sizeof(sz), pFile))
+ {
+ if (!strncmp(sz, RT_STR_TUPLE("MemTotal:")))
+ rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("MemTotal:")]), NULL, 0, &cbTotal);
+ else if (!strncmp(sz, RT_STR_TUPLE("MemFree:")))
+ rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("MemFree:")]), NULL, 0, &cbFree);
+ else if (!strncmp(sz, RT_STR_TUPLE("Buffers:")))
+ rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("Buffers:")]), NULL, 0, &cbBuffers);
+ else if (!strncmp(sz, RT_STR_TUPLE("Cached:")))
+ rc = RTStrToUInt64Ex(RTStrStripL(&sz[sizeof("Cached:")]), NULL, 0, &cbCached);
+ if (RT_FAILURE(rc))
+ break;
+ }
+ fclose(pFile);
+ if (RT_SUCCESS(rc))
+ {
+ *pcb = (cbFree + cbBuffers + cbCached) * _1K;
+ return VINF_SUCCESS;
+ }
+ }
+ /*
+ * Fallback (e.g. /proc not mapped) to sysinfo. Less accurat because there
+ * is no information about the cached memory. 'Cached:' from above is only
+ * accessible through proc :-(
+ */
+ struct sysinfo info;
+ int rc = sysinfo(&info);
+ if (rc == 0)
+ {
+ *pcb = ((uint64_t)info.freeram + info.bufferram) * info.mem_unit;
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
diff --git a/src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp b/src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp
new file mode 100644
index 00000000..7b5cdf51
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/thread-affinity-linux.cpp
@@ -0,0 +1,95 @@
+/* $Id: thread-affinity-linux.cpp $ */
+/** @file
+ * IPRT - Thread Affinity, Linux ring-3 implementation.
+ */
+
+/*
+ * Copyright (C) 2011-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+#include <features.h>
+#if __GLIBC_PREREQ(2,4)
+
+#include <sched.h>
+#include <unistd.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include <iprt/thread.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/cpuset.h>
+#include <iprt/err.h>
+#include <iprt/mp.h>
+
+
+
+RTR3DECL(int) RTThreadSetAffinity(PCRTCPUSET pCpuSet)
+{
+ /* convert */
+ cpu_set_t LnxCpuSet;
+ CPU_ZERO(&LnxCpuSet);
+ if (!pCpuSet)
+ for (unsigned iCpu = 0; iCpu < CPU_SETSIZE; iCpu++)
+ CPU_SET(iCpu, &LnxCpuSet);
+ else
+ for (unsigned iCpu = 0; iCpu < RT_MIN(CPU_SETSIZE, RTCPUSET_MAX_CPUS); iCpu++)
+ if (RTCpuSetIsMemberByIndex(pCpuSet, iCpu))
+ CPU_SET(iCpu, &LnxCpuSet);
+
+ int rc = pthread_setaffinity_np(pthread_self(), sizeof(LnxCpuSet), &LnxCpuSet);
+ if (!rc)
+ return VINF_SUCCESS;
+ rc = errno;
+ if (rc == ENOENT)
+ return VERR_CPU_NOT_FOUND;
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTThreadGetAffinity(PRTCPUSET pCpuSet)
+{
+ cpu_set_t LnxCpuSet;
+ int rc = pthread_getaffinity_np(pthread_self(), sizeof(LnxCpuSet), &LnxCpuSet);
+ if (rc != 0)
+ return RTErrConvertFromErrno(errno);
+
+ /* convert */
+ RTCpuSetEmpty(pCpuSet);
+ for (unsigned iCpu = 0; iCpu < RT_MIN(CPU_SETSIZE, RTCPUSET_MAX_CPUS); iCpu++)
+ if (CPU_ISSET(iCpu, &LnxCpuSet))
+ RTCpuSetAddByIndex(pCpuSet, iCpu);
+
+ return VINF_SUCCESS;
+}
+
+#else
+# include "../../generic/RTThreadGetAffinity-stub-generic.cpp"
+# include "../../generic/RTThreadSetAffinity-stub-generic.cpp"
+#endif
+
diff --git a/src/VBox/Runtime/r3/linux/time-linux.cpp b/src/VBox/Runtime/r3/linux/time-linux.cpp
new file mode 100644
index 00000000..fdc062ef
--- /dev/null
+++ b/src/VBox/Runtime/r3/linux/time-linux.cpp
@@ -0,0 +1,159 @@
+/* $Id: time-linux.cpp $ */
+/** @file
+ * IPRT - Time, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2019 Oracle Corporation
+ *
+ * This file is part of VirtualBox Open Source Edition (OSE), as
+ * available from http://www.virtualbox.org. This file is free software;
+ * you can redistribute it and/or modify it under the terms of the GNU
+ * General Public License (GPL) as published by the Free Software
+ * Foundation, in version 2 as it comes in the "COPYING" file of the
+ * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
+ * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL) only, as it comes in the "COPYING.CDDL" file of the
+ * VirtualBox OSE distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#define RTTIME_INCL_TIMEVAL
+#include <sys/time.h>
+#include <time.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifndef __NR_clock_gettime
+# define __NR_timer_create 259
+# define __NR_clock_gettime (__NR_timer_create+6)
+#endif
+
+#include <iprt/time.h>
+#include "internal/time.h"
+
+
+DECLINLINE(int) sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ int rc = syscall(__NR_clock_gettime, id, ts);
+ if (rc >= 0)
+ return rc;
+ return -1;
+}
+
+
+/**
+ * Wrapper around various monotone time sources.
+ */
+DECLINLINE(int) mono_clock(struct timespec *ts)
+{
+ static int iWorking = -1;
+ switch (iWorking)
+ {
+#ifdef CLOCK_MONOTONIC
+ /*
+ * Standard clock_gettime()
+ */
+ case 0:
+ return clock_gettime(CLOCK_MONOTONIC, ts);
+
+ /*
+ * Syscall clock_gettime().
+ */
+ case 1:
+ return sys_clock_gettime(CLOCK_MONOTONIC, ts);
+
+#endif /* CLOCK_MONOTONIC */
+
+
+ /*
+ * Figure out what's working.
+ */
+ case -1:
+ {
+#ifdef CLOCK_MONOTONIC
+ /*
+ * Real-Time API.
+ */
+ int rc = clock_gettime(CLOCK_MONOTONIC, ts);
+ if (!rc)
+ {
+ iWorking = 0;
+ return 0;
+ }
+
+ rc = sys_clock_gettime(CLOCK_MONOTONIC, ts);
+ if (!rc)
+ {
+ iWorking = 1;
+ return 0;
+ }
+#endif /* CLOCK_MONOTONIC */
+
+ /* give up */
+ iWorking = -2;
+ break;
+ }
+ }
+ return -1;
+}
+
+
+DECLINLINE(uint64_t) rtTimeGetSystemNanoTS(void)
+{
+ /* check monotonic clock first. */
+ static bool fMonoClock = true;
+ if (fMonoClock)
+ {
+ struct timespec ts;
+ if (!mono_clock(&ts))
+ return (uint64_t)ts.tv_sec * RT_NS_1SEC_64
+ + ts.tv_nsec;
+ fMonoClock = false;
+ }
+
+ /* fallback to gettimeofday(). */
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (uint64_t)tv.tv_sec * RT_NS_1SEC_64
+ + (uint64_t)(tv.tv_usec * RT_NS_1US);
+}
+
+
+/**
+ * Gets the current nanosecond timestamp.
+ *
+ * This differs from RTTimeNanoTS in that it will use system APIs and not do any
+ * resolution or performance optimizations.
+ *
+ * @returns nanosecond timestamp.
+ */
+RTDECL(uint64_t) RTTimeSystemNanoTS(void)
+{
+ return rtTimeGetSystemNanoTS();
+}
+
+
+/**
+ * Gets the current millisecond timestamp.
+ *
+ * This differs from RTTimeNanoTS in that it will use system APIs and not do any
+ * resolution or performance optimizations.
+ *
+ * @returns millisecond timestamp.
+ */
+RTDECL(uint64_t) RTTimeSystemMilliTS(void)
+{
+ return rtTimeGetSystemNanoTS() / RT_NS_1MS;
+}
+