summaryrefslogtreecommitdiffstats
path: root/src/VBox/Runtime/r3/posix
diff options
context:
space:
mode:
Diffstat (limited to 'src/VBox/Runtime/r3/posix')
-rw-r--r--src/VBox/Runtime/r3/posix/Makefile.kup0
-rw-r--r--src/VBox/Runtime/r3/posix/RTFileQueryFsSizes-posix.cpp78
-rw-r--r--src/VBox/Runtime/r3/posix/RTFileSetAllocationSize-posix.cpp87
-rw-r--r--src/VBox/Runtime/r3/posix/RTHandleGetStandard-posix.cpp144
-rw-r--r--src/VBox/Runtime/r3/posix/RTMemProtect-posix.cpp105
-rw-r--r--src/VBox/Runtime/r3/posix/RTMpGetCount-posix.cpp89
-rw-r--r--src/VBox/Runtime/r3/posix/RTPathUserDocuments-posix.cpp63
-rw-r--r--src/VBox/Runtime/r3/posix/RTPathUserHome-posix.cpp173
-rw-r--r--src/VBox/Runtime/r3/posix/RTSystemQueryOSInfo-posix.cpp100
-rw-r--r--src/VBox/Runtime/r3/posix/RTSystemQueryTotalRam-posix.cpp61
-rw-r--r--src/VBox/Runtime/r3/posix/RTTimeNow-posix.cpp61
-rw-r--r--src/VBox/Runtime/r3/posix/RTTimeSet-posix.cpp60
-rw-r--r--src/VBox/Runtime/r3/posix/RTTimeZoneGetCurrent-posix.cpp262
-rw-r--r--src/VBox/Runtime/r3/posix/allocex-r3-posix.cpp120
-rw-r--r--src/VBox/Runtime/r3/posix/dir-posix.cpp733
-rw-r--r--src/VBox/Runtime/r3/posix/env-posix.cpp179
-rw-r--r--src/VBox/Runtime/r3/posix/errvars-posix.cpp88
-rw-r--r--src/VBox/Runtime/r3/posix/fileaio-posix.cpp1071
-rw-r--r--src/VBox/Runtime/r3/posix/fileio-at-posix.cpp107
-rw-r--r--src/VBox/Runtime/r3/posix/fileio-posix.cpp934
-rw-r--r--src/VBox/Runtime/r3/posix/fileio-sg-at-posix.cpp298
-rw-r--r--src/VBox/Runtime/r3/posix/fileio-sg-posix.cpp260
-rw-r--r--src/VBox/Runtime/r3/posix/fileio2-posix.cpp210
-rw-r--r--src/VBox/Runtime/r3/posix/filelock-posix.cpp148
-rw-r--r--src/VBox/Runtime/r3/posix/fs-posix.cpp346
-rw-r--r--src/VBox/Runtime/r3/posix/fs2-posix.cpp165
-rw-r--r--src/VBox/Runtime/r3/posix/fs3-posix.cpp94
-rw-r--r--src/VBox/Runtime/r3/posix/ldrNative-posix.cpp207
-rw-r--r--src/VBox/Runtime/r3/posix/localipc-posix.cpp1172
-rw-r--r--src/VBox/Runtime/r3/posix/path-posix.cpp418
-rw-r--r--src/VBox/Runtime/r3/posix/path2-posix.cpp316
-rw-r--r--src/VBox/Runtime/r3/posix/pathhost-posix.cpp294
-rw-r--r--src/VBox/Runtime/r3/posix/pipe-posix.cpp754
-rw-r--r--src/VBox/Runtime/r3/posix/process-creation-posix.cpp2408
-rw-r--r--src/VBox/Runtime/r3/posix/process-posix.cpp279
-rw-r--r--src/VBox/Runtime/r3/posix/rand-posix.cpp148
-rw-r--r--src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-heap-posix.cpp797
-rw-r--r--src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-posix.cpp182
-rw-r--r--src/VBox/Runtime/r3/posix/sched-posix.cpp849
-rw-r--r--src/VBox/Runtime/r3/posix/semevent-posix.cpp654
-rw-r--r--src/VBox/Runtime/r3/posix/semeventmulti-posix.cpp613
-rw-r--r--src/VBox/Runtime/r3/posix/semmutex-posix.cpp467
-rw-r--r--src/VBox/Runtime/r3/posix/semrw-posix.cpp741
-rw-r--r--src/VBox/Runtime/r3/posix/semwait.h162
-rw-r--r--src/VBox/Runtime/r3/posix/serialport-posix.cpp1269
-rw-r--r--src/VBox/Runtime/r3/posix/shmem-posix.cpp419
-rw-r--r--src/VBox/Runtime/r3/posix/symlink-posix.cpp247
-rw-r--r--src/VBox/Runtime/r3/posix/thread-posix.cpp780
-rw-r--r--src/VBox/Runtime/r3/posix/thread2-posix.cpp133
-rw-r--r--src/VBox/Runtime/r3/posix/time-posix.cpp99
-rw-r--r--src/VBox/Runtime/r3/posix/timelocal-posix.cpp215
-rw-r--r--src/VBox/Runtime/r3/posix/timer-posix.cpp847
-rw-r--r--src/VBox/Runtime/r3/posix/tls-posix.cpp119
-rw-r--r--src/VBox/Runtime/r3/posix/utf8-posix.cpp709
54 files changed, 21334 insertions, 0 deletions
diff --git a/src/VBox/Runtime/r3/posix/Makefile.kup b/src/VBox/Runtime/r3/posix/Makefile.kup
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/Makefile.kup
diff --git a/src/VBox/Runtime/r3/posix/RTFileQueryFsSizes-posix.cpp b/src/VBox/Runtime/r3/posix/RTFileQueryFsSizes-posix.cpp
new file mode 100644
index 00000000..886c3844
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTFileQueryFsSizes-posix.cpp
@@ -0,0 +1,78 @@
+/* $Id: RTFileQueryFsSizes-posix.cpp $ */
+/** @file
+ * IPRT - File I/O, RTFileFsQuerySizes, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+
+#include <errno.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/statvfs.h>
+
+#include <iprt/file.h>
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/log.h>
+#include <iprt/string.h>
+
+
+RTR3DECL(int) RTFileQueryFsSizes(RTFILE hFile, PRTFOFF pcbTotal, RTFOFF *pcbFree,
+ uint32_t *pcbBlock, uint32_t *pcbSector)
+{
+ struct statvfs StatVFS;
+ RT_ZERO(StatVFS);
+ if (fstatvfs(RTFileToNative(hFile), &StatVFS))
+ return RTErrConvertFromErrno(errno);
+
+ /*
+ * Calc the returned values.
+ */
+ if (pcbTotal)
+ *pcbTotal = (RTFOFF)StatVFS.f_blocks * StatVFS.f_frsize;
+ if (pcbFree)
+ *pcbFree = (RTFOFF)StatVFS.f_bavail * StatVFS.f_frsize;
+ if (pcbBlock)
+ *pcbBlock = StatVFS.f_frsize;
+ /* no idea how to get the sector... */
+ if (pcbSector)
+ *pcbSector = 512;
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTFileSetAllocationSize-posix.cpp b/src/VBox/Runtime/r3/posix/RTFileSetAllocationSize-posix.cpp
new file mode 100644
index 00000000..534754ac
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTFileSetAllocationSize-posix.cpp
@@ -0,0 +1,87 @@
+/* $Id: RTFileSetAllocationSize-posix.cpp $ */
+/** @file
+ * IPRT - RTFileSetAllocationSize, linux implementation.
+ */
+
+/*
+ * Copyright (C) 2016-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+#include <iprt/file.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+/**
+ * The posix_fallocate() method.
+ */
+typedef int (*PFNPOSIXFALLOCATE) (int iFd, off_t offStart, off_t cb);
+
+RTDECL(int) RTFileSetAllocationSize(RTFILE hFile, uint64_t cbSize, uint32_t fFlags)
+{
+ AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & ~RTFILE_ALLOC_SIZE_F_VALID), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(sizeof(off_t) >= sizeof(cbSize) || RT_HIDWORD(cbSize) == 0,
+ ("64-bit filesize not supported! cbSize=%lld\n", cbSize),
+ VERR_NOT_SUPPORTED);
+
+ if (fFlags & RTFILE_ALLOC_SIZE_F_KEEP_SIZE)
+ return VERR_NOT_SUPPORTED;
+
+ int rc = VINF_SUCCESS;
+ PFNPOSIXFALLOCATE pfnPosixFAllocate = (PFNPOSIXFALLOCATE)(uintptr_t)dlsym(RTLD_DEFAULT, "posix_fallocate");
+ if (RT_VALID_PTR(pfnPosixFAllocate))
+ {
+ int rcPosix = pfnPosixFAllocate(RTFileToNative(hFile), 0, cbSize);
+ if (rcPosix != 0)
+ {
+ if (errno == EOPNOTSUPP)
+ rc = VERR_NOT_SUPPORTED;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+
+ return rc;
+}
+RT_EXPORT_SYMBOL(RTFileSetAllocationSize);
diff --git a/src/VBox/Runtime/r3/posix/RTHandleGetStandard-posix.cpp b/src/VBox/Runtime/r3/posix/RTHandleGetStandard-posix.cpp
new file mode 100644
index 00000000..30e54331
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTHandleGetStandard-posix.cpp
@@ -0,0 +1,144 @@
+/* $Id: RTHandleGetStandard-posix.cpp $ */
+/** @file
+ * IPRT - RTHandleGetStandard, POSIX.
+ */
+
+/*
+ * Copyright (C) 2012-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#ifdef _MSC_VER
+# include <io.h>
+#else
+# include <unistd.h>
+#endif
+
+#include "internal/iprt.h"
+#include <iprt/handle.h>
+
+#include <iprt/file.h>
+#include <iprt/pipe.h>
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/log.h>
+
+#include "internal/socket.h"
+
+
+
+RTDECL(int) RTHandleGetStandard(RTHANDLESTD enmStdHandle, bool fLeaveOpen, PRTHANDLE ph)
+{
+ /*
+ * Validate and convert input.
+ */
+ AssertPtrReturn(ph, VERR_INVALID_POINTER);
+ int fd;
+ switch (enmStdHandle)
+ {
+ case RTHANDLESTD_INPUT: fd = 0; break;
+ case RTHANDLESTD_OUTPUT: fd = 1; break;
+ case RTHANDLESTD_ERROR: fd = 2; break;
+ default:
+ AssertFailedReturn(VERR_INVALID_PARAMETER);
+ }
+
+ /*
+ * Is the requested descriptor valid and which IPRT handle type does it
+ * best map on to?
+ */
+ struct stat st;
+ int rc = fstat(fd, &st);
+ if (rc == -1)
+ return RTErrConvertFromErrno(errno);
+
+ rc = fcntl(fd, F_GETFD, 0);
+ if (rc == -1)
+ return RTErrConvertFromErrno(errno);
+ bool const fInherit = !(rc & FD_CLOEXEC);
+
+ RTHANDLE h;
+ if (S_ISREG(st.st_mode))
+ h.enmType = RTHANDLETYPE_FILE;
+ else if ( S_ISFIFO(st.st_mode)
+ || (st.st_mode == 0 && st.st_nlink == 0 /*see bugs on bsd manpage*/))
+ h.enmType = RTHANDLETYPE_PIPE;
+ else if (S_ISSOCK(st.st_mode))
+ {
+ /** @todo check if it's really a socket... IIRC some OSes reports
+ * anonymouse pips as sockets. */
+ h.enmType = RTHANDLETYPE_SOCKET;
+ }
+#if 0 /** @todo re-enable this when the VFS pipe has been coded up. */
+ else if (isatty(fd))
+ h.enmType = RTHANDLETYPE_PIPE;
+#endif
+ else
+ h.enmType = RTHANDLETYPE_FILE;
+
+ /*
+ * Create the IPRT handle.
+ */
+ switch (h.enmType)
+ {
+ case RTHANDLETYPE_FILE:
+ /** @todo fLeaveOpen */
+ rc = RTFileFromNative(&h.u.hFile, fd);
+ break;
+
+ case RTHANDLETYPE_PIPE:
+ rc = RTPipeFromNative(&h.u.hPipe, fd,
+ (enmStdHandle == RTHANDLESTD_INPUT ? RTPIPE_N_READ : RTPIPE_N_WRITE)
+ | (fInherit ? RTPIPE_N_INHERIT : 0)
+ | (fLeaveOpen ? RTPIPE_N_LEAVE_OPEN : 0));
+ break;
+
+ case RTHANDLETYPE_SOCKET:
+ rc = rtSocketCreateForNative(&h.u.hSocket, fd, fLeaveOpen);
+ break;
+
+ default: /* shut up gcc */
+ return VERR_INTERNAL_ERROR;
+ }
+
+ if (RT_SUCCESS(rc))
+ *ph = h;
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTMemProtect-posix.cpp b/src/VBox/Runtime/r3/posix/RTMemProtect-posix.cpp
new file mode 100644
index 00000000..41400a87
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTMemProtect-posix.cpp
@@ -0,0 +1,105 @@
+/* $Id: RTMemProtect-posix.cpp $ */
+/** @file
+ * IPRT - Memory Allocation, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/alloc.h>
+#include <iprt/assert.h>
+#include <iprt/param.h>
+#include <iprt/errcore.h>
+#include <iprt/string.h>
+
+#include <errno.h>
+#include <sys/mman.h>
+
+
+RTDECL(int) RTMemProtect(void *pv, size_t cb, unsigned fProtect) RT_NO_THROW_DEF
+{
+ /*
+ * Validate input.
+ */
+ if (cb == 0)
+ {
+ AssertMsgFailed(("!cb\n"));
+ return VERR_INVALID_PARAMETER;
+ }
+ if (fProtect & ~(RTMEM_PROT_NONE | RTMEM_PROT_READ | RTMEM_PROT_WRITE | RTMEM_PROT_EXEC))
+ {
+ AssertMsgFailed(("fProtect=%#x\n", fProtect));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /*
+ * Convert the flags.
+ */
+ int fProt;
+#if RTMEM_PROT_NONE == PROT_NONE \
+ && RTMEM_PROT_READ == PROT_READ \
+ && RTMEM_PROT_WRITE == PROT_WRITE \
+ && RTMEM_PROT_EXEC == PROT_EXEC
+ fProt = fProtect;
+#else
+ Assert(!RTMEM_PROT_NONE);
+ if (!fProtect)
+ fProt = PROT_NONE;
+ else
+ {
+ fProt = 0;
+ if (fProtect & RTMEM_PROT_READ)
+ fProt |= PROT_READ;
+ if (fProtect & RTMEM_PROT_WRITE)
+ fProt |= PROT_WRITE;
+ if (fProtect & RTMEM_PROT_EXEC)
+ fProt |= PROT_EXEC;
+ }
+#endif
+
+ /*
+ * Align the request.
+ */
+ cb += (uintptr_t)pv & PAGE_OFFSET_MASK;
+ pv = (void *)((uintptr_t)pv & ~PAGE_OFFSET_MASK);
+
+ /*
+ * Change the page attributes.
+ */
+ int rc = mprotect(pv, cb, fProt);
+ if (!rc)
+ return rc;
+ return RTErrConvertFromErrno(errno);
+}
diff --git a/src/VBox/Runtime/r3/posix/RTMpGetCount-posix.cpp b/src/VBox/Runtime/r3/posix/RTMpGetCount-posix.cpp
new file mode 100644
index 00000000..f48d6f98
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTMpGetCount-posix.cpp
@@ -0,0 +1,89 @@
+/* $Id: RTMpGetCount-posix.cpp $ */
+/** @file
+ * IPRT - RTMpGetCount, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/mp.h>
+#include <iprt/assert.h>
+
+#include <unistd.h>
+#if !defined(RT_OS_SOLARIS)
+# include <sys/sysctl.h>
+#endif
+
+
+RTDECL(RTCPUID) RTMpGetCount(void)
+{
+ /*
+ * The sysconf way (linux and others).
+ */
+#if defined(_SC_NPROCESSORS_MAX) || defined(_SC_NPROCESSORS_CONF) || defined(_SC_NPROCESSORS_ONLN)
+ int cCpusSC = -1;
+# ifdef _SC_NPROCESSORS_MAX
+ int cMax = sysconf(_SC_NPROCESSORS_MAX);
+ cCpusSC = RT_MAX(cCpusSC, cMax);
+# endif
+# ifdef _SC_NPROCESSORS_CONF
+ int cConf = sysconf(_SC_NPROCESSORS_CONF);
+ cCpusSC = RT_MAX(cCpusSC, cConf);
+# endif
+# ifdef _SC_NPROCESSORS_ONLN
+ int cOnln = sysconf(_SC_NPROCESSORS_ONLN);
+ cCpusSC = RT_MAX(cCpusSC, cOnln);
+# endif
+ Assert(cCpusSC > 0);
+ if (cCpusSC > 0)
+ return cCpusSC;
+#endif
+
+ /*
+ * The BSD 4.4 way.
+ */
+#if defined(CTL_HW) && defined(HW_NCPU)
+ int aiMib[2];
+ aiMib[0] = CTL_HW;
+ aiMib[1] = HW_NCPU;
+ int cCpus = -1;
+ size_t cb = sizeof(cCpus);
+ int rc = sysctl(aiMib, RT_ELEMENTS(aiMib), &cCpus, &cb, NULL, 0);
+ if (rc != -1 && cCpus >= 1)
+ return cCpus;
+#endif
+ return 1;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTPathUserDocuments-posix.cpp b/src/VBox/Runtime/r3/posix/RTPathUserDocuments-posix.cpp
new file mode 100644
index 00000000..ee8c8036
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTPathUserDocuments-posix.cpp
@@ -0,0 +1,63 @@
+/* $Id: RTPathUserDocuments-posix.cpp $ */
+/** @file
+ * IPRT - RTPathUserDocuments, posix ring-3.
+ */
+
+/*
+ * Copyright (C) 2011-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/path.h>
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+
+RTDECL(int) RTPathUserDocuments(char *pszPath, size_t cchPath)
+{
+ /*
+ * Validate input
+ */
+ AssertPtrReturn(pszPath, VERR_INVALID_POINTER);
+ AssertReturn(cchPath, VERR_INVALID_PARAMETER);
+
+ int rc = RTPathUserHome(pszPath, cchPath);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ rc = RTPathAppend(pszPath, cchPath, "Documents");
+ if (RT_FAILURE(rc))
+ *pszPath = '\0';
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTPathUserHome-posix.cpp b/src/VBox/Runtime/r3/posix/RTPathUserHome-posix.cpp
new file mode 100644
index 00000000..daa30ea1
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTPathUserHome-posix.cpp
@@ -0,0 +1,173 @@
+/* $Id: RTPathUserHome-posix.cpp $ */
+/** @file
+ * IPRT - Path Manipulation, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PATH
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <pwd.h>
+
+#include <iprt/path.h>
+#include <iprt/env.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include "internal/path.h"
+#include "internal/fs.h"
+
+
+#ifndef RT_OS_L4
+/**
+ * Worker for RTPathUserHome that looks up the home directory
+ * using the getpwuid_r api.
+ *
+ * @returns IPRT status code.
+ * @param pszPath The path buffer.
+ * @param cchPath The size of the buffer.
+ * @param uid The User ID to query the home directory of.
+ */
+static int rtPathUserHomeByPasswd(char *pszPath, size_t cchPath, uid_t uid)
+{
+ /*
+ * The getpwuid_r function uses the passed in buffer to "allocate" any
+ * extra memory it needs. On some systems we should probably use the
+ * sysconf function to find the appropriate buffer size, but since it won't
+ * work everywhere we'll settle with a 5KB buffer and ASSUME that it'll
+ * suffice for even the lengthiest user descriptions...
+ */
+ char achBuffer[5120];
+ struct passwd Passwd;
+ struct passwd *pPasswd;
+ memset(&Passwd, 0, sizeof(Passwd));
+ int rc = getpwuid_r(uid, &Passwd, &achBuffer[0], sizeof(achBuffer), &pPasswd);
+ if (rc != 0)
+ return RTErrConvertFromErrno(rc);
+ if (!pPasswd) /* uid not found in /etc/passwd */
+ return VERR_PATH_NOT_FOUND;
+
+ /*
+ * Check that it isn't empty and that it exists.
+ */
+ struct stat st;
+ if ( !pPasswd->pw_dir
+ || !*pPasswd->pw_dir
+ || stat(pPasswd->pw_dir, &st)
+ || !S_ISDIR(st.st_mode))
+ return VERR_PATH_NOT_FOUND;
+
+ /*
+ * Convert it to UTF-8 and copy it to the return buffer.
+ */
+ return rtPathFromNativeCopy(pszPath, cchPath, pPasswd->pw_dir, NULL);
+}
+#endif
+
+
+/**
+ * Worker for RTPathUserHome that looks up the home directory
+ * using the HOME environment variable.
+ *
+ * @returns IPRT status code.
+ * @param pszPath The path buffer.
+ * @param cchPath The size of the buffer.
+ */
+static int rtPathUserHomeByEnv(char *pszPath, size_t cchPath)
+{
+ /*
+ * Get HOME env. var it and validate it's existance.
+ */
+ int rc = VERR_PATH_NOT_FOUND;
+ const char *pszHome = RTEnvGet("HOME"); /** @todo Codeset confusion in RTEnv. */
+ if (pszHome)
+
+ {
+ struct stat st;
+ if ( !stat(pszHome, &st)
+ && S_ISDIR(st.st_mode))
+ rc = rtPathFromNativeCopy(pszPath, cchPath, pszHome, NULL);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTPathUserHome(char *pszPath, size_t cchPath)
+{
+ int rc;
+#ifndef RT_OS_L4
+ /*
+ * We make an exception for the root user and use the system call
+ * getpwuid_r to determine their initial home path instead of
+ * reading it from the $HOME variable. This is because the $HOME
+ * variable does not get changed by sudo (and possibly su and others)
+ * which can cause root-owned files to appear in user's home folders.
+ */
+ uid_t uid = geteuid();
+ if (!uid)
+ rc = rtPathUserHomeByPasswd(pszPath, cchPath, uid);
+ else
+ rc = rtPathUserHomeByEnv(pszPath, cchPath);
+
+ /*
+ * On failure, retry using the alternative method.
+ * (Should perhaps restrict the retry cases a bit more here...)
+ */
+ if ( RT_FAILURE(rc)
+ && rc != VERR_BUFFER_OVERFLOW)
+ {
+ if (!uid)
+ rc = rtPathUserHomeByEnv(pszPath, cchPath);
+ else
+ rc = rtPathUserHomeByPasswd(pszPath, cchPath, uid);
+ }
+#else /* RT_OS_L4 */
+ rc = rtPathUserHomeByEnv(pszPath, cchPath);
+#endif /* RT_OS_L4 */
+
+ LogFlow(("RTPathUserHome(%p:{%s}, %u): returns %Rrc\n", pszPath,
+ RT_SUCCESS(rc) ? pszPath : "<failed>", cchPath, rc));
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTSystemQueryOSInfo-posix.cpp b/src/VBox/Runtime/r3/posix/RTSystemQueryOSInfo-posix.cpp
new file mode 100644
index 00000000..6dc2f975
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTSystemQueryOSInfo-posix.cpp
@@ -0,0 +1,100 @@
+/* $Id: RTSystemQueryOSInfo-posix.cpp $ */
+/** @file
+ * IPRT - RTSystemQueryOSInfo, POSIX implementation.
+ */
+
+/*
+ * Copyright (C) 2008-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/errcore.h>
+
+#include <errno.h>
+#include <sys/utsname.h>
+
+
+RTDECL(int) RTSystemQueryOSInfo(RTSYSOSINFO enmInfo, char *pszInfo, size_t cchInfo)
+{
+ /*
+ * Quick validation.
+ */
+ AssertReturn(enmInfo > RTSYSOSINFO_INVALID && enmInfo < RTSYSOSINFO_END, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pszInfo, VERR_INVALID_POINTER);
+ if (!cchInfo)
+ return VERR_BUFFER_OVERFLOW;
+
+ /*
+ * Handle the request.
+ */
+ switch (enmInfo)
+ {
+ case RTSYSOSINFO_PRODUCT:
+ case RTSYSOSINFO_RELEASE:
+ case RTSYSOSINFO_VERSION:
+ {
+ struct utsname UtsInfo;
+ if (uname(&UtsInfo) < 0)
+ return RTErrConvertFromErrno(errno);
+ const char *pszSrc;
+ switch (enmInfo)
+ {
+ case RTSYSOSINFO_PRODUCT: pszSrc = UtsInfo.sysname; break;
+ case RTSYSOSINFO_RELEASE: pszSrc = UtsInfo.release; break;
+ case RTSYSOSINFO_VERSION: pszSrc = UtsInfo.version; break;
+ default: AssertFatalFailed(); /* screw gcc */
+ }
+ size_t cch = strlen(pszSrc);
+ if (cch < cchInfo)
+ {
+ memcpy(pszInfo, pszSrc, cch + 1);
+ return VINF_SUCCESS;
+ }
+ memcpy(pszInfo, pszSrc, cchInfo - 1);
+ pszInfo[cchInfo - 1] = '\0';
+ return VERR_BUFFER_OVERFLOW;
+ }
+
+
+ case RTSYSOSINFO_SERVICE_PACK:
+ default:
+ *pszInfo = '\0';
+ return VERR_NOT_SUPPORTED;
+ }
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTSystemQueryTotalRam-posix.cpp b/src/VBox/Runtime/r3/posix/RTSystemQueryTotalRam-posix.cpp
new file mode 100644
index 00000000..151a12f5
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTSystemQueryTotalRam-posix.cpp
@@ -0,0 +1,61 @@
+/* $Id: RTSystemQueryTotalRam-posix.cpp $ */
+/** @file
+ * IPRT - RTSystemQueryTotalRam, windows ring-3.
+ */
+
+/*
+ * Copyright (C) 2010-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/system.h>
+#include "internal/iprt.h"
+
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+
+
+
+
+RTDECL(int) RTSystemQueryTotalRam(uint64_t *pcb)
+{
+ AssertPtrReturn(pcb, VERR_INVALID_POINTER);
+ return VERR_NOT_IMPLEMENTED;
+}
+
+RTDECL(int) RTSystemQueryAvailableRam(uint64_t *pcb)
+{
+ AssertPtrReturn(pcb, VERR_INVALID_POINTER);
+ return VERR_NOT_IMPLEMENTED;
+}
diff --git a/src/VBox/Runtime/r3/posix/RTTimeNow-posix.cpp b/src/VBox/Runtime/r3/posix/RTTimeNow-posix.cpp
new file mode 100644
index 00000000..b00394c8
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTTimeNow-posix.cpp
@@ -0,0 +1,61 @@
+/* $Id: RTTimeNow-posix.cpp $ */
+/** @file
+ * IPRT - RTTimeNow, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#define RTTIME_INCL_TIMEVAL
+#include <sys/time.h>
+#include <time.h>
+
+#include <iprt/time.h>
+
+
+/**
+ * Gets the current system time.
+ *
+ * @returns pTime.
+ * @param pTime Where to store the time.
+ */
+RTDECL(PRTTIMESPEC) RTTimeNow(PRTTIMESPEC pTime)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return RTTimeSpecSetTimeval(pTime, &tv);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTTimeSet-posix.cpp b/src/VBox/Runtime/r3/posix/RTTimeSet-posix.cpp
new file mode 100644
index 00000000..8d848b50
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTTimeSet-posix.cpp
@@ -0,0 +1,60 @@
+/* $Id: RTTimeSet-posix.cpp $ */
+/** @file
+ * IPRT - RTTimeSet, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#define RTTIME_INCL_TIMEVAL
+#include <sys/time.h>
+#include <time.h>
+#include <errno.h>
+
+#include <iprt/time.h>
+#include "internal/iprt.h"
+
+#include <iprt/errcore.h>
+
+
+RTDECL(int) RTTimeSet(PCRTTIMESPEC pTime)
+{
+ struct timeval tv;
+ if (settimeofday(RTTimeSpecGetTimeval(pTime, &tv), NULL) == 0)
+ return VINF_SUCCESS;
+ return RTErrConvertFromErrno(errno);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/RTTimeZoneGetCurrent-posix.cpp b/src/VBox/Runtime/r3/posix/RTTimeZoneGetCurrent-posix.cpp
new file mode 100644
index 00000000..6d0d84d0
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/RTTimeZoneGetCurrent-posix.cpp
@@ -0,0 +1,262 @@
+/* $Id: RTTimeZoneGetCurrent-posix.cpp $ */
+/** @file
+ * IPRT - RTTimeZoneGetCurrent, POSIX.
+ */
+
+/*
+ * Copyright (C) 2020-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/time.h>
+#include "internal/iprt.h"
+
+#include <iprt/env.h>
+#include <iprt/file.h>
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/errcore.h>
+#include <iprt/types.h>
+#include <iprt/symlink.h>
+#include <iprt/stream.h>
+
+#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS)
+# include <tzfile.h>
+#else
+# define TZDIR "/usr/share/zoneinfo"
+# define TZ_MAGIC "TZif"
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#define PATH_LOCALTIME "/etc/localtime"
+#if defined(RT_OS_FREEBSD)
+# define PATH_TIMEZONE "/var/db/zoneinfo"
+#else
+# define PATH_TIMEZONE "/etc/timezone"
+#endif
+#define PATH_SYSCONFIG_CLOCK "/etc/sysconfig/clock"
+
+
+/**
+ * Checks if a time zone database file is valid by verifying it begins with
+ * TZ_MAGIC.
+ *
+ * @returns IPRT status code.
+ * @param pszTimezone The time zone database file relative to
+ * <tzfile.h>:TZDIR (normally /usr/share/zoneinfo),
+ * e.g. Europe/London, or Etc/UTC, or UTC, or etc.
+ *
+ * @note File format is documented in RFC-8536.
+ */
+static int rtIsValidTimeZoneFile(const char *pszTimeZone)
+{
+ if (pszTimeZone == NULL || *pszTimeZone == '\0' || *pszTimeZone == '/')
+ return VERR_INVALID_PARAMETER;
+
+ int rc = RTStrValidateEncoding(pszTimeZone);
+ if (RT_SUCCESS(rc))
+ {
+ /* construct full pathname of the time zone file */
+ char szTZPath[RTPATH_MAX];
+ rc = RTPathJoin(szTZPath, sizeof(szTZPath), TZDIR, pszTimeZone);
+ if (RT_SUCCESS(rc))
+ {
+ /* open the time zone file and check that it begins with the correct magic number */
+ RTFILE hFile = NIL_RTFILE;
+ rc = RTFileOpen(&hFile, szTZPath, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_WRITE);
+ if (RT_SUCCESS(rc))
+ {
+ char achTZBuf[sizeof(TZ_MAGIC)];
+ rc = RTFileRead(hFile, achTZBuf, sizeof(achTZBuf), NULL);
+ RTFileClose(hFile);
+ if (RT_SUCCESS(rc))
+ {
+ if (memcmp(achTZBuf, RT_STR_TUPLE(TZ_MAGIC)) == 0)
+ rc = VINF_SUCCESS;
+ else
+ rc = VERR_INVALID_MAGIC;
+ }
+ }
+ }
+ }
+
+ return rc;
+}
+
+
+/**
+ * Return the system time zone.
+ *
+ * @returns IPRT status code.
+ * @param pszName The buffer to return the time zone in.
+ * @param cbName The size of the pszName buffer.
+ */
+RTDECL(int) RTTimeZoneGetCurrent(char *pszName, size_t cbName)
+{
+ int rc = RTEnvGetEx(RTENV_DEFAULT, "TZ", pszName, cbName, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * $TZ can have two different formats and one of them doesn't specify
+ * a time zone database file under <tzfile.h>:TZDIR but since all
+ * current callers of this routine expect a time zone filename we do
+ * the validation check here so that if it is invalid then we fall back
+ * to the other mechanisms to return the system's current time zone.
+ */
+ if (*pszName == ':') /* POSIX allows $TZ to begin with a colon (:) so we allow for that here */
+ memmove(pszName, pszName + 1, strlen(pszName));
+ /** @todo this isn't perfect for absolute paths... Should probably try treat
+ * it like /etc/localtime. */
+ rc = rtIsValidTimeZoneFile(pszName);
+ if (RT_SUCCESS(rc))
+ return rc;
+ }
+ else if (rc != VERR_ENV_VAR_NOT_FOUND)
+ return rc;
+
+ /*
+ * /etc/localtime is a symbolic link to the system time zone on many OSes
+ * including Solaris, macOS, Ubuntu, RH/OEL 6 and later, Arch Linux, NetBSD,
+ * and etc. We extract the time zone pathname relative to TZDIR defined in
+ * <tzfile.h> which is normally /usr/share/zoneinfo.
+ *
+ * N.B. Some OSes have /etc/localtime as a regular file instead of a
+ * symlink and while we could trawl through all the files under TZDIR
+ * looking for a match we instead fallback to other popular mechanisms of
+ * specifying the system-wide time zone for the sake of simplicity.
+ */
+ char szBuf[RTPATH_MAX];
+ const char *pszPath = PATH_LOCALTIME;
+ if (RTSymlinkExists(pszPath))
+ {
+ /* the contents of the symink may contain '..' or other links */
+ char szLinkPathReal[RTPATH_MAX];
+ rc = RTPathReal(pszPath, szLinkPathReal, sizeof(szLinkPathReal));
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTPathReal(TZDIR, szBuf, sizeof(szBuf));
+ AssertRC(rc);
+ if (RT_SUCCESS(rc))
+ {
+ Assert(RTPathStartsWith(szLinkPathReal, szBuf));
+ if (RTPathStartsWith(szLinkPathReal, szBuf))
+ {
+ /* <tzfile.h>:TZDIR doesn't include a trailing slash */
+ const char *pszTimeZone = &szLinkPathReal[strlen(szBuf) + 1];
+ rc = rtIsValidTimeZoneFile(pszTimeZone);
+ if (RT_SUCCESS(rc))
+ return RTStrCopy(pszName, cbName, pszTimeZone);
+ }
+ }
+ }
+ }
+
+ /*
+ * /etc/timezone is a regular file consisting of a single line containing
+ * the time zone (e.g. Europe/London or Etc/UTC or etc.) and is used by a
+ * variety of Linux distros such as Ubuntu, Gentoo, Debian, and etc.
+ * The equivalent on FreeBSD is /var/db/zoneinfo.
+ */
+ pszPath = PATH_TIMEZONE;
+ if (RTFileExists(pszPath))
+ {
+ RTFILE hFile = NIL_RTFILE;
+ rc = RTFileOpen(&hFile, PATH_TIMEZONE, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_WRITE);
+ if (RT_SUCCESS(rc))
+ {
+ size_t cbRead = 0;
+ rc = RTFileRead(hFile, szBuf, sizeof(szBuf), &cbRead);
+ RTFileClose(hFile);
+ if (RT_SUCCESS(rc) && cbRead > 0)
+ {
+ /* Get the first line and strip it. */
+ szBuf[RT_MIN(sizeof(szBuf) - 1, cbRead)] = '\0';
+ size_t const offNewLine = RTStrOffCharOrTerm(szBuf, '\n');
+ szBuf[offNewLine] = '\0';
+ const char *pszTimeZone = RTStrStrip(szBuf);
+
+ rc = rtIsValidTimeZoneFile(pszTimeZone);
+ if (RT_SUCCESS(rc))
+ return RTStrCopy(pszName, cbName, pszTimeZone);
+ }
+ }
+ }
+
+ /*
+ * Older versions of RedHat / OEL don't have /etc/localtime as a symlink or
+ * /etc/timezone but instead have /etc/sysconfig/clock which contains a line
+ * of the syntax ZONE=Europe/London or ZONE="Europe/London" amongst other entries.
+ */
+ pszPath = PATH_SYSCONFIG_CLOCK;
+ if (RTFileExists(pszPath))
+ {
+ PRTSTREAM pStrm;
+ rc = RTStrmOpen(pszPath, "r", &pStrm);
+ if (RT_SUCCESS(rc))
+ {
+ while (RT_SUCCESS(rc = RTStrmGetLine(pStrm, szBuf, sizeof(szBuf))))
+ {
+ static char const s_szVarEq[] = "ZONE=";
+ char *pszStart = RTStrStrip(szBuf);
+ if (memcmp(pszStart, RT_STR_TUPLE(s_szVarEq)) == 0)
+ {
+ char *pszTimeZone = &pszStart[sizeof(s_szVarEq) - 1];
+
+ /* Drop any quoting before using the value, assuming it is plain stuff: */
+ if (*pszTimeZone == '\"' || *pszTimeZone == '\'')
+ {
+ pszTimeZone++;
+ size_t const cchTimeZone = strlen(pszTimeZone);
+ if (cchTimeZone && (pszTimeZone[cchTimeZone - 1] == '"' || pszTimeZone[cchTimeZone - 1] == '\''))
+ pszTimeZone[cchTimeZone - 1] = '\0';
+ }
+
+ rc = rtIsValidTimeZoneFile(pszTimeZone);
+ if (RT_SUCCESS(rc))
+ {
+ RTStrmClose(pStrm);
+ return RTStrCopy(pszName, cbName, pszTimeZone);
+ }
+ }
+ }
+ RTStrmClose(pStrm);
+ }
+ }
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/allocex-r3-posix.cpp b/src/VBox/Runtime/r3/posix/allocex-r3-posix.cpp
new file mode 100644
index 00000000..b8f1a1ad
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/allocex-r3-posix.cpp
@@ -0,0 +1,120 @@
+/* $Id: allocex-r3-posix.cpp $ */
+/** @file
+ * IPRT - Memory Allocation, Extended Alloc Workers, posix.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define RTMEM_NO_WRAP_TO_EF_APIS
+#include <iprt/mem.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/string.h>
+#include "../allocex.h"
+
+#include <sys/mman.h>
+
+
+DECLHIDDEN(int) rtMemAllocEx16BitReach(size_t cbAlloc, uint32_t fFlags, void **ppv)
+{
+ AssertReturn(cbAlloc < _64K, VERR_NO_MEMORY);
+
+ /*
+ * Try with every possible address hint since the possible range is very limited.
+ */
+ int fProt = PROT_READ | PROT_WRITE | (fFlags & RTMEMALLOCEX_FLAGS_EXEC ? PROT_EXEC : 0);
+ uintptr_t uAddr = 0x1000;
+ uintptr_t uAddrLast = _64K - uAddr - cbAlloc;
+ while (uAddr <= uAddrLast)
+ {
+ void *pv = mmap((void *)uAddr, cbAlloc, fProt, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (pv && (uintptr_t)pv <= uAddrLast)
+ {
+ *ppv = pv;
+ return VINF_SUCCESS;
+ }
+
+ if (pv)
+ {
+ munmap(pv, cbAlloc);
+ pv = NULL;
+ }
+ uAddr += _4K;
+ }
+
+ return VERR_NO_MEMORY;
+}
+
+
+DECLHIDDEN(int) rtMemAllocEx32BitReach(size_t cbAlloc, uint32_t fFlags, void **ppv)
+{
+ int fProt = PROT_READ | PROT_WRITE | (fFlags & RTMEMALLOCEX_FLAGS_EXEC ? PROT_EXEC : 0);
+#if ARCH_BITS == 32
+ void *pv = mmap(NULL, cbAlloc, fProt, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (pv)
+ {
+ *ppv = pv;
+ return VINF_SUCCESS;
+ }
+ return VERR_NO_MEMORY;
+
+#elif defined(RT_OS_LINUX)
+# ifdef MAP_32BIT
+ void *pv = mmap(NULL, cbAlloc, fProt, MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ if (pv)
+ {
+ *ppv = pv;
+ return VINF_SUCCESS;
+ }
+# endif
+
+ /** @todo On linux, we need an accurate hint. Since I don't need this branch of
+ * the code right now, I won't bother starting to parse
+ * /proc/curproc/mmap right now... */
+#else
+#endif
+ return VERR_NOT_SUPPORTED;
+}
+
+
+DECLHIDDEN(void) rtMemFreeExYyBitReach(void *pv, size_t cb, uint32_t fFlags)
+{
+ RT_NOREF_PV(fFlags);
+ munmap(pv, cb);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/dir-posix.cpp b/src/VBox/Runtime/r3/posix/dir-posix.cpp
new file mode 100644
index 00000000..558085ab
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/dir-posix.cpp
@@ -0,0 +1,733 @@
+/* $Id: dir-posix.cpp $ */
+/** @file
+ * IPRT - Directory manipulation, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_DIR
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <dlfcn.h>
+#include <stdio.h>
+
+#include <iprt/dir.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloca.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/mem.h>
+#include <iprt/param.h>
+#include <iprt/path.h>
+#include <iprt/string.h>
+#include "internal/dir.h"
+#include "internal/fs.h"
+#include "internal/path.h"
+
+#if !defined(RT_OS_SOLARIS) && !defined(RT_OS_HAIKU)
+# define HAVE_DIRENT_D_TYPE 1
+#endif
+
+
+RTDECL(bool) RTDirExists(const char *pszPath)
+{
+ bool fRc = false;
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat s;
+ fRc = !stat(pszNativePath, &s)
+ && S_ISDIR(s.st_mode);
+
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTDirExists(%p={%s}): returns %RTbool\n", pszPath, pszPath, fRc));
+ return fRc;
+}
+
+
+RTDECL(int) RTDirCreate(const char *pszPath, RTFMODE fMode, uint32_t fCreate)
+{
+ RT_NOREF_PV(fCreate);
+
+ int rc;
+ fMode = rtFsModeNormalize(fMode, pszPath, 0, RTFS_TYPE_DIRECTORY);
+ if (rtFsModeIsValidPermissions(fMode))
+ {
+ char const *pszNativePath;
+ rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat st;
+ if (mkdir(pszNativePath, fMode & RTFS_UNIX_MASK) == 0)
+ {
+ /* If requested, we try make use the permission bits are set
+ correctly when asked. For now, we'll just ignore errors here. */
+ if (fCreate & RTDIRCREATE_FLAGS_IGNORE_UMASK)
+ {
+ if ( stat(pszNativePath, &st)
+ || (st.st_mode & 07777) != (fMode & 07777) )
+ chmod(pszNativePath, fMode & RTFS_UNIX_MASK);
+ }
+ rc = VINF_SUCCESS;
+ }
+ else
+ {
+ rc = errno;
+ /*
+ * Solaris mkdir returns ENOSYS on autofs directories, and also
+ * did this apparently for NFS mount points in some Nevada
+ * development builds. It also returned EACCES when it should
+ * have returned EEXIST, which actually is within the POSIX
+ * spec (not that I like this interpretation, but it seems
+ * valid). Check ourselves.
+ */
+ if ( rc == ENOSYS
+ || rc == EACCES)
+ {
+ rc = RTErrConvertFromErrno(rc);
+ if (!stat(pszNativePath, &st))
+ rc = VERR_ALREADY_EXISTS;
+ }
+ else
+ rc = RTErrConvertFromErrno(rc);
+ }
+ }
+
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+ else
+ {
+ AssertMsgFailed(("Invalid file mode! %RTfmode\n", fMode));
+ rc = VERR_INVALID_FMODE;
+ }
+ LogFlow(("RTDirCreate(%p={%s}, %RTfmode): returns %Rrc\n", pszPath, pszPath, fMode, rc));
+ return rc;
+}
+
+
+RTDECL(int) RTDirRemove(const char *pszPath)
+{
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (rmdir(pszNativePath))
+ {
+ rc = errno;
+ if (rc == EEXIST) /* Solaris returns this, the rest have ENOTEMPTY. */
+ rc = VERR_DIR_NOT_EMPTY;
+ else if (rc != ENOTDIR)
+ rc = RTErrConvertFromErrno(rc);
+ else
+ {
+ /*
+ * This may be a valid path-not-found or it may be a non-directory in
+ * the final component. FsPerf want us to distinguish between the two,
+ * and trailing slash shouldn't matter because it doesn't on windows...
+ */
+ char *pszFree = NULL;
+ const char *pszStat = pszNativePath;
+ size_t cch = strlen(pszNativePath);
+ if (cch > 2 && RTPATH_IS_SLASH(pszNativePath[cch - 1]))
+ {
+ pszFree = (char *)RTMemTmpAlloc(cch);
+ if (pszFree)
+ {
+ memcpy(pszFree, pszNativePath, cch);
+ do
+ pszFree[--cch] = '\0';
+ while (cch > 2 && RTPATH_IS_SLASH(pszFree[cch - 1]));
+ pszStat = pszFree;
+ }
+ }
+
+ struct stat st;
+ if (!stat(pszStat, &st) && !S_ISDIR(st.st_mode))
+ rc = VERR_NOT_A_DIRECTORY;
+ else
+ rc = VERR_PATH_NOT_FOUND;
+
+ if (pszFree)
+ RTMemTmpFree(pszFree);
+ }
+ }
+
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTDirRemove(%p={%s}): returns %Rrc\n", pszPath, pszPath, rc));
+ return rc;
+}
+
+
+RTDECL(int) RTDirFlush(const char *pszPath)
+{
+ /*
+ * Linux: The fsync() man page hints at this being required for ensuring
+ * consistency between directory and file in case of a crash.
+ *
+ * Solaris: No mentioned is made of directories on the fsync man page.
+ * While rename+fsync will do what we want on ZFS, the code needs more
+ * careful studying wrt whether the directory entry of a new file is
+ * implicitly synced when the file is synced (it's very likely for ZFS).
+ *
+ * FreeBSD: The FFS fsync code seems to flush the directory entry as well
+ * in some cases. Don't know exactly what's up with rename, but from the
+ * look of things fsync(dir) should work.
+ */
+ int rc;
+#ifdef O_DIRECTORY
+ int fd = open(pszPath, O_RDONLY | O_DIRECTORY, 0);
+#else
+ int fd = open(pszPath, O_RDONLY, 0);
+#endif
+ if (fd >= 0)
+ {
+ if (fsync(fd) == 0)
+ rc = VINF_SUCCESS;
+ else
+ {
+ /* Linux fsync(2) man page documents both errors as an indication
+ * that the file descriptor can't be flushed (seen EINVAL for usual
+ * directories on CIFS). BSD (OS X) fsync(2) documents only the
+ * latter, and Solaris fsync(3C) pretends there is no problem. */
+ if (errno == EROFS || errno == EINVAL)
+ rc = VERR_NOT_SUPPORTED;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ close(fd);
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ return rc;
+}
+
+
+size_t rtDirNativeGetStructSize(const char *pszPath)
+{
+ long cbNameMax = pathconf(pszPath, _PC_NAME_MAX);
+# ifdef NAME_MAX
+ if (cbNameMax < NAME_MAX) /* This is plain paranoia, but it doesn't hurt. */
+ cbNameMax = NAME_MAX;
+# endif
+# ifdef _XOPEN_NAME_MAX
+ if (cbNameMax < _XOPEN_NAME_MAX) /* Ditto. */
+ cbNameMax = _XOPEN_NAME_MAX;
+# endif
+ size_t cbDir = RT_UOFFSETOF_DYN(RTDIRINTERNAL, Data.d_name[cbNameMax + 1]);
+ if (cbDir < sizeof(RTDIRINTERNAL)) /* Ditto. */
+ cbDir = sizeof(RTDIRINTERNAL);
+ cbDir = RT_ALIGN_Z(cbDir, 8);
+
+ return cbDir;
+}
+
+
+int rtDirNativeOpen(PRTDIRINTERNAL pDir, uintptr_t hRelativeDir, void *pvNativeRelative)
+{
+ NOREF(hRelativeDir);
+ NOREF(pvNativeRelative);
+
+ /*
+ * Convert to a native path and try opendir.
+ */
+ char *pszSlash = NULL;
+ char const *pszNativePath;
+ int rc;
+ if ( !(pDir->fFlags & RTDIR_F_NO_FOLLOW)
+ || pDir->fDirSlash
+ || pDir->cchPath <= 1)
+ rc = rtPathToNative(&pszNativePath, pDir->pszPath, NULL);
+ else
+ {
+ pszSlash = (char *)&pDir->pszPath[pDir->cchPath - 1];
+ *pszSlash = '\0';
+ rc = rtPathToNative(&pszNativePath, pDir->pszPath, NULL);
+ }
+ if (RT_SUCCESS(rc))
+ {
+ if ( !(pDir->fFlags & RTDIR_F_NO_FOLLOW)
+ || pDir->fDirSlash)
+ pDir->pDir = opendir(pszNativePath);
+ else
+ {
+ /*
+ * If we can get fdopendir() and have both O_NOFOLLOW and O_DIRECTORY,
+ * we will use open() to safely open the directory without following
+ * symlinks in the final component, and then use fdopendir to get a DIR
+ * from the file descriptor.
+ *
+ * If we cannot get that, we will use lstat() + opendir() as a fallback.
+ *
+ * We ASSUME that support for the O_NOFOLLOW and O_DIRECTORY flags is
+ * older than fdopendir().
+ */
+#if defined(O_NOFOLLOW) && defined(O_DIRECTORY)
+ /* Need to resolve fdopendir dynamically. */
+ typedef DIR * (*PFNFDOPENDIR)(int);
+ static PFNFDOPENDIR s_pfnFdOpenDir = NULL;
+ static bool volatile s_fInitalized = false;
+
+ PFNFDOPENDIR pfnFdOpenDir = s_pfnFdOpenDir;
+ ASMCompilerBarrier();
+ if (s_fInitalized)
+ { /* likely */ }
+ else
+ {
+ pfnFdOpenDir = (PFNFDOPENDIR)(uintptr_t)dlsym(RTLD_DEFAULT, "fdopendir");
+ s_pfnFdOpenDir = pfnFdOpenDir;
+ ASMAtomicWriteBool(&s_fInitalized, true);
+ }
+
+ if (pfnFdOpenDir)
+ {
+ int fd = open(pszNativePath, O_RDONLY | O_DIRECTORY | O_NOFOLLOW, 0);
+ if (fd >= 0)
+ {
+ pDir->pDir = pfnFdOpenDir(fd);
+ if (RT_UNLIKELY(!pDir->pDir))
+ {
+ rc = RTErrConvertFromErrno(errno);
+ close(fd);
+ }
+ }
+ else
+ {
+ /* WSL returns ELOOP here, but we take no chances that O_NOFOLLOW
+ takes precedence over O_DIRECTORY everywhere. */
+ int iErr = errno;
+ if (iErr == ELOOP || iErr == ENOTDIR)
+ {
+ struct stat St;
+ if ( lstat(pszNativePath, &St) == 0
+ && S_ISLNK(St.st_mode))
+ rc = VERR_IS_A_SYMLINK;
+ else
+ rc = RTErrConvertFromErrno(iErr);
+ }
+ }
+ }
+ else
+#endif
+ {
+ /* Fallback. This contains a race condition. */
+ struct stat St;
+ if ( lstat(pszNativePath, &St) != 0
+ || !S_ISLNK(St.st_mode))
+ pDir->pDir = opendir(pszNativePath);
+ else
+ rc = VERR_IS_A_SYMLINK;
+ }
+ }
+ if (pDir->pDir)
+ {
+ /*
+ * Init data (allocated as all zeros).
+ */
+ pDir->fDataUnread = false; /* spelling it out */
+ }
+ else if (RT_SUCCESS_NP(rc))
+ rc = RTErrConvertFromErrno(errno);
+
+ rtPathFreeNative(pszNativePath, pDir->pszPath);
+ }
+ if (pszSlash)
+ *pszSlash = RTPATH_SLASH;
+ return rc;
+}
+
+
+RTDECL(int) RTDirClose(RTDIR hDir)
+{
+ PRTDIRINTERNAL pDir = hDir;
+
+ /*
+ * Validate input.
+ */
+ if (!pDir)
+ return VERR_INVALID_PARAMETER;
+ if (pDir->u32Magic != RTDIR_MAGIC)
+ {
+ AssertMsgFailed(("Invalid pDir=%p\n", pDir));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /*
+ * Close the handle.
+ */
+ int rc = VINF_SUCCESS;
+ pDir->u32Magic = RTDIR_MAGIC_DEAD;
+ if (closedir(pDir->pDir))
+ {
+ rc = RTErrConvertFromErrno(errno);
+ AssertMsgFailed(("closedir(%p) -> errno=%d (%Rrc)\n", pDir->pDir, errno, rc));
+ }
+
+ RTMemFree(pDir);
+ return rc;
+}
+
+
+/**
+ * Ensure that there is unread data in the buffer
+ * and that there is a converted filename hanging around.
+ *
+ * @returns IPRT status code.
+ * @param pDir the open directory. Fully validated.
+ */
+static int rtDirReadMore(PRTDIRINTERNAL pDir)
+{
+ /** @todo try avoid the rematching on buffer overflow errors. */
+ for (;;)
+ {
+ /*
+ * Fetch data?
+ */
+ if (!pDir->fDataUnread)
+ {
+ struct dirent *pResult = NULL;
+#if RT_GNUC_PREREQ(4, 6)
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#endif
+ int rc = readdir_r(pDir->pDir, &pDir->Data, &pResult);
+#if RT_GNUC_PREREQ(4, 6)
+# pragma GCC diagnostic pop
+#endif
+ if (rc)
+ {
+ rc = RTErrConvertFromErrno(rc);
+ /** @todo Consider translating ENOENT (The current
+ * position of the directory stream is invalid)
+ * differently. */
+ AssertMsg(rc == VERR_FILE_NOT_FOUND, ("%Rrc\n", rc));
+ return rc;
+ }
+ if (!pResult)
+ return VERR_NO_MORE_FILES;
+ }
+
+ /*
+ * Convert the filename to UTF-8.
+ */
+ if (!pDir->pszName)
+ {
+ int rc = rtPathFromNative(&pDir->pszName, pDir->Data.d_name, pDir->pszPath);
+ if (RT_FAILURE(rc))
+ {
+ pDir->pszName = NULL;
+ return rc;
+ }
+ pDir->cchName = strlen(pDir->pszName);
+ }
+ if ( !pDir->pfnFilter
+ || pDir->pfnFilter(pDir, pDir->pszName))
+ break;
+ rtPathFreeIprt(pDir->pszName, pDir->Data.d_name);
+ pDir->pszName = NULL;
+ pDir->fDataUnread = false;
+ }
+
+ pDir->fDataUnread = true;
+ return VINF_SUCCESS;
+}
+
+
+#ifdef HAVE_DIRENT_D_TYPE
+/**
+ * Converts the d_type field to IPRT directory entry type.
+ *
+ * @returns IPRT directory entry type.
+ * @param Unix
+ */
+static RTDIRENTRYTYPE rtDirType(int iType)
+{
+ switch (iType)
+ {
+ case DT_UNKNOWN: return RTDIRENTRYTYPE_UNKNOWN;
+ case DT_FIFO: return RTDIRENTRYTYPE_FIFO;
+ case DT_CHR: return RTDIRENTRYTYPE_DEV_CHAR;
+ case DT_DIR: return RTDIRENTRYTYPE_DIRECTORY;
+ case DT_BLK: return RTDIRENTRYTYPE_DEV_BLOCK;
+ case DT_REG: return RTDIRENTRYTYPE_FILE;
+ case DT_LNK: return RTDIRENTRYTYPE_SYMLINK;
+ case DT_SOCK: return RTDIRENTRYTYPE_SOCKET;
+ case DT_WHT: return RTDIRENTRYTYPE_WHITEOUT;
+ default:
+ AssertMsgFailed(("iType=%d\n", iType));
+ return RTDIRENTRYTYPE_UNKNOWN;
+ }
+}
+#endif /*HAVE_DIRENT_D_TYPE */
+
+
+RTDECL(int) RTDirRead(RTDIR hDir, PRTDIRENTRY pDirEntry, size_t *pcbDirEntry)
+{
+ PRTDIRINTERNAL pDir = hDir;
+
+ /*
+ * Validate and digest input.
+ */
+ if (!rtDirValidHandle(pDir))
+ return VERR_INVALID_PARAMETER;
+ AssertPtrReturn(pDirEntry, VERR_INVALID_POINTER);
+
+ size_t cbDirEntry = sizeof(*pDirEntry);
+ if (pcbDirEntry)
+ {
+ AssertPtrReturn(pcbDirEntry, VERR_INVALID_POINTER);
+ cbDirEntry = *pcbDirEntry;
+ AssertMsgReturn(cbDirEntry >= RT_UOFFSETOF(RTDIRENTRY, szName[2]),
+ ("Invalid *pcbDirEntry=%d (min %zu)\n", *pcbDirEntry, RT_UOFFSETOF(RTDIRENTRYEX, szName[2])),
+ VERR_INVALID_PARAMETER);
+ }
+
+ /*
+ * Fetch more data if necessary and/or convert the name.
+ */
+ int rc = rtDirReadMore(pDir);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Check if we've got enough space to return the data.
+ */
+ const char *pszName = pDir->pszName;
+ const size_t cchName = pDir->cchName;
+ const size_t cbRequired = RT_UOFFSETOF(RTDIRENTRY, szName[1]) + cchName;
+ if (pcbDirEntry)
+ *pcbDirEntry = cbRequired;
+ if (cbRequired <= cbDirEntry)
+ {
+ /*
+ * Setup the returned data.
+ */
+ pDirEntry->INodeId = pDir->Data.d_ino; /* may need #ifdefing later */
+#ifdef HAVE_DIRENT_D_TYPE
+ pDirEntry->enmType = rtDirType(pDir->Data.d_type);
+#else
+ pDirEntry->enmType = RTDIRENTRYTYPE_UNKNOWN;
+#endif
+ pDirEntry->cbName = (uint16_t)cchName;
+ Assert(pDirEntry->cbName == cchName);
+ memcpy(pDirEntry->szName, pszName, cchName + 1);
+
+ /* free cached data */
+ pDir->fDataUnread = false;
+ rtPathFreeIprt(pDir->pszName, pDir->Data.d_name);
+ pDir->pszName = NULL;
+ }
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+
+ LogFlow(("RTDirRead(%p:{%s}, %p:{%s}, %p:{%u}): returns %Rrc\n",
+ pDir, pDir->pszPath, pDirEntry, RT_SUCCESS(rc) ? pDirEntry->szName : "<failed>",
+ pcbDirEntry, pcbDirEntry ? *pcbDirEntry : 0, rc));
+ return rc;
+}
+
+
+/**
+ * Fills dummy info into the info structure.
+ * This function is called if we cannot stat the file.
+ *
+ * @param pInfo The struct in question.
+ * @param
+ */
+static void rtDirSetDummyInfo(PRTFSOBJINFO pInfo, RTDIRENTRYTYPE enmType)
+{
+ pInfo->cbObject = 0;
+ pInfo->cbAllocated = 0;
+ RTTimeSpecSetNano(&pInfo->AccessTime, 0);
+ RTTimeSpecSetNano(&pInfo->ModificationTime, 0);
+ RTTimeSpecSetNano(&pInfo->ChangeTime, 0);
+ RTTimeSpecSetNano(&pInfo->BirthTime, 0);
+ memset(&pInfo->Attr, 0, sizeof(pInfo->Attr));
+ pInfo->Attr.enmAdditional = RTFSOBJATTRADD_NOTHING;
+ switch (enmType)
+ {
+ default:
+ case RTDIRENTRYTYPE_UNKNOWN: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL; break;
+ case RTDIRENTRYTYPE_FIFO: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_FIFO; break;
+ case RTDIRENTRYTYPE_DEV_CHAR: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_DEV_CHAR; break;
+ case RTDIRENTRYTYPE_DIRECTORY: pInfo->Attr.fMode = RTFS_DOS_DIRECTORY | RTFS_TYPE_DIRECTORY; break;
+ case RTDIRENTRYTYPE_DEV_BLOCK: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_DEV_BLOCK; break;
+ case RTDIRENTRYTYPE_FILE: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_FILE; break;
+ case RTDIRENTRYTYPE_SYMLINK: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_SYMLINK; break;
+ case RTDIRENTRYTYPE_SOCKET: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_SOCKET; break;
+ case RTDIRENTRYTYPE_WHITEOUT: pInfo->Attr.fMode = RTFS_DOS_NT_NORMAL | RTFS_TYPE_WHITEOUT; break;
+ }
+}
+
+
+RTDECL(int) RTDirReadEx(RTDIR hDir, PRTDIRENTRYEX pDirEntry, size_t *pcbDirEntry,
+ RTFSOBJATTRADD enmAdditionalAttribs, uint32_t fFlags)
+{
+ PRTDIRINTERNAL pDir = hDir;
+
+ /*
+ * Validate and digest input.
+ */
+ if (!rtDirValidHandle(pDir))
+ return VERR_INVALID_PARAMETER;
+ AssertPtrReturn(pDirEntry, VERR_INVALID_POINTER);
+ AssertMsgReturn( enmAdditionalAttribs >= RTFSOBJATTRADD_NOTHING
+ && enmAdditionalAttribs <= RTFSOBJATTRADD_LAST,
+ ("Invalid enmAdditionalAttribs=%p\n", enmAdditionalAttribs),
+ VERR_INVALID_PARAMETER);
+ AssertMsgReturn(RTPATH_F_IS_VALID(fFlags, 0), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
+ size_t cbDirEntry = sizeof(*pDirEntry);
+ if (pcbDirEntry)
+ {
+ AssertPtrReturn(pcbDirEntry, VERR_INVALID_POINTER);
+ cbDirEntry = *pcbDirEntry;
+ AssertMsgReturn(cbDirEntry >= RT_UOFFSETOF(RTDIRENTRYEX, szName[2]),
+ ("Invalid *pcbDirEntry=%zu (min %zu)\n", *pcbDirEntry, RT_UOFFSETOF(RTDIRENTRYEX, szName[2])),
+ VERR_INVALID_PARAMETER);
+ }
+
+ /*
+ * Fetch more data if necessary and/or convert the name.
+ */
+ int rc = rtDirReadMore(pDir);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Check if we've got enough space to return the data.
+ */
+ const char *pszName = pDir->pszName;
+ const size_t cchName = pDir->cchName;
+ const size_t cbRequired = RT_UOFFSETOF(RTDIRENTRYEX, szName[1]) + cchName;
+ if (pcbDirEntry)
+ *pcbDirEntry = cbRequired;
+ if (cbRequired <= cbDirEntry)
+ {
+ /*
+ * Setup the returned data.
+ */
+ pDirEntry->cwcShortName = 0;
+ pDirEntry->wszShortName[0] = 0;
+ pDirEntry->cbName = (uint16_t)cchName;
+ Assert(pDirEntry->cbName == cchName);
+ memcpy(pDirEntry->szName, pszName, cchName + 1);
+
+ /* get the info data */
+ size_t cch = cchName + pDir->cchPath + 1;
+ char *pszNamePath = (char *)alloca(cch);
+ if (pszNamePath)
+ {
+ memcpy(pszNamePath, pDir->pszPath, pDir->cchPath);
+ memcpy(pszNamePath + pDir->cchPath, pszName, cchName + 1);
+ rc = RTPathQueryInfoEx(pszNamePath, &pDirEntry->Info, enmAdditionalAttribs, fFlags);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ if (RT_FAILURE(rc))
+ {
+#ifdef HAVE_DIRENT_D_TYPE
+ rtDirSetDummyInfo(&pDirEntry->Info, rtDirType(pDir->Data.d_type));
+#else
+ rtDirSetDummyInfo(&pDirEntry->Info, RTDIRENTRYTYPE_UNKNOWN);
+#endif
+ rc = VWRN_NO_DIRENT_INFO;
+ }
+
+ /* free cached data */
+ pDir->fDataUnread = false;
+ rtPathFreeIprt(pDir->pszName, pDir->Data.d_name);
+ pDir->pszName = NULL;
+ }
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTDirRewind(RTDIR hDir)
+{
+ PRTDIRINTERNAL pDir = hDir;
+
+ /*
+ * Validate and digest input.
+ */
+ if (!rtDirValidHandle(pDir))
+ return VERR_INVALID_PARAMETER;
+
+ /*
+ * Do the rewinding.
+ */
+ /** @todo OS/2 does not rescan the directory as it should. */
+ rewinddir(pDir->pDir);
+ pDir->fDataUnread = false;
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTDirRename(const char *pszSrc, const char *pszDst, unsigned fRename)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszSrc, VERR_INVALID_POINTER);
+ AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
+ AssertMsgReturn(*pszSrc, ("%p\n", pszSrc), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(*pszDst, ("%p\n", pszDst), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(!(fRename & ~RTPATHRENAME_FLAGS_REPLACE), ("%#x\n", fRename), VERR_INVALID_PARAMETER);
+
+ /*
+ * Take common cause with RTPathRename.
+ */
+ int rc = rtPathPosixRename(pszSrc, pszDst, fRename, RTFS_TYPE_DIRECTORY);
+
+ LogFlow(("RTDirRename(%p:{%s}, %p:{%s}): returns %Rrc\n",
+ pszSrc, pszSrc, pszDst, pszDst, rc));
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/env-posix.cpp b/src/VBox/Runtime/r3/posix/env-posix.cpp
new file mode 100644
index 00000000..60a9112e
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/env-posix.cpp
@@ -0,0 +1,179 @@
+/* $Id: env-posix.cpp $ */
+/** @file
+ * IPRT - Environment, Posix.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#ifdef RT_OS_DARWIN
+/* pick the correct prototype for unsetenv. */
+# define _POSIX_C_SOURCE 1
+#endif
+#include <iprt/env.h>
+
+#include <iprt/alloca.h>
+#include <iprt/assert.h>
+#if defined(DEBUG) && defined(RT_OS_LINUX)
+# include <iprt/asm.h>
+#endif
+#include <iprt/err.h>
+#include <iprt/string.h>
+
+#include <stdlib.h>
+#include <errno.h>
+
+#include "internal/alignmentchecks.h"
+
+
+RTDECL(bool) RTEnvExistsBad(const char *pszVar)
+{
+ AssertReturn(strchr(pszVar, '=') == NULL, false);
+ return RTEnvGetBad(pszVar) != NULL;
+}
+
+
+RTDECL(bool) RTEnvExist(const char *pszVar)
+{
+ return RTEnvExistsBad(pszVar);
+}
+
+
+RTDECL(const char *) RTEnvGetBad(const char *pszVar)
+{
+ AssertReturn(strchr(pszVar, '=') == NULL, NULL);
+
+ IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
+ const char *pszValue = getenv(pszVar);
+ IPRT_ALIGNMENT_CHECKS_ENABLE();
+ return pszValue;
+}
+
+
+RTDECL(const char *) RTEnvGet(const char *pszVar)
+{
+ return RTEnvGetBad(pszVar);
+}
+
+
+RTDECL(int) RTEnvPutBad(const char *pszVarEqualValue)
+{
+ /** @todo putenv is a source memory leaks. deal with this on a per system basis. */
+ if (!putenv((char *)pszVarEqualValue))
+ return 0;
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTEnvPut(const char *pszVarEqualValue)
+{
+ return RTEnvPutBad(pszVarEqualValue);
+}
+
+
+RTDECL(int) RTEnvSetBad(const char *pszVar, const char *pszValue)
+{
+ AssertMsgReturn(strchr(pszVar, '=') == NULL, ("'%s'\n", pszVar), VERR_ENV_INVALID_VAR_NAME);
+
+#if defined(_MSC_VER)
+ /* make a local copy and feed it to putenv. */
+ const size_t cchVar = strlen(pszVar);
+ const size_t cchValue = strlen(pszValue);
+ char *pszTmp = (char *)alloca(cchVar + cchValue + 2 + !*pszValue);
+ memcpy(pszTmp, pszVar, cchVar);
+ pszTmp[cchVar] = '=';
+ if (*pszValue)
+ memcpy(pszTmp + cchVar + 1, pszValue, cchValue + 1);
+ else
+ {
+ pszTmp[cchVar + 1] = ' '; /* wrong, but putenv will remove it otherwise. */
+ pszTmp[cchVar + 2] = '\0';
+ }
+
+ if (!putenv(pszTmp))
+ return 0;
+ return RTErrConvertFromErrno(errno);
+
+#else
+ if (!setenv(pszVar, pszValue, 1))
+ return VINF_SUCCESS;
+ return RTErrConvertFromErrno(errno);
+#endif
+}
+
+
+RTDECL(int) RTEnvSet(const char *pszVar, const char *pszValue)
+{
+ return RTEnvSetBad(pszVar, pszValue);
+}
+
+RTDECL(int) RTEnvUnsetBad(const char *pszVar)
+{
+ AssertReturn(strchr(pszVar, '=') == NULL, VERR_ENV_INVALID_VAR_NAME);
+
+ /*
+ * Check that it exists first.
+ */
+ if (!RTEnvExist(pszVar))
+ return VINF_ENV_VAR_NOT_FOUND;
+
+ /*
+ * Ok, try remove it.
+ */
+#ifdef RT_OS_WINDOWS
+ /* Use putenv(var=) since Windows does not have unsetenv(). */
+ size_t cchVar = strlen(pszVar);
+ char *pszBuf = (char *)alloca(cchVar + 2);
+ memcpy(pszBuf, pszVar, cchVar);
+ pszBuf[cchVar] = '=';
+ pszBuf[cchVar + 1] = '\0';
+
+ if (!putenv(pszBuf))
+ return VINF_SUCCESS;
+
+#else
+ /* This is the preferred function as putenv() like used above does neither work on Solaris nor on Darwin. */
+ if (!unsetenv((char*)pszVar))
+ return VINF_SUCCESS;
+#endif
+
+ return RTErrConvertFromErrno(errno);
+}
+
+RTDECL(int) RTEnvUnset(const char *pszVar)
+{
+ return RTEnvUnsetBad(pszVar);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/errvars-posix.cpp b/src/VBox/Runtime/r3/posix/errvars-posix.cpp
new file mode 100644
index 00000000..b6a98709
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/errvars-posix.cpp
@@ -0,0 +1,88 @@
+/* $Id: errvars-posix.cpp $ */
+/** @file
+ * IPRT - Save and Restore Error Variables, POSIX Ring-3.
+ */
+
+/*
+ * Copyright (C) 2011-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <netdb.h>
+#include <errno.h>
+
+#include <iprt/errcore.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include "internal/magics.h"
+
+
+
+RTDECL(PRTERRVARS) RTErrVarsSave(PRTERRVARS pVars)
+{
+ pVars->ai32Vars[0] = RTERRVARS_MAGIC;
+ pVars->ai32Vars[1] = errno;
+ pVars->ai32Vars[2] = h_errno;
+ return pVars;
+}
+
+
+RTDECL(void) RTErrVarsRestore(PCRTERRVARS pVars)
+{
+ AssertReturnVoid(pVars->ai32Vars[0] == RTERRVARS_MAGIC);
+ h_errno = pVars->ai32Vars[2];
+ errno = pVars->ai32Vars[1];
+}
+
+
+RTDECL(bool) RTErrVarsAreEqual(PCRTERRVARS pVars1, PCRTERRVARS pVars2)
+{
+ Assert(pVars1->ai32Vars[0] == RTERRVARS_MAGIC);
+ Assert(pVars2->ai32Vars[0] == RTERRVARS_MAGIC);
+
+ return pVars1->ai32Vars[0] == pVars2->ai32Vars[0]
+ && pVars1->ai32Vars[1] == pVars2->ai32Vars[1]
+ && pVars1->ai32Vars[2] == pVars2->ai32Vars[2];
+}
+
+
+RTDECL(bool) RTErrVarsHaveChanged(PCRTERRVARS pVars)
+{
+ Assert(pVars->ai32Vars[0] == RTERRVARS_MAGIC);
+
+ return pVars->ai32Vars[0] != RTERRVARS_MAGIC
+ || pVars->ai32Vars[1] != errno
+ || pVars->ai32Vars[2] != h_errno;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fileaio-posix.cpp b/src/VBox/Runtime/r3/posix/fileaio-posix.cpp
new file mode 100644
index 00000000..befa49fb
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fileaio-posix.cpp
@@ -0,0 +1,1071 @@
+/* $Id: fileaio-posix.cpp $ */
+/** @file
+ * IPRT - File async I/O, native implementation for POSIX compliant host platforms.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_DIR
+#include <iprt/asm.h>
+#include <iprt/file.h>
+#include <iprt/mem.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/thread.h>
+#include <iprt/semaphore.h>
+#include "internal/fileaio.h"
+
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+# include <sys/types.h>
+# include <sys/sysctl.h> /* for sysctlbyname */
+#endif
+#if defined(RT_OS_FREEBSD)
+# include <fcntl.h> /* O_SYNC */
+#endif
+#include <aio.h>
+#include <errno.h>
+#include <time.h>
+
+/*
+ * Linux does not define this value.
+ * Just define it with really big
+ * value.
+ */
+#ifndef AIO_LISTIO_MAX
+# define AIO_LISTIO_MAX UINT32_MAX
+#endif
+
+#if 0 /* Only used for debugging */
+# undef AIO_LISTIO_MAX
+# define AIO_LISTIO_MAX 16
+#endif
+
+/** Invalid entry in the waiting array. */
+#define RTFILEAIOCTX_WAIT_ENTRY_INVALID (~0U)
+
+/** No-op replacement for rtFileAioCtxDump for non debug builds */
+#ifndef LOG_ENABLED
+# define rtFileAioCtxDump(pCtxInt) do {} while (0)
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * Async I/O request state.
+ */
+typedef struct RTFILEAIOREQINTERNAL
+{
+ /** The aio control block. FIRST ELEMENT! */
+ struct aiocb AioCB;
+ /** Next element in the chain. */
+ struct RTFILEAIOREQINTERNAL *pNext;
+ /** Previous element in the chain. */
+ struct RTFILEAIOREQINTERNAL *pPrev;
+ /** Current state the request is in. */
+ RTFILEAIOREQSTATE enmState;
+ /** Flag whether this is a flush request. */
+ bool fFlush;
+ /** Flag indicating if the request was canceled. */
+ volatile bool fCanceled;
+ /** Opaque user data. */
+ void *pvUser;
+ /** Number of bytes actually transferred. */
+ size_t cbTransfered;
+ /** Status code. */
+ int Rc;
+ /** Completion context we are assigned to. */
+ struct RTFILEAIOCTXINTERNAL *pCtxInt;
+ /** Entry in the waiting list the request is in. */
+ unsigned iWaitingList;
+ /** Magic value (RTFILEAIOREQ_MAGIC). */
+ uint32_t u32Magic;
+} RTFILEAIOREQINTERNAL, *PRTFILEAIOREQINTERNAL;
+
+/**
+ * Async I/O completion context state.
+ */
+typedef struct RTFILEAIOCTXINTERNAL
+{
+ /** Current number of requests active on this context. */
+ volatile int32_t cRequests;
+ /** Maximum number of requests this context can handle. */
+ uint32_t cMaxRequests;
+ /** The ID of the thread which is currently waiting for requests. */
+ volatile RTTHREAD hThreadWait;
+ /** Flag whether the thread was woken up. */
+ volatile bool fWokenUp;
+ /** Flag whether the thread is currently waiting in the syscall. */
+ volatile bool fWaiting;
+ /** Flags given during creation. */
+ uint32_t fFlags;
+ /** Magic value (RTFILEAIOCTX_MAGIC). */
+ uint32_t u32Magic;
+ /** Flag whether the thread was woken up due to a internal event. */
+ volatile bool fWokenUpInternal;
+ /** List of new requests which needs to be inserted into apReqs by the
+ * waiting thread. */
+ volatile PRTFILEAIOREQINTERNAL apReqsNewHead[5];
+ /** Special entry for requests which are canceled. Because only one
+ * request can be canceled at a time and the thread canceling the request
+ * has to wait we need only one entry. */
+ volatile PRTFILEAIOREQINTERNAL pReqToCancel;
+ /** Event semaphore the canceling thread is waiting for completion of
+ * the operation. */
+ RTSEMEVENT SemEventCancel;
+ /** Head of submitted elements waiting to get into the array. */
+ PRTFILEAIOREQINTERNAL pReqsWaitHead;
+ /** Tail of submitted elements waiting to get into the array. */
+ PRTFILEAIOREQINTERNAL pReqsWaitTail;
+ /** Maximum number of elements in the waiting array. */
+ unsigned cReqsWaitMax;
+ /** First free slot in the waiting list. */
+ unsigned iFirstFree;
+ /** List of requests we are currently waiting on.
+ * Size depends on cMaxRequests and AIO_LISTIO_MAX. */
+ volatile PRTFILEAIOREQINTERNAL apReqs[1];
+} RTFILEAIOCTXINTERNAL, *PRTFILEAIOCTXINTERNAL;
+
+/**
+ * Internal worker for waking up the waiting thread.
+ */
+static void rtFileAioCtxWakeup(PRTFILEAIOCTXINTERNAL pCtxInt)
+{
+ /*
+ * Read the thread handle before the status flag.
+ * If we read the handle after the flag we might
+ * end up with an invalid handle because the thread
+ * waiting in RTFileAioCtxWakeup() might get scheduled
+ * before we read the flag and returns.
+ * We can ensure that the handle is valid if fWaiting is true
+ * when reading the handle before the status flag.
+ */
+ RTTHREAD hThread;
+ ASMAtomicReadHandle(&pCtxInt->hThreadWait, &hThread);
+ bool fWaiting = ASMAtomicReadBool(&pCtxInt->fWaiting);
+ if (fWaiting)
+ {
+ /*
+ * If a thread waits the handle must be valid.
+ * It is possible that the thread returns from
+ * aio_suspend() before the signal is send.
+ * This is no problem because we already set fWokenUp
+ * to true which will let the thread return VERR_INTERRUPTED
+ * and the next call to RTFileAioCtxWait() will not
+ * return VERR_INTERRUPTED because signals are not saved
+ * and will simply vanish if the destination thread can't
+ * receive it.
+ */
+ Assert(hThread != NIL_RTTHREAD);
+ RTThreadPoke(hThread);
+ }
+}
+
+/**
+ * Internal worker processing events and inserting new requests into the waiting list.
+ */
+static int rtFileAioCtxProcessEvents(PRTFILEAIOCTXINTERNAL pCtxInt)
+{
+ int rc = VINF_SUCCESS;
+
+ /* Process new requests first. */
+ bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, false);
+ if (fWokenUp)
+ {
+ for (unsigned iSlot = 0; iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead); iSlot++)
+ {
+ PRTFILEAIOREQINTERNAL pReqHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[iSlot], NULL, PRTFILEAIOREQINTERNAL);
+
+ while ( (pCtxInt->iFirstFree < pCtxInt->cReqsWaitMax)
+ && pReqHead)
+ {
+ RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
+ pCtxInt->apReqs[pCtxInt->iFirstFree] = pReqHead;
+ pReqHead->iWaitingList = pCtxInt->iFirstFree;
+ pReqHead = pReqHead->pNext;
+
+ /* Clear pointer to next and previous element just for safety. */
+ pCtxInt->apReqs[pCtxInt->iFirstFree]->pNext = NULL;
+ pCtxInt->apReqs[pCtxInt->iFirstFree]->pPrev = NULL;
+ pCtxInt->iFirstFree++;
+
+ Assert( (pCtxInt->iFirstFree <= pCtxInt->cMaxRequests)
+ && (pCtxInt->iFirstFree <= pCtxInt->cReqsWaitMax));
+ }
+
+ /* Append the rest to the wait list. */
+ if (pReqHead)
+ {
+ RTFIELAIOREQ_ASSERT_STATE(pReqHead, SUBMITTED);
+ if (!pCtxInt->pReqsWaitHead)
+ {
+ Assert(!pCtxInt->pReqsWaitTail);
+ pCtxInt->pReqsWaitHead = pReqHead;
+ pReqHead->pPrev = NULL;
+ }
+ else
+ {
+ AssertPtr(pCtxInt->pReqsWaitTail);
+
+ pCtxInt->pReqsWaitTail->pNext = pReqHead;
+ pReqHead->pPrev = pCtxInt->pReqsWaitTail;
+ }
+
+ /* Update tail. */
+ while (pReqHead->pNext)
+ {
+ RTFIELAIOREQ_ASSERT_STATE(pReqHead->pNext, SUBMITTED);
+ pReqHead = pReqHead->pNext;
+ }
+
+ pCtxInt->pReqsWaitTail = pReqHead;
+ pCtxInt->pReqsWaitTail->pNext = NULL;
+ }
+ }
+
+ /* Check if a request needs to be canceled. */
+ PRTFILEAIOREQINTERNAL pReqToCancel = ASMAtomicReadPtrT(&pCtxInt->pReqToCancel, PRTFILEAIOREQINTERNAL);
+ if (pReqToCancel)
+ {
+ /* The request can be in the array waiting for completion or still in the list because it is full. */
+ if (pReqToCancel->iWaitingList != RTFILEAIOCTX_WAIT_ENTRY_INVALID)
+ {
+ /* Put it out of the waiting list. */
+ pCtxInt->apReqs[pReqToCancel->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
+ pCtxInt->apReqs[pReqToCancel->iWaitingList]->iWaitingList = pReqToCancel->iWaitingList;
+ }
+ else
+ {
+ /* Unlink from the waiting list. */
+ PRTFILEAIOREQINTERNAL pPrev = pReqToCancel->pPrev;
+ PRTFILEAIOREQINTERNAL pNext = pReqToCancel->pNext;
+
+ if (pNext)
+ pNext->pPrev = pPrev;
+ else
+ {
+ /* We canceled the tail. */
+ pCtxInt->pReqsWaitTail = pPrev;
+ }
+
+ if (pPrev)
+ pPrev->pNext = pNext;
+ else
+ {
+ /* We canceled the head. */
+ pCtxInt->pReqsWaitHead = pNext;
+ }
+ }
+
+ ASMAtomicDecS32(&pCtxInt->cRequests);
+ AssertMsg(pCtxInt->cRequests >= 0, ("Canceled request not which is not in this context\n"));
+ RTSemEventSignal(pCtxInt->SemEventCancel);
+ }
+ }
+ else
+ {
+ if (ASMAtomicXchgBool(&pCtxInt->fWokenUp, false))
+ rc = VERR_INTERRUPTED;
+ }
+
+ return rc;
+}
+
+RTR3DECL(int) RTFileAioGetLimits(PRTFILEAIOLIMITS pAioLimits)
+{
+ int rcBSD = 0;
+ AssertPtrReturn(pAioLimits, VERR_INVALID_POINTER);
+
+#if defined(RT_OS_DARWIN)
+ int cReqsOutstandingMax = 0;
+ size_t cbParameter = sizeof(int);
+
+ rcBSD = sysctlbyname("kern.aioprocmax", /* name */
+ &cReqsOutstandingMax, /* Where to store the old value. */
+ &cbParameter, /* Size of the memory pointed to. */
+ NULL, /* Where the new value is located. */
+ 0); /* Where the size of the new value is stored. */
+ if (rcBSD == -1)
+ return RTErrConvertFromErrno(errno);
+
+ pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
+ pAioLimits->cbBufferAlignment = 0;
+#elif defined(RT_OS_FREEBSD)
+ /*
+ * The AIO API is implemented in a kernel module which is not
+ * loaded by default.
+ * If it is loaded there are additional sysctl parameters.
+ */
+ int cReqsOutstandingMax = 0;
+ size_t cbParameter = sizeof(int);
+
+ rcBSD = sysctlbyname("vfs.aio.max_aio_per_proc", /* name */
+ &cReqsOutstandingMax, /* Where to store the old value. */
+ &cbParameter, /* Size of the memory pointed to. */
+ NULL, /* Where the new value is located. */
+ 0); /* Where the size of the new value is stored. */
+ if (rcBSD == -1)
+ {
+ /* ENOENT means the value is unknown thus the module is not loaded. */
+ if (errno == ENOENT)
+ return VERR_NOT_SUPPORTED;
+ else
+ return RTErrConvertFromErrno(errno);
+ }
+
+ pAioLimits->cReqsOutstandingMax = cReqsOutstandingMax;
+ pAioLimits->cbBufferAlignment = 0;
+#else
+ pAioLimits->cReqsOutstandingMax = RTFILEAIO_UNLIMITED_REQS;
+ pAioLimits->cbBufferAlignment = 0;
+#endif
+
+ return VINF_SUCCESS;
+}
+
+RTR3DECL(int) RTFileAioReqCreate(PRTFILEAIOREQ phReq)
+{
+ AssertPtrReturn(phReq, VERR_INVALID_POINTER);
+
+ PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)RTMemAllocZ(sizeof(RTFILEAIOREQINTERNAL));
+ if (RT_UNLIKELY(!pReqInt))
+ return VERR_NO_MEMORY;
+
+ pReqInt->pCtxInt = NULL;
+ pReqInt->u32Magic = RTFILEAIOREQ_MAGIC;
+ pReqInt->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+
+ *phReq = (RTFILEAIOREQ)pReqInt;
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioReqDestroy(RTFILEAIOREQ hReq)
+{
+ /*
+ * Validate the handle and ignore nil.
+ */
+ if (hReq == NIL_RTFILEAIOREQ)
+ return VINF_SUCCESS;
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+
+ /*
+ * Trash the magic and free it.
+ */
+ ASMAtomicUoWriteU32(&pReqInt->u32Magic, ~RTFILEAIOREQ_MAGIC);
+ RTMemFree(pReqInt);
+ return VINF_SUCCESS;
+}
+
+/**
+ * Worker setting up the request.
+ */
+DECLINLINE(int) rtFileAioReqPrepareTransfer(RTFILEAIOREQ hReq, RTFILE hFile,
+ unsigned uTransferDirection,
+ RTFOFF off, void *pvBuf, size_t cbTransfer,
+ void *pvUser)
+{
+ /*
+ * Validate the input.
+ */
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+ Assert(hFile != NIL_RTFILE);
+ AssertPtr(pvBuf);
+ Assert(off >= 0);
+ Assert(cbTransfer > 0);
+
+ memset(&pReqInt->AioCB, 0, sizeof(struct aiocb));
+ pReqInt->fFlush = false;
+ pReqInt->AioCB.aio_lio_opcode = uTransferDirection;
+ pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
+ pReqInt->AioCB.aio_offset = off;
+ pReqInt->AioCB.aio_nbytes = cbTransfer;
+ pReqInt->AioCB.aio_buf = pvBuf;
+ pReqInt->pvUser = pvUser;
+ pReqInt->pCtxInt = NULL;
+ pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioReqPrepareRead(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
+ void *pvBuf, size_t cbRead, void *pvUser)
+{
+ return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_READ,
+ off, pvBuf, cbRead, pvUser);
+}
+
+
+RTDECL(int) RTFileAioReqPrepareWrite(RTFILEAIOREQ hReq, RTFILE hFile, RTFOFF off,
+ void const *pvBuf, size_t cbWrite, void *pvUser)
+{
+ return rtFileAioReqPrepareTransfer(hReq, hFile, LIO_WRITE,
+ off, (void *)pvBuf, cbWrite, pvUser);
+}
+
+
+RTDECL(int) RTFileAioReqPrepareFlush(RTFILEAIOREQ hReq, RTFILE hFile, void *pvUser)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = (PRTFILEAIOREQINTERNAL)hReq;
+
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+ Assert(hFile != NIL_RTFILE);
+
+ pReqInt->fFlush = true;
+ pReqInt->AioCB.aio_fildes = RTFileToNative(hFile);
+ pReqInt->AioCB.aio_offset = 0;
+ pReqInt->AioCB.aio_nbytes = 0;
+ pReqInt->AioCB.aio_buf = NULL;
+ pReqInt->pvUser = pvUser;
+ pReqInt->Rc = VERR_FILE_AIO_IN_PROGRESS;
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(void *) RTFileAioReqGetUser(RTFILEAIOREQ hReq)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN_RC(pReqInt, NULL);
+
+ return pReqInt->pvUser;
+}
+
+
+RTDECL(int) RTFileAioReqCancel(RTFILEAIOREQ hReq)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_NOT_SUBMITTED);
+
+ ASMAtomicXchgBool(&pReqInt->fCanceled, true);
+
+ int rcPosix = aio_cancel(pReqInt->AioCB.aio_fildes, &pReqInt->AioCB);
+
+ if (rcPosix == AIO_CANCELED)
+ {
+ PRTFILEAIOCTXINTERNAL pCtxInt = pReqInt->pCtxInt;
+ /*
+ * Notify the waiting thread that the request was canceled.
+ */
+ AssertMsg(RT_VALID_PTR(pCtxInt), ("Invalid state. Request was canceled but wasn't submitted\n"));
+
+ Assert(!pCtxInt->pReqToCancel);
+ ASMAtomicWritePtr(&pCtxInt->pReqToCancel, pReqInt);
+ rtFileAioCtxWakeup(pCtxInt);
+
+ /* Wait for acknowledge. */
+ int rc = RTSemEventWait(pCtxInt->SemEventCancel, RT_INDEFINITE_WAIT);
+ AssertRC(rc);
+
+ ASMAtomicWriteNullPtr(&pCtxInt->pReqToCancel);
+ pReqInt->Rc = VERR_FILE_AIO_CANCELED;
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+ return VINF_SUCCESS;
+ }
+ else if (rcPosix == AIO_ALLDONE)
+ return VERR_FILE_AIO_COMPLETED;
+ else if (rcPosix == AIO_NOTCANCELED)
+ return VERR_FILE_AIO_IN_PROGRESS;
+ else
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTFileAioReqGetRC(RTFILEAIOREQ hReq, size_t *pcbTransfered)
+{
+ PRTFILEAIOREQINTERNAL pReqInt = hReq;
+ RTFILEAIOREQ_VALID_RETURN(pReqInt);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, SUBMITTED, VERR_FILE_AIO_IN_PROGRESS);
+ RTFILEAIOREQ_NOT_STATE_RETURN_RC(pReqInt, PREPARED, VERR_FILE_AIO_NOT_SUBMITTED);
+ AssertPtrNull(pcbTransfered);
+
+ if ( (RT_SUCCESS(pReqInt->Rc))
+ && (pcbTransfered))
+ *pcbTransfered = pReqInt->cbTransfered;
+
+ return pReqInt->Rc;
+}
+
+
+RTDECL(int) RTFileAioCtxCreate(PRTFILEAIOCTX phAioCtx, uint32_t cAioReqsMax,
+ uint32_t fFlags)
+{
+ PRTFILEAIOCTXINTERNAL pCtxInt;
+ unsigned cReqsWaitMax;
+
+ AssertPtrReturn(phAioCtx, VERR_INVALID_POINTER);
+ AssertReturn(!(fFlags & ~RTFILEAIOCTX_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
+
+ if (cAioReqsMax == RTFILEAIO_UNLIMITED_REQS)
+ return VERR_OUT_OF_RANGE;
+
+ cReqsWaitMax = RT_MIN(cAioReqsMax, AIO_LISTIO_MAX);
+
+ pCtxInt = (PRTFILEAIOCTXINTERNAL)RTMemAllocZ( sizeof(RTFILEAIOCTXINTERNAL)
+ + cReqsWaitMax * sizeof(PRTFILEAIOREQINTERNAL));
+ if (RT_UNLIKELY(!pCtxInt))
+ return VERR_NO_MEMORY;
+
+ /* Create event semaphore. */
+ int rc = RTSemEventCreate(&pCtxInt->SemEventCancel);
+ if (RT_FAILURE(rc))
+ {
+ RTMemFree(pCtxInt);
+ return rc;
+ }
+
+ pCtxInt->u32Magic = RTFILEAIOCTX_MAGIC;
+ pCtxInt->cMaxRequests = cAioReqsMax;
+ pCtxInt->cReqsWaitMax = cReqsWaitMax;
+ pCtxInt->fFlags = fFlags;
+ *phAioCtx = (RTFILEAIOCTX)pCtxInt;
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileAioCtxDestroy(RTFILEAIOCTX hAioCtx)
+{
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+
+ AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
+
+ if (RT_UNLIKELY(pCtxInt->cRequests))
+ return VERR_FILE_AIO_BUSY;
+
+ RTSemEventDestroy(pCtxInt->SemEventCancel);
+ RTMemFree(pCtxInt);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTFileAioCtxGetMaxReqCount(RTFILEAIOCTX hAioCtx)
+{
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+
+ if (hAioCtx == NIL_RTFILEAIOCTX)
+ return RTFILEAIO_UNLIMITED_REQS;
+ return pCtxInt->cMaxRequests;
+}
+
+RTDECL(int) RTFileAioCtxAssociateWithFile(RTFILEAIOCTX hAioCtx, RTFILE hFile)
+{
+ NOREF(hAioCtx); NOREF(hFile);
+ return VINF_SUCCESS;
+}
+
+#ifdef LOG_ENABLED
+/**
+ * Dumps the state of a async I/O context.
+ */
+static void rtFileAioCtxDump(PRTFILEAIOCTXINTERNAL pCtxInt)
+{
+ LogFlow(("cRequests=%d\n", pCtxInt->cRequests));
+ LogFlow(("cMaxRequests=%u\n", pCtxInt->cMaxRequests));
+ LogFlow(("hThreadWait=%#p\n", pCtxInt->hThreadWait));
+ LogFlow(("fWokenUp=%RTbool\n", pCtxInt->fWokenUp));
+ LogFlow(("fWaiting=%RTbool\n", pCtxInt->fWaiting));
+ LogFlow(("fWokenUpInternal=%RTbool\n", pCtxInt->fWokenUpInternal));
+ for (unsigned i = 0; i < RT_ELEMENTS(pCtxInt->apReqsNewHead); i++)
+ LogFlow(("apReqsNewHead[%u]=%#p\n", i, pCtxInt->apReqsNewHead[i]));
+ LogFlow(("pReqToCancel=%#p\n", pCtxInt->pReqToCancel));
+ LogFlow(("pReqsWaitHead=%#p\n", pCtxInt->pReqsWaitHead));
+ LogFlow(("pReqsWaitTail=%#p\n", pCtxInt->pReqsWaitTail));
+ LogFlow(("cReqsWaitMax=%u\n", pCtxInt->cReqsWaitMax));
+ LogFlow(("iFirstFree=%u\n", pCtxInt->iFirstFree));
+ for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
+ LogFlow(("apReqs[%u]=%#p\n", i, pCtxInt->apReqs[i]));
+}
+#endif
+
+RTDECL(int) RTFileAioCtxSubmit(RTFILEAIOCTX hAioCtx, PRTFILEAIOREQ pahReqs, size_t cReqs)
+{
+ int rc = VINF_SUCCESS;
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+
+ /* Parameter checks */
+ AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
+ AssertReturn(cReqs != 0, VERR_INVALID_POINTER);
+ AssertPtrReturn(pahReqs, VERR_INVALID_PARAMETER);
+
+ rtFileAioCtxDump(pCtxInt);
+
+ /* Check that we don't exceed the limit */
+ if (ASMAtomicUoReadS32(&pCtxInt->cRequests) + cReqs > pCtxInt->cMaxRequests)
+ return VERR_FILE_AIO_LIMIT_EXCEEDED;
+
+ PRTFILEAIOREQINTERNAL pHead = NULL;
+
+ do
+ {
+ int rcPosix = 0;
+ size_t cReqsSubmit = 0;
+ size_t i = 0;
+ PRTFILEAIOREQINTERNAL pReqInt;
+
+ while ( (i < cReqs)
+ && (i < AIO_LISTIO_MAX))
+ {
+ pReqInt = pahReqs[i];
+ if (RTFILEAIOREQ_IS_NOT_VALID(pReqInt))
+ {
+ /* Undo everything and stop submitting. */
+ for (size_t iUndo = 0; iUndo < i; iUndo++)
+ {
+ pReqInt = pahReqs[iUndo];
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+ pReqInt->pCtxInt = NULL;
+
+ /* Unlink from the list again. */
+ PRTFILEAIOREQINTERNAL pNext, pPrev;
+ pNext = pReqInt->pNext;
+ pPrev = pReqInt->pPrev;
+ if (pNext)
+ pNext->pPrev = pPrev;
+ if (pPrev)
+ pPrev->pNext = pNext;
+ else
+ pHead = pNext;
+ }
+ rc = VERR_INVALID_HANDLE;
+ break;
+ }
+
+ pReqInt->pCtxInt = pCtxInt;
+
+ if (pReqInt->fFlush)
+ break;
+
+ /* Link them together. */
+ pReqInt->pNext = pHead;
+ if (pHead)
+ pHead->pPrev = pReqInt;
+ pReqInt->pPrev = NULL;
+ pHead = pReqInt;
+ RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
+
+ cReqsSubmit++;
+ i++;
+ }
+
+ if (cReqsSubmit)
+ {
+ rcPosix = lio_listio(LIO_NOWAIT, (struct aiocb **)pahReqs, cReqsSubmit, NULL);
+ if (RT_UNLIKELY(rcPosix < 0))
+ {
+ size_t cReqsSubmitted = cReqsSubmit;
+
+ if (errno == EAGAIN)
+ rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ /* Check which ones were not submitted. */
+ for (i = 0; i < cReqsSubmit; i++)
+ {
+ pReqInt = pahReqs[i];
+
+ rcPosix = aio_error(&pReqInt->AioCB);
+
+ if ((rcPosix != EINPROGRESS) && (rcPosix != 0))
+ {
+ cReqsSubmitted--;
+
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+ if (errno == EINVAL)
+#else
+ if (rcPosix == EINVAL)
+#endif
+ {
+ /* Was not submitted. */
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+ }
+ else
+ {
+ /* An error occurred. */
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+
+ /*
+ * Looks like Apple and glibc interpret the standard in different ways.
+ * glibc returns the error code which would be in errno but Apple returns
+ * -1 and sets errno to the appropriate value
+ */
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+ Assert(rcPosix == -1);
+ pReqInt->Rc = RTErrConvertFromErrno(errno);
+#elif defined(RT_OS_LINUX)
+ pReqInt->Rc = RTErrConvertFromErrno(rcPosix);
+#endif
+ pReqInt->cbTransfered = 0;
+ }
+ /* Unlink from the list. */
+ PRTFILEAIOREQINTERNAL pNext, pPrev;
+ pNext = pReqInt->pNext;
+ pPrev = pReqInt->pPrev;
+ if (pNext)
+ pNext->pPrev = pPrev;
+ if (pPrev)
+ pPrev->pNext = pNext;
+ else
+ pHead = pNext;
+
+ pReqInt->pNext = NULL;
+ pReqInt->pPrev = NULL;
+ }
+ }
+ ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmitted);
+ AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
+ break;
+ }
+
+ ASMAtomicAddS32(&pCtxInt->cRequests, cReqsSubmit);
+ AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
+ cReqs -= cReqsSubmit;
+ pahReqs += cReqsSubmit;
+ }
+
+ /*
+ * Check if we have a flush request now.
+ * If not we hit the AIO_LISTIO_MAX limit
+ * and will continue submitting requests
+ * above.
+ */
+ if (cReqs && RT_SUCCESS_NP(rc))
+ {
+ pReqInt = pahReqs[0];
+
+ if (pReqInt->fFlush)
+ {
+ /*
+ * lio_listio does not work with flush requests so
+ * we have to use aio_fsync directly.
+ */
+ rcPosix = aio_fsync(O_SYNC, &pReqInt->AioCB);
+ if (RT_UNLIKELY(rcPosix < 0))
+ {
+ if (errno == EAGAIN)
+ {
+ rc = VERR_FILE_AIO_INSUFFICIENT_RESSOURCES;
+ RTFILEAIOREQ_SET_STATE(pReqInt, PREPARED);
+ }
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ RTFILEAIOREQ_SET_STATE(pReqInt, COMPLETED);
+ pReqInt->Rc = rc;
+ }
+ pReqInt->cbTransfered = 0;
+ break;
+ }
+
+ /* Link them together. */
+ pReqInt->pNext = pHead;
+ if (pHead)
+ pHead->pPrev = pReqInt;
+ pReqInt->pPrev = NULL;
+ pHead = pReqInt;
+ RTFILEAIOREQ_SET_STATE(pReqInt, SUBMITTED);
+
+ ASMAtomicIncS32(&pCtxInt->cRequests);
+ AssertMsg(pCtxInt->cRequests >= 0, ("Adding requests resulted in overflow\n"));
+ cReqs--;
+ pahReqs++;
+ }
+ }
+ } while ( cReqs
+ && RT_SUCCESS_NP(rc));
+
+ if (pHead)
+ {
+ /*
+ * Forward successfully submitted requests to the thread waiting for requests.
+ * We search for a free slot first and if we don't find one
+ * we will grab the first one and append our list to the existing entries.
+ */
+ unsigned iSlot = 0;
+ while ( (iSlot < RT_ELEMENTS(pCtxInt->apReqsNewHead))
+ && !ASMAtomicCmpXchgPtr(&pCtxInt->apReqsNewHead[iSlot], pHead, NULL))
+ iSlot++;
+
+ if (iSlot == RT_ELEMENTS(pCtxInt->apReqsNewHead))
+ {
+ /* Nothing found. */
+ PRTFILEAIOREQINTERNAL pOldHead = ASMAtomicXchgPtrT(&pCtxInt->apReqsNewHead[0], NULL, PRTFILEAIOREQINTERNAL);
+
+ /* Find the end of the current head and link the old list to the current. */
+ PRTFILEAIOREQINTERNAL pTail = pHead;
+ while (pTail->pNext)
+ pTail = pTail->pNext;
+
+ pTail->pNext = pOldHead;
+
+ ASMAtomicWritePtr(&pCtxInt->apReqsNewHead[0], pHead);
+ }
+
+ /* Set the internal wakeup flag and wakeup the thread if possible. */
+ bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUpInternal, true);
+ if (!fWokenUp)
+ rtFileAioCtxWakeup(pCtxInt);
+ }
+
+ rtFileAioCtxDump(pCtxInt);
+
+ return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxWait(RTFILEAIOCTX hAioCtx, size_t cMinReqs, RTMSINTERVAL cMillies,
+ PRTFILEAIOREQ pahReqs, size_t cReqs, uint32_t *pcReqs)
+{
+ int rc = VINF_SUCCESS;
+ int cRequestsCompleted = 0;
+ PRTFILEAIOCTXINTERNAL pCtxInt = (PRTFILEAIOCTXINTERNAL)hAioCtx;
+ struct timespec Timeout;
+ struct timespec *pTimeout = NULL;
+ uint64_t StartNanoTS = 0;
+
+ LogFlowFunc(("hAioCtx=%#p cMinReqs=%zu cMillies=%u pahReqs=%#p cReqs=%zu pcbReqs=%#p\n",
+ hAioCtx, cMinReqs, cMillies, pahReqs, cReqs, pcReqs));
+
+ /* Check parameters. */
+ AssertPtrReturn(pCtxInt, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pcReqs, VERR_INVALID_POINTER);
+ AssertPtrReturn(pahReqs, VERR_INVALID_POINTER);
+ AssertReturn(cReqs != 0, VERR_INVALID_PARAMETER);
+ AssertReturn(cReqs >= cMinReqs, VERR_OUT_OF_RANGE);
+
+ rtFileAioCtxDump(pCtxInt);
+
+ int32_t cRequestsWaiting = ASMAtomicReadS32(&pCtxInt->cRequests);
+
+ if ( RT_UNLIKELY(cRequestsWaiting <= 0)
+ && !(pCtxInt->fFlags & RTFILEAIOCTX_FLAGS_WAIT_WITHOUT_PENDING_REQUESTS))
+ return VERR_FILE_AIO_NO_REQUEST;
+
+ if (RT_UNLIKELY(cMinReqs > (uint32_t)cRequestsWaiting))
+ return VERR_INVALID_PARAMETER;
+
+ if (cMillies != RT_INDEFINITE_WAIT)
+ {
+ Timeout.tv_sec = cMillies / 1000;
+ Timeout.tv_nsec = (cMillies % 1000) * 1000000;
+ pTimeout = &Timeout;
+ StartNanoTS = RTTimeNanoTS();
+ }
+
+ /* Wait for at least one. */
+ if (!cMinReqs)
+ cMinReqs = 1;
+
+ /* For the wakeup call. */
+ Assert(pCtxInt->hThreadWait == NIL_RTTHREAD);
+ ASMAtomicWriteHandle(&pCtxInt->hThreadWait, RTThreadSelf());
+
+ /* Update the waiting list once before we enter the loop. */
+ rc = rtFileAioCtxProcessEvents(pCtxInt);
+
+ while ( cMinReqs
+ && RT_SUCCESS_NP(rc))
+ {
+#ifdef RT_STRICT
+ if (RT_UNLIKELY(!pCtxInt->iFirstFree))
+ {
+ for (unsigned i = 0; i < pCtxInt->cReqsWaitMax; i++)
+ RTAssertMsg2Weak("wait[%d] = %#p\n", i, pCtxInt->apReqs[i]);
+
+ AssertMsgFailed(("No request to wait for. pReqsWaitHead=%#p pReqsWaitTail=%#p\n",
+ pCtxInt->pReqsWaitHead, pCtxInt->pReqsWaitTail));
+ }
+#endif
+
+ LogFlow(("Waiting for %d requests to complete\n", pCtxInt->iFirstFree));
+ rtFileAioCtxDump(pCtxInt);
+
+ ASMAtomicXchgBool(&pCtxInt->fWaiting, true);
+ int rcPosix = aio_suspend((const struct aiocb * const *)pCtxInt->apReqs,
+ pCtxInt->iFirstFree, pTimeout);
+ ASMAtomicXchgBool(&pCtxInt->fWaiting, false);
+ if (rcPosix < 0)
+ {
+ LogFlow(("aio_suspend failed %d nent=%u\n", errno, pCtxInt->iFirstFree));
+ /* Check that this is an external wakeup event. */
+ if (errno == EINTR)
+ rc = rtFileAioCtxProcessEvents(pCtxInt);
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ {
+ /* Requests finished. */
+ unsigned iReqCurr = 0;
+ unsigned cDone = 0;
+
+ /* Remove completed requests from the waiting list. */
+ while ( (iReqCurr < pCtxInt->iFirstFree)
+ && (cDone < cReqs))
+ {
+ PRTFILEAIOREQINTERNAL pReq = pCtxInt->apReqs[iReqCurr];
+ int rcReq = aio_error(&pReq->AioCB);
+
+ if (rcReq != EINPROGRESS)
+ {
+ /* Completed store the return code. */
+ if (rcReq == 0)
+ {
+ pReq->Rc = VINF_SUCCESS;
+ /* Call aio_return() to free resources. */
+ pReq->cbTransfered = aio_return(&pReq->AioCB);
+ }
+ else
+ {
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+ pReq->Rc = RTErrConvertFromErrno(errno);
+#else
+ pReq->Rc = RTErrConvertFromErrno(rcReq);
+#endif
+ }
+
+ /* Mark the request as finished. */
+ RTFILEAIOREQ_SET_STATE(pReq, COMPLETED);
+ cDone++;
+
+ /* If there are other entries waiting put the head into the now free entry. */
+ if (pCtxInt->pReqsWaitHead)
+ {
+ PRTFILEAIOREQINTERNAL pReqInsert = pCtxInt->pReqsWaitHead;
+
+ pCtxInt->pReqsWaitHead = pReqInsert->pNext;
+ if (!pCtxInt->pReqsWaitHead)
+ {
+ /* List is empty now. Clear tail too. */
+ pCtxInt->pReqsWaitTail = NULL;
+ }
+
+ pReqInsert->iWaitingList = pReq->iWaitingList;
+ pCtxInt->apReqs[pReqInsert->iWaitingList] = pReqInsert;
+ iReqCurr++;
+ }
+ else
+ {
+ /*
+ * Move the last entry into the current position to avoid holes
+ * but only if it is not the last element already.
+ */
+ if (pReq->iWaitingList < pCtxInt->iFirstFree - 1)
+ {
+ pCtxInt->apReqs[pReq->iWaitingList] = pCtxInt->apReqs[--pCtxInt->iFirstFree];
+ pCtxInt->apReqs[pReq->iWaitingList]->iWaitingList = pReq->iWaitingList;
+ }
+ else
+ pCtxInt->iFirstFree--;
+
+ pCtxInt->apReqs[pCtxInt->iFirstFree] = NULL;
+ }
+
+ /* Put the request into the completed list. */
+ pahReqs[cRequestsCompleted++] = pReq;
+ pReq->iWaitingList = RTFILEAIOCTX_WAIT_ENTRY_INVALID;
+ }
+ else
+ iReqCurr++;
+ }
+
+ AssertMsg((cDone <= cReqs), ("Overflow cReqs=%u cMinReqs=%u cDone=%u\n",
+ cReqs, cDone));
+ cReqs -= cDone;
+ cMinReqs = RT_MAX(cMinReqs, cDone) - cDone;
+ ASMAtomicSubS32(&pCtxInt->cRequests, cDone);
+
+ AssertMsg(pCtxInt->cRequests >= 0, ("Finished more requests than currently active\n"));
+
+ if (!cMinReqs)
+ break;
+
+ if (cMillies != RT_INDEFINITE_WAIT)
+ {
+ uint64_t TimeDiff;
+
+ /* Recalculate the timeout. */
+ TimeDiff = RTTimeSystemNanoTS() - StartNanoTS;
+ Timeout.tv_sec = Timeout.tv_sec - (TimeDiff / 1000000);
+ Timeout.tv_nsec = Timeout.tv_nsec - (TimeDiff % 1000000);
+ }
+
+ /* Check for new elements. */
+ rc = rtFileAioCtxProcessEvents(pCtxInt);
+ }
+ }
+
+ *pcReqs = cRequestsCompleted;
+ Assert(pCtxInt->hThreadWait == RTThreadSelf());
+ ASMAtomicWriteHandle(&pCtxInt->hThreadWait, NIL_RTTHREAD);
+
+ rtFileAioCtxDump(pCtxInt);
+
+ return rc;
+}
+
+
+RTDECL(int) RTFileAioCtxWakeup(RTFILEAIOCTX hAioCtx)
+{
+ PRTFILEAIOCTXINTERNAL pCtxInt = hAioCtx;
+ RTFILEAIOCTX_VALID_RETURN(pCtxInt);
+
+ /** @todo r=bird: Define the protocol for how to resume work after calling
+ * this function. */
+
+ bool fWokenUp = ASMAtomicXchgBool(&pCtxInt->fWokenUp, true);
+ if (!fWokenUp)
+ rtFileAioCtxWakeup(pCtxInt);
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fileio-at-posix.cpp b/src/VBox/Runtime/r3/posix/fileio-at-posix.cpp
new file mode 100644
index 00000000..72d3cc05
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fileio-at-posix.cpp
@@ -0,0 +1,107 @@
+/* $Id: fileio-at-posix.cpp $ */
+/** @file
+ * IPRT - File I/O, RTFileReadAt and RTFileWriteAt, posix.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <errno.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "internal/iprt.h"
+#include <iprt/file.h>
+
+#include <iprt/err.h>
+#include <iprt/log.h>
+
+
+
+RTDECL(int) RTFileReadAt(RTFILE hFile, RTFOFF off, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ ssize_t cbRead = pread(RTFileToNative(hFile), pvBuf, cbToRead, off);
+ if (cbRead >= 0)
+ {
+ if (pcbRead)
+ /* caller can handle partial read. */
+ *pcbRead = cbRead;
+ else
+ {
+ /* Caller expects all to be read. */
+ while ((ssize_t)cbToRead > cbRead)
+ {
+ ssize_t cbReadPart = pread(RTFileToNative(hFile), (char*)pvBuf + cbRead, cbToRead - cbRead, off + cbRead);
+ if (cbReadPart <= 0)
+ {
+ if (cbReadPart == 0)
+ return VERR_EOF;
+ return RTErrConvertFromErrno(errno);
+ }
+ cbRead += cbReadPart;
+ }
+ }
+ return VINF_SUCCESS;
+ }
+
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTFileWriteAt(RTFILE hFile, RTFOFF off, const void *pvBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ ssize_t cbWritten = pwrite(RTFileToNative(hFile), pvBuf, cbToWrite, off);
+ if (cbWritten >= 0)
+ {
+ if (pcbWritten)
+ /* caller can handle partial write. */
+ *pcbWritten = cbWritten;
+ else
+ {
+ /* Caller expects all to be write. */
+ while ((ssize_t)cbToWrite > cbWritten)
+ {
+ ssize_t cbWrittenPart = pwrite(RTFileToNative(hFile), (const char *)pvBuf + cbWritten, cbToWrite - cbWritten,
+ off + cbWritten);
+ if (cbWrittenPart < 0)
+ return cbWrittenPart < 0 ? RTErrConvertFromErrno(errno) : VERR_TRY_AGAIN;
+ cbWritten += cbWrittenPart;
+ }
+ }
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fileio-posix.cpp b/src/VBox/Runtime/r3/posix/fileio-posix.cpp
new file mode 100644
index 00000000..cd7d238f
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fileio-posix.cpp
@@ -0,0 +1,934 @@
+/* $Id: fileio-posix.cpp $ */
+/** @file
+ * IPRT - File I/O, POSIX, Part 1.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#ifdef _MSC_VER
+# include <io.h>
+# include <stdio.h>
+#else
+# include <unistd.h>
+# include <sys/time.h>
+#endif
+#ifdef RT_OS_LINUX
+# include <sys/file.h>
+#endif
+#if defined(RT_OS_OS2) && (!defined(__INNOTEK_LIBC__) || __INNOTEK_LIBC__ < 0x006)
+# include <io.h>
+#endif
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+# include <sys/disk.h>
+#endif
+#ifdef RT_OS_SOLARIS
+# include <stropts.h>
+# include <sys/dkio.h>
+# include <sys/vtoc.h>
+#endif /* RT_OS_SOLARIS */
+
+#include <iprt/file.h>
+#include <iprt/path.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/thread.h>
+#include "internal/file.h"
+#include "internal/fs.h"
+#include "internal/path.h"
+
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** Default file permissions for newly created files. */
+#if defined(S_IRUSR) && defined(S_IWUSR)
+# define RT_FILE_PERMISSION (S_IRUSR | S_IWUSR)
+#else
+# define RT_FILE_PERMISSION (00600)
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#ifdef O_CLOEXEC
+static int volatile g_fHave_O_CLOEXEC = 0; /* {-1,0,1}; since Linux 2.6.23 */
+#endif
+
+
+
+RTDECL(bool) RTFileExists(const char *pszPath)
+{
+ bool fRc = false;
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat s;
+ fRc = !stat(pszNativePath, &s)
+ && S_ISREG(s.st_mode);
+
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTFileExists(%p={%s}): returns %RTbool\n", pszPath, pszPath, fRc));
+ return fRc;
+}
+
+
+#ifdef O_CLOEXEC
+/** Worker for RTFileOpenEx that detects whether the kernel supports
+ * O_CLOEXEC or not, setting g_fHave_O_CLOEXEC to 1 or -1 accordingly. */
+static int rtFileOpenExDetectCloExecSupport(void)
+{
+ /*
+ * Open /dev/null with O_CLOEXEC and see if FD_CLOEXEC is set or not.
+ */
+ int fHave_O_CLOEXEC = -1;
+ int fd = open("/dev/null", O_RDONLY | O_CLOEXEC, 0);
+ if (fd >= 0)
+ {
+ int fFlags = fcntl(fd, F_GETFD, 0);
+ fHave_O_CLOEXEC = fFlags > 0 && (fFlags & FD_CLOEXEC) ? 1 : -1;
+ close(fd);
+ }
+ else
+ AssertMsg(errno == EINVAL, ("%d\n", errno));
+ g_fHave_O_CLOEXEC = fHave_O_CLOEXEC;
+ return fHave_O_CLOEXEC;
+}
+#endif
+
+
+RTR3DECL(int) RTFileOpen(PRTFILE pFile, const char *pszFilename, uint64_t fOpen)
+{
+ return RTFileOpenEx(pszFilename, fOpen, pFile, NULL);
+}
+
+
+RTDECL(int) RTFileOpenEx(const char *pszFilename, uint64_t fOpen, PRTFILE phFile, PRTFILEACTION penmActionTaken)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(phFile, VERR_INVALID_POINTER);
+ *phFile = NIL_RTFILE;
+ if (penmActionTaken)
+ *penmActionTaken = RTFILEACTION_INVALID;
+ AssertPtrReturn(pszFilename, VERR_INVALID_POINTER);
+
+ /*
+ * Merge forced open flags and validate them.
+ */
+ int rc = rtFileRecalcAndValidateFlags(&fOpen);
+ if (RT_FAILURE(rc))
+ return rc;
+#ifndef O_NONBLOCK
+ AssertReturn(!(fOpen & RTFILE_O_NON_BLOCK), VERR_INVALID_FLAGS);
+#endif
+#if defined(RT_OS_OS2) /* Cannot delete open files on OS/2. */
+ AssertReturn(!(fOpen & RTFILE_O_TEMP_AUTO_DELETE), VERR_NOT_SUPPORTED);
+#endif
+
+ /*
+ * Calculate open mode flags.
+ */
+ int fOpenMode = 0;
+#ifdef O_BINARY
+ fOpenMode |= O_BINARY; /* (pc) */
+#endif
+#ifdef O_LARGEFILE
+ fOpenMode |= O_LARGEFILE; /* (linux, solaris) */
+#endif
+#ifdef O_NOINHERIT
+ if (!(fOpen & RTFILE_O_INHERIT))
+ fOpenMode |= O_NOINHERIT;
+#endif
+#ifdef O_CLOEXEC
+ int fHave_O_CLOEXEC = g_fHave_O_CLOEXEC;
+ if ( !(fOpen & RTFILE_O_INHERIT)
+ && ( fHave_O_CLOEXEC > 0
+ || ( fHave_O_CLOEXEC == 0
+ && (fHave_O_CLOEXEC = rtFileOpenExDetectCloExecSupport()) > 0)))
+ fOpenMode |= O_CLOEXEC;
+#endif
+#ifdef O_NONBLOCK
+ if (fOpen & RTFILE_O_NON_BLOCK)
+ fOpenMode |= O_NONBLOCK;
+#endif
+#ifdef O_SYNC
+ if (fOpen & RTFILE_O_WRITE_THROUGH)
+ fOpenMode |= O_SYNC;
+#endif
+#if defined(O_DIRECT) && defined(RT_OS_LINUX)
+ /* O_DIRECT is mandatory to get async I/O working on Linux. */
+ if (fOpen & RTFILE_O_ASYNC_IO)
+ fOpenMode |= O_DIRECT;
+#endif
+#if defined(O_DIRECT) && (defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD))
+ /* Disable the kernel cache. */
+ if (fOpen & RTFILE_O_NO_CACHE)
+ fOpenMode |= O_DIRECT;
+#endif
+
+ /* create/truncate file */
+ switch (fOpen & RTFILE_O_ACTION_MASK)
+ {
+ case RTFILE_O_OPEN: break;
+ case RTFILE_O_OPEN_CREATE: fOpenMode |= O_CREAT; break;
+ case RTFILE_O_CREATE: fOpenMode |= O_CREAT | O_EXCL; break;
+ case RTFILE_O_CREATE_REPLACE: fOpenMode |= O_CREAT | O_TRUNC; break; /** @todo replacing needs fixing, this is *not* a 1:1 mapping! */
+ default:
+ AssertMsgFailed(("fOpen=%#llx\n", fOpen));
+ fOpen = (fOpen & ~RTFILE_O_ACTION_MASK) | RTFILE_O_OPEN;
+ break;
+
+ }
+ if ( (fOpen & RTFILE_O_TRUNCATE)
+ && (fOpen & RTFILE_O_ACTION_MASK) != RTFILE_O_CREATE)
+ fOpenMode |= O_TRUNC;
+
+ switch (fOpen & RTFILE_O_ACCESS_MASK)
+ {
+ case RTFILE_O_READ:
+ fOpenMode |= O_RDONLY; /* RTFILE_O_APPEND is ignored. */
+ break;
+ case RTFILE_O_WRITE:
+ fOpenMode |= fOpen & RTFILE_O_APPEND ? O_APPEND | O_WRONLY : O_WRONLY;
+ break;
+ case RTFILE_O_READWRITE:
+ fOpenMode |= fOpen & RTFILE_O_APPEND ? O_APPEND | O_RDWR : O_RDWR;
+ break;
+ default:
+ AssertMsgFailedReturn(("RTFileOpen received an invalid RW value, fOpen=%#llx\n", fOpen), VERR_INVALID_FLAGS);
+ }
+
+ /* File mode. */
+ int fMode = (fOpen & RTFILE_O_CREATE_MODE_MASK)
+ ? (fOpen & RTFILE_O_CREATE_MODE_MASK) >> RTFILE_O_CREATE_MODE_SHIFT
+ : RT_FILE_PERMISSION;
+
+ /** @todo sharing? */
+
+ /*
+ * Open/create the file.
+ */
+ char const *pszNativeFilename;
+ rc = rtPathToNative(&pszNativeFilename, pszFilename, NULL);
+ if (RT_FAILURE(rc))
+ return (rc);
+
+ int fh;
+ int iErr;
+ if (!penmActionTaken)
+ {
+ fh = open(pszNativeFilename, fOpenMode, fMode);
+ iErr = errno;
+ }
+ else
+ {
+ /* We need to know exactly which action was taken by open, Windows &
+ OS/2 style. Can be tedious and subject to races: */
+ switch (fOpen & RTFILE_O_ACTION_MASK)
+ {
+ case RTFILE_O_OPEN:
+ Assert(!(fOpenMode & O_CREAT));
+ Assert(!(fOpenMode & O_EXCL));
+ fh = open(pszNativeFilename, fOpenMode, fMode);
+ iErr = errno;
+ if (fh >= 0)
+ *penmActionTaken = fOpenMode & O_TRUNC ? RTFILEACTION_TRUNCATED : RTFILEACTION_OPENED;
+ break;
+
+ case RTFILE_O_CREATE:
+ Assert(fOpenMode & O_CREAT);
+ Assert(fOpenMode & O_EXCL);
+ fh = open(pszNativeFilename, fOpenMode, fMode);
+ iErr = errno;
+ if (fh >= 0)
+ *penmActionTaken = RTFILEACTION_CREATED;
+ else if (iErr == EEXIST)
+ *penmActionTaken = RTFILEACTION_ALREADY_EXISTS;
+ break;
+
+ case RTFILE_O_OPEN_CREATE:
+ case RTFILE_O_CREATE_REPLACE:
+ {
+ Assert(fOpenMode & O_CREAT);
+ Assert(!(fOpenMode & O_EXCL));
+ int iTries = 64;
+ while (iTries-- > 0)
+ {
+ /* Yield the CPU if we've raced too long. */
+ if (iTries < 4)
+ RTThreadSleep(2 - (iTries & 1));
+
+ /* Try exclusive creation first: */
+ fh = open(pszNativeFilename, fOpenMode | O_EXCL, fMode);
+ iErr = errno;
+ if (fh >= 0)
+ {
+ *penmActionTaken = RTFILEACTION_CREATED;
+ break;
+ }
+ if (iErr != EEXIST)
+ break;
+
+ /* If the file exists, try open it: */
+ fh = open(pszNativeFilename, fOpenMode & ~O_CREAT, fMode);
+ iErr = errno;
+ if (fh >= 0)
+ {
+ if ((fOpen & RTFILE_O_ACTION_MASK) == RTFILE_O_OPEN_CREATE)
+ *penmActionTaken = fOpenMode & O_TRUNC ? RTFILEACTION_TRUNCATED : RTFILEACTION_OPENED;
+ else
+ *penmActionTaken = RTFILEACTION_REPLACED;
+ break;
+ }
+ if (iErr != ENOENT)
+ break;
+ }
+ Assert(iTries >= 0);
+ if (iTries < 0)
+ {
+ /* Thanks for the race, but we need to get on with things. */
+ fh = open(pszNativeFilename, fOpenMode, fMode);
+ iErr = errno;
+ if (fh >= 0)
+ *penmActionTaken = RTFILEACTION_OPENED;
+ }
+ break;
+ }
+
+ default:
+ AssertMsgFailed(("fOpen=%#llx fOpenMode=%#x\n", fOpen, fOpenMode));
+ iErr = EINVAL;
+ fh = -1;
+ break;
+ }
+ }
+
+ rtPathFreeNative(pszNativeFilename, pszFilename);
+ if (fh >= 0)
+ {
+ iErr = 0;
+
+ /*
+ * If temporary file, delete it.
+ */
+ if (fOpen & RTFILE_O_TEMP_AUTO_DELETE)
+ {
+ /** @todo Use funlinkat/funlink or similar here when available! Or better,
+ * use O_TMPFILE, only that may require fallback as not supported by
+ * all file system on linux. */
+ iErr = unlink(pszNativeFilename);
+ Assert(iErr == 0);
+ }
+
+ /*
+ * Mark the file handle close on exec, unless inherit is specified.
+ */
+ if ( !(fOpen & RTFILE_O_INHERIT)
+#ifdef O_NOINHERIT
+ && !(fOpenMode & O_NOINHERIT) /* Take care since it might be a zero value dummy. */
+#endif
+#ifdef O_CLOEXEC
+ && fHave_O_CLOEXEC <= 0
+#endif
+ )
+ iErr = fcntl(fh, F_SETFD, FD_CLOEXEC) >= 0 ? 0 : errno;
+
+ /*
+ * Switch direct I/O on now if requested and required.
+ */
+#if defined(RT_OS_DARWIN) \
+ || (defined(RT_OS_SOLARIS) && !defined(IN_GUEST))
+ if (iErr == 0 && (fOpen & RTFILE_O_NO_CACHE))
+ {
+# if defined(RT_OS_DARWIN)
+ iErr = fcntl(fh, F_NOCACHE, 1) >= 0 ? 0 : errno;
+# else
+ iErr = directio(fh, DIRECTIO_ON) >= 0 ? 0 : errno;
+# endif
+ }
+#endif
+
+ /*
+ * Implement / emulate file sharing.
+ *
+ * We need another mode which allows skipping this stuff completely
+ * and do things the UNIX way. So for the present this is just a debug
+ * aid that can be enabled by developers too lazy to test on Windows.
+ */
+#if 0 && defined(RT_OS_LINUX)
+ if (iErr == 0)
+ {
+ /* This approach doesn't work because only knfsd checks for these
+ buggers. :-( */
+ int iLockOp;
+ switch (fOpen & RTFILE_O_DENY_MASK)
+ {
+ default:
+ AssertFailed();
+ case RTFILE_O_DENY_NONE:
+ case RTFILE_O_DENY_NOT_DELETE:
+ iLockOp = LOCK_MAND | LOCK_READ | LOCK_WRITE;
+ break;
+ case RTFILE_O_DENY_READ:
+ case RTFILE_O_DENY_READ | RTFILE_O_DENY_NOT_DELETE:
+ iLockOp = LOCK_MAND | LOCK_WRITE;
+ break;
+ case RTFILE_O_DENY_WRITE:
+ case RTFILE_O_DENY_WRITE | RTFILE_O_DENY_NOT_DELETE:
+ iLockOp = LOCK_MAND | LOCK_READ;
+ break;
+ case RTFILE_O_DENY_WRITE | RTFILE_O_DENY_READ:
+ case RTFILE_O_DENY_WRITE | RTFILE_O_DENY_READ | RTFILE_O_DENY_NOT_DELETE:
+ iLockOp = LOCK_MAND;
+ break;
+ }
+ iErr = flock(fh, iLockOp | LOCK_NB);
+ if (iErr != 0)
+ iErr = errno == EAGAIN ? ETXTBSY : 0;
+ }
+#endif /* 0 && RT_OS_LINUX */
+#if defined(DEBUG_bird) && !defined(RT_OS_SOLARIS)
+ if (iErr == 0)
+ {
+ /* This emulation is incomplete but useful. */
+ switch (fOpen & RTFILE_O_DENY_MASK)
+ {
+ default:
+ AssertFailed();
+ case RTFILE_O_DENY_NONE:
+ case RTFILE_O_DENY_NOT_DELETE:
+ case RTFILE_O_DENY_READ:
+ case RTFILE_O_DENY_READ | RTFILE_O_DENY_NOT_DELETE:
+ break;
+ case RTFILE_O_DENY_WRITE:
+ case RTFILE_O_DENY_WRITE | RTFILE_O_DENY_NOT_DELETE:
+ case RTFILE_O_DENY_WRITE | RTFILE_O_DENY_READ:
+ case RTFILE_O_DENY_WRITE | RTFILE_O_DENY_READ | RTFILE_O_DENY_NOT_DELETE:
+ if (fOpen & RTFILE_O_WRITE)
+ {
+ iErr = flock(fh, LOCK_EX | LOCK_NB);
+ if (iErr != 0)
+ iErr = errno == EAGAIN ? ETXTBSY : 0;
+ }
+ break;
+ }
+ }
+#endif
+#ifdef RT_OS_SOLARIS
+ /** @todo Use fshare_t and associates, it's a perfect match. see sys/fcntl.h */
+#endif
+
+ /*
+ * We're done.
+ */
+ if (iErr == 0)
+ {
+ *phFile = (RTFILE)(uintptr_t)fh;
+ Assert((intptr_t)*phFile == fh);
+ LogFlow(("RTFileOpen(%p:{%RTfile}, %p:{%s}, %#llx): returns %Rrc\n",
+ phFile, *phFile, pszFilename, pszFilename, fOpen, rc));
+ return VINF_SUCCESS;
+ }
+
+ close(fh);
+ }
+ return RTErrConvertFromErrno(iErr);
+}
+
+
+RTR3DECL(int) RTFileOpenBitBucket(PRTFILE phFile, uint64_t fAccess)
+{
+ AssertReturn( fAccess == RTFILE_O_READ
+ || fAccess == RTFILE_O_WRITE
+ || fAccess == RTFILE_O_READWRITE,
+ VERR_INVALID_PARAMETER);
+ return RTFileOpen(phFile, "/dev/null", fAccess | RTFILE_O_DENY_NONE | RTFILE_O_OPEN);
+}
+
+
+RTR3DECL(int) RTFileClose(RTFILE hFile)
+{
+ if (hFile == NIL_RTFILE)
+ return VINF_SUCCESS;
+ if (close(RTFileToNative(hFile)) == 0)
+ return VINF_SUCCESS;
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileFromNative(PRTFILE pFile, RTHCINTPTR uNative)
+{
+ AssertCompile(sizeof(uNative) == sizeof(*pFile));
+ if (uNative < 0)
+ {
+ AssertMsgFailed(("%p\n", uNative));
+ *pFile = NIL_RTFILE;
+ return VERR_INVALID_HANDLE;
+ }
+ *pFile = (RTFILE)uNative;
+ return VINF_SUCCESS;
+}
+
+
+RTR3DECL(RTHCINTPTR) RTFileToNative(RTFILE hFile)
+{
+ AssertReturn(hFile != NIL_RTFILE, -1);
+ return (intptr_t)hFile;
+}
+
+
+RTFILE rtFileGetStandard(RTHANDLESTD enmStdHandle)
+{
+ int fd;
+ switch (enmStdHandle)
+ {
+ case RTHANDLESTD_INPUT: fd = 0; break;
+ case RTHANDLESTD_OUTPUT: fd = 1; break;
+ case RTHANDLESTD_ERROR: fd = 2; break;
+ default:
+ AssertFailedReturn(NIL_RTFILE);
+ }
+
+ struct stat st;
+ int rc = fstat(fd, &st);
+ if (rc == -1)
+ return NIL_RTFILE;
+ return (RTFILE)(intptr_t)fd;
+}
+
+
+RTR3DECL(int) RTFileDelete(const char *pszFilename)
+{
+ char const *pszNativeFilename;
+ int rc = rtPathToNative(&pszNativeFilename, pszFilename, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (unlink(pszNativeFilename) != 0)
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativeFilename, pszFilename);
+ }
+ return rc;
+}
+
+
+RTR3DECL(int) RTFileSeek(RTFILE hFile, int64_t offSeek, unsigned uMethod, uint64_t *poffActual)
+{
+ static const unsigned aSeekRecode[] =
+ {
+ SEEK_SET,
+ SEEK_CUR,
+ SEEK_END,
+ };
+
+ /*
+ * Validate input.
+ */
+ if (uMethod > RTFILE_SEEK_END)
+ {
+ AssertMsgFailed(("Invalid uMethod=%d\n", uMethod));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /* check that within off_t range. */
+ if ( sizeof(off_t) < sizeof(offSeek)
+ && ( (offSeek > 0 && (unsigned)(offSeek >> 32) != 0)
+ || (offSeek < 0 && (unsigned)(-offSeek >> 32) != 0)))
+ {
+ AssertMsgFailed(("64-bit search not supported\n"));
+ return VERR_NOT_SUPPORTED;
+ }
+
+ off_t offCurrent = lseek(RTFileToNative(hFile), (off_t)offSeek, aSeekRecode[uMethod]);
+ if (offCurrent != ~0)
+ {
+ if (poffActual)
+ *poffActual = (uint64_t)offCurrent;
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileRead(RTFILE hFile, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ if (cbToRead <= 0)
+ {
+ if (pcbRead)
+ *pcbRead = 0;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Attempt read.
+ */
+ ssize_t cbRead = read(RTFileToNative(hFile), pvBuf, cbToRead);
+ if (cbRead >= 0)
+ {
+ if (pcbRead)
+ /* caller can handle partial read. */
+ *pcbRead = cbRead;
+ else
+ {
+ /* Caller expects all to be read. */
+ while ((ssize_t)cbToRead > cbRead)
+ {
+ ssize_t cbReadPart = read(RTFileToNative(hFile), (char*)pvBuf + cbRead, cbToRead - cbRead);
+ if (cbReadPart <= 0)
+ {
+ if (cbReadPart == 0)
+ return VERR_EOF;
+ return RTErrConvertFromErrno(errno);
+ }
+ cbRead += cbReadPart;
+ }
+ }
+ return VINF_SUCCESS;
+ }
+
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileWrite(RTFILE hFile, const void *pvBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ if (cbToWrite <= 0)
+ return VINF_SUCCESS;
+
+ /*
+ * Attempt write.
+ */
+ ssize_t cbWritten = write(RTFileToNative(hFile), pvBuf, cbToWrite);
+ if (cbWritten >= 0)
+ {
+ if (pcbWritten)
+ /* caller can handle partial write. */
+ *pcbWritten = cbWritten;
+ else
+ {
+ /* Caller expects all to be write. */
+ while ((ssize_t)cbToWrite > cbWritten)
+ {
+ ssize_t cbWrittenPart = write(RTFileToNative(hFile), (const char *)pvBuf + cbWritten, cbToWrite - cbWritten);
+ if (cbWrittenPart <= 0)
+ return cbWrittenPart < 0 ? RTErrConvertFromErrno(errno) : VERR_TRY_AGAIN;
+ cbWritten += cbWrittenPart;
+ }
+ }
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileSetSize(RTFILE hFile, uint64_t cbSize)
+{
+ /*
+ * Validate offset.
+ */
+ if ( sizeof(off_t) < sizeof(cbSize)
+ && (cbSize >> 32) != 0)
+ {
+ AssertMsgFailed(("64-bit filesize not supported! cbSize=%lld\n", cbSize));
+ return VERR_NOT_SUPPORTED;
+ }
+
+#if defined(_MSC_VER) || (defined(RT_OS_OS2) && (!defined(__INNOTEK_LIBC__) || __INNOTEK_LIBC__ < 0x006))
+ if (chsize(RTFileToNative(hFile), (off_t)cbSize) == 0)
+#else
+ /* This relies on a non-standard feature of FreeBSD, Linux, and OS/2
+ * LIBC v0.6 and higher. (SuS doesn't define ftruncate() and size bigger
+ * than the file.)
+ */
+ if (ftruncate(RTFileToNative(hFile), (off_t)cbSize) == 0)
+#endif
+ return VINF_SUCCESS;
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileQuerySize(RTFILE hFile, uint64_t *pcbSize)
+{
+ /*
+ * Ask fstat() first.
+ */
+ struct stat st;
+ if (!fstat(RTFileToNative(hFile), &st))
+ {
+ *pcbSize = st.st_size;
+ if ( st.st_size != 0
+#if defined(RT_OS_SOLARIS) || defined(RT_OS_DARWIN)
+ || (!S_ISBLK(st.st_mode) && !S_ISCHR(st.st_mode))
+#elif defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD) || defined(RT_OS_DARWIN)
+ || !S_ISCHR(st.st_mode)
+#else
+ || !S_ISBLK(st.st_mode)
+#endif
+ )
+ return VINF_SUCCESS;
+
+ /*
+ * It could be a block device. Try determin the size by I/O control
+ * query or seek.
+ */
+#ifdef RT_OS_DARWIN
+ uint64_t cBlocks;
+ if (!ioctl(RTFileToNative(hFile), DKIOCGETBLOCKCOUNT, &cBlocks))
+ {
+ uint32_t cbBlock;
+ if (!ioctl(RTFileToNative(hFile), DKIOCGETBLOCKSIZE, &cbBlock))
+ {
+ *pcbSize = cBlocks * cbBlock;
+ return VINF_SUCCESS;
+ }
+ }
+
+ /* Always fail block devices. Character devices doesn't all need to be
+ /dev/rdisk* nodes, they should return ENOTTY but /dev/null returns ENODEV
+ and we include EINVAL just in case. */
+ if (!S_ISBLK(st.st_mode) && (errno == ENOTTY || errno == ENODEV || errno == EINVAL))
+ return VINF_SUCCESS;
+
+#elif defined(RT_OS_SOLARIS)
+ struct dk_minfo MediaInfo;
+ if (!ioctl(RTFileToNative(hFile), DKIOCGMEDIAINFO, &MediaInfo))
+ {
+ *pcbSize = MediaInfo.dki_capacity * MediaInfo.dki_lbsize;
+ return VINF_SUCCESS;
+ }
+ /* might not be a block device. */
+ if (errno == EINVAL || errno == ENOTTY)
+ return VINF_SUCCESS;
+
+#elif defined(RT_OS_FREEBSD)
+ off_t cbMedia = 0;
+ if (!ioctl(RTFileToNative(hFile), DIOCGMEDIASIZE, &cbMedia))
+ {
+ *pcbSize = cbMedia;
+ return VINF_SUCCESS;
+ }
+ /* might not be a block device. */
+ if (errno == EINVAL || errno == ENOTTY)
+ return VINF_SUCCESS;
+
+#else
+ /* PORTME! Avoid this path when possible. */
+ uint64_t offSaved = UINT64_MAX;
+ int rc = RTFileSeek(hFile, 0, RTFILE_SEEK_CURRENT, &offSaved);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTFileSeek(hFile, 0, RTFILE_SEEK_END, pcbSize);
+ int rc2 = RTFileSeek(hFile, offSaved, RTFILE_SEEK_BEGIN, NULL);
+ if (RT_SUCCESS(rc))
+ return rc2;
+ }
+#endif
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileQueryMaxSizeEx(RTFILE hFile, PRTFOFF pcbMax)
+{
+ /*
+ * Save the current location
+ */
+ uint64_t offOld = UINT64_MAX;
+ int rc = RTFileSeek(hFile, 0, RTFILE_SEEK_CURRENT, &offOld);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ uint64_t offLow = 0;
+ uint64_t offHigh = INT64_MAX; /* we don't need bigger files */
+ /** @todo Unfortunately this does not work for certain file system types,
+ * for instance cifs mounts. Even worse, statvfs.f_fsid returns 0 for such
+ * file systems. */
+
+ /*
+ * Quickly guess the order of magnitude for offHigh and offLow.
+ */
+ {
+ uint64_t offHighPrev = offHigh;
+ while (offHigh >= INT32_MAX)
+ {
+ rc = RTFileSeek(hFile, offHigh, RTFILE_SEEK_BEGIN, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ offLow = offHigh;
+ offHigh = offHighPrev;
+ break;
+ }
+ else
+ {
+ offHighPrev = offHigh;
+ offHigh >>= 8;
+ }
+ }
+ }
+
+ /*
+ * Sanity: if the seek to the initial offHigh (INT64_MAX) works, then
+ * this algorithm cannot possibly work. Declare defeat.
+ */
+ if (offLow == offHigh)
+ {
+ rc = RTFileSeek(hFile, offOld, RTFILE_SEEK_BEGIN, NULL);
+ if (RT_SUCCESS(rc))
+ rc = VERR_NOT_IMPLEMENTED;
+
+ return rc;
+ }
+
+ /*
+ * Perform a binary search for the max file size.
+ */
+ while (offLow <= offHigh)
+ {
+ uint64_t offMid = offLow + (offHigh - offLow) / 2;
+ rc = RTFileSeek(hFile, offMid, RTFILE_SEEK_BEGIN, NULL);
+ if (RT_FAILURE(rc))
+ offHigh = offMid - 1;
+ else
+ offLow = offMid + 1;
+ }
+
+ if (pcbMax)
+ *pcbMax = RT_MIN(offLow, offHigh);
+ return RTFileSeek(hFile, offOld, RTFILE_SEEK_BEGIN, NULL);
+}
+
+
+RTR3DECL(bool) RTFileIsValid(RTFILE hFile)
+{
+ if (hFile != NIL_RTFILE)
+ {
+ int fFlags = fcntl(RTFileToNative(hFile), F_GETFD);
+ if (fFlags >= 0)
+ return true;
+ }
+ return false;
+}
+
+
+RTR3DECL(int) RTFileFlush(RTFILE hFile)
+{
+ if (!fsync(RTFileToNative(hFile)))
+ return VINF_SUCCESS;
+ /* Ignore EINVAL here as that's what returned for pseudo ttys
+ and other odd handles. */
+ if (errno == EINVAL)
+ return VINF_NOT_SUPPORTED;
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileIoCtl(RTFILE hFile, unsigned long ulRequest, void *pvData, unsigned cbData, int *piRet)
+{
+ NOREF(cbData);
+ int rc = ioctl(RTFileToNative(hFile), ulRequest, pvData);
+ if (piRet)
+ *piRet = rc;
+ return rc >= 0 ? VINF_SUCCESS : RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(int) RTFileSetMode(RTFILE hFile, RTFMODE fMode)
+{
+ /*
+ * Normalize the mode and call the API.
+ */
+ fMode = rtFsModeNormalize(fMode, NULL, 0, RTFS_TYPE_FILE);
+ if (!rtFsModeIsValid(fMode))
+ return VERR_INVALID_PARAMETER;
+
+ if (fchmod(RTFileToNative(hFile), fMode & RTFS_UNIX_MASK))
+ {
+ int rc = RTErrConvertFromErrno(errno);
+ Log(("RTFileSetMode(%RTfile,%RTfmode): returns %Rrc\n", hFile, fMode, rc));
+ return rc;
+ }
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTFileSetOwner(RTFILE hFile, uint32_t uid, uint32_t gid)
+{
+ uid_t uidNative = uid != NIL_RTUID ? (uid_t)uid : (uid_t)-1;
+ AssertReturn(uid == uidNative, VERR_INVALID_PARAMETER);
+ gid_t gidNative = gid != NIL_RTGID ? (gid_t)gid : (gid_t)-1;
+ AssertReturn(gid == gidNative, VERR_INVALID_PARAMETER);
+
+ if (fchown(RTFileToNative(hFile), uidNative, gidNative))
+ return RTErrConvertFromErrno(errno);
+ return VINF_SUCCESS;
+}
+
+
+RTR3DECL(int) RTFileRename(const char *pszSrc, const char *pszDst, unsigned fRename)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszSrc, VERR_INVALID_POINTER);
+ AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
+ AssertMsgReturn(*pszSrc, ("%p\n", pszSrc), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(*pszDst, ("%p\n", pszDst), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(!(fRename & ~RTPATHRENAME_FLAGS_REPLACE), ("%#x\n", fRename), VERR_INVALID_PARAMETER);
+
+ /*
+ * Take common cause with RTPathRename.
+ */
+ int rc = rtPathPosixRename(pszSrc, pszDst, fRename, RTFS_TYPE_FILE);
+
+ LogFlow(("RTDirRename(%p:{%s}, %p:{%s}, %#x): returns %Rrc\n",
+ pszSrc, pszSrc, pszDst, pszDst, fRename, rc));
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fileio-sg-at-posix.cpp b/src/VBox/Runtime/r3/posix/fileio-sg-at-posix.cpp
new file mode 100644
index 00000000..5fdea41a
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fileio-sg-at-posix.cpp
@@ -0,0 +1,298 @@
+/* $Id: fileio-sg-at-posix.cpp $ */
+/** @file
+ * IPRT - File I/O, RTFileSgReadAt & RTFileSgWriteAt, posixy.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+/*
+ * Determin whether we've got preadv and pwritev.
+ */
+#include <iprt/cdefs.h>
+#ifdef RT_OS_LINUX
+/* Linux has these since glibc 2.10 and Linux 2.6.30: */
+# include <features.h>
+# ifdef __GLIBC_PREREQ
+# if __GLIBC_PREREQ(2,10)
+# define HAVE_PREADV_AND_PWRITEV 1
+#else
+# endif
+# endif
+
+#elif defined(RT_OS_FREEBSD)
+/* FreeBSD has these since 6.0: */
+# include <osreldate.h>
+# ifdef __FreeBSD_version
+# if __FreeBSD_version >= 600000
+# define HAVE_PREADV_AND_PWRITEV 1
+# endif
+# endif
+
+#endif
+
+#ifndef HAVE_PREADV_AND_PWRITEV
+
+# include "../../generic/fileio-sg-at-generic.cpp"
+
+#else /* HAVE_PREADV_AND_PWRITEV - rest of the file */
+
+# include <errno.h>
+# include <sys/types.h>
+# include <sys/uio.h>
+# include <unistd.h>
+# include <limits.h>
+# if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD) || defined(RT_OS_OPENBSD)
+# include <sys/syslimits.h>
+# endif
+
+# include "internal/iprt.h"
+# include <iprt/file.h>
+
+# include <iprt/assert.h>
+# include <iprt/err.h>
+# include <iprt/log.h>
+
+# ifndef UIO_MAXIOV
+# ifdef IOV_MAX
+# define UIO_MAXIOV IOV_MAX
+# else
+# error "UIO_MAXIOV and IOV_MAX are undefined"
+# endif
+# endif
+
+
+/* These assumptions simplifies things a lot here. */
+AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, RTSGSEG, pvSeg);
+AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, RTSGSEG, cbSeg);
+
+
+RTDECL(int) RTFileSgReadAt(RTFILE hFile, RTFOFF off, PRTSGBUF pSgBuf, size_t cbToRead, size_t *pcbRead)
+{
+ /*
+ * Make sure we set pcbRead.
+ */
+ if (pcbRead)
+ *pcbRead = 0;
+
+ /*
+ * Special case: Zero read == seek.
+ */
+ if (cbToRead == 0)
+ return RTFileSeek(hFile, off, RTFILE_SEEK_BEGIN, NULL);
+
+ /*
+ * We can use the segment array directly if we're at the start of the
+ * current S/G segment and cbToRead matches the remainder exactly.
+ */
+ size_t cbTotalRead = 0;
+
+ size_t const cbSgBufLeft = RTSgBufCalcLengthLeft(pSgBuf);
+ AssertMsgReturn(cbSgBufLeft >= cbToRead, ("%#zx vs %#zx\n", cbSgBufLeft, cbToRead), VERR_INVALID_PARAMETER);
+
+ if (cbToRead == cbSgBufLeft)
+ while (RTSgBufIsAtStartOfSegment(pSgBuf))
+ {
+ size_t const cSegsLeft = pSgBuf->cSegs - pSgBuf->idxSeg;
+ ssize_t cbThisRead = preadv(RTFileToNative(hFile), (const struct iovec *)&pSgBuf->paSegs[pSgBuf->idxSeg],
+ RT_MIN(cSegsLeft, UIO_MAXIOV), off);
+ if (cbThisRead >= 0)
+ {
+ AssertStmt((size_t)cbThisRead <= cbToRead, cbThisRead = cbToRead);
+
+ RTSgBufAdvance(pSgBuf, cbThisRead);
+ cbTotalRead += cbThisRead;
+ cbToRead -= cbThisRead;
+ if (cbToRead == 0)
+ {
+ if (pcbRead)
+ *pcbRead = cbTotalRead;
+ return VINF_SUCCESS;
+ }
+
+ if ( pcbRead
+ && ( cSegsLeft <= UIO_MAXIOV
+ || cbThisRead == 0 /* typically EOF */ ))
+ {
+ *pcbRead = cbTotalRead;
+ return VINF_SUCCESS;
+ }
+ if (cbThisRead == 0)
+ return VERR_EOF;
+
+ off += cbThisRead;
+ }
+ else if (cbTotalRead > 0 && pcbRead)
+ {
+ *pcbRead = cbTotalRead;
+ return VINF_SUCCESS;
+ }
+ else
+ return RTErrConvertFromErrno(errno);
+ }
+
+ /*
+ * Unaligned start or not reading the whole buffer. For reasons of
+ * simplicity, we work the input segment by segment like the generic code.
+ */
+ int rc = VINF_SUCCESS;
+ while (cbToRead > 0)
+ {
+ size_t cbSeg;
+ void *pvSeg = RTSgBufGetCurrentSegment(pSgBuf, cbToRead, &cbSeg);
+ size_t cbThisRead = cbSeg;
+ rc = RTFileReadAt(hFile, off, pvSeg, cbSeg, pcbRead ? &cbThisRead : NULL);
+ if (RT_SUCCESS(rc))
+ {
+ RTSgBufAdvance(pSgBuf, cbThisRead);
+ cbTotalRead += cbThisRead;
+ }
+ else
+ break;
+ if ((size_t)cbThisRead < cbSeg)
+ {
+ AssertStmt(pcbRead, rc = VERR_INTERNAL_ERROR_2);
+ break;
+ }
+
+ Assert(cbSeg == cbThisRead);
+ cbToRead -= cbSeg;
+ off += cbSeg;
+ }
+ if (pcbRead)
+ *pcbRead = cbTotalRead;
+ return rc;
+}
+
+
+RTDECL(int) RTFileSgWriteAt(RTFILE hFile, RTFOFF off, PRTSGBUF pSgBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ /*
+ * Make sure we set pcbWritten.
+ */
+ if (pcbWritten)
+ *pcbWritten = 0;
+
+ /*
+ * Special case: Zero write == seek.
+ */
+ if (cbToWrite == 0)
+ return RTFileSeek(hFile, off, RTFILE_SEEK_BEGIN, NULL);
+
+ /*
+ * We can use the segment array directly if we're at the start of the
+ * current S/G segment and cbToWrite matches the remainder exactly.
+ */
+ size_t cbTotalWritten = 0;
+
+ size_t const cbSgBufLeft = RTSgBufCalcLengthLeft(pSgBuf);
+ AssertMsgReturn(cbSgBufLeft >= cbToWrite, ("%#zx vs %#zx\n", cbSgBufLeft, cbToWrite), VERR_INVALID_PARAMETER);
+
+ if (cbToWrite == cbSgBufLeft)
+ while (RTSgBufIsAtStartOfSegment(pSgBuf))
+ {
+ size_t const cSegsLeft = pSgBuf->cSegs - pSgBuf->idxSeg;
+ ssize_t cbThisWritten = pwritev(RTFileToNative(hFile), (const struct iovec *)&pSgBuf->paSegs[pSgBuf->idxSeg],
+ RT_MIN(cSegsLeft, UIO_MAXIOV), off);
+ if (cbThisWritten >= 0)
+ {
+ AssertStmt((size_t)cbThisWritten <= cbToWrite, cbThisWritten = cbToWrite);
+
+ RTSgBufAdvance(pSgBuf, cbThisWritten);
+ cbTotalWritten += cbThisWritten;
+ cbToWrite -= cbThisWritten;
+ if (cbToWrite == 0)
+ {
+ if (pcbWritten)
+ *pcbWritten = cbTotalWritten;
+ return VINF_SUCCESS;
+ }
+
+ if ( pcbWritten
+ && ( cSegsLeft <= UIO_MAXIOV
+ || cbThisWritten == 0 /* non-file, full buffer/whatever */ ))
+ {
+ *pcbWritten = cbTotalWritten;
+ return VINF_SUCCESS;
+ }
+ if (cbThisWritten == 0)
+ return VERR_TRY_AGAIN;
+
+ off += cbThisWritten;
+ }
+ else if (cbTotalWritten > 0 && pcbWritten)
+ {
+ *pcbWritten = cbTotalWritten;
+ return VINF_SUCCESS;
+ }
+ else
+ return RTErrConvertFromErrno(errno);
+ }
+
+ /*
+ * Unaligned start or not writing the whole buffer. For reasons of
+ * simplicity, we work the input segment by segment like the generic code.
+ */
+ int rc = VINF_SUCCESS;
+ while (cbToWrite > 0)
+ {
+ size_t cbSeg;
+ void *pvSeg = RTSgBufGetCurrentSegment(pSgBuf, cbToWrite, &cbSeg);
+ size_t cbThisWritten = cbSeg;
+ rc = RTFileWriteAt(hFile, off, pvSeg, cbSeg, pcbWritten ? &cbThisWritten : NULL);
+ if (RT_SUCCESS(rc))
+ {
+ RTSgBufAdvance(pSgBuf, cbThisWritten);
+ cbTotalWritten += cbThisWritten;
+ }
+ else
+ break;
+ if ((size_t)cbThisWritten < cbSeg)
+ {
+ AssertStmt(pcbWritten, rc = VERR_INTERNAL_ERROR_2);
+ break;
+ }
+
+ Assert(cbSeg == cbThisWritten);
+ cbToWrite -= cbSeg;
+ off += cbSeg;
+ }
+ if (pcbWritten)
+ *pcbWritten = cbTotalWritten;
+ return rc;
+}
+
+#endif /* HAVE_PREADV_AND_PWRITEV */
+
diff --git a/src/VBox/Runtime/r3/posix/fileio-sg-posix.cpp b/src/VBox/Runtime/r3/posix/fileio-sg-posix.cpp
new file mode 100644
index 00000000..c8c95786
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fileio-sg-posix.cpp
@@ -0,0 +1,260 @@
+/* $Id: fileio-sg-posix.cpp $ */
+/** @file
+ * IPRT - File I/O, RTFileSgRead & RTFileSgWrite, posixy.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/cdefs.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <limits.h>
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD) || defined(RT_OS_NETBSD) || defined(RT_OS_OPENBSD)
+# include <sys/syslimits.h>
+#endif
+
+#include "internal/iprt.h"
+#include <iprt/file.h>
+
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+
+#ifndef UIO_MAXIOV
+# ifdef IOV_MAX
+# define UIO_MAXIOV IOV_MAX
+# else
+# error "UIO_MAXIOV and IOV_MAX are undefined"
+# endif
+#endif
+
+
+/* These assumptions simplifies things a lot here. */
+AssertCompileMembersSameSizeAndOffset(struct iovec, iov_base, RTSGSEG, pvSeg);
+AssertCompileMembersSameSizeAndOffset(struct iovec, iov_len, RTSGSEG, cbSeg);
+
+
+RTDECL(int) RTFileSgRead(RTFILE hFile, PRTSGBUF pSgBuf, size_t cbToRead, size_t *pcbRead)
+{
+ /*
+ * Make sure we set pcbRead.
+ */
+ if (pcbRead)
+ *pcbRead = 0;
+
+ /*
+ * Special case: Zero read == nop.
+ */
+ if (cbToRead == 0)
+ return VINF_SUCCESS;
+
+ /*
+ * We can use the segment array directly if we're at the start of the
+ * current S/G segment and cbToRead matches the remainder exactly.
+ */
+ size_t cbTotalRead = 0;
+
+ size_t const cbSgBufLeft = RTSgBufCalcLengthLeft(pSgBuf);
+ AssertMsgReturn(cbSgBufLeft >= cbToRead, ("%#zx vs %#zx\n", cbSgBufLeft, cbToRead), VERR_INVALID_PARAMETER);
+
+ if (cbToRead == cbSgBufLeft)
+ while (RTSgBufIsAtStartOfSegment(pSgBuf))
+ {
+ size_t const cSegsLeft = pSgBuf->cSegs - pSgBuf->idxSeg;
+ ssize_t cbThisRead = readv(RTFileToNative(hFile), (const struct iovec *)&pSgBuf->paSegs[pSgBuf->idxSeg],
+ RT_MIN(cSegsLeft, UIO_MAXIOV));
+ if (cbThisRead >= 0)
+ {
+ AssertStmt((size_t)cbThisRead <= cbToRead, cbThisRead = cbToRead);
+
+ RTSgBufAdvance(pSgBuf, cbThisRead);
+ cbTotalRead += cbThisRead;
+ cbToRead -= cbThisRead;
+ if (cbToRead == 0)
+ {
+ if (pcbRead)
+ *pcbRead = cbTotalRead;
+ return VINF_SUCCESS;
+ }
+
+ if ( pcbRead
+ && ( cSegsLeft <= UIO_MAXIOV
+ || cbThisRead == 0 /* typically EOF */ ))
+ {
+ *pcbRead = cbTotalRead;
+ return VINF_SUCCESS;
+ }
+ if (cbThisRead == 0)
+ return VERR_EOF;
+ }
+ else if (cbTotalRead > 0 && pcbRead)
+ {
+ *pcbRead = cbTotalRead;
+ return VINF_SUCCESS;
+ }
+ else
+ return RTErrConvertFromErrno(errno);
+ }
+
+ /*
+ * Unaligned start or not reading the whole buffer. For reasons of
+ * simplicity, we work the input segment by segment like the generic code.
+ */
+ int rc = VINF_SUCCESS;
+ while (cbToRead > 0)
+ {
+ size_t cbSeg;
+ void *pvSeg = RTSgBufGetCurrentSegment(pSgBuf, cbToRead, &cbSeg);
+ size_t cbThisRead = cbSeg;
+ rc = RTFileRead(hFile, pvSeg, cbSeg, pcbRead ? &cbThisRead : NULL);
+ if (RT_SUCCESS(rc))
+ {
+ RTSgBufAdvance(pSgBuf, cbThisRead);
+ cbTotalRead += cbThisRead;
+ }
+ else
+ break;
+ if ((size_t)cbThisRead < cbSeg)
+ {
+ AssertStmt(pcbRead, rc = VERR_INTERNAL_ERROR_2);
+ break;
+ }
+
+ Assert(cbSeg == cbThisRead);
+ cbToRead -= cbSeg;
+ }
+ if (pcbRead)
+ *pcbRead = cbTotalRead;
+ return rc;
+}
+
+
+RTDECL(int) RTFileSgWrite(RTFILE hFile, PRTSGBUF pSgBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ /*
+ * Make sure we set pcbWritten.
+ */
+ if (pcbWritten)
+ *pcbWritten = 0;
+
+ /*
+ * Special case: Zero write == nop.
+ */
+ if (cbToWrite == 0)
+ return VINF_SUCCESS;
+
+ /*
+ * We can use the segment array directly if we're at the start of the
+ * current S/G segment and cbToWrite matches the remainder exactly.
+ */
+ size_t cbTotalWritten = 0;
+
+ size_t const cbSgBufLeft = RTSgBufCalcLengthLeft(pSgBuf);
+ AssertMsgReturn(cbSgBufLeft >= cbToWrite, ("%#zx vs %#zx\n", cbSgBufLeft, cbToWrite), VERR_INVALID_PARAMETER);
+
+ if (cbToWrite == cbSgBufLeft)
+ while (RTSgBufIsAtStartOfSegment(pSgBuf))
+ {
+ size_t const cSegsLeft = pSgBuf->cSegs - pSgBuf->idxSeg;
+ ssize_t cbThisWritten = writev(RTFileToNative(hFile), (const struct iovec *)&pSgBuf->paSegs[pSgBuf->idxSeg],
+ RT_MIN(cSegsLeft, UIO_MAXIOV));
+ if (cbThisWritten >= 0)
+ {
+ AssertStmt((size_t)cbThisWritten <= cbToWrite, cbThisWritten = cbToWrite);
+
+ RTSgBufAdvance(pSgBuf, cbThisWritten);
+ cbTotalWritten += cbThisWritten;
+ cbToWrite -= cbThisWritten;
+ if (cbToWrite == 0)
+ {
+ if (pcbWritten)
+ *pcbWritten = cbTotalWritten;
+ return VINF_SUCCESS;
+ }
+
+ if ( pcbWritten
+ && ( cSegsLeft <= UIO_MAXIOV
+ || cbThisWritten == 0 /* non-file, full buffer/whatever */ ))
+ {
+ *pcbWritten = cbTotalWritten;
+ return VINF_SUCCESS;
+ }
+ if (cbThisWritten == 0)
+ return VERR_TRY_AGAIN;
+ }
+ else if (cbTotalWritten > 0 && pcbWritten)
+ {
+ *pcbWritten = cbTotalWritten;
+ return VINF_SUCCESS;
+ }
+ else
+ return RTErrConvertFromErrno(errno);
+ }
+
+ /*
+ * Unaligned start or not writing the whole buffer. For reasons of
+ * simplicity, we work the input segment by segment like the generic code.
+ */
+ int rc = VINF_SUCCESS;
+ while (cbToWrite > 0)
+ {
+ size_t cbSeg;
+ void *pvSeg = RTSgBufGetCurrentSegment(pSgBuf, cbToWrite, &cbSeg);
+ size_t cbThisWritten = cbSeg;
+ rc = RTFileWrite(hFile, pvSeg, cbSeg, pcbWritten ? &cbThisWritten : NULL);
+ if (RT_SUCCESS(rc))
+ {
+ RTSgBufAdvance(pSgBuf, cbThisWritten);
+ cbTotalWritten += cbThisWritten;
+ }
+ else
+ break;
+ if ((size_t)cbThisWritten < cbSeg)
+ {
+ AssertStmt(pcbWritten, rc = VERR_INTERNAL_ERROR_2);
+ break;
+ }
+
+ Assert(cbSeg == cbThisWritten);
+ cbToWrite -= cbSeg;
+ }
+ if (pcbWritten)
+ *pcbWritten = cbTotalWritten;
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fileio2-posix.cpp b/src/VBox/Runtime/r3/posix/fileio2-posix.cpp
new file mode 100644
index 00000000..4879f51e
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fileio2-posix.cpp
@@ -0,0 +1,210 @@
+/* $Id: fileio2-posix.cpp $ */
+/** @file
+ * IPRT - File I/O, POSIX, Part 2.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#ifdef _MSC_VER
+# include <io.h>
+# include <stdio.h>
+#else
+# include <unistd.h>
+# include <sys/time.h>
+#endif
+#ifdef RT_OS_LINUX
+# include <sys/file.h>
+#endif
+#if defined(RT_OS_OS2) && (!defined(__INNOTEK_LIBC__) || __INNOTEK_LIBC__ < 0x006)
+# include <io.h>
+#endif
+
+#ifdef RT_OS_SOLARIS
+# define futimes(filedes, timeval) futimesat(filedes, NULL, timeval)
+#endif
+
+#ifdef RT_OS_HAIKU
+# define USE_FUTIMENS
+#endif
+
+#include <iprt/file.h>
+#include <iprt/path.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/errcore.h>
+#include <iprt/log.h>
+#include "internal/file.h"
+#include "internal/fs.h"
+#include "internal/path.h"
+
+
+
+RTR3DECL(int) RTFileQueryInfo(RTFILE hFile, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAdditionalAttribs)
+{
+ /*
+ * Validate input.
+ */
+ AssertReturn(hFile != NIL_RTFILE, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pObjInfo, VERR_INVALID_PARAMETER);
+ if ( enmAdditionalAttribs < RTFSOBJATTRADD_NOTHING
+ || enmAdditionalAttribs > RTFSOBJATTRADD_LAST)
+ {
+ AssertMsgFailed(("Invalid enmAdditionalAttribs=%p\n", enmAdditionalAttribs));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /*
+ * Query file info.
+ */
+ struct stat Stat;
+ if (fstat(RTFileToNative(hFile), &Stat))
+ {
+ int rc = RTErrConvertFromErrno(errno);
+ Log(("RTFileQueryInfo(%RTfile,,%d): returns %Rrc\n", hFile, enmAdditionalAttribs, rc));
+ return rc;
+ }
+
+ /*
+ * Setup the returned data.
+ */
+ rtFsConvertStatToObjInfo(pObjInfo, &Stat, NULL, 0);
+
+ /*
+ * Requested attributes (we cannot provide anything actually).
+ */
+ switch (enmAdditionalAttribs)
+ {
+ case RTFSOBJATTRADD_NOTHING:
+ case RTFSOBJATTRADD_UNIX:
+ /* done */
+ break;
+
+ case RTFSOBJATTRADD_UNIX_OWNER:
+ rtFsObjInfoAttrSetUnixOwner(pObjInfo, Stat.st_uid);
+ break;
+
+ case RTFSOBJATTRADD_UNIX_GROUP:
+ rtFsObjInfoAttrSetUnixGroup(pObjInfo, Stat.st_gid);
+ break;
+
+ case RTFSOBJATTRADD_EASIZE:
+ pObjInfo->Attr.enmAdditional = RTFSOBJATTRADD_EASIZE;
+ pObjInfo->Attr.u.EASize.cb = 0;
+ break;
+
+ default:
+ AssertMsgFailed(("Impossible!\n"));
+ return VERR_INTERNAL_ERROR;
+ }
+
+ LogFlow(("RTFileQueryInfo(%RTfile,,%d): returns VINF_SUCCESS\n", hFile, enmAdditionalAttribs));
+ return VINF_SUCCESS;
+}
+
+
+RTR3DECL(int) RTFileSetTimes(RTFILE hFile, PCRTTIMESPEC pAccessTime, PCRTTIMESPEC pModificationTime,
+ PCRTTIMESPEC pChangeTime, PCRTTIMESPEC pBirthTime)
+{
+ NOREF(pChangeTime); NOREF(pBirthTime);
+
+ /*
+ * We can only set AccessTime and ModificationTime, so if neither
+ * are specified we can return immediately.
+ */
+ if (!pAccessTime && !pModificationTime)
+ return VINF_SUCCESS;
+
+#ifdef USE_FUTIMENS
+ struct timespec aTimespecs[2];
+ if (pAccessTime && pModificationTime)
+ {
+ memcpy(&aTimespecs[0], pAccessTime, sizeof(struct timespec));
+ memcpy(&aTimespecs[1], pModificationTime, sizeof(struct timespec));
+ }
+ else
+ {
+ RTFSOBJINFO ObjInfo;
+ int rc = RTFileQueryInfo(hFile, &ObjInfo, RTFSOBJATTRADD_UNIX);
+ if (RT_FAILURE(rc))
+ return rc;
+ memcpy(&aTimespecs[0], pAccessTime ? pAccessTime : &ObjInfo.AccessTime, sizeof(struct timespec));
+ memcpy(&aTimespecs[1], pModificationTime ? pModificationTime : &ObjInfo.ModificationTime, sizeof(struct timespec));
+ }
+
+ if (futimens(RTFileToNative(hFile), aTimespecs))
+ {
+ int rc = RTErrConvertFromErrno(errno);
+ Log(("RTFileSetTimes(%RTfile,%p,%p,,): returns %Rrc\n", hFile, pAccessTime, pModificationTime, rc));
+ return rc;
+ }
+#else
+ /*
+ * Convert the input to timeval, getting the missing one if necessary,
+ * and call the API which does the change.
+ */
+ struct timeval aTimevals[2];
+ if (pAccessTime && pModificationTime)
+ {
+ RTTimeSpecGetTimeval(pAccessTime, &aTimevals[0]);
+ RTTimeSpecGetTimeval(pModificationTime, &aTimevals[1]);
+ }
+ else
+ {
+ RTFSOBJINFO ObjInfo;
+ int rc = RTFileQueryInfo(hFile, &ObjInfo, RTFSOBJATTRADD_UNIX);
+ if (RT_FAILURE(rc))
+ return rc;
+ RTTimeSpecGetTimeval(pAccessTime ? pAccessTime : &ObjInfo.AccessTime, &aTimevals[0]);
+ RTTimeSpecGetTimeval(pModificationTime ? pModificationTime : &ObjInfo.ModificationTime, &aTimevals[1]);
+ }
+
+ /* XXX this falls back to utimes("/proc/self/fd/...",...) for older kernels/glibcs and this
+ * will not work for hardened builds where this directory is owned by root.root and mode 0500 */
+ if (futimes(RTFileToNative(hFile), aTimevals))
+ {
+ int rc = RTErrConvertFromErrno(errno);
+ Log(("RTFileSetTimes(%RTfile,%p,%p,,): returns %Rrc\n", hFile, pAccessTime, pModificationTime, rc));
+ return rc;
+ }
+#endif
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/filelock-posix.cpp b/src/VBox/Runtime/r3/posix/filelock-posix.cpp
new file mode 100644
index 00000000..3a0e230f
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/filelock-posix.cpp
@@ -0,0 +1,148 @@
+/* $Id: filelock-posix.cpp $ */
+/** @file
+ * IPRT - File Locking, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FILE
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include <iprt/file.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include "internal/file.h"
+#include "internal/fs.h"
+
+
+
+
+RTR3DECL(int) RTFileLock(RTFILE hFile, unsigned fLock, int64_t offLock, uint64_t cbLock)
+{
+ Assert(offLock >= 0);
+
+ /* Check arguments. */
+ if (fLock & ~RTFILE_LOCK_MASK)
+ {
+ AssertMsgFailed(("Invalid fLock=%08X\n", fLock));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /*
+ * Validate offset.
+ */
+ if ( sizeof(off_t) < sizeof(cbLock)
+ && ( (offLock >> 32) != 0
+ || (cbLock >> 32) != 0
+ || ((offLock + cbLock) >> 32) != 0))
+ {
+ AssertMsgFailed(("64-bit file i/o not supported! offLock=%lld cbLock=%lld\n", offLock, cbLock));
+ return VERR_NOT_SUPPORTED;
+ }
+
+ /* Prepare flock structure. */
+ struct flock fl;
+ Assert(RTFILE_LOCK_WRITE);
+ fl.l_type = (fLock & RTFILE_LOCK_WRITE) ? F_WRLCK : F_RDLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = (off_t)offLock;
+ fl.l_len = (off_t)cbLock;
+ fl.l_pid = 0;
+
+ Assert(RTFILE_LOCK_WAIT);
+ if (fcntl(RTFileToNative(hFile), (fLock & RTFILE_LOCK_WAIT) ? F_SETLKW : F_SETLK, &fl) >= 0)
+ return VINF_SUCCESS;
+
+ int iErr = errno;
+ if ( iErr == EAGAIN
+ || iErr == EACCES)
+ return VERR_FILE_LOCK_VIOLATION;
+
+ return RTErrConvertFromErrno(iErr);
+}
+
+
+RTR3DECL(int) RTFileChangeLock(RTFILE hFile, unsigned fLock, int64_t offLock, uint64_t cbLock)
+{
+ /** @todo We never returns VERR_FILE_NOT_LOCKED for now. */
+ return RTFileLock(hFile, fLock, offLock, cbLock);
+}
+
+
+RTR3DECL(int) RTFileUnlock(RTFILE hFile, int64_t offLock, uint64_t cbLock)
+{
+ Assert(offLock >= 0);
+
+ /*
+ * Validate offset.
+ */
+ if ( sizeof(off_t) < sizeof(cbLock)
+ && ( (offLock >> 32) != 0
+ || (cbLock >> 32) != 0
+ || ((offLock + cbLock) >> 32) != 0))
+ {
+ AssertMsgFailed(("64-bit file i/o not supported! offLock=%lld cbLock=%lld\n", offLock, cbLock));
+ return VERR_NOT_SUPPORTED;
+ }
+
+ /* Prepare flock structure. */
+ struct flock fl;
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = (off_t)offLock;
+ fl.l_len = (off_t)cbLock;
+ fl.l_pid = 0;
+
+ if (fcntl(RTFileToNative(hFile), F_SETLK, &fl) >= 0)
+ return VINF_SUCCESS;
+
+ /** @todo check error codes for non existing lock. */
+ int iErr = errno;
+ if ( iErr == EAGAIN
+ || iErr == EACCES)
+ return VERR_FILE_LOCK_VIOLATION;
+
+ return RTErrConvertFromErrno(iErr);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fs-posix.cpp b/src/VBox/Runtime/r3/posix/fs-posix.cpp
new file mode 100644
index 00000000..04b3289a
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fs-posix.cpp
@@ -0,0 +1,346 @@
+/* $Id: fs-posix.cpp $ */
+/** @file
+ * IPRT - File System, Linux.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_FS
+#include <sys/statvfs.h>
+#include <errno.h>
+#include <stdio.h>
+#ifdef RT_OS_LINUX
+# include <mntent.h>
+#endif
+#if defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+# include <sys/mount.h>
+#endif
+
+#include <iprt/fs.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/log.h>
+#include <iprt/string.h>
+#include "internal/fs.h"
+#include "internal/path.h"
+
+
+
+RTR3DECL(int) RTFsQuerySizes(const char *pszFsPath, RTFOFF *pcbTotal, RTFOFF *pcbFree,
+ uint32_t *pcbBlock, uint32_t *pcbSector)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszFsPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszFsPath != '\0', VERR_INVALID_PARAMETER);
+
+ /*
+ * Convert the path and query the information.
+ */
+ char const *pszNativeFsPath;
+ int rc = rtPathToNative(&pszNativeFsPath, pszFsPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /** @todo I'm not quite sure if statvfs was properly specified by SuS, I have to check my own
+ * implementation and FreeBSD before this can eventually be promoted to posix. */
+ struct statvfs StatVFS;
+ RT_ZERO(StatVFS);
+ if (!statvfs(pszNativeFsPath, &StatVFS))
+ {
+ /*
+ * Calc the returned values.
+ */
+ if (pcbTotal)
+ *pcbTotal = (RTFOFF)StatVFS.f_blocks * StatVFS.f_frsize;
+ if (pcbFree)
+ *pcbFree = (RTFOFF)StatVFS.f_bavail * StatVFS.f_frsize;
+ if (pcbBlock)
+ *pcbBlock = StatVFS.f_frsize;
+ /* no idea how to get the sector... */
+ if (pcbSector)
+ *pcbSector = 512;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativeFsPath, pszFsPath);
+ }
+
+ LogFlow(("RTFsQuerySizes(%p:{%s}, %p:{%RTfoff}, %p:{%RTfoff}, %p:{%RX32}, %p:{%RX32}): returns %Rrc\n",
+ pszFsPath, pszFsPath, pcbTotal, pcbTotal ? *pcbTotal : 0, pcbFree, pcbFree ? *pcbFree : 0,
+ pcbBlock, pcbBlock ? *pcbBlock : 0, pcbSector, pcbSector ? *pcbSector : 0, rc));
+ return rc;
+}
+
+
+RTR3DECL(int) RTFsQuerySerial(const char *pszFsPath, uint32_t *pu32Serial)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszFsPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszFsPath != '\0', VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pu32Serial, VERR_INVALID_POINTER);
+
+ /*
+ * Convert the path and query the stats.
+ * We're simply return the device id.
+ */
+ char const *pszNativeFsPath;
+ int rc = rtPathToNative(&pszNativeFsPath, pszFsPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat Stat;
+ if (!stat(pszNativeFsPath, &Stat))
+ {
+ if (pu32Serial)
+ *pu32Serial = (uint32_t)Stat.st_dev;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativeFsPath, pszFsPath);
+ }
+ LogFlow(("RTFsQuerySerial(%p:{%s}, %p:{%RX32}: returns %Rrc\n",
+ pszFsPath, pszFsPath, pu32Serial, pu32Serial ? *pu32Serial : 0, rc));
+ return rc;
+}
+
+
+RTR3DECL(int) RTFsQueryProperties(const char *pszFsPath, PRTFSPROPERTIES pProperties)
+{
+ /*
+ * Validate.
+ */
+ AssertPtrReturn(pszFsPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszFsPath != '\0', VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pProperties, VERR_INVALID_POINTER);
+
+ /*
+ * Convert the path and query the information.
+ */
+ char const *pszNativeFsPath;
+ int rc = rtPathToNative(&pszNativeFsPath, pszFsPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct statvfs StatVFS;
+ RT_ZERO(StatVFS);
+ if (!statvfs(pszNativeFsPath, &StatVFS))
+ {
+ /*
+ * Calc/fake the returned values.
+ */
+ pProperties->cbMaxComponent = StatVFS.f_namemax;
+#if defined(RT_OS_OS2) || defined(RT_OS_WINDOWS)
+ pProperties->fCaseSensitive = false;
+#else
+ pProperties->fCaseSensitive = true;
+#endif
+ pProperties->fCompressed = false;
+ pProperties->fFileCompression = false;
+ pProperties->fReadOnly = !!(StatVFS.f_flag & ST_RDONLY);
+ pProperties->fRemote = false;
+ pProperties->fSupportsUnicode = true;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativeFsPath, pszFsPath);
+ }
+
+ LogFlow(("RTFsQueryProperties(%p:{%s}, %p:{.cbMaxComponent=%u, .fReadOnly=%RTbool}): returns %Rrc\n",
+ pszFsPath, pszFsPath, pProperties, pProperties->cbMaxComponent, pProperties->fReadOnly, rc));
+ return rc;
+}
+
+
+RTR3DECL(bool) RTFsIsCaseSensitive(const char *pszFsPath)
+{
+ RT_NOREF_PV(pszFsPath);
+#if defined(RT_OS_OS2) || defined(RT_OS_WINDOWS)
+ return false;
+#else
+ return true;
+#endif
+}
+
+
+RTR3DECL(int) RTFsQueryType(const char *pszFsPath, PRTFSTYPE penmType)
+{
+ *penmType = RTFSTYPE_UNKNOWN;
+
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszFsPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszFsPath, VERR_INVALID_PARAMETER);
+
+ /*
+ * Convert the path and query the stats.
+ * We're simply return the device id.
+ */
+ char const *pszNativeFsPath;
+ int rc = rtPathToNative(&pszNativeFsPath, pszFsPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat Stat;
+ if (!stat(pszNativeFsPath, &Stat))
+ {
+#if defined(RT_OS_LINUX)
+ FILE *mounted = setmntent("/proc/mounts", "r");
+ if (!mounted)
+ mounted = setmntent("/etc/mtab", "r");
+ if (mounted)
+ {
+ char szBuf[1024];
+ struct stat mntStat;
+ struct mntent mntEnt;
+ while (getmntent_r(mounted, &mntEnt, szBuf, sizeof(szBuf)))
+ {
+ if (!stat(mntEnt.mnt_dir, &mntStat))
+ {
+ if (mntStat.st_dev == Stat.st_dev)
+ {
+ if (!strcmp("ext4", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_EXT4;
+ else if (!strcmp("ext3", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_EXT3;
+ else if (!strcmp("ext2", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_EXT2;
+ else if (!strcmp("jfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_JFS;
+ else if (!strcmp("xfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_XFS;
+ else if (!strcmp("btrfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_BTRFS;
+ else if ( !strcmp("vfat", mntEnt.mnt_type)
+ || !strcmp("msdos", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_FAT;
+ else if (!strcmp("ntfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_NTFS;
+ else if (!strcmp("hpfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_HPFS;
+ else if (!strcmp("ufs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_UFS;
+ else if (!strcmp("tmpfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_TMPFS;
+ else if (!strcmp("hfsplus", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_HFS;
+ else if (!strcmp("udf", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_UDF;
+ else if (!strcmp("iso9660", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_ISO9660;
+ else if (!strcmp("smbfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_SMBFS;
+ else if (!strcmp("cifs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_CIFS;
+ else if (!strcmp("nfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_NFS;
+ else if (!strcmp("nfs4", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_NFS;
+ else if (!strcmp("ocfs2", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_OCFS2;
+ else if (!strcmp("sysfs", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_SYSFS;
+ else if (!strcmp("proc", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_PROC;
+ else if ( !strcmp("fuse", mntEnt.mnt_type)
+ || !strncmp("fuse.", mntEnt.mnt_type, 5)
+ || !strcmp("fuseblk", mntEnt.mnt_type))
+ *penmType = RTFSTYPE_FUSE;
+ else
+ {
+ /* sometimes there are more than one entry for the same partition */
+ continue;
+ }
+ break;
+ }
+ }
+ }
+ endmntent(mounted);
+ }
+
+#elif defined(RT_OS_SOLARIS)
+ /*
+ * Home directories are normally loopback mounted in Solaris 11 (st_fstype=="lofs")
+ * so statvfs(2) is needed to get the underlying file system information.
+ */
+ struct statvfs statvfsBuf;
+ if (!statvfs(pszNativeFsPath, &statvfsBuf))
+ {
+ if (!strcmp("zfs", statvfsBuf.f_basetype))
+ *penmType = RTFSTYPE_ZFS;
+ else if (!strcmp("ufs", statvfsBuf.f_basetype))
+ *penmType = RTFSTYPE_UFS;
+ else if (!strcmp("nfs", statvfsBuf.f_basetype))
+ *penmType = RTFSTYPE_NFS;
+ }
+
+#elif defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD)
+ struct statfs statfsBuf;
+ if (!statfs(pszNativeFsPath, &statfsBuf))
+ {
+ if (!strcmp("hfs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_HFS;
+ else if (!strcmp("apfs", statfsBuf.f_fstypename)) /** @todo verify apfs signature. */
+ *penmType = RTFSTYPE_APFS;
+ else if ( !strcmp("fat", statfsBuf.f_fstypename)
+ || !strcmp("msdos", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_FAT;
+ else if (!strcmp("ntfs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_NTFS;
+ else if (!strcmp("autofs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_AUTOFS;
+ else if (!strcmp("devfs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_DEVFS;
+ else if (!strcmp("nfs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_NFS;
+ else if (!strcmp("ufs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_UFS;
+ else if (!strcmp("zfs", statfsBuf.f_fstypename))
+ *penmType = RTFSTYPE_ZFS;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+#endif
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativeFsPath, pszFsPath);
+ }
+
+ return rc;
+}
diff --git a/src/VBox/Runtime/r3/posix/fs2-posix.cpp b/src/VBox/Runtime/r3/posix/fs2-posix.cpp
new file mode 100644
index 00000000..adbdecfe
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fs2-posix.cpp
@@ -0,0 +1,165 @@
+/* $Id: fs2-posix.cpp $ */
+/** @file
+ * IPRT - File System Helpers, POSIX, Part 2.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define RTTIME_INCL_TIMESPEC
+#include <sys/time.h>
+#include <sys/param.h>
+#ifndef DEV_BSIZE
+# include <sys/stat.h>
+# if defined(RT_OS_HAIKU) && !defined(S_BLKSIZE)
+# define S_BLKSIZE 512
+# endif
+# define DEV_BSIZE S_BLKSIZE /** @todo bird: add DEV_BSIZE to sys/param.h on OS/2. */
+#endif
+
+#include <iprt/fs.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/time.h>
+#include "internal/fs.h"
+
+
+/**
+ * Internal worker function which setups RTFSOBJINFO based on a UNIX stat struct.
+ *
+ * @param pObjInfo The file system object info structure to setup.
+ * @param pStat The stat structure to use.
+ * @param pszName The filename which this applies to (exe/hidden check).
+ * @param cbName The length of that filename. (optional, set 0)
+ */
+void rtFsConvertStatToObjInfo(PRTFSOBJINFO pObjInfo, const struct stat *pStat, const char *pszName, unsigned cbName)
+{
+ pObjInfo->cbObject = pStat->st_size;
+ pObjInfo->cbAllocated = pStat->st_blocks * DEV_BSIZE;
+
+#ifdef HAVE_STAT_NSEC
+ RTTimeSpecAddNano(RTTimeSpecSetSeconds(&pObjInfo->AccessTime, pStat->st_atime), pStat->st_atimensec);
+ RTTimeSpecAddNano(RTTimeSpecSetSeconds(&pObjInfo->ModificationTime, pStat->st_mtime), pStat->st_mtimensec);
+ RTTimeSpecAddNano(RTTimeSpecSetSeconds(&pObjInfo->ChangeTime, pStat->st_ctime), pStat->st_ctimensec);
+# ifdef HAVE_STAT_BIRTHTIME
+ RTTimeSpecAddNano(RTTimeSpecSetSeconds(&pObjInfo->BirthTime, pStat->st_birthtime), pStat->st_birthtimensec);
+# endif
+
+#elif defined(HAVE_STAT_TIMESPEC_BRIEF)
+ RTTimeSpecSetTimespec(&pObjInfo->AccessTime, &pStat->st_atim);
+ RTTimeSpecSetTimespec(&pObjInfo->ModificationTime, &pStat->st_mtim);
+ RTTimeSpecSetTimespec(&pObjInfo->ChangeTime, &pStat->st_ctim);
+# ifdef HAVE_STAT_BIRTHTIME
+ RTTimeSpecSetTimespec(&pObjInfo->BirthTime, &pStat->st_birthtim);
+# endif
+
+#elif defined(HAVE_STAT_TIMESPEC)
+ RTTimeSpecSetTimespec(&pObjInfo->AccessTime, pStat->st_atimespec);
+ RTTimeSpecSetTimespec(&pObjInfo->ModificationTime, pStat->st_mtimespec);
+ RTTimeSpecSetTimespec(&pObjInfo->ChangeTime, pStat->st_ctimespec);
+# ifdef HAVE_STAT_BIRTHTIME
+ RTTimeSpecSetTimespec(&pObjInfo->BirthTime, pStat->st_birthtimespec);
+# endif
+
+#else /* just the normal stuff */
+ RTTimeSpecSetSeconds(&pObjInfo->AccessTime, pStat->st_atime);
+ RTTimeSpecSetSeconds(&pObjInfo->ModificationTime, pStat->st_mtime);
+ RTTimeSpecSetSeconds(&pObjInfo->ChangeTime, pStat->st_ctime);
+# ifdef HAVE_STAT_BIRTHTIME
+ RTTimeSpecSetSeconds(&pObjInfo->BirthTime, pStat->st_birthtime);
+# endif
+#endif
+#ifndef HAVE_STAT_BIRTHTIME
+ pObjInfo->BirthTime = pObjInfo->ChangeTime;
+#endif
+
+ /* the file mode */
+ RTFMODE fMode = pStat->st_mode & RTFS_UNIX_MASK;
+ Assert(RTFS_UNIX_ISUID == S_ISUID);
+ Assert(RTFS_UNIX_ISGID == S_ISGID);
+#ifdef S_ISTXT
+ Assert(RTFS_UNIX_ISTXT == S_ISTXT);
+#elif defined(S_ISVTX)
+ Assert(RTFS_UNIX_ISTXT == S_ISVTX);
+#else
+#error "S_ISVTX / S_ISTXT isn't defined"
+#endif
+ Assert(RTFS_UNIX_IRWXU == S_IRWXU);
+ Assert(RTFS_UNIX_IRUSR == S_IRUSR);
+ Assert(RTFS_UNIX_IWUSR == S_IWUSR);
+ Assert(RTFS_UNIX_IXUSR == S_IXUSR);
+ Assert(RTFS_UNIX_IRWXG == S_IRWXG);
+ Assert(RTFS_UNIX_IRGRP == S_IRGRP);
+ Assert(RTFS_UNIX_IWGRP == S_IWGRP);
+ Assert(RTFS_UNIX_IXGRP == S_IXGRP);
+ Assert(RTFS_UNIX_IRWXO == S_IRWXO);
+ Assert(RTFS_UNIX_IROTH == S_IROTH);
+ Assert(RTFS_UNIX_IWOTH == S_IWOTH);
+ Assert(RTFS_UNIX_IXOTH == S_IXOTH);
+ Assert(RTFS_TYPE_FIFO == S_IFIFO);
+ Assert(RTFS_TYPE_DEV_CHAR == S_IFCHR);
+ Assert(RTFS_TYPE_DIRECTORY == S_IFDIR);
+ Assert(RTFS_TYPE_DEV_BLOCK == S_IFBLK);
+ Assert(RTFS_TYPE_FILE == S_IFREG);
+ Assert(RTFS_TYPE_SYMLINK == S_IFLNK);
+ Assert(RTFS_TYPE_SOCKET == S_IFSOCK);
+#ifdef S_IFWHT
+ Assert(RTFS_TYPE_WHITEOUT == S_IFWHT);
+#endif
+ Assert(RTFS_TYPE_MASK == S_IFMT);
+
+ pObjInfo->Attr.fMode = rtFsModeFromUnix(fMode, pszName, cbName, 0);
+
+ /* additional unix attribs */
+ pObjInfo->Attr.enmAdditional = RTFSOBJATTRADD_UNIX;
+ pObjInfo->Attr.u.Unix.uid = pStat->st_uid;
+ pObjInfo->Attr.u.Unix.gid = pStat->st_gid;
+ pObjInfo->Attr.u.Unix.cHardlinks = pStat->st_nlink;
+ pObjInfo->Attr.u.Unix.INodeIdDevice = pStat->st_dev;
+ pObjInfo->Attr.u.Unix.INodeId = pStat->st_ino;
+#ifdef HAVE_STAT_FLAGS
+ pObjInfo->Attr.u.Unix.fFlags = pStat->st_flags;
+#else
+ pObjInfo->Attr.u.Unix.fFlags = 0;
+#endif
+#ifdef HAVE_STAT_GEN
+ pObjInfo->Attr.u.Unix.GenerationId = pStat->st_gen;
+#else
+ pObjInfo->Attr.u.Unix.GenerationId = 0;
+#endif
+ pObjInfo->Attr.u.Unix.Device = pStat->st_rdev;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/fs3-posix.cpp b/src/VBox/Runtime/r3/posix/fs3-posix.cpp
new file mode 100644
index 00000000..368ae8e3
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/fs3-posix.cpp
@@ -0,0 +1,94 @@
+/* $Id: fs3-posix.cpp $ */
+/** @file
+ * IPRT - File System Helpers, POSIX, Part 3.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/fs.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include "internal/fs.h"
+
+#include <sys/time.h>
+#include <grp.h>
+#include <pwd.h>
+
+
+/**
+ * Set user-owner additional attributes.
+ *
+ * @param pObjInfo The object info to fill add attrs for.
+ * @param uid The user id.
+ */
+void rtFsObjInfoAttrSetUnixOwner(PRTFSOBJINFO pObjInfo, RTUID uid)
+{
+ pObjInfo->Attr.enmAdditional = RTFSOBJATTRADD_UNIX_OWNER;
+ pObjInfo->Attr.u.UnixOwner.uid = uid;
+ pObjInfo->Attr.u.UnixOwner.szName[0] = '\0';
+
+ char achBuf[_4K];
+ struct passwd Pwd;
+ struct passwd *pPwd;
+ int rc = getpwuid_r(uid, &Pwd, achBuf, sizeof(achBuf), &pPwd);
+ if (!rc && pPwd)
+ RTStrCopy(pObjInfo->Attr.u.UnixOwner.szName, sizeof(pObjInfo->Attr.u.UnixOwner.szName), pPwd->pw_name);
+}
+
+
+/**
+ * Set user-group additional attributes.
+ *
+ * @param pObjInfo The object info to fill add attrs for.
+ * @param gid The group id.
+ */
+void rtFsObjInfoAttrSetUnixGroup(PRTFSOBJINFO pObjInfo, RTUID gid)
+{
+ pObjInfo->Attr.enmAdditional = RTFSOBJATTRADD_UNIX_GROUP;
+ pObjInfo->Attr.u.UnixGroup.gid = gid;
+ pObjInfo->Attr.u.UnixGroup.szName[0] = '\0';
+
+ char achBuf[_4K];
+ struct group Grp;
+ struct group *pGrp;
+
+ int rc = getgrgid_r(gid, &Grp, achBuf, sizeof(achBuf), &pGrp);
+ if (!rc && pGrp)
+ RTStrCopy(pObjInfo->Attr.u.UnixGroup.szName, sizeof(pObjInfo->Attr.u.UnixGroup.szName), pGrp->gr_name);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/ldrNative-posix.cpp b/src/VBox/Runtime/r3/posix/ldrNative-posix.cpp
new file mode 100644
index 00000000..9b33a0ff
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/ldrNative-posix.cpp
@@ -0,0 +1,207 @@
+/* $Id: ldrNative-posix.cpp $ */
+/** @file
+ * IPRT - Binary Image Loader, POSIX native.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_LDR
+#include <dlfcn.h>
+
+#include <iprt/ldr.h>
+#include <iprt/assert.h>
+#include <iprt/path.h>
+#include <iprt/alloca.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include "internal/ldr.h"
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#if defined(RT_OS_OS2) || defined(RT_OS_WINDOWS)
+static const char g_szSuff[] = ".DLL";
+#elif defined(RT_OS_L4)
+static const char g_szSuff[] = ".s.so";
+#elif defined(RT_OS_DARWIN)
+static const char g_szSuff[] = ".dylib";
+#else
+static const char g_szSuff[] = ".so";
+#endif
+
+
+DECLHIDDEN(int) rtldrNativeLoad(const char *pszFilename, uintptr_t *phHandle, uint32_t fFlags, PRTERRINFO pErrInfo)
+{
+ /*
+ * Do we need to add an extension?
+ */
+ if (!RTPathHasSuffix(pszFilename) && !(fFlags & RTLDRLOAD_FLAGS_NO_SUFFIX))
+ {
+ size_t cch = strlen(pszFilename);
+ char *psz = (char *)alloca(cch + sizeof(g_szSuff));
+ if (!psz)
+ return RTErrInfoSet(pErrInfo, VERR_NO_MEMORY, "alloca failed");
+ memcpy(psz, pszFilename, cch);
+ memcpy(psz + cch, g_szSuff, sizeof(g_szSuff));
+ pszFilename = psz;
+ }
+
+ /*
+ * Attempt load.
+ */
+ int fFlagsNative = RTLD_NOW;
+ if (fFlags & RTLDRLOAD_FLAGS_GLOBAL)
+ fFlagsNative |= RTLD_GLOBAL;
+ else
+ fFlagsNative |= RTLD_LOCAL;
+ void *pvMod = dlopen(pszFilename, fFlagsNative);
+ if (pvMod)
+ {
+ *phHandle = (uintptr_t)pvMod;
+ return VINF_SUCCESS;
+ }
+
+ const char *pszDlError = dlerror();
+ RTErrInfoSet(pErrInfo, VERR_FILE_NOT_FOUND, pszDlError);
+ LogRel(("rtldrNativeLoad: dlopen('%s', RTLD_NOW | RTLD_LOCAL) failed: %s\n", pszFilename, pszDlError));
+ return VERR_FILE_NOT_FOUND;
+}
+
+
+DECLCALLBACK(int) rtldrNativeGetSymbol(PRTLDRMODINTERNAL pMod, const char *pszSymbol, void **ppvValue)
+{
+ PRTLDRMODNATIVE pModNative = (PRTLDRMODNATIVE)pMod;
+#ifdef RT_OS_OS2
+ /* Prefix the symbol with an underscore (assuming __cdecl/gcc-default). */
+ size_t cch = strlen(pszSymbol);
+ char *psz = (char *)alloca(cch + 2);
+ psz[0] = '_';
+ memcpy(psz + 1, pszSymbol, cch + 1);
+ pszSymbol = psz;
+#endif
+ *ppvValue = dlsym((void *)pModNative->hNative, pszSymbol);
+ if (*ppvValue)
+ return VINF_SUCCESS;
+ return VERR_SYMBOL_NOT_FOUND;
+}
+
+
+DECLCALLBACK(int) rtldrNativeClose(PRTLDRMODINTERNAL pMod)
+{
+ PRTLDRMODNATIVE pModNative = (PRTLDRMODNATIVE)pMod;
+#ifdef __SANITIZE_ADDRESS__
+ /* If we are compiled with enabled address sanitizer (gcc/llvm), don't
+ * unload the module to prevent <unknown module> in the stack trace */
+ pModNative->fFlags |= RTLDRLOAD_FLAGS_NO_UNLOAD;
+#endif
+ if ( (pModNative->fFlags & RTLDRLOAD_FLAGS_NO_UNLOAD)
+ || !dlclose((void *)pModNative->hNative))
+ {
+ pModNative->hNative = (uintptr_t)0;
+ return VINF_SUCCESS;
+ }
+ Log(("rtldrNativeFree: dlclose(%p) failed: %s\n", pModNative->hNative, dlerror()));
+ return VERR_GENERAL_FAILURE;
+}
+
+
+DECLHIDDEN(int) rtldrNativeLoadSystem(const char *pszFilename, const char *pszExt, uint32_t fFlags, PRTLDRMOD phLdrMod)
+{
+ /*
+ * For the present we ASSUME that we can trust dlopen to load what we want
+ * when not specifying a path. There seems to be very little we can do to
+ * restrict the places dlopen will search for library without doing
+ * auditing (linux) or something like that.
+ */
+ Assert(strchr(pszFilename, '/') == NULL);
+
+ uint32_t const fFlags2 = fFlags & ~(RTLDRLOAD_FLAGS_SO_VER_BEGIN_MASK | RTLDRLOAD_FLAGS_SO_VER_END_MASK);
+
+ /*
+ * If no suffix is given and we haven't got any RTLDRLOAD_FLAGS_SO_VER_ range to work
+ * with, we can call RTLdrLoadEx directly.
+ */
+ if (!pszExt)
+ {
+#if !defined(RT_OS_DARWIN) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
+ if ( (fFlags & RTLDRLOAD_FLAGS_SO_VER_BEGIN_MASK) >> RTLDRLOAD_FLAGS_SO_VER_BEGIN_SHIFT
+ == (fFlags & RTLDRLOAD_FLAGS_SO_VER_END_MASK) >> RTLDRLOAD_FLAGS_SO_VER_END_SHIFT)
+#endif
+ return RTLdrLoadEx(pszFilename, phLdrMod, fFlags2, NULL);
+ pszExt = "";
+ }
+
+ /*
+ * Combine filename and suffix and then do the loading.
+ */
+ size_t const cchFilename = strlen(pszFilename);
+ size_t const cchSuffix = strlen(pszExt);
+ char *pszTmp = (char *)alloca(cchFilename + cchSuffix + 16 + 1);
+ memcpy(pszTmp, pszFilename, cchFilename);
+ memcpy(&pszTmp[cchFilename], pszExt, cchSuffix);
+ pszTmp[cchFilename + cchSuffix] = '\0';
+
+ int rc = RTLdrLoadEx(pszTmp, phLdrMod, fFlags2, NULL);
+
+#if !defined(RT_OS_DARWIN) && !defined(RT_OS_OS2) && !defined(RT_OS_WINDOWS)
+ /*
+ * If no version was given after the .so and do .so.MAJOR search according
+ * to the range in the fFlags.
+ */
+ if (RT_FAILURE(rc) && !(fFlags & RTLDRLOAD_FLAGS_NO_SUFFIX))
+ {
+ const char *pszActualSuff = RTPathSuffix(pszTmp);
+ if (pszActualSuff && strcmp(pszActualSuff, ".so") == 0)
+ {
+ int32_t const iBegin = (fFlags & RTLDRLOAD_FLAGS_SO_VER_BEGIN_MASK) >> RTLDRLOAD_FLAGS_SO_VER_BEGIN_SHIFT;
+ int32_t const iEnd = (fFlags & RTLDRLOAD_FLAGS_SO_VER_END_MASK) >> RTLDRLOAD_FLAGS_SO_VER_END_SHIFT;
+ int32_t const iIncr = iBegin <= iEnd ? 1 : -1;
+ for (int32_t iMajorVer = iBegin; iMajorVer != iEnd; iMajorVer += iIncr)
+ {
+ RTStrPrintf(&pszTmp[cchFilename + cchSuffix], 16 + 1, ".%d", iMajorVer);
+ rc = RTLdrLoadEx(pszTmp, phLdrMod, fFlags2, NULL);
+ if (RT_SUCCESS(rc))
+ break;
+ }
+ }
+ }
+#endif
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/localipc-posix.cpp b/src/VBox/Runtime/r3/posix/localipc-posix.cpp
new file mode 100644
index 00000000..131d78cf
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/localipc-posix.cpp
@@ -0,0 +1,1172 @@
+/* $Id: localipc-posix.cpp $ */
+/** @file
+ * IPRT - Local IPC Server & Client, Posix.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_LOCALIPC
+#include "internal/iprt.h"
+#include <iprt/localipc.h>
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/critsect.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/log.h>
+#include <iprt/poll.h>
+#include <iprt/socket.h>
+#include <iprt/string.h>
+#include <iprt/time.h>
+#include <iprt/path.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#ifndef RT_OS_OS2
+# include <sys/poll.h>
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#ifndef AF_LOCAL
+# define AF_LOCAL AF_UNIX
+#endif
+
+#include "internal/magics.h"
+#include "internal/path.h"
+#include "internal/socket.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * Local IPC service instance, POSIX.
+ */
+typedef struct RTLOCALIPCSERVERINT
+{
+ /** The magic (RTLOCALIPCSERVER_MAGIC). */
+ uint32_t u32Magic;
+ /** The creation flags. */
+ uint32_t fFlags;
+ /** Critical section protecting the structure. */
+ RTCRITSECT CritSect;
+ /** The number of references to the instance. */
+ uint32_t volatile cRefs;
+ /** Indicates that there is a pending cancel request. */
+ bool volatile fCancelled;
+ /** The server socket. */
+ RTSOCKET hSocket;
+ /** Thread currently listening for clients. */
+ RTTHREAD hListenThread;
+ /** The name we bound the server to (native charset encoding). */
+ struct sockaddr_un Name;
+} RTLOCALIPCSERVERINT;
+/** Pointer to a local IPC server instance (POSIX). */
+typedef RTLOCALIPCSERVERINT *PRTLOCALIPCSERVERINT;
+
+
+/**
+ * Local IPC session instance, POSIX.
+ */
+typedef struct RTLOCALIPCSESSIONINT
+{
+ /** The magic (RTLOCALIPCSESSION_MAGIC). */
+ uint32_t u32Magic;
+ /** Critical section protecting the structure. */
+ RTCRITSECT CritSect;
+ /** The number of references to the instance. */
+ uint32_t volatile cRefs;
+ /** Indicates that there is a pending cancel request. */
+ bool volatile fCancelled;
+ /** Set if this is the server side, clear if the client. */
+ bool fServerSide;
+ /** The client socket. */
+ RTSOCKET hSocket;
+ /** Thread currently doing read related activites. */
+ RTTHREAD hWriteThread;
+ /** Thread currently doing write related activies. */
+ RTTHREAD hReadThread;
+} RTLOCALIPCSESSIONINT;
+/** Pointer to a local IPC session instance (Windows). */
+typedef RTLOCALIPCSESSIONINT *PRTLOCALIPCSESSIONINT;
+
+
+/** Local IPC name prefix for portable names. */
+#define RTLOCALIPC_POSIX_NAME_PREFIX "/tmp/.iprt-localipc-"
+
+
+/**
+ * Validates the user specified name.
+ *
+ * @returns IPRT status code.
+ * @param pszName The name to validate.
+ * @param fNative Whether it's a native name or a portable name.
+ */
+static int rtLocalIpcPosixValidateName(const char *pszName, bool fNative)
+{
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+ AssertReturn(*pszName, VERR_INVALID_NAME);
+
+ if (!fNative)
+ {
+ for (;;)
+ {
+ char ch = *pszName++;
+ if (!ch)
+ break;
+ AssertReturn(!RT_C_IS_CNTRL(ch), VERR_INVALID_NAME);
+ AssertReturn((unsigned)ch < 0x80, VERR_INVALID_NAME);
+ AssertReturn(ch != '\\', VERR_INVALID_NAME);
+ AssertReturn(ch != '/', VERR_INVALID_NAME);
+ }
+ }
+ else
+ {
+ int rc = RTStrValidateEncoding(pszName);
+ AssertRCReturn(rc, rc);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Constructs a local (unix) domain socket name.
+ *
+ * @returns IPRT status code.
+ * @param pAddr The address structure to construct the name in.
+ * @param pcbAddr Where to return the address size.
+ * @param pszName The user specified name (valid).
+ * @param fNative Whether it's a native name or a portable name.
+ */
+static int rtLocalIpcPosixConstructName(struct sockaddr_un *pAddr, uint8_t *pcbAddr, const char *pszName, bool fNative)
+{
+ const char *pszNativeName;
+ int rc = rtPathToNative(&pszNativeName, pszName, NULL /*pszBasePath not support*/);
+ if (RT_SUCCESS(rc))
+ {
+ size_t cchNativeName = strlen(pszNativeName);
+ size_t cbFull = !fNative ? cchNativeName + sizeof(RTLOCALIPC_POSIX_NAME_PREFIX) : cchNativeName + 1;
+ if (cbFull <= sizeof(pAddr->sun_path))
+ {
+ RT_ZERO(*pAddr);
+#ifdef RT_OS_OS2 /* Size must be exactly right on OS/2. */
+ *pcbAddr = sizeof(*pAddr);
+#else
+ *pcbAddr = RT_UOFFSETOF(struct sockaddr_un, sun_path) + (uint8_t)cbFull;
+#endif
+#ifdef HAVE_SUN_LEN_MEMBER
+ pAddr->sun_len = *pcbAddr;
+#endif
+ pAddr->sun_family = AF_LOCAL;
+
+ if (!fNative)
+ {
+ memcpy(pAddr->sun_path, RTLOCALIPC_POSIX_NAME_PREFIX, sizeof(RTLOCALIPC_POSIX_NAME_PREFIX) - 1);
+ memcpy(&pAddr->sun_path[sizeof(RTLOCALIPC_POSIX_NAME_PREFIX) - 1], pszNativeName, cchNativeName + 1);
+ }
+ else
+ memcpy(pAddr->sun_path, pszNativeName, cchNativeName + 1);
+ }
+ else
+ rc = VERR_FILENAME_TOO_LONG;
+ rtPathFreeNative(pszNativeName, pszName);
+ }
+ return rc;
+}
+
+
+
+RTDECL(int) RTLocalIpcServerCreate(PRTLOCALIPCSERVER phServer, const char *pszName, uint32_t fFlags)
+{
+ /*
+ * Parameter validation.
+ */
+ AssertPtrReturn(phServer, VERR_INVALID_POINTER);
+ *phServer = NIL_RTLOCALIPCSERVER;
+ AssertReturn(!(fFlags & ~RTLOCALIPC_FLAGS_VALID_MASK), VERR_INVALID_FLAGS);
+ int rc = rtLocalIpcPosixValidateName(pszName, RT_BOOL(fFlags & RTLOCALIPC_FLAGS_NATIVE_NAME));
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate memory for the instance and initialize it.
+ */
+ PRTLOCALIPCSERVERINT pThis = (PRTLOCALIPCSERVERINT)RTMemAllocZ(sizeof(*pThis));
+ if (pThis)
+ {
+ pThis->u32Magic = RTLOCALIPCSERVER_MAGIC;
+ pThis->fFlags = fFlags;
+ pThis->cRefs = 1;
+ pThis->fCancelled = false;
+ pThis->hListenThread = NIL_RTTHREAD;
+ rc = RTCritSectInit(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create the local (unix) socket and bind to it.
+ */
+ rc = rtSocketCreate(&pThis->hSocket, AF_LOCAL, SOCK_STREAM, 0 /*iProtocol*/, false /*fInheritable*/);
+ if (RT_SUCCESS(rc))
+ {
+ signal(SIGPIPE, SIG_IGN); /* Required on solaris, at least. */
+
+ uint8_t cbAddr;
+ rc = rtLocalIpcPosixConstructName(&pThis->Name, &cbAddr, pszName,
+ RT_BOOL(fFlags & RTLOCALIPC_FLAGS_NATIVE_NAME));
+ if (RT_SUCCESS(rc))
+ {
+ rc = rtSocketBindRawAddr(pThis->hSocket, &pThis->Name, cbAddr);
+ if (rc == VERR_NET_ADDRESS_IN_USE)
+ {
+ unlink(pThis->Name.sun_path);
+ rc = rtSocketBindRawAddr(pThis->hSocket, &pThis->Name, cbAddr);
+ }
+ if (RT_SUCCESS(rc))
+ {
+ rc = rtSocketListen(pThis->hSocket, 16);
+ if (RT_SUCCESS(rc))
+ {
+ LogFlow(("RTLocalIpcServerCreate: Created %p (%s)\n", pThis, pThis->Name.sun_path));
+ *phServer = pThis;
+ return VINF_SUCCESS;
+ }
+ unlink(pThis->Name.sun_path);
+ }
+ }
+ RTSocketRelease(pThis->hSocket);
+ }
+ RTCritSectDelete(&pThis->CritSect);
+ }
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ }
+ Log(("RTLocalIpcServerCreate: failed, rc=%Rrc\n", rc));
+ return rc;
+}
+
+
+RTDECL(int) RTLocalIpcServerGrantGroupAccess(RTLOCALIPCSERVER hServer, RTGID gid)
+{
+ PRTLOCALIPCSERVERINT pThis = (PRTLOCALIPCSERVERINT)hServer;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSERVER_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->Name.sun_path[0] != '\0', VERR_INVALID_STATE);
+
+ if (chown(pThis->Name.sun_path, (uid_t)-1, gid) == 0)
+ {
+ if (chmod(pThis->Name.sun_path, S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP) == 0)
+ {
+ LogRel2(("RTLocalIpcServerGrantGroupAccess: IPC socket %s access has been granted to group %RTgid\n",
+ pThis->Name.sun_path, gid));
+ return VINF_SUCCESS;
+ }
+ LogRel(("RTLocalIpcServerGrantGroupAccess: cannot grant IPC socket %s write permission to group %RTgid: errno=%d\n",
+ pThis->Name.sun_path, gid, errno));
+ }
+ else
+ LogRel(("RTLocalIpcServerGrantGroupAccess: cannot change IPC socket %s group ownership to %RTgid: errno=%d\n",
+ pThis->Name.sun_path, gid, errno));
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTLocalIpcServerSetAccessMode(RTLOCALIPCSERVER hServer, RTFMODE fMode)
+{
+ PRTLOCALIPCSERVERINT pThis = (PRTLOCALIPCSERVERINT)hServer;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSERVER_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->Name.sun_path[0] != '\0', VERR_INVALID_STATE);
+
+ if (chmod(pThis->Name.sun_path, fMode & RTFS_UNIX_ALL_ACCESS_PERMS) == 0)
+ return VINF_SUCCESS;
+
+ return RTErrConvertFromErrno(errno);
+}
+
+
+/**
+ * Retains a reference to the server instance.
+ *
+ * @returns
+ * @param pThis The server instance.
+ */
+DECLINLINE(void) rtLocalIpcServerRetain(PRTLOCALIPCSERVERINT pThis)
+{
+ uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+ Assert(cRefs < UINT32_MAX / 2 && cRefs); RT_NOREF_PV(cRefs);
+}
+
+
+/**
+ * Server instance destructor.
+ *
+ * @returns VINF_OBJECT_DESTROYED
+ * @param pThis The server instance.
+ */
+static int rtLocalIpcServerDtor(PRTLOCALIPCSERVERINT pThis)
+{
+ pThis->u32Magic = ~RTLOCALIPCSERVER_MAGIC;
+ if (RTSocketRelease(pThis->hSocket) == 0)
+ Log(("rtLocalIpcServerDtor: Released socket\n"));
+ else
+ Log(("rtLocalIpcServerDtor: Socket still has references (impossible?)\n"));
+ RTCritSectDelete(&pThis->CritSect);
+ unlink(pThis->Name.sun_path);
+ RTMemFree(pThis);
+ return VINF_OBJECT_DESTROYED;
+}
+
+
+/**
+ * Releases a reference to the server instance.
+ *
+ * @returns VINF_SUCCESS if only release, VINF_OBJECT_DESTROYED if destroyed.
+ * @param pThis The server instance.
+ */
+DECLINLINE(int) rtLocalIpcServerRelease(PRTLOCALIPCSERVERINT pThis)
+{
+ uint32_t cRefs = ASMAtomicDecU32(&pThis->cRefs);
+ Assert(cRefs < UINT32_MAX / 2);
+ if (!cRefs)
+ return rtLocalIpcServerDtor(pThis);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * The core of RTLocalIpcServerCancel, used by both the destroy and cancel APIs.
+ *
+ * @returns IPRT status code
+ * @param pThis The server instance.
+ */
+static int rtLocalIpcServerCancel(PRTLOCALIPCSERVERINT pThis)
+{
+ RTCritSectEnter(&pThis->CritSect);
+ pThis->fCancelled = true;
+ Log(("rtLocalIpcServerCancel:\n"));
+ if (pThis->hListenThread != NIL_RTTHREAD)
+ RTThreadPoke(pThis->hListenThread);
+ RTCritSectLeave(&pThis->CritSect);
+ return VINF_SUCCESS;
+}
+
+
+
+RTDECL(int) RTLocalIpcServerDestroy(RTLOCALIPCSERVER hServer)
+{
+ /*
+ * Validate input.
+ */
+ if (hServer == NIL_RTLOCALIPCSERVER)
+ return VINF_SUCCESS;
+ PRTLOCALIPCSERVERINT pThis = (PRTLOCALIPCSERVERINT)hServer;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSERVER_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Invalidate the server, releasing the caller's reference to the instance
+ * data and making sure any other thread in the listen API will wake up.
+ */
+ AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, ~RTLOCALIPCSERVER_MAGIC, RTLOCALIPCSERVER_MAGIC), VERR_WRONG_ORDER);
+
+ rtLocalIpcServerCancel(pThis);
+ return rtLocalIpcServerRelease(pThis);
+}
+
+
+RTDECL(int) RTLocalIpcServerCancel(RTLOCALIPCSERVER hServer)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSERVERINT pThis = (PRTLOCALIPCSERVERINT)hServer;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSERVER_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the job.
+ */
+ rtLocalIpcServerRetain(pThis);
+ rtLocalIpcServerCancel(pThis);
+ rtLocalIpcServerRelease(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTLocalIpcServerListen(RTLOCALIPCSERVER hServer, PRTLOCALIPCSESSION phClientSession)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSERVERINT pThis = (PRTLOCALIPCSERVERINT)hServer;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSERVER_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Begin listening.
+ */
+ rtLocalIpcServerRetain(pThis);
+ int rc = RTCritSectEnter(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->hListenThread == NIL_RTTHREAD)
+ {
+ pThis->hListenThread = RTThreadSelf();
+
+ /*
+ * The listening retry loop.
+ */
+ for (;;)
+ {
+ if (!pThis->fCancelled)
+ {
+ rc = RTCritSectLeave(&pThis->CritSect);
+ AssertRCBreak(rc);
+
+ struct sockaddr_un Addr;
+ size_t cbAddr = sizeof(Addr);
+ RTSOCKET hClient;
+ Log(("RTLocalIpcServerListen: Calling rtSocketAccept...\n"));
+ rc = rtSocketAccept(pThis->hSocket, &hClient, (struct sockaddr *)&Addr, &cbAddr);
+ Log(("RTLocalIpcServerListen: rtSocketAccept returns %Rrc.\n", rc));
+
+ int rc2 = RTCritSectEnter(&pThis->CritSect);
+ AssertRCBreakStmt(rc2, rc = RT_SUCCESS(rc) ? rc2 : rc);
+
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create a client session.
+ */
+ PRTLOCALIPCSESSIONINT pSession = (PRTLOCALIPCSESSIONINT)RTMemAllocZ(sizeof(*pSession));
+ if (pSession)
+ {
+ pSession->u32Magic = RTLOCALIPCSESSION_MAGIC;
+ pSession->cRefs = 1;
+ pSession->fCancelled = false;
+ pSession->fServerSide = true;
+ pSession->hSocket = hClient;
+ pSession->hReadThread = NIL_RTTHREAD;
+ pSession->hWriteThread = NIL_RTTHREAD;
+ rc = RTCritSectInit(&pSession->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ Log(("RTLocalIpcServerListen: Returning new client session: %p\n", pSession));
+ *phClientSession = pSession;
+ break;
+ }
+
+ RTMemFree(pSession);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ }
+ else if ( rc != VERR_INTERRUPTED
+ && rc != VERR_TRY_AGAIN)
+ break;
+ }
+ else
+ {
+ rc = VERR_CANCELLED;
+ break;
+ }
+ }
+
+ pThis->hListenThread = NIL_RTTHREAD;
+ }
+ else
+ {
+ AssertFailed();
+ rc = VERR_RESOURCE_BUSY;
+ }
+ int rc2 = RTCritSectLeave(&pThis->CritSect);
+ AssertStmt(RT_SUCCESS(rc2), rc = RT_SUCCESS(rc) ? rc2 : rc);
+ }
+ rtLocalIpcServerRelease(pThis);
+
+ Log(("RTLocalIpcServerListen: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+RTDECL(int) RTLocalIpcSessionConnect(PRTLOCALIPCSESSION phSession, const char *pszName, uint32_t fFlags)
+{
+ /*
+ * Parameter validation.
+ */
+ AssertPtrReturn(phSession, VERR_INVALID_POINTER);
+ *phSession = NIL_RTLOCALIPCSESSION;
+
+ AssertReturn(!(fFlags & ~RTLOCALIPC_C_FLAGS_VALID_MASK), VERR_INVALID_FLAGS);
+
+ int rc = rtLocalIpcPosixValidateName(pszName, RT_BOOL(fFlags & RTLOCALIPC_C_FLAGS_NATIVE_NAME));
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Allocate memory for the instance and initialize it.
+ */
+ PRTLOCALIPCSESSIONINT pThis = (PRTLOCALIPCSESSIONINT)RTMemAllocZ(sizeof(*pThis));
+ if (pThis)
+ {
+ pThis->u32Magic = RTLOCALIPCSESSION_MAGIC;
+ pThis->cRefs = 1;
+ pThis->fCancelled = false;
+ pThis->fServerSide = false;
+ pThis->hSocket = NIL_RTSOCKET;
+ pThis->hReadThread = NIL_RTTHREAD;
+ pThis->hWriteThread = NIL_RTTHREAD;
+ rc = RTCritSectInit(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create the local (unix) socket and try connect to the server.
+ */
+ rc = rtSocketCreate(&pThis->hSocket, AF_LOCAL, SOCK_STREAM, 0 /*iProtocol*/, false /*fInheritable*/);
+ if (RT_SUCCESS(rc))
+ {
+ signal(SIGPIPE, SIG_IGN); /* Required on solaris, at least. */
+
+ struct sockaddr_un Addr;
+ uint8_t cbAddr;
+ rc = rtLocalIpcPosixConstructName(&Addr, &cbAddr, pszName, RT_BOOL(fFlags & RTLOCALIPC_C_FLAGS_NATIVE_NAME));
+ if (RT_SUCCESS(rc))
+ {
+ rc = rtSocketConnectRaw(pThis->hSocket, &Addr, cbAddr);
+ if (RT_SUCCESS(rc))
+ {
+ *phSession = pThis;
+ Log(("RTLocalIpcSessionConnect: Returns new session %p\n", pThis));
+ return VINF_SUCCESS;
+ }
+ }
+ RTSocketRelease(pThis->hSocket);
+ }
+ RTCritSectDelete(&pThis->CritSect);
+ }
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ }
+ Log(("RTLocalIpcSessionConnect: returns %Rrc\n", rc));
+ return rc;
+}
+
+
+/**
+ * Retains a reference to the session instance.
+ *
+ * @param pThis The server instance.
+ */
+DECLINLINE(void) rtLocalIpcSessionRetain(PRTLOCALIPCSESSIONINT pThis)
+{
+ uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+ Assert(cRefs < UINT32_MAX / 2 && cRefs); RT_NOREF_PV(cRefs);
+}
+
+
+RTDECL(uint32_t) RTLocalIpcSessionRetain(RTLOCALIPCSESSION hSession)
+{
+ PRTLOCALIPCSESSIONINT pThis = (PRTLOCALIPCSESSIONINT)hSession;
+ AssertPtrReturn(pThis, UINT32_MAX);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, UINT32_MAX);
+
+ uint32_t cRefs = ASMAtomicIncU32(&pThis->cRefs);
+ Assert(cRefs < UINT32_MAX / 2 && cRefs);
+ return cRefs;
+}
+
+
+/**
+ * Session instance destructor.
+ *
+ * @returns VINF_OBJECT_DESTROYED
+ * @param pThis The server instance.
+ */
+static int rtLocalIpcSessionDtor(PRTLOCALIPCSESSIONINT pThis)
+{
+ pThis->u32Magic = ~RTLOCALIPCSESSION_MAGIC;
+ if (RTSocketRelease(pThis->hSocket) == 0)
+ Log(("rtLocalIpcSessionDtor: Released socket\n"));
+ else
+ Log(("rtLocalIpcSessionDtor: Socket still has references (impossible?)\n"));
+ RTCritSectDelete(&pThis->CritSect);
+ RTMemFree(pThis);
+ return VINF_OBJECT_DESTROYED;
+}
+
+
+/**
+ * Releases a reference to the session instance.
+ *
+ * @returns VINF_SUCCESS or VINF_OBJECT_DESTROYED as appropriate.
+ * @param pThis The session instance.
+ */
+DECLINLINE(int) rtLocalIpcSessionRelease(PRTLOCALIPCSESSIONINT pThis)
+{
+ uint32_t cRefs = ASMAtomicDecU32(&pThis->cRefs);
+ Assert(cRefs < UINT32_MAX / 2);
+ if (!cRefs)
+ return rtLocalIpcSessionDtor(pThis);
+ Log(("rtLocalIpcSessionRelease: %u refs left\n", cRefs));
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTLocalIpcSessionRelease(RTLOCALIPCSESSION hSession)
+{
+ if (hSession == NIL_RTLOCALIPCSESSION)
+ return 0;
+
+ PRTLOCALIPCSESSIONINT pThis = (PRTLOCALIPCSESSIONINT)hSession;
+ AssertPtrReturn(pThis, UINT32_MAX);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, UINT32_MAX);
+
+ uint32_t cRefs = ASMAtomicDecU32(&pThis->cRefs);
+ Assert(cRefs < UINT32_MAX / 2);
+ if (cRefs)
+ Log(("RTLocalIpcSessionRelease: %u refs left\n", cRefs));
+ else
+ rtLocalIpcSessionDtor(pThis);
+ return cRefs;
+}
+
+
+/**
+ * The core of RTLocalIpcSessionCancel, used by both the destroy and cancel APIs.
+ *
+ * @returns IPRT status code
+ * @param pThis The session instance.
+ */
+static int rtLocalIpcSessionCancel(PRTLOCALIPCSESSIONINT pThis)
+{
+ RTCritSectEnter(&pThis->CritSect);
+ pThis->fCancelled = true;
+ Log(("rtLocalIpcSessionCancel:\n"));
+ if (pThis->hReadThread != NIL_RTTHREAD)
+ RTThreadPoke(pThis->hReadThread);
+ if (pThis->hWriteThread != NIL_RTTHREAD)
+ RTThreadPoke(pThis->hWriteThread);
+ RTCritSectLeave(&pThis->CritSect);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTLocalIpcSessionClose(RTLOCALIPCSESSION hSession)
+{
+ /*
+ * Validate input.
+ */
+ if (hSession == NIL_RTLOCALIPCSESSION)
+ return VINF_SUCCESS;
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Invalidate the session, releasing the caller's reference to the instance
+ * data and making sure any other thread in the listen API will wake up.
+ */
+ Log(("RTLocalIpcSessionClose:\n"));
+
+ rtLocalIpcSessionCancel(pThis);
+ return rtLocalIpcSessionRelease(pThis);
+}
+
+
+RTDECL(int) RTLocalIpcSessionCancel(RTLOCALIPCSESSION hSession)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the job.
+ */
+ rtLocalIpcSessionRetain(pThis);
+ rtLocalIpcSessionCancel(pThis);
+ rtLocalIpcSessionRelease(pThis);
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if the socket has has a HUP condition after reading zero bytes.
+ *
+ * @returns true if HUP, false if no.
+ * @param pThis The IPC session handle.
+ */
+static bool rtLocalIpcPosixHasHup(PRTLOCALIPCSESSIONINT pThis)
+{
+ int fdNative = RTSocketToNative(pThis->hSocket);
+
+#if !defined(RT_OS_OS2) && !defined(RT_OS_SOLARIS)
+ struct pollfd PollFd;
+ RT_ZERO(PollFd);
+ PollFd.fd = fdNative;
+ PollFd.events = POLLHUP | POLLERR;
+ if (poll(&PollFd, 1, 0) <= 0)
+ return false;
+ if (!(PollFd.revents & (POLLHUP | POLLERR)))
+ return false;
+#else /* RT_OS_OS2 || RT_OS_SOLARIS */
+ /*
+ * OS/2: No native poll, do zero byte send to check for EPIPE.
+ * Solaris: We don't get POLLHUP.
+ */
+ uint8_t bDummy;
+ ssize_t rcSend = send(fdNative, &bDummy, 0, 0);
+ if (rcSend >= 0 || (errno != EPIPE && errno != ECONNRESET))
+ return false;
+#endif /* RT_OS_OS2 || RT_OS_SOLARIS */
+
+ /*
+ * We've established EPIPE. Now make sure there aren't any last bytes to
+ * read that came in between the recv made by the caller and the disconnect.
+ */
+ uint8_t bPeek;
+ ssize_t rcRecv = recv(fdNative, &bPeek, 1, MSG_DONTWAIT | MSG_PEEK);
+ return rcRecv <= 0;
+}
+
+
+RTDECL(int) RTLocalIpcSessionRead(RTLOCALIPCSESSION hSession, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the job.
+ */
+ rtLocalIpcSessionRetain(pThis);
+
+ int rc = RTCritSectEnter(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->hReadThread == NIL_RTTHREAD)
+ {
+ pThis->hReadThread = RTThreadSelf();
+
+ for (;;)
+ {
+ if (!pThis->fCancelled)
+ {
+ rc = RTCritSectLeave(&pThis->CritSect);
+ AssertRCBreak(rc);
+
+ rc = RTSocketRead(pThis->hSocket, pvBuf, cbToRead, pcbRead);
+
+ /* Detect broken pipe. */
+ if (rc == VINF_SUCCESS)
+ {
+ if (!pcbRead || *pcbRead)
+ { /* likely */ }
+ else if (rtLocalIpcPosixHasHup(pThis))
+ rc = VERR_BROKEN_PIPE;
+ }
+ else if (rc == VERR_NET_CONNECTION_RESET_BY_PEER || rc == VERR_NET_SHUTDOWN)
+ rc = VERR_BROKEN_PIPE;
+
+ int rc2 = RTCritSectEnter(&pThis->CritSect);
+ AssertRCBreakStmt(rc2, rc = RT_SUCCESS(rc) ? rc2 : rc);
+
+ if ( rc == VERR_INTERRUPTED
+ || rc == VERR_TRY_AGAIN)
+ continue;
+ }
+ else
+ rc = VERR_CANCELLED;
+ break;
+ }
+
+ pThis->hReadThread = NIL_RTTHREAD;
+ }
+ int rc2 = RTCritSectLeave(&pThis->CritSect);
+ AssertStmt(RT_SUCCESS(rc2), rc = RT_SUCCESS(rc) ? rc2 : rc);
+ }
+
+ rtLocalIpcSessionRelease(pThis);
+ return rc;
+}
+
+
+RTDECL(int) RTLocalIpcSessionReadNB(RTLOCALIPCSESSION hSession, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the job.
+ */
+ rtLocalIpcSessionRetain(pThis);
+
+ int rc = RTCritSectEnter(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->hReadThread == NIL_RTTHREAD)
+ {
+ pThis->hReadThread = RTThreadSelf(); /* not really required, but whatever. */
+
+ for (;;)
+ {
+ if (!pThis->fCancelled)
+ {
+ rc = RTSocketReadNB(pThis->hSocket, pvBuf, cbToRead, pcbRead);
+
+ /* Detect broken pipe. */
+ if (rc == VINF_SUCCESS)
+ {
+ if (!pcbRead || *pcbRead)
+ { /* likely */ }
+ else if (rtLocalIpcPosixHasHup(pThis))
+ rc = VERR_BROKEN_PIPE;
+ }
+ else if (rc == VERR_NET_CONNECTION_RESET_BY_PEER || rc == VERR_NET_SHUTDOWN)
+ rc = VERR_BROKEN_PIPE;
+
+ if (rc == VERR_INTERRUPTED)
+ continue;
+ }
+ else
+ rc = VERR_CANCELLED;
+ break;
+ }
+
+ pThis->hReadThread = NIL_RTTHREAD;
+ }
+ int rc2 = RTCritSectLeave(&pThis->CritSect);
+ AssertStmt(RT_SUCCESS(rc2), rc = RT_SUCCESS(rc) ? rc2 : rc);
+ }
+
+ rtLocalIpcSessionRelease(pThis);
+ return rc;
+}
+
+
+RTDECL(int) RTLocalIpcSessionWrite(RTLOCALIPCSESSION hSession, const void *pvBuf, size_t cbToWrite)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the job.
+ */
+ rtLocalIpcSessionRetain(pThis);
+
+ int rc = RTCritSectEnter(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->hWriteThread == NIL_RTTHREAD)
+ {
+ pThis->hWriteThread = RTThreadSelf();
+
+ for (;;)
+ {
+ if (!pThis->fCancelled)
+ {
+ rc = RTCritSectLeave(&pThis->CritSect);
+ AssertRCBreak(rc);
+
+ rc = RTSocketWrite(pThis->hSocket, pvBuf, cbToWrite);
+
+ int rc2 = RTCritSectEnter(&pThis->CritSect);
+ AssertRCBreakStmt(rc2, rc = RT_SUCCESS(rc) ? rc2 : rc);
+
+ if ( rc == VERR_INTERRUPTED
+ || rc == VERR_TRY_AGAIN)
+ continue;
+ }
+ else
+ rc = VERR_CANCELLED;
+ break;
+ }
+
+ pThis->hWriteThread = NIL_RTTHREAD;
+ }
+ int rc2 = RTCritSectLeave(&pThis->CritSect);
+ AssertStmt(RT_SUCCESS(rc2), rc = RT_SUCCESS(rc) ? rc2 : rc);
+ }
+
+ rtLocalIpcSessionRelease(pThis);
+ return rc;
+}
+
+
+RTDECL(int) RTLocalIpcSessionFlush(RTLOCALIPCSESSION hSession)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * This is a no-op because apparently write doesn't return until the
+ * result is read. At least that's what the reply to a 2003-04-08 LKML
+ * posting title "fsync() on unix domain sockets?" indicates.
+ *
+ * For conformity, make sure there isn't any active writes concurrent to this call.
+ */
+ rtLocalIpcSessionRetain(pThis);
+
+ int rc = RTCritSectEnter(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->hWriteThread == NIL_RTTHREAD)
+ rc = RTCritSectLeave(&pThis->CritSect);
+ else
+ {
+ rc = RTCritSectLeave(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ rc = VERR_RESOURCE_BUSY;
+ }
+ }
+
+ rtLocalIpcSessionRelease(pThis);
+ return rc;
+}
+
+
+RTDECL(int) RTLocalIpcSessionWaitForData(RTLOCALIPCSESSION hSession, uint32_t cMillies)
+{
+ /*
+ * Validate input.
+ */
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the job.
+ */
+ rtLocalIpcSessionRetain(pThis);
+
+ int rc = RTCritSectEnter(&pThis->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->hReadThread == NIL_RTTHREAD)
+ {
+ pThis->hReadThread = RTThreadSelf();
+ uint64_t const msStart = RTTimeMilliTS();
+ RTMSINTERVAL const cMsOriginalTimeout = cMillies;
+
+ for (;;)
+ {
+ if (!pThis->fCancelled)
+ {
+ rc = RTCritSectLeave(&pThis->CritSect);
+ AssertRCBreak(rc);
+
+ uint32_t fEvents = 0;
+#ifdef RT_OS_OS2
+ /* This doesn't give us any error condition on hangup, so use HUP check. */
+ Log(("RTLocalIpcSessionWaitForData: Calling RTSocketSelectOneEx...\n"));
+ rc = RTSocketSelectOneEx(pThis->hSocket, RTPOLL_EVT_READ | RTPOLL_EVT_ERROR, &fEvents, cMillies);
+ Log(("RTLocalIpcSessionWaitForData: RTSocketSelectOneEx returns %Rrc, fEvents=%#x\n", rc, fEvents));
+ if (RT_SUCCESS(rc) && fEvents == RTPOLL_EVT_READ && rtLocalIpcPosixHasHup(pThis))
+ rc = VERR_BROKEN_PIPE;
+#else
+/** @todo RTSocketPoll? */
+ /* POLLHUP will be set on hangup. */
+ struct pollfd PollFd;
+ RT_ZERO(PollFd);
+ PollFd.fd = RTSocketToNative(pThis->hSocket);
+ PollFd.events = POLLHUP | POLLERR | POLLIN;
+ Log(("RTLocalIpcSessionWaitForData: Calling poll...\n"));
+ int cFds = poll(&PollFd, 1, cMillies == RT_INDEFINITE_WAIT ? -1 : (int)cMillies);
+ if (cFds >= 1)
+ {
+ /* Linux & Darwin sets both POLLIN and POLLHUP when the pipe is
+ broken and but no more data to read. Google hints at NetBSD
+ returning more sane values (POLLIN till no more data, then
+ POLLHUP). Solairs OTOH, doesn't ever seem to return POLLHUP. */
+ fEvents = RTPOLL_EVT_READ;
+ if ( (PollFd.revents & (POLLHUP | POLLERR))
+ && !(PollFd.revents & POLLIN))
+ fEvents = RTPOLL_EVT_ERROR;
+# if defined(RT_OS_SOLARIS)
+ else if (PollFd.revents & POLLIN)
+# else
+ else if ((PollFd.revents & (POLLIN | POLLHUP)) == (POLLIN | POLLHUP))
+# endif
+ {
+ /* Check if there is actually data available. */
+ uint8_t bPeek;
+ ssize_t rcRecv = recv(PollFd.fd, &bPeek, 1, MSG_DONTWAIT | MSG_PEEK);
+ if (rcRecv <= 0)
+ fEvents = RTPOLL_EVT_ERROR;
+ }
+ rc = VINF_SUCCESS;
+ }
+ else if (rc == 0)
+ rc = VERR_TIMEOUT;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ Log(("RTLocalIpcSessionWaitForData: poll returns %u (rc=%d), revents=%#x\n", cFds, rc, PollFd.revents));
+#endif
+
+ int rc2 = RTCritSectEnter(&pThis->CritSect);
+ AssertRCBreakStmt(rc2, rc = RT_SUCCESS(rc) ? rc2 : rc);
+
+ if (RT_SUCCESS(rc))
+ {
+ if (pThis->fCancelled)
+ rc = VERR_CANCELLED;
+ else if (fEvents & RTPOLL_EVT_ERROR)
+ rc = VERR_BROKEN_PIPE;
+ }
+ else if ( rc == VERR_INTERRUPTED
+ || rc == VERR_TRY_AGAIN)
+ {
+ /* Recalc cMillies. */
+ if (cMsOriginalTimeout != RT_INDEFINITE_WAIT)
+ {
+ uint64_t cMsElapsed = RTTimeMilliTS() - msStart;
+ cMillies = cMsElapsed >= cMsOriginalTimeout ? 0 : cMsOriginalTimeout - (RTMSINTERVAL)cMsElapsed;
+ }
+ continue;
+ }
+ }
+ else
+ rc = VERR_CANCELLED;
+ break;
+ }
+
+ pThis->hReadThread = NIL_RTTHREAD;
+ }
+ int rc2 = RTCritSectLeave(&pThis->CritSect);
+ AssertStmt(RT_SUCCESS(rc2), rc = RT_SUCCESS(rc) ? rc2 : rc);
+ }
+
+ rtLocalIpcSessionRelease(pThis);
+ return rc;
+}
+
+
+/**
+ * Get IPC session socket peer credentials.
+ *
+ * @returns IPRT status code.
+ * @param hSession IPC session handle.
+ * @param pProcess Where to return the remote peer's PID (can be NULL).
+ * @param pUid Where to return the remote peer's UID (can be NULL).
+ * @param pGid Where to return the remote peer's GID (can be NULL).
+ */
+static int rtLocalIpcSessionQueryUcred(RTLOCALIPCSESSION hSession, PRTPROCESS pProcess, PRTUID pUid, PRTGID pGid)
+{
+ PRTLOCALIPCSESSIONINT pThis = hSession;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTLOCALIPCSESSION_MAGIC, VERR_INVALID_HANDLE);
+
+#if defined(RT_OS_LINUX)
+ struct ucred PeerCred = { (pid_t)NIL_RTPROCESS, (uid_t)NIL_RTUID, (gid_t)NIL_RTGID };
+ socklen_t cbPeerCred = sizeof(PeerCred);
+
+ rtLocalIpcSessionRetain(pThis);
+
+ int rc = RTCritSectEnter(&pThis->CritSect);;
+ if (RT_SUCCESS(rc))
+ {
+ if (getsockopt(RTSocketToNative(pThis->hSocket), SOL_SOCKET, SO_PEERCRED, &PeerCred, &cbPeerCred) >= 0)
+ {
+ if (pProcess)
+ *pProcess = PeerCred.pid;
+ if (pUid)
+ *pUid = PeerCred.uid;
+ if (pGid)
+ *pGid = PeerCred.gid;
+ rc = VINF_SUCCESS;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ int rc2 = RTCritSectLeave(&pThis->CritSect);
+ AssertStmt(RT_SUCCESS(rc2), rc = RT_SUCCESS(rc) ? rc2 : rc);
+ }
+
+ rtLocalIpcSessionRelease(pThis);
+
+ return rc;
+
+#else
+ /** @todo Implement on other platforms too (mostly platform specific this).
+ * Solaris: getpeerucred? Darwin: LOCALPEERCRED or getpeereid? */
+ RT_NOREF(pProcess, pUid, pGid);
+ return VERR_NOT_SUPPORTED;
+#endif
+}
+
+
+RTDECL(int) RTLocalIpcSessionQueryProcess(RTLOCALIPCSESSION hSession, PRTPROCESS pProcess)
+{
+ return rtLocalIpcSessionQueryUcred(hSession, pProcess, NULL, NULL);
+}
+
+
+RTDECL(int) RTLocalIpcSessionQueryUserId(RTLOCALIPCSESSION hSession, PRTUID pUid)
+{
+ return rtLocalIpcSessionQueryUcred(hSession, NULL, pUid, NULL);
+}
+
+RTDECL(int) RTLocalIpcSessionQueryGroupId(RTLOCALIPCSESSION hSession, PRTGID pGid)
+{
+ return rtLocalIpcSessionQueryUcred(hSession, NULL, NULL, pGid);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/path-posix.cpp b/src/VBox/Runtime/r3/posix/path-posix.cpp
new file mode 100644
index 00000000..04462315
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/path-posix.cpp
@@ -0,0 +1,418 @@
+/* $Id: path-posix.cpp $ */
+/** @file
+ * IPRT - Path Manipulation, POSIX, Part 1.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PATH
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <pwd.h>
+
+#include <iprt/path.h>
+#include <iprt/env.h>
+#include <iprt/assert.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include "internal/path.h"
+#include "internal/process.h"
+#include "internal/fs.h"
+
+
+
+RTDECL(int) RTPathReal(const char *pszPath, char *pszRealPath, size_t cchRealPath)
+{
+ /*
+ * Convert input.
+ */
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * On POSIX platforms the API doesn't take a length parameter, which makes it
+ * a little bit more work.
+ */
+ char szTmpPath[PATH_MAX + 1];
+ const char *psz = realpath(pszNativePath, szTmpPath);
+ if (psz)
+ rc = rtPathFromNativeCopy(pszRealPath, cchRealPath, szTmpPath, NULL);
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTPathReal(%p:{%s}, %p:{%s}, %u): returns %Rrc\n", pszPath, pszPath,
+ pszRealPath, RT_SUCCESS(rc) ? pszRealPath : "<failed>", cchRealPath, rc));
+ return rc;
+}
+
+
+RTR3DECL(int) RTPathSetMode(const char *pszPath, RTFMODE fMode)
+{
+ AssertPtrReturn(pszPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszPath, VERR_INVALID_PARAMETER);
+
+ int rc;
+ fMode = rtFsModeNormalize(fMode, pszPath, 0, 0);
+ if (rtFsModeIsValidPermissions(fMode))
+ {
+ char const *pszNativePath;
+ rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (chmod(pszNativePath, fMode & RTFS_UNIX_MASK) != 0)
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+ }
+ else
+ {
+ AssertMsgFailed(("Invalid file mode! %RTfmode\n", fMode));
+ rc = VERR_INVALID_FMODE;
+ }
+ return rc;
+}
+
+
+/**
+ * Checks if two files are the one and same file.
+ */
+static bool rtPathSame(const char *pszNativeSrc, const char *pszNativeDst)
+{
+ struct stat SrcStat;
+ if (lstat(pszNativeSrc, &SrcStat))
+ return false;
+ struct stat DstStat;
+ if (lstat(pszNativeDst, &DstStat))
+ return false;
+ Assert(SrcStat.st_dev && DstStat.st_dev);
+ Assert(SrcStat.st_ino && DstStat.st_ino);
+ if ( SrcStat.st_dev == DstStat.st_dev
+ && SrcStat.st_ino == DstStat.st_ino
+ && (SrcStat.st_mode & S_IFMT) == (DstStat.st_mode & S_IFMT))
+ return true;
+ return false;
+}
+
+
+/**
+ * Worker for RTPathRename, RTDirRename, RTFileRename.
+ *
+ * @returns IPRT status code.
+ * @param pszSrc The source path.
+ * @param pszDst The destination path.
+ * @param fRename The rename flags.
+ * @param fFileType The filetype. We use the RTFMODE filetypes here. If it's 0,
+ * anything goes. If it's RTFS_TYPE_DIRECTORY we'll check that the
+ * source is a directory. If Its RTFS_TYPE_FILE we'll check that it's
+ * not a directory (we are NOT checking whether it's a file).
+ */
+DECLHIDDEN(int) rtPathPosixRename(const char *pszSrc, const char *pszDst, unsigned fRename, RTFMODE fFileType)
+{
+ /*
+ * Convert the paths.
+ */
+ char const *pszNativeSrc;
+ int rc = rtPathToNative(&pszNativeSrc, pszSrc, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ char const *pszNativeDst;
+ rc = rtPathToNative(&pszNativeDst, pszDst, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Check that the source exists and that any types that's specified matches.
+ * We have to check this first to avoid getting errnous VERR_ALREADY_EXISTS
+ * errors from the next step.
+ *
+ * There are race conditions here (perhaps unlikely ones, but still), but I'm
+ * afraid there is little with can do to fix that.
+ */
+ struct stat SrcStat;
+ if (lstat(pszNativeSrc, &SrcStat))
+ rc = RTErrConvertFromErrno(errno);
+ else if (!fFileType)
+ rc = VINF_SUCCESS;
+ else if (RTFS_IS_DIRECTORY(fFileType))
+ rc = S_ISDIR(SrcStat.st_mode) ? VINF_SUCCESS : VERR_NOT_A_DIRECTORY;
+ else
+ rc = S_ISDIR(SrcStat.st_mode) ? VERR_IS_A_DIRECTORY : VINF_SUCCESS;
+ if (RT_SUCCESS(rc))
+ {
+ bool fSameFile = false;
+
+ /*
+ * Check if the target exists, rename is rather destructive.
+ * We'll have to make sure we don't overwrite the source!
+ * Another race condition btw.
+ */
+ struct stat DstStat;
+ if (lstat(pszNativeDst, &DstStat))
+ rc = errno == ENOENT ? VINF_SUCCESS : RTErrConvertFromErrno(errno);
+ else
+ {
+ Assert(SrcStat.st_dev && DstStat.st_dev);
+ Assert(SrcStat.st_ino && DstStat.st_ino);
+ if ( SrcStat.st_dev == DstStat.st_dev
+ && SrcStat.st_ino == DstStat.st_ino
+ && (SrcStat.st_mode & S_IFMT) == (DstStat.st_mode & S_IFMT))
+ {
+ /*
+ * It's likely that we're talking about the same file here.
+ * We should probably check paths or whatever, but for now this'll have to be enough.
+ */
+ fSameFile = true;
+ }
+ if (fSameFile)
+ rc = VINF_SUCCESS;
+ else if (S_ISDIR(DstStat.st_mode) || !(fRename & RTPATHRENAME_FLAGS_REPLACE))
+ rc = VERR_ALREADY_EXISTS;
+ else
+ rc = VINF_SUCCESS;
+
+ }
+ if (RT_SUCCESS(rc))
+ {
+ if (!rename(pszNativeSrc, pszNativeDst))
+ rc = VINF_SUCCESS;
+ else if ( (fRename & RTPATHRENAME_FLAGS_REPLACE)
+ && (errno == ENOTDIR || errno == EEXIST))
+ {
+ /*
+ * Check that the destination isn't a directory.
+ * Yet another race condition.
+ */
+ if (rtPathSame(pszNativeSrc, pszNativeDst))
+ {
+ rc = VINF_SUCCESS;
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): appears to be the same file... (errno=%d)\n",
+ pszSrc, pszDst, fRename, fFileType, errno));
+ }
+ else
+ {
+ if (lstat(pszNativeDst, &DstStat))
+ rc = errno != ENOENT ? RTErrConvertFromErrno(errno) : VINF_SUCCESS;
+ else if (S_ISDIR(DstStat.st_mode))
+ rc = VERR_ALREADY_EXISTS;
+ else
+ rc = VINF_SUCCESS;
+ if (RT_SUCCESS(rc))
+ {
+ if (!unlink(pszNativeDst))
+ {
+ if (!rename(pszNativeSrc, pszNativeDst))
+ rc = VINF_SUCCESS;
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): rename failed rc=%Rrc errno=%d\n",
+ pszSrc, pszDst, fRename, fFileType, rc, errno));
+ }
+ }
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): failed to unlink dst rc=%Rrc errno=%d\n",
+ pszSrc, pszDst, fRename, fFileType, rc, errno));
+ }
+ }
+ else
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): dst !dir check failed rc=%Rrc\n",
+ pszSrc, pszDst, fRename, fFileType, rc));
+ }
+ }
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ if (errno == ENOTDIR)
+ rc = VERR_ALREADY_EXISTS; /* unless somebody is racing us, this is the right interpretation */
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): rename failed rc=%Rrc errno=%d\n",
+ pszSrc, pszDst, fRename, fFileType, rc, errno));
+ }
+ }
+ else
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): destination check failed rc=%Rrc errno=%d\n",
+ pszSrc, pszDst, fRename, fFileType, rc, errno));
+ }
+ else
+ Log(("rtPathRename('%s', '%s', %#x ,%RTfmode): source type check failed rc=%Rrc errno=%d\n",
+ pszSrc, pszDst, fRename, fFileType, rc, errno));
+
+ rtPathFreeNative(pszNativeDst, pszDst);
+ }
+ rtPathFreeNative(pszNativeSrc, pszSrc);
+ }
+ return rc;
+}
+
+
+RTR3DECL(int) RTPathRename(const char *pszSrc, const char *pszDst, unsigned fRename)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszSrc, VERR_INVALID_POINTER);
+ AssertPtrReturn(pszDst, VERR_INVALID_POINTER);
+ AssertMsgReturn(*pszSrc, ("%p\n", pszSrc), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(*pszDst, ("%p\n", pszDst), VERR_INVALID_PARAMETER);
+ AssertMsgReturn(!(fRename & ~RTPATHRENAME_FLAGS_REPLACE), ("%#x\n", fRename), VERR_INVALID_PARAMETER);
+
+ /*
+ * Hand it to the worker.
+ */
+ int rc = rtPathPosixRename(pszSrc, pszDst, fRename, 0);
+
+ Log(("RTPathRename(%p:{%s}, %p:{%s}, %#x): returns %Rrc\n", pszSrc, pszSrc, pszDst, pszDst, fRename, rc));
+ return rc;
+}
+
+
+RTR3DECL(int) RTPathUnlink(const char *pszPath, uint32_t fUnlink)
+{
+ RT_NOREF_PV(pszPath); RT_NOREF_PV(fUnlink);
+ return VERR_NOT_IMPLEMENTED;
+}
+
+
+RTDECL(bool) RTPathExists(const char *pszPath)
+{
+ return RTPathExistsEx(pszPath, RTPATH_F_FOLLOW_LINK);
+}
+
+
+RTDECL(bool) RTPathExistsEx(const char *pszPath, uint32_t fFlags)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszPath, false);
+ AssertReturn(*pszPath, false);
+ Assert(RTPATH_F_IS_VALID(fFlags, 0));
+
+ /*
+ * Convert the path and check if it exists using stat().
+ */
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat Stat;
+ if (fFlags & RTPATH_F_FOLLOW_LINK)
+ rc = stat(pszNativePath, &Stat);
+ else
+ rc = lstat(pszNativePath, &Stat);
+ if (!rc)
+ rc = VINF_SUCCESS;
+ else
+ rc = VERR_GENERAL_FAILURE;
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+ return RT_SUCCESS(rc);
+}
+
+
+RTDECL(int) RTPathGetCurrent(char *pszPath, size_t cchPath)
+{
+ /*
+ * Try with a reasonably sized buffer first.
+ */
+ char szNativeCurDir[RTPATH_MAX];
+ if (getcwd(szNativeCurDir, sizeof(szNativeCurDir)) != NULL)
+ return rtPathFromNativeCopy(pszPath, cchPath, szNativeCurDir, NULL);
+
+ /*
+ * Retry a few times with really big buffers if we failed because CWD is unreasonably long.
+ */
+ int iErr = errno;
+ if (iErr != ERANGE)
+ return RTErrConvertFromErrno(iErr);
+
+ size_t cbNativeTmp = RTPATH_BIG_MAX;
+ for (;;)
+ {
+ char *pszNativeTmp = (char *)RTMemTmpAlloc(cbNativeTmp);
+ if (!pszNativeTmp)
+ return VERR_NO_TMP_MEMORY;
+ if (getcwd(pszNativeTmp, cbNativeTmp) != NULL)
+ {
+ int rc = rtPathFromNativeCopy(pszPath, cchPath, pszNativeTmp, NULL);
+ RTMemTmpFree(pszNativeTmp);
+ return rc;
+ }
+ iErr = errno;
+ RTMemTmpFree(pszNativeTmp);
+ if (iErr != ERANGE)
+ return RTErrConvertFromErrno(iErr);
+
+ cbNativeTmp += RTPATH_BIG_MAX;
+ if (cbNativeTmp > RTPATH_BIG_MAX * 4)
+ return VERR_FILENAME_TOO_LONG;
+ }
+}
+
+
+RTDECL(int) RTPathSetCurrent(const char *pszPath)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszPath, VERR_INVALID_PARAMETER);
+
+ /*
+ * Change the directory.
+ */
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (chdir(pszNativePath))
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/path2-posix.cpp b/src/VBox/Runtime/r3/posix/path2-posix.cpp
new file mode 100644
index 00000000..9af9512f
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/path2-posix.cpp
@@ -0,0 +1,316 @@
+/* $Id: path2-posix.cpp $ */
+/** @file
+ * IPRT - Path Manipulation, POSIX, Part 2 - RTPathQueryInfo.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PATH
+#include <stdlib.h>
+#include <limits.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <stdio.h>
+#include <sys/types.h>
+
+#include <iprt/path.h>
+#include <iprt/env.h>
+#include <iprt/assert.h>
+#include <iprt/string.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include "internal/path.h"
+#include "internal/process.h"
+#include "internal/fs.h"
+
+
+RTR3DECL(int) RTPathQueryInfo(const char *pszPath, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAdditionalAttribs)
+{
+ return RTPathQueryInfoEx(pszPath, pObjInfo, enmAdditionalAttribs, RTPATH_F_ON_LINK);
+}
+
+
+RTR3DECL(int) RTPathQueryInfoEx(const char *pszPath, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAdditionalAttribs, uint32_t fFlags)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszPath, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pObjInfo, VERR_INVALID_POINTER);
+ AssertMsgReturn( enmAdditionalAttribs >= RTFSOBJATTRADD_NOTHING
+ && enmAdditionalAttribs <= RTFSOBJATTRADD_LAST,
+ ("Invalid enmAdditionalAttribs=%p\n", enmAdditionalAttribs),
+ VERR_INVALID_PARAMETER);
+ AssertMsgReturn(RTPATH_F_IS_VALID(fFlags, 0), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
+
+ /*
+ * Convert the filename.
+ */
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat Stat;
+ if (fFlags & RTPATH_F_FOLLOW_LINK)
+ rc = stat(pszNativePath, &Stat);
+ else
+ rc = lstat(pszNativePath, &Stat); /** @todo how doesn't have lstat again? */
+ if (!rc)
+ {
+ rtFsConvertStatToObjInfo(pObjInfo, &Stat, pszPath, 0);
+ switch (enmAdditionalAttribs)
+ {
+ case RTFSOBJATTRADD_NOTHING:
+ case RTFSOBJATTRADD_UNIX:
+ Assert(pObjInfo->Attr.enmAdditional == RTFSOBJATTRADD_UNIX);
+ break;
+
+ case RTFSOBJATTRADD_UNIX_OWNER:
+ rtFsObjInfoAttrSetUnixOwner(pObjInfo, Stat.st_uid);
+ break;
+
+ case RTFSOBJATTRADD_UNIX_GROUP:
+ rtFsObjInfoAttrSetUnixGroup(pObjInfo, Stat.st_gid);
+ break;
+
+ case RTFSOBJATTRADD_EASIZE:
+ /** @todo Use SGI extended attribute interface to query EA info. */
+ pObjInfo->Attr.enmAdditional = RTFSOBJATTRADD_EASIZE;
+ pObjInfo->Attr.u.EASize.cb = 0;
+ break;
+
+ default:
+ AssertMsgFailed(("Impossible!\n"));
+ return VERR_INTERNAL_ERROR;
+ }
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTPathQueryInfoEx(%p:{%s}, pObjInfo=%p, %d): returns %Rrc\n",
+ pszPath, pszPath, pObjInfo, enmAdditionalAttribs, rc));
+ return rc;
+}
+
+
+RTR3DECL(int) RTPathSetTimes(const char *pszPath, PCRTTIMESPEC pAccessTime, PCRTTIMESPEC pModificationTime,
+ PCRTTIMESPEC pChangeTime, PCRTTIMESPEC pBirthTime)
+{
+ return RTPathSetTimesEx(pszPath, pAccessTime, pModificationTime, pChangeTime, pBirthTime, RTPATH_F_ON_LINK);
+}
+
+
+RTR3DECL(int) RTPathSetTimesEx(const char *pszPath, PCRTTIMESPEC pAccessTime, PCRTTIMESPEC pModificationTime,
+ PCRTTIMESPEC pChangeTime, PCRTTIMESPEC pBirthTime, uint32_t fFlags)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszPath, VERR_INVALID_PARAMETER);
+ AssertPtrNullReturn(pAccessTime, VERR_INVALID_POINTER);
+ AssertPtrNullReturn(pModificationTime, VERR_INVALID_POINTER);
+ AssertPtrNullReturn(pChangeTime, VERR_INVALID_POINTER);
+ AssertPtrNullReturn(pBirthTime, VERR_INVALID_POINTER);
+ AssertMsgReturn(RTPATH_F_IS_VALID(fFlags, 0), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
+
+ /*
+ * Convert the paths.
+ */
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ RTFSOBJINFO ObjInfo;
+
+ /*
+ * If it's a no-op, we'll only verify the existance of the file.
+ */
+ if (!pAccessTime && !pModificationTime)
+ rc = RTPathQueryInfoEx(pszPath, &ObjInfo, RTFSOBJATTRADD_NOTHING, fFlags);
+ else
+ {
+ /*
+ * Convert the input to timeval, getting the missing one if necessary,
+ * and call the API which does the change.
+ */
+ struct timeval aTimevals[2];
+ if (pAccessTime && pModificationTime)
+ {
+ RTTimeSpecGetTimeval(pAccessTime, &aTimevals[0]);
+ RTTimeSpecGetTimeval(pModificationTime, &aTimevals[1]);
+ }
+ else
+ {
+ rc = RTPathQueryInfoEx(pszPath, &ObjInfo, RTFSOBJATTRADD_UNIX, fFlags);
+ if (RT_SUCCESS(rc))
+ {
+ RTTimeSpecGetTimeval(pAccessTime ? pAccessTime : &ObjInfo.AccessTime, &aTimevals[0]);
+ RTTimeSpecGetTimeval(pModificationTime ? pModificationTime : &ObjInfo.ModificationTime, &aTimevals[1]);
+ }
+ else
+ Log(("RTPathSetTimes('%s',%p,%p,,): RTPathQueryInfo failed with %Rrc\n",
+ pszPath, pAccessTime, pModificationTime, rc));
+ }
+ if (RT_SUCCESS(rc))
+ {
+ if (fFlags & RTPATH_F_FOLLOW_LINK)
+ {
+ if (utimes(pszNativePath, aTimevals))
+ rc = RTErrConvertFromErrno(errno);
+ }
+#if (defined(RT_OS_DARWIN) && MAC_OS_X_VERSION_MIN_REQUIRED >= 1050) \
+ || defined(RT_OS_FREEBSD) \
+ || defined(RT_OS_LINUX) \
+ || defined(RT_OS_OS2) /** @todo who really has lutimes? */
+ else
+ {
+ if (lutimes(pszNativePath, aTimevals))
+ {
+ /* If lutimes is not supported (e.g. linux < 2.6.22), try fall back on utimes: */
+ if (errno != ENOSYS)
+ rc = RTErrConvertFromErrno(errno);
+ else
+ {
+ if (pAccessTime && pModificationTime)
+ rc = RTPathQueryInfoEx(pszPath, &ObjInfo, RTFSOBJATTRADD_UNIX, fFlags);
+ if (RT_SUCCESS(rc) && !RTFS_IS_SYMLINK(ObjInfo.Attr.fMode))
+ {
+ if (utimes(pszNativePath, aTimevals))
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+ }
+ }
+ }
+#else
+ else
+ {
+ if (pAccessTime && pModificationTime)
+ rc = RTPathQueryInfoEx(pszPath, &ObjInfo, RTFSOBJATTRADD_UNIX, fFlags);
+ if (RT_SUCCESS(rc) && RTFS_IS_SYMLINK(ObjInfo.Attr.fMode))
+ rc = VERR_NS_SYMLINK_SET_TIME;
+ else if (RT_SUCCESS(rc))
+ {
+ if (utimes(pszNativePath, aTimevals))
+ rc = RTErrConvertFromErrno(errno);
+ }
+ }
+#endif
+ if (RT_FAILURE(rc))
+ Log(("RTPathSetTimes('%s',%p,%p,,): failed with %Rrc and errno=%d\n",
+ pszPath, pAccessTime, pModificationTime, rc, errno));
+ }
+ }
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTPathSetTimes(%p:{%s}, %p:{%RDtimespec}, %p:{%RDtimespec}, %p:{%RDtimespec}, %p:{%RDtimespec}): return %Rrc\n",
+ pszPath, pszPath, pAccessTime, pAccessTime, pModificationTime, pModificationTime,
+ pChangeTime, pChangeTime, pBirthTime, pBirthTime, rc));
+ return rc;
+}
+
+
+RTR3DECL(int) RTPathSetOwner(const char *pszPath, uint32_t uid, uint32_t gid)
+{
+ return RTPathSetOwnerEx(pszPath, uid, gid, RTPATH_F_ON_LINK);
+}
+
+
+RTR3DECL(int) RTPathSetOwnerEx(const char *pszPath, uint32_t uid, uint32_t gid, uint32_t fFlags)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pszPath, VERR_INVALID_POINTER);
+ AssertReturn(*pszPath, VERR_INVALID_PARAMETER);
+ AssertMsgReturn(RTPATH_F_IS_VALID(fFlags, 0), ("%#x\n", fFlags), VERR_INVALID_PARAMETER);
+ uid_t uidNative = uid != NIL_RTUID ? (uid_t)uid : (uid_t)-1;
+ AssertReturn(uid == uidNative, VERR_INVALID_PARAMETER);
+ gid_t gidNative = gid != NIL_RTGID ? (gid_t)gid : (uid_t)-1;
+ AssertReturn(gid == gidNative, VERR_INVALID_PARAMETER);
+
+ /*
+ * Convert the path.
+ */
+ char const *pszNativePath;
+ int rc = rtPathToNative(&pszNativePath, pszPath, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (fFlags & RTPATH_F_FOLLOW_LINK)
+ {
+ if (chown(pszNativePath, uidNative, gidNative))
+ rc = RTErrConvertFromErrno(errno);
+ }
+#if 1
+ else
+ {
+ if (lchown(pszNativePath, uidNative, gidNative))
+ rc = RTErrConvertFromErrno(errno);
+ }
+#else
+ else
+ {
+ RTFSOBJINFO ObjInfo;
+ rc = RTPathQueryInfoEx(pszPath, &ObjInfo, RTFSOBJATTRADD_UNIX, fFlags);
+ if (RT_SUCCESS(rc) && RTFS_IS_SYMLINK(ObjInfo.Attr.fMode))
+ rc = VERR_NS_SYMLINK_CHANGE_OWNER;
+ else if (RT_SUCCESS(rc))
+ {
+ if (lchown(pszNativePath, uidNative, gidNative))
+ rc = RTErrConvertFromErrno(errno);
+ }
+ }
+#endif
+ if (RT_FAILURE(rc))
+ Log(("RTPathSetOwnerEx('%s',%d,%d): failed with %Rrc and errno=%d\n",
+ pszPath, uid, gid, rc, errno));
+
+ rtPathFreeNative(pszNativePath, pszPath);
+ }
+
+ LogFlow(("RTPathSetOwnerEx(%p:{%s}, uid=%d, gid=%d): return %Rrc\n",
+ pszPath, pszPath, uid, gid, rc));
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/pathhost-posix.cpp b/src/VBox/Runtime/r3/posix/pathhost-posix.cpp
new file mode 100644
index 00000000..b0c4fe46
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/pathhost-posix.cpp
@@ -0,0 +1,294 @@
+/* $Id: pathhost-posix.cpp $ */
+/** @file
+ * IPRT - Path Conversions, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PATH
+#include "internal/iprt.h"
+#include "internal/path.h"
+#include "internal/string.h"
+#include "internal/thread.h"
+
+#include <iprt/env.h>
+#include <iprt/err.h>
+#include <iprt/string.h>
+#include <iprt/once.h>
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Initialize once object. */
+static RTONCE g_OnceInitPathConv = RTONCE_INITIALIZER;
+/** If set, then we can pass UTF-8 thru directly. */
+static bool g_fPassthruUtf8 = false;
+/** The UTF-8 to FS iconv cache entry. */
+static RTSTRICONV g_enmUtf8ToFsIdx = RTSTRICONV_UTF8_TO_LOCALE;
+/** The FS to UTF-8 iconv cache entry. */
+static RTSTRICONV g_enmFsToUtf8Idx = RTSTRICONV_LOCALE_TO_UTF8;
+/** The codeset we're using. */
+static char g_szFsCodeset[32];
+
+
+/**
+ * Do a case insensitive compare where the 2nd string is known and can be case
+ * folded when writing the code.
+ *
+ * @returns see strcmp.
+ * @param pszStr1 The string to compare against pszLower and
+ * pszUpper.
+ * @param pszUpper The upper case edition of the 2nd string.
+ * @param pszLower The lower case edition of the 2nd string.
+ */
+static int rtPathStrICmp(const char *pszStr1, const char *pszUpper, const char *pszLower)
+{
+ Assert(strlen(pszLower) == strlen(pszUpper));
+ for (;;)
+ {
+ char ch1 = *pszStr1++;
+ char ch2Upper = *pszUpper++;
+ char ch2Lower = *pszLower++;
+ if ( ch1 != ch2Upper
+ && ch1 != ch2Lower)
+ return ch1 < ch2Upper ? -1 : 1;
+ if (!ch1)
+ return 0;
+ }
+}
+
+/**
+ * Is the specified codeset something we can treat as UTF-8.
+ *
+ * @returns true if we can do UTF-8 passthru, false if not.
+ * @param pszCodeset The codeset in question.
+ */
+static bool rtPathConvInitIsUtf8(const char *pszCodeset)
+{
+ /* Paranoia. */
+ if (!pszCodeset)
+ return false;
+
+ /*
+ * Avoid RTStrICmp at this point.
+ */
+ static struct
+ {
+ const char *pszUpper;
+ const char *pszLower;
+ } const s_aUtf8Compatible[] =
+ {
+ /* The default locale. */
+ { "C" , "c" },
+ { "POSIX" , "posix" },
+ /* 7-bit ASCII. */
+ { "ANSI_X3.4-1968" , "ansi_x3.4-1968" },
+ { "ANSI_X3.4-1986" , "ansi_x3.4-1986" },
+ { "US-ASCII" , "us-ascii" },
+ { "ISO646-US" , "iso646-us" },
+ { "ISO_646.IRV:1991" , "iso_646.irv:1991" },
+ { "ISO-IR-6" , "iso-ir-6" },
+ { "IBM367" , "ibm367" },
+ /* UTF-8 */
+ { "UTF-8" , "utf-8" },
+ { "UTF8" , "utf8" },
+ { "ISO-10646/UTF-8" , "iso-10646/utf-8" },
+ { "ISO-10646/UTF8" , "iso-10646/utf8" }
+ };
+
+ for (size_t i = 0; i < RT_ELEMENTS(s_aUtf8Compatible); i++)
+ if (!rtPathStrICmp(pszCodeset, s_aUtf8Compatible[i].pszUpper, s_aUtf8Compatible[i].pszLower))
+ return true;
+
+ return false;
+}
+
+
+/**
+ * Init once for the path conversion code.
+ *
+ * @returns IPRT status code.
+ * @param pvUser1 Unused.
+ * @param pvUser2 Unused.
+ */
+static DECLCALLBACK(int32_t) rtPathConvInitOnce(void *pvUser)
+{
+ /*
+ * Read the environment variable, no mercy on misconfigs here except that
+ * empty values are quietly ignored. (We use a temp buffer for stripping.)
+ */
+ char *pszEnvValue = NULL;
+ char szEnvValue[sizeof(g_szFsCodeset)];
+ int rc = RTEnvGetEx(RTENV_DEFAULT, RTPATH_CODESET_ENV_VAR, szEnvValue, sizeof(szEnvValue), NULL);
+ if (rc != VERR_ENV_VAR_NOT_FOUND && RT_FAILURE(rc))
+ return rc;
+ if (RT_SUCCESS(rc))
+ pszEnvValue = RTStrStrip(szEnvValue);
+
+ if (pszEnvValue && *pszEnvValue)
+ {
+ g_fPassthruUtf8 = rtPathConvInitIsUtf8(pszEnvValue);
+ g_enmFsToUtf8Idx = RTSTRICONV_FS_TO_UTF8;
+ g_enmUtf8ToFsIdx = RTSTRICONV_UTF8_TO_FS;
+ strcpy(g_szFsCodeset, pszEnvValue);
+ }
+ else
+ {
+ const char *pszCodeset = rtStrGetLocaleCodeset();
+ size_t cchCodeset = pszCodeset ? strlen(pszCodeset) : sizeof(g_szFsCodeset);
+ if (cchCodeset >= sizeof(g_szFsCodeset))
+ /* This shouldn't happen, but we'll manage. */
+ g_szFsCodeset[0] = '\0';
+ else
+ {
+ memcpy(g_szFsCodeset, pszCodeset, cchCodeset + 1);
+ pszCodeset = g_szFsCodeset;
+ }
+ g_fPassthruUtf8 = rtPathConvInitIsUtf8(pszCodeset);
+ g_enmFsToUtf8Idx = RTSTRICONV_LOCALE_TO_UTF8;
+ g_enmUtf8ToFsIdx = RTSTRICONV_UTF8_TO_LOCALE;
+ }
+
+ NOREF(pvUser);
+ return VINF_SUCCESS;
+}
+
+
+int rtPathToNative(char const **ppszNativePath, const char *pszPath, const char *pszBasePath)
+{
+ *ppszNativePath = NULL;
+
+ int rc = RTOnce(&g_OnceInitPathConv, rtPathConvInitOnce, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (g_fPassthruUtf8 || !*pszPath)
+ *ppszNativePath = pszPath;
+ else
+ rc = rtStrConvert(pszPath, strlen(pszPath), "UTF-8",
+ (char **)ppszNativePath, 0, g_szFsCodeset,
+ 2, g_enmUtf8ToFsIdx);
+ }
+ NOREF(pszBasePath); /* We don't query the FS for codeset preferences. */
+ return rc;
+}
+
+
+void rtPathFreeNative(char const *pszNativePath, const char *pszPath)
+{
+ if ( pszNativePath != pszPath
+ && pszNativePath)
+ RTStrFree((char *)pszNativePath);
+}
+
+
+int rtPathFromNative(const char **ppszPath, const char *pszNativePath, const char *pszBasePath)
+{
+ *ppszPath = NULL;
+
+ int rc = RTOnce(&g_OnceInitPathConv, rtPathConvInitOnce, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (g_fPassthruUtf8 || !*pszNativePath)
+ {
+ size_t cCpsIgnored;
+ size_t cchNativePath;
+ rc = rtUtf8Length(pszNativePath, RTSTR_MAX, &cCpsIgnored, &cchNativePath);
+ if (RT_SUCCESS(rc))
+ {
+ char *pszPath;
+ *ppszPath = pszPath = RTStrAlloc(cchNativePath + 1);
+ if (pszPath)
+ memcpy(pszPath, pszNativePath, cchNativePath + 1);
+ else
+ rc = VERR_NO_STR_MEMORY;
+ }
+ }
+ else
+ rc = rtStrConvert(pszNativePath, strlen(pszNativePath), g_szFsCodeset,
+ (char **)ppszPath, 0, "UTF-8",
+ 2, g_enmFsToUtf8Idx);
+ }
+ NOREF(pszBasePath); /* We don't query the FS for codeset preferences. */
+ return rc;
+}
+
+
+void rtPathFreeIprt(const char *pszPath, const char *pszNativePath)
+{
+ if ( pszPath != pszNativePath
+ && pszPath)
+ RTStrFree((char *)pszPath);
+}
+
+
+int rtPathFromNativeCopy(char *pszPath, size_t cbPath, const char *pszNativePath, const char *pszBasePath)
+{
+ int rc = RTOnce(&g_OnceInitPathConv, rtPathConvInitOnce, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (g_fPassthruUtf8 || !*pszNativePath)
+ rc = RTStrCopy(pszPath, cbPath, pszNativePath);
+ else if (cbPath)
+ rc = rtStrConvert(pszNativePath, strlen(pszNativePath), g_szFsCodeset,
+ &pszPath, cbPath, "UTF-8",
+ 2, g_enmFsToUtf8Idx);
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+
+ NOREF(pszBasePath); /* We don't query the FS for codeset preferences. */
+ return rc;
+}
+
+
+int rtPathFromNativeDup(char **ppszPath, const char *pszNativePath, const char *pszBasePath)
+{
+ int rc = RTOnce(&g_OnceInitPathConv, rtPathConvInitOnce, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (g_fPassthruUtf8 || !*pszNativePath)
+ rc = RTStrDupEx(ppszPath, pszNativePath);
+ else
+ rc = rtStrConvert(pszNativePath, strlen(pszNativePath), g_szFsCodeset,
+ ppszPath, 0, "UTF-8",
+ 2, g_enmFsToUtf8Idx);
+ }
+
+ NOREF(pszBasePath); /* We don't query the FS for codeset preferences. */
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/pipe-posix.cpp b/src/VBox/Runtime/r3/posix/pipe-posix.cpp
new file mode 100644
index 00000000..dc673bd3
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/pipe-posix.cpp
@@ -0,0 +1,754 @@
+/* $Id: pipe-posix.cpp $ */
+/** @file
+ * IPRT - Anonymous Pipes, POSIX Implementation.
+ */
+
+/*
+ * Copyright (C) 2010-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/pipe.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/poll.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include "internal/magics.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/poll.h>
+#include <sys/stat.h>
+#include <signal.h>
+#ifdef RT_OS_LINUX
+# include <sys/syscall.h>
+#endif
+#ifdef RT_OS_SOLARIS
+# include <sys/filio.h>
+#endif
+
+#include "internal/pipe.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+typedef struct RTPIPEINTERNAL
+{
+ /** Magic value (RTPIPE_MAGIC). */
+ uint32_t u32Magic;
+ /** The file descriptor. */
+ int fd;
+ /** Set if this is the read end, clear if it's the write end. */
+ bool fRead;
+ /** RTPipeFromNative: Leave it open on RTPipeClose. */
+ bool fLeaveOpen;
+ /** Atomically operated state variable.
+ *
+ * - Bits 0 thru 29 - Users of the new mode.
+ * - Bit 30 - The pipe mode, set indicates blocking.
+ * - Bit 31 - Set when we're switching the mode.
+ */
+ uint32_t volatile u32State;
+} RTPIPEINTERNAL;
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @name RTPIPEINTERNAL::u32State defines
+ * @{ */
+#define RTPIPE_POSIX_BLOCKING UINT32_C(0x40000000)
+#define RTPIPE_POSIX_SWITCHING UINT32_C(0x80000000)
+#define RTPIPE_POSIX_SWITCHING_BIT 31
+#define RTPIPE_POSIX_USERS_MASK UINT32_C(0x3fffffff)
+/** @} */
+
+
+
+/**
+ * Wrapper for calling pipe2() or pipe().
+ *
+ * When using pipe2() the returned handles are marked close-on-exec and does
+ * not risk racing process creation calls on other threads.
+ *
+ * @returns See pipe().
+ * @param paFds See pipe().
+ * @param piNewPipeSyscall Where to cache which call we should used. -1 if
+ * pipe(), 1 if pipe2(), 0 if not yet decided.
+ */
+static int my_pipe_wrapper(int *paFds, int *piNewPipeSyscall)
+{
+ if (*piNewPipeSyscall >= 0)
+ {
+#if defined(RT_OS_LINUX) && defined(__NR_pipe2) && defined(O_CLOEXEC)
+ long rc = syscall(__NR_pipe2, paFds, O_CLOEXEC);
+ if (rc >= 0)
+ {
+ if (*piNewPipeSyscall == 0)
+ *piNewPipeSyscall = 1;
+ return (int)rc;
+ }
+#endif
+ *piNewPipeSyscall = -1;
+ }
+
+ return pipe(paFds);
+}
+
+
+RTDECL(int) RTPipeCreate(PRTPIPE phPipeRead, PRTPIPE phPipeWrite, uint32_t fFlags)
+{
+ AssertPtrReturn(phPipeRead, VERR_INVALID_POINTER);
+ AssertPtrReturn(phPipeWrite, VERR_INVALID_POINTER);
+ AssertReturn(!(fFlags & ~RTPIPE_C_VALID_MASK), VERR_INVALID_PARAMETER);
+
+ /*
+ * Create the pipe and clear/set the close-on-exec flag as required.
+ */
+ int aFds[2] = {-1, -1};
+ static int s_iNewPipeSyscall = 0;
+ if (my_pipe_wrapper(aFds, &s_iNewPipeSyscall))
+ return RTErrConvertFromErrno(errno);
+
+ int rc = VINF_SUCCESS;
+ if (s_iNewPipeSyscall > 0)
+ {
+ /* created with close-on-exec set. */
+ if (fFlags & RTPIPE_C_INHERIT_READ)
+ {
+ if (fcntl(aFds[0], F_SETFD, 0))
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ if (fFlags & RTPIPE_C_INHERIT_WRITE)
+ {
+ if (fcntl(aFds[1], F_SETFD, 0))
+ rc = RTErrConvertFromErrno(errno);
+ }
+ }
+ else
+ {
+ /* created with close-on-exec cleared. */
+ if (!(fFlags & RTPIPE_C_INHERIT_READ))
+ {
+ if (fcntl(aFds[0], F_SETFD, FD_CLOEXEC))
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ if (!(fFlags & RTPIPE_C_INHERIT_WRITE))
+ {
+ if (fcntl(aFds[1], F_SETFD, FD_CLOEXEC))
+ rc = RTErrConvertFromErrno(errno);
+ }
+ }
+
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create the two handles.
+ */
+ RTPIPEINTERNAL *pThisR = (RTPIPEINTERNAL *)RTMemAlloc(sizeof(RTPIPEINTERNAL));
+ if (pThisR)
+ {
+ RTPIPEINTERNAL *pThisW = (RTPIPEINTERNAL *)RTMemAlloc(sizeof(RTPIPEINTERNAL));
+ if (pThisW)
+ {
+ pThisR->u32Magic = RTPIPE_MAGIC;
+ pThisW->u32Magic = RTPIPE_MAGIC;
+ pThisR->fd = aFds[0];
+ pThisW->fd = aFds[1];
+ pThisR->fRead = true;
+ pThisW->fRead = false;
+ pThisR->fLeaveOpen = false;
+ pThisW->fLeaveOpen = false;
+ pThisR->u32State = RTPIPE_POSIX_BLOCKING;
+ pThisW->u32State = RTPIPE_POSIX_BLOCKING;
+
+ *phPipeRead = pThisR;
+ *phPipeWrite = pThisW;
+
+ /*
+ * Before we leave, make sure to shut up SIGPIPE.
+ */
+ signal(SIGPIPE, SIG_IGN);
+ return VINF_SUCCESS;
+ }
+
+ RTMemFree(pThisR);
+ rc = VERR_NO_MEMORY;
+ }
+ else
+ rc = VERR_NO_MEMORY;
+ }
+
+ close(aFds[0]);
+ close(aFds[1]);
+ return rc;
+}
+
+
+RTDECL(int) RTPipeCloseEx(RTPIPE hPipe, bool fLeaveOpen)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ if (pThis == NIL_RTPIPE)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the cleanup.
+ */
+ AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, ~RTPIPE_MAGIC, RTPIPE_MAGIC), VERR_INVALID_HANDLE);
+
+ int fd = pThis->fd;
+ pThis->fd = -1;
+ if (!fLeaveOpen && !pThis->fLeaveOpen)
+ close(fd);
+
+ if (ASMAtomicReadU32(&pThis->u32State) & RTPIPE_POSIX_USERS_MASK)
+ {
+ AssertFailed();
+ RTThreadSleep(1);
+ }
+
+ RTMemFree(pThis);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTPipeClose(RTPIPE hPipe)
+{
+ return RTPipeCloseEx(hPipe, false /*fLeaveOpen*/);
+}
+
+
+RTDECL(int) RTPipeFromNative(PRTPIPE phPipe, RTHCINTPTR hNativePipe, uint32_t fFlags)
+{
+ AssertPtrReturn(phPipe, VERR_INVALID_POINTER);
+ AssertReturn(!(fFlags & ~RTPIPE_N_VALID_MASK_FN), VERR_INVALID_PARAMETER);
+ AssertReturn(!!(fFlags & RTPIPE_N_READ) != !!(fFlags & RTPIPE_N_WRITE), VERR_INVALID_PARAMETER);
+
+ /*
+ * Get and validate the pipe handle info.
+ */
+ int hNative = (int)hNativePipe;
+ struct stat st;
+ AssertReturn(fstat(hNative, &st) == 0, RTErrConvertFromErrno(errno));
+ AssertMsgReturn(S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode), ("%#x (%o)\n", st.st_mode, st.st_mode), VERR_INVALID_HANDLE);
+
+ int fFd = fcntl(hNative, F_GETFL, 0);
+ AssertReturn(fFd != -1, VERR_INVALID_HANDLE);
+ AssertMsgReturn( (fFd & O_ACCMODE) == (fFlags & RTPIPE_N_READ ? O_RDONLY : O_WRONLY)
+ || (fFd & O_ACCMODE) == O_RDWR /* Solaris creates bi-directional pipes. */
+ , ("%#x\n", fFd), VERR_INVALID_HANDLE);
+
+ /*
+ * Create the handle.
+ */
+ RTPIPEINTERNAL *pThis = (RTPIPEINTERNAL *)RTMemAlloc(sizeof(RTPIPEINTERNAL));
+ if (!pThis)
+ return VERR_NO_MEMORY;
+
+ pThis->u32Magic = RTPIPE_MAGIC;
+ pThis->fd = hNative;
+ pThis->fRead = RT_BOOL(fFlags & RTPIPE_N_READ);
+ pThis->fLeaveOpen = RT_BOOL(fFlags & RTPIPE_N_LEAVE_OPEN);
+ pThis->u32State = fFd & O_NONBLOCK ? 0 : RTPIPE_POSIX_BLOCKING;
+
+ /*
+ * Fix up inheritability and shut up SIGPIPE and we're done.
+ */
+ if (fcntl(hNative, F_SETFD, fFlags & RTPIPE_N_INHERIT ? 0 : FD_CLOEXEC) == 0)
+ {
+ signal(SIGPIPE, SIG_IGN);
+ *phPipe = pThis;
+ return VINF_SUCCESS;
+ }
+
+ int rc = RTErrConvertFromErrno(errno);
+ RTMemFree(pThis);
+ return rc;
+}
+
+
+RTDECL(RTHCINTPTR) RTPipeToNative(RTPIPE hPipe)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, -1);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, -1);
+
+ return pThis->fd;
+}
+
+
+/**
+ * Prepare blocking mode.
+ *
+ * @returns VINF_SUCCESS
+ * @retval VERR_WRONG_ORDER
+ * @retval VERR_INTERNAL_ERROR_4
+ *
+ * @param pThis The pipe handle.
+ */
+static int rtPipeTryBlocking(RTPIPEINTERNAL *pThis)
+{
+ /*
+ * Update the state.
+ */
+ for (;;)
+ {
+ uint32_t u32State = ASMAtomicReadU32(&pThis->u32State);
+ uint32_t const u32StateOld = u32State;
+ uint32_t const cUsers = (u32State & RTPIPE_POSIX_USERS_MASK);
+
+ if (u32State & RTPIPE_POSIX_BLOCKING)
+ {
+ AssertReturn(cUsers < RTPIPE_POSIX_USERS_MASK / 2, VERR_INTERNAL_ERROR_4);
+ u32State &= ~RTPIPE_POSIX_USERS_MASK;
+ u32State |= cUsers + 1;
+ if (ASMAtomicCmpXchgU32(&pThis->u32State, u32State, u32StateOld))
+ {
+ if (u32State & RTPIPE_POSIX_SWITCHING)
+ break;
+ return VINF_SUCCESS;
+ }
+ }
+ else if (cUsers == 0)
+ {
+ u32State = 1 | RTPIPE_POSIX_SWITCHING | RTPIPE_POSIX_BLOCKING;
+ if (ASMAtomicCmpXchgU32(&pThis->u32State, u32State, u32StateOld))
+ break;
+ }
+ else
+ return VERR_WRONG_ORDER;
+ ASMNopPause();
+ }
+
+ /*
+ * Do the switching.
+ */
+ int fFlags = fcntl(pThis->fd, F_GETFL, 0);
+ if (fFlags != -1)
+ {
+ if ( !(fFlags & O_NONBLOCK)
+ || fcntl(pThis->fd, F_SETFL, fFlags & ~O_NONBLOCK) != -1)
+ {
+ ASMAtomicBitClear(&pThis->u32State, RTPIPE_POSIX_SWITCHING_BIT);
+ return VINF_SUCCESS;
+ }
+ }
+
+ ASMAtomicDecU32(&pThis->u32State);
+ return RTErrConvertFromErrno(errno);
+}
+
+
+/**
+ * Prepare non-blocking mode.
+ *
+ * @returns VINF_SUCCESS
+ * @retval VERR_WRONG_ORDER
+ * @retval VERR_INTERNAL_ERROR_4
+ *
+ * @param pThis The pipe handle.
+ */
+static int rtPipeTryNonBlocking(RTPIPEINTERNAL *pThis)
+{
+ /*
+ * Update the state.
+ */
+ for (;;)
+ {
+ uint32_t u32State = ASMAtomicReadU32(&pThis->u32State);
+ uint32_t const u32StateOld = u32State;
+ uint32_t const cUsers = (u32State & RTPIPE_POSIX_USERS_MASK);
+
+ if (!(u32State & RTPIPE_POSIX_BLOCKING))
+ {
+ AssertReturn(cUsers < RTPIPE_POSIX_USERS_MASK / 2, VERR_INTERNAL_ERROR_4);
+ u32State &= ~RTPIPE_POSIX_USERS_MASK;
+ u32State |= cUsers + 1;
+ if (ASMAtomicCmpXchgU32(&pThis->u32State, u32State, u32StateOld))
+ {
+ if (u32State & RTPIPE_POSIX_SWITCHING)
+ break;
+ return VINF_SUCCESS;
+ }
+ }
+ else if (cUsers == 0)
+ {
+ u32State = 1 | RTPIPE_POSIX_SWITCHING;
+ if (ASMAtomicCmpXchgU32(&pThis->u32State, u32State, u32StateOld))
+ break;
+ }
+ else
+ return VERR_WRONG_ORDER;
+ ASMNopPause();
+ }
+
+ /*
+ * Do the switching.
+ */
+ int fFlags = fcntl(pThis->fd, F_GETFL, 0);
+ if (fFlags != -1)
+ {
+ if ( (fFlags & O_NONBLOCK)
+ || fcntl(pThis->fd, F_SETFL, fFlags | O_NONBLOCK) != -1)
+ {
+ ASMAtomicBitClear(&pThis->u32State, RTPIPE_POSIX_SWITCHING_BIT);
+ return VINF_SUCCESS;
+ }
+ }
+
+ ASMAtomicDecU32(&pThis->u32State);
+ return RTErrConvertFromErrno(errno);
+}
+
+
+/**
+ * Checks if the read pipe has a HUP condition.
+ *
+ * @returns true if HUP, false if no.
+ * @param pThis The pipe handle (read).
+ */
+static bool rtPipePosixHasHup(RTPIPEINTERNAL *pThis)
+{
+ Assert(pThis->fRead);
+
+ struct pollfd PollFd;
+ RT_ZERO(PollFd);
+ PollFd.fd = pThis->fd;
+ PollFd.events = POLLHUP;
+ return poll(&PollFd, 1, 0) >= 1
+ && (PollFd.revents & POLLHUP);
+}
+
+
+RTDECL(int) RTPipeRead(RTPIPE hPipe, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->fRead, VERR_ACCESS_DENIED);
+ AssertPtr(pcbRead);
+ AssertPtr(pvBuf);
+
+ int rc = rtPipeTryNonBlocking(pThis);
+ if (RT_SUCCESS(rc))
+ {
+ ssize_t cbRead = read(pThis->fd, pvBuf, RT_MIN(cbToRead, SSIZE_MAX));
+ if (cbRead >= 0)
+ {
+ if (cbRead || !cbToRead || !rtPipePosixHasHup(pThis))
+ *pcbRead = cbRead;
+ else
+ rc = VERR_BROKEN_PIPE;
+ }
+ else if (errno == EAGAIN)
+ {
+ *pcbRead = 0;
+ rc = VINF_TRY_AGAIN;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ ASMAtomicDecU32(&pThis->u32State);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTPipeReadBlocking(RTPIPE hPipe, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->fRead, VERR_ACCESS_DENIED);
+ AssertPtr(pvBuf);
+
+ int rc = rtPipeTryBlocking(pThis);
+ if (RT_SUCCESS(rc))
+ {
+ size_t cbTotalRead = 0;
+ while (cbToRead > 0)
+ {
+ ssize_t cbRead = read(pThis->fd, pvBuf, RT_MIN(cbToRead, SSIZE_MAX));
+ if (cbRead < 0)
+ {
+ rc = RTErrConvertFromErrno(errno);
+ break;
+ }
+ if (!cbRead && rtPipePosixHasHup(pThis))
+ {
+ rc = VERR_BROKEN_PIPE;
+ break;
+ }
+
+ /* advance */
+ pvBuf = (char *)pvBuf + cbRead;
+ cbTotalRead += cbRead;
+ cbToRead -= cbRead;
+ }
+
+ if (pcbRead)
+ {
+ *pcbRead = cbTotalRead;
+ if ( RT_FAILURE(rc)
+ && cbTotalRead
+ && rc != VERR_INVALID_POINTER)
+ rc = VINF_SUCCESS;
+ }
+
+ ASMAtomicDecU32(&pThis->u32State);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTPipeWrite(RTPIPE hPipe, const void *pvBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(!pThis->fRead, VERR_ACCESS_DENIED);
+ AssertPtr(pcbWritten);
+ AssertPtr(pvBuf);
+
+ int rc = rtPipeTryNonBlocking(pThis);
+ if (RT_SUCCESS(rc))
+ {
+ if (cbToWrite)
+ {
+ ssize_t cbWritten = write(pThis->fd, pvBuf, RT_MIN(cbToWrite, SSIZE_MAX));
+ if (cbWritten >= 0)
+ *pcbWritten = cbWritten;
+ else if (errno == EAGAIN)
+ {
+ *pcbWritten = 0;
+ rc = VINF_TRY_AGAIN;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ *pcbWritten = 0;
+
+ ASMAtomicDecU32(&pThis->u32State);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTPipeWriteBlocking(RTPIPE hPipe, const void *pvBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(!pThis->fRead, VERR_ACCESS_DENIED);
+ AssertPtr(pvBuf);
+ AssertPtrNull(pcbWritten);
+
+ int rc = rtPipeTryBlocking(pThis);
+ if (RT_SUCCESS(rc))
+ {
+ size_t cbTotalWritten = 0;
+ while (cbToWrite > 0)
+ {
+ ssize_t cbWritten = write(pThis->fd, pvBuf, RT_MIN(cbToWrite, SSIZE_MAX));
+ if (cbWritten < 0)
+ {
+ rc = RTErrConvertFromErrno(errno);
+ break;
+ }
+
+ /* advance */
+ pvBuf = (char const *)pvBuf + cbWritten;
+ cbTotalWritten += cbWritten;
+ cbToWrite -= cbWritten;
+ }
+
+ if (pcbWritten)
+ {
+ *pcbWritten = cbTotalWritten;
+ if ( RT_FAILURE(rc)
+ && cbTotalWritten
+ && rc != VERR_INVALID_POINTER)
+ rc = VINF_SUCCESS;
+ }
+
+ ASMAtomicDecU32(&pThis->u32State);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTPipeFlush(RTPIPE hPipe)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(!pThis->fRead, VERR_ACCESS_DENIED);
+
+ if (fsync(pThis->fd))
+ {
+ if (errno == EINVAL || errno == ENOTSUP)
+ return VERR_NOT_SUPPORTED;
+ return RTErrConvertFromErrno(errno);
+ }
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTPipeSelectOne(RTPIPE hPipe, RTMSINTERVAL cMillies)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+
+ struct pollfd PollFd;
+ RT_ZERO(PollFd);
+ PollFd.fd = pThis->fd;
+ PollFd.events = POLLHUP | POLLERR;
+ if (pThis->fRead)
+ PollFd.events |= POLLIN | POLLPRI;
+ else
+ PollFd.events |= POLLOUT;
+
+ int timeout;
+ if ( cMillies == RT_INDEFINITE_WAIT
+ || cMillies >= INT_MAX /* lazy bird */)
+ timeout = -1;
+ else
+ timeout = cMillies;
+
+ int rc = poll(&PollFd, 1, timeout);
+ if (rc == -1)
+ return RTErrConvertFromErrno(errno);
+ return rc > 0 ? VINF_SUCCESS : VERR_TIMEOUT;
+}
+
+
+RTDECL(int) RTPipeQueryReadable(RTPIPE hPipe, size_t *pcbReadable)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->fRead, VERR_PIPE_NOT_READ);
+ AssertPtrReturn(pcbReadable, VERR_INVALID_POINTER);
+
+ int cb = 0;
+ int rc = ioctl(pThis->fd, FIONREAD, &cb);
+ if (rc != -1)
+ {
+ AssertStmt(cb >= 0, cb = 0);
+ *pcbReadable = cb;
+ return VINF_SUCCESS;
+ }
+
+ rc = errno;
+ if (rc == ENOTTY)
+ rc = VERR_NOT_SUPPORTED;
+ else
+ rc = RTErrConvertFromErrno(rc);
+ return rc;
+}
+
+
+RTDECL(int) RTPipeQueryInfo(RTPIPE hPipe, PRTFSOBJINFO pObjInfo, RTFSOBJATTRADD enmAddAttr)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, 0);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, 0);
+
+ rtPipeFakeQueryInfo(pObjInfo, enmAddAttr, pThis->fRead);
+
+ if (pThis->fRead)
+ {
+ int cb = 0;
+ int rc = ioctl(pThis->fd, FIONREAD, &cb);
+ if (rc >= 0)
+ pObjInfo->cbObject = cb;
+ }
+#ifdef FIONSPACE
+ else
+ {
+ int cb = 0;
+ int rc = ioctl(pThis->fd, FIONSPACE, &cb);
+ if (rc >= 0)
+ pObjInfo->cbObject = cb;
+ }
+#endif
+
+ /** @todo Check this out on linux, solaris and darwin... (Currently going by a
+ * FreeBSD manpage.) */
+ struct stat St;
+ if (fstat(pThis->fd, &St))
+ {
+ pObjInfo->cbAllocated = St.st_blksize;
+ if ( enmAddAttr == RTFSOBJATTRADD_NOTHING
+ || enmAddAttr == RTFSOBJATTRADD_UNIX)
+ {
+ pObjInfo->Attr.enmAdditional = RTFSOBJATTRADD_UNIX;
+ pObjInfo->Attr.u.Unix.INodeId = St.st_ino;
+ pObjInfo->Attr.u.Unix.INodeIdDevice = St.st_dev;
+ }
+ }
+ /** @todo error handling? */
+
+ return VINF_SUCCESS;
+}
+
+
+int rtPipePollGetHandle(RTPIPE hPipe, uint32_t fEvents, PRTHCINTPTR phNative)
+{
+ RTPIPEINTERNAL *pThis = hPipe;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTPIPE_MAGIC, VERR_INVALID_HANDLE);
+
+ AssertReturn(!(fEvents & RTPOLL_EVT_READ) || pThis->fRead, VERR_INVALID_PARAMETER);
+ AssertReturn(!(fEvents & RTPOLL_EVT_WRITE) || !pThis->fRead, VERR_INVALID_PARAMETER);
+
+ *phNative = pThis->fd;
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/process-creation-posix.cpp b/src/VBox/Runtime/r3/posix/process-creation-posix.cpp
new file mode 100644
index 00000000..6cf7be73
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/process-creation-posix.cpp
@@ -0,0 +1,2408 @@
+/* $Id: process-creation-posix.cpp $ */
+/** @file
+ * IPRT - Process Creation, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <iprt/cdefs.h>
+#ifdef RT_OS_LINUX
+# define IPRT_WITH_DYNAMIC_CRYPT_R
+#endif
+#if (defined(RT_OS_LINUX) || defined(RT_OS_OS2)) && !defined(_GNU_SOURCE)
+# define _GNU_SOURCE
+#endif
+#if defined(RT_OS_LINUX) && !defined(_XOPEN_SOURCE)
+# define _XOPEN_SOURCE 700 /* for newlocale */
+#endif
+
+#ifdef RT_OS_OS2
+# define crypt unistd_crypt
+# define setkey unistd_setkey
+# define encrypt unistd_encrypt
+# include <unistd.h>
+# undef crypt
+# undef setkey
+# undef encrypt
+#else
+# include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <errno.h>
+#include <langinfo.h>
+#include <locale.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <grp.h>
+#include <pwd.h>
+#if defined(RT_OS_LINUX) || defined(RT_OS_OS2) || defined(RT_OS_SOLARIS)
+# include <crypt.h>
+#endif
+#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
+# include <shadow.h>
+#endif
+#if defined(RT_OS_DARWIN)
+# include <xlocale.h> /* for newlocale() */
+#endif
+
+#if defined(RT_OS_LINUX) || defined(RT_OS_OS2)
+/* While Solaris has posix_spawn() of course we don't want to use it as
+ * we need to have the child in a different process contract, no matter
+ * whether it is started detached or not. */
+# define HAVE_POSIX_SPAWN 1
+#endif
+#if defined(RT_OS_DARWIN) && defined(MAC_OS_X_VERSION_MIN_REQUIRED)
+# if MAC_OS_X_VERSION_MIN_REQUIRED >= 1050
+# define HAVE_POSIX_SPAWN 1
+# endif
+#endif
+#ifdef HAVE_POSIX_SPAWN
+# include <spawn.h>
+#endif
+
+#if !defined(IPRT_USE_PAM) \
+ && !defined(IPRT_WITHOUT_PAM) \
+ && ( defined(RT_OS_DARWIN) || defined(RT_OS_FREEBSD) || defined(RT_OS_LINUX) || defined(RT_OS_NETBSD) || defined(RT_OS_OPENBSD) || defined(RT_OS_SOLARIS) )
+# define IPRT_USE_PAM
+#endif
+#ifdef IPRT_USE_PAM
+# include <security/pam_appl.h>
+# include <stdlib.h>
+# include <dlfcn.h>
+# include <iprt/asm.h>
+#endif
+
+#ifdef RT_OS_SOLARIS
+# include <limits.h>
+# include <sys/ctfs.h>
+# include <sys/contract/process.h>
+# include <libcontract.h>
+#endif
+
+#ifndef RT_OS_SOLARIS
+# include <paths.h>
+#else
+# define _PATH_MAILDIR "/var/mail"
+# define _PATH_DEFPATH "/usr/bin:/bin"
+# define _PATH_STDPATH "/sbin:/usr/sbin:/bin:/usr/bin"
+#endif
+#ifndef _PATH_BSHELL
+# define _PATH_BSHELL "/bin/sh"
+#endif
+
+
+#include <iprt/process.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloca.h>
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/env.h>
+#include <iprt/err.h>
+#include <iprt/file.h>
+#if defined(IPRT_WITH_DYNAMIC_CRYPT_R) || defined(IPRT_USE_PAM)
+# include <iprt/ldr.h>
+#endif
+#include <iprt/log.h>
+#include <iprt/path.h>
+#include <iprt/pipe.h>
+#include <iprt/socket.h>
+#include <iprt/string.h>
+#include <iprt/mem.h>
+#include "internal/process.h"
+#include "internal/path.h"
+#include "internal/string.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+#ifdef IPRT_USE_PAM
+/*
+ * The PAM library names and version ranges to try.
+ */
+# ifdef RT_OS_DARWIN
+# include <mach-o/dyld.h>
+/** @node libpam.2.dylib was introduced with 10.6.x (OpenPAM); we use
+ * libpam.dylib as that's a symlink to the latest and greatest. */
+# define IPRT_LIBPAM_FILE_1 "libpam.dylib"
+# define IPRT_LIBPAM_FILE_1_FIRST_VER 0
+# define IPRT_LIBPAM_FILE_1_END_VER 0
+# define IPRT_LIBPAM_FILE_2 "libpam.2.dylib"
+# define IPRT_LIBPAM_FILE_2_FIRST_VER 0
+# define IPRT_LIBPAM_FILE_2_END_VER 0
+# define IPRT_LIBPAM_FILE_3 "libpam.1.dylib"
+# define IPRT_LIBPAM_FILE_3_FIRST_VER 0
+# define IPRT_LIBPAM_FILE_3_END_VER 0
+# elif RT_OS_LINUX
+# define IPRT_LIBPAM_FILE_1 "libpam.so.0"
+# define IPRT_LIBPAM_FILE_1_FIRST_VER 0
+# define IPRT_LIBPAM_FILE_1_END_VER 0
+# define IPRT_LIBPAM_FILE_2 "libpam.so"
+# define IPRT_LIBPAM_FILE_2_FIRST_VER 16
+# define IPRT_LIBPAM_FILE_2_END_VER 1
+# else
+# define IPRT_LIBPAM_FILE_1 "libpam.so"
+# define IPRT_LIBPAM_FILE_1_FIRST_VER 16
+# define IPRT_LIBPAM_FILE_1_END_VER 0
+# endif
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+#ifdef IPRT_USE_PAM
+/** For passing info between rtCheckCredentials and rtPamConv. */
+typedef struct RTPROCPAMARGS
+{
+ const char *pszUser;
+ const char *pszPassword;
+} RTPROCPAMARGS;
+/** Pointer to rtPamConv argument package. */
+typedef RTPROCPAMARGS *PRTPROCPAMARGS;
+#endif
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Environment dump marker used with CSH. */
+static const char g_szEnvMarkerBegin[] = "IPRT_EnvEnvEnv_Begin_EnvEnvEnv";
+/** Environment dump marker used with CSH. */
+static const char g_szEnvMarkerEnd[] = "IPRT_EnvEnvEnv_End_EnvEnvEnv";
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static int rtProcPosixCreateInner(const char *pszExec, const char * const *papszArgs, RTENV hEnv, RTENV hEnvToUse,
+ uint32_t fFlags, const char *pszAsUser, uid_t uid, gid_t gid,
+ unsigned cRedirFds, int *paRedirFds, PRTPROCESS phProcess);
+
+
+#ifdef IPRT_USE_PAM
+/**
+ * Worker for rtCheckCredentials that feeds password and maybe username to PAM.
+ *
+ * @returns PAM status.
+ * @param cMessages Number of messages.
+ * @param papMessages Message vector.
+ * @param ppaResponses Where to put our responses.
+ * @param pvAppData Pointer to RTPROCPAMARGS.
+ */
+#if defined(RT_OS_SOLARIS)
+static int rtPamConv(int cMessages, struct pam_message **papMessages, struct pam_response **ppaResponses, void *pvAppData)
+#else
+static int rtPamConv(int cMessages, const struct pam_message **papMessages, struct pam_response **ppaResponses, void *pvAppData)
+#endif
+{
+ LogFlow(("rtPamConv: cMessages=%d\n", cMessages));
+ PRTPROCPAMARGS pArgs = (PRTPROCPAMARGS)pvAppData;
+ AssertPtrReturn(pArgs, PAM_CONV_ERR);
+
+ struct pam_response *paResponses = (struct pam_response *)calloc(cMessages, sizeof(paResponses[0]));
+ AssertReturn(paResponses, PAM_CONV_ERR);
+ for (int i = 0; i < cMessages; i++)
+ {
+ LogFlow(("rtPamConv: #%d: msg_style=%d msg=%s\n", i, papMessages[i]->msg_style, papMessages[i]->msg));
+
+ paResponses[i].resp_retcode = 0;
+ if (papMessages[i]->msg_style == PAM_PROMPT_ECHO_OFF)
+ paResponses[i].resp = strdup(pArgs->pszPassword);
+ else if (papMessages[i]->msg_style == PAM_PROMPT_ECHO_ON)
+ paResponses[i].resp = strdup(pArgs->pszUser);
+ else
+ {
+ paResponses[i].resp = NULL;
+ continue;
+ }
+ if (paResponses[i].resp == NULL)
+ {
+ while (i-- > 0)
+ free(paResponses[i].resp);
+ free(paResponses);
+ LogFlow(("rtPamConv: out of memory\n"));
+ return PAM_CONV_ERR;
+ }
+ }
+
+ *ppaResponses = paResponses;
+ return PAM_SUCCESS;
+}
+
+
+/**
+ * Common PAM driver for rtCheckCredentials and the case where pszAsUser is NULL
+ * but RTPROC_FLAGS_PROFILE is set.
+ *
+ * @returns IPRT status code.
+ * @param pszPamService The PAM service to use for the run.
+ * @param pszUser The user.
+ * @param pszPassword The password.
+ * @param ppapszEnv Where to return PAM environment variables, NULL is
+ * fine if no variables to return. Call
+ * rtProcPosixFreePamEnv to free. Optional, so NULL
+ * can be passed in.
+ * @param pfMayFallBack Where to return whether a fallback to crypt is
+ * acceptable or if the failure result is due to
+ * authentication failing. Optional.
+ */
+static int rtProcPosixAuthenticateUsingPam(const char *pszPamService, const char *pszUser, const char *pszPassword,
+ char ***ppapszEnv, bool *pfMayFallBack)
+{
+ if (pfMayFallBack)
+ *pfMayFallBack = true;
+
+ /*
+ * Dynamically load pam the first time we go thru here.
+ */
+ static int (*s_pfnPamStart)(const char *, const char *, struct pam_conv *, pam_handle_t **);
+ static int (*s_pfnPamAuthenticate)(pam_handle_t *, int);
+ static int (*s_pfnPamAcctMgmt)(pam_handle_t *, int);
+ static int (*s_pfnPamSetItem)(pam_handle_t *, int, const void *);
+ static int (*s_pfnPamSetCred)(pam_handle_t *, int);
+ static char ** (*s_pfnPamGetEnvList)(pam_handle_t *);
+ static int (*s_pfnPamOpenSession)(pam_handle_t *, int);
+ static int (*s_pfnPamCloseSession)(pam_handle_t *, int);
+ static int (*s_pfnPamEnd)(pam_handle_t *, int);
+ if ( s_pfnPamStart == NULL
+ || s_pfnPamAuthenticate == NULL
+ || s_pfnPamAcctMgmt == NULL
+ || s_pfnPamSetItem == NULL
+ || s_pfnPamEnd == NULL)
+ {
+ RTLDRMOD hModPam = NIL_RTLDRMOD;
+ const char *pszLast;
+ int rc = RTLdrLoadSystemEx(pszLast = IPRT_LIBPAM_FILE_1, RTLDRLOAD_FLAGS_GLOBAL | RTLDRLOAD_FLAGS_NO_UNLOAD
+ | RTLDRLOAD_FLAGS_SO_VER_RANGE(IPRT_LIBPAM_FILE_1_FIRST_VER, IPRT_LIBPAM_FILE_1_END_VER),
+ &hModPam);
+# ifdef IPRT_LIBPAM_FILE_2
+ if (RT_FAILURE(rc))
+ rc = RTLdrLoadSystemEx(pszLast = IPRT_LIBPAM_FILE_2, RTLDRLOAD_FLAGS_GLOBAL | RTLDRLOAD_FLAGS_NO_UNLOAD
+ | RTLDRLOAD_FLAGS_SO_VER_RANGE(IPRT_LIBPAM_FILE_2_FIRST_VER, IPRT_LIBPAM_FILE_2_END_VER),
+ &hModPam);
+# endif
+# ifdef IPRT_LIBPAM_FILE_3
+ if (RT_FAILURE(rc))
+ rc = RTLdrLoadSystemEx(pszLast = IPRT_LIBPAM_FILE_3, RTLDRLOAD_FLAGS_GLOBAL | RTLDRLOAD_FLAGS_NO_UNLOAD
+ | RTLDRLOAD_FLAGS_SO_VER_RANGE(IPRT_LIBPAM_FILE_3_FIRST_VER, IPRT_LIBPAM_FILE_3_END_VER),
+ &hModPam);
+# endif
+ if (RT_FAILURE(rc))
+ {
+ LogRelMax(10, ("failed to load %s: %Rrc\n", pszLast, rc));
+ return VERR_AUTHENTICATION_FAILURE;
+ }
+
+ *(uintptr_t *)&s_pfnPamStart = (uintptr_t)RTLdrGetFunction(hModPam, "pam_start");
+ *(uintptr_t *)&s_pfnPamAuthenticate = (uintptr_t)RTLdrGetFunction(hModPam, "pam_authenticate");
+ *(uintptr_t *)&s_pfnPamAcctMgmt = (uintptr_t)RTLdrGetFunction(hModPam, "pam_acct_mgmt");
+ *(uintptr_t *)&s_pfnPamSetItem = (uintptr_t)RTLdrGetFunction(hModPam, "pam_set_item");
+ *(uintptr_t *)&s_pfnPamSetCred = (uintptr_t)RTLdrGetFunction(hModPam, "pam_setcred");
+ *(uintptr_t *)&s_pfnPamGetEnvList = (uintptr_t)RTLdrGetFunction(hModPam, "pam_getenvlist");
+ *(uintptr_t *)&s_pfnPamOpenSession = (uintptr_t)RTLdrGetFunction(hModPam, "pam_open_session");
+ *(uintptr_t *)&s_pfnPamCloseSession = (uintptr_t)RTLdrGetFunction(hModPam, "pam_close_session");
+ *(uintptr_t *)&s_pfnPamEnd = (uintptr_t)RTLdrGetFunction(hModPam, "pam_end");
+ ASMCompilerBarrier();
+
+ RTLdrClose(hModPam);
+
+ if ( s_pfnPamStart == NULL
+ || s_pfnPamAuthenticate == NULL
+ || s_pfnPamAcctMgmt == NULL
+ || s_pfnPamSetItem == NULL
+ || s_pfnPamEnd == NULL)
+ {
+ LogRelMax(10, ("failed to resolve symbols: %p %p %p %p %p\n",
+ s_pfnPamStart, s_pfnPamAuthenticate, s_pfnPamAcctMgmt, s_pfnPamSetItem, s_pfnPamEnd));
+ return VERR_AUTHENTICATION_FAILURE;
+ }
+ }
+
+# define pam_start s_pfnPamStart
+# define pam_authenticate s_pfnPamAuthenticate
+# define pam_acct_mgmt s_pfnPamAcctMgmt
+# define pam_set_item s_pfnPamSetItem
+# define pam_setcred s_pfnPamSetCred
+# define pam_getenvlist s_pfnPamGetEnvList
+# define pam_open_session s_pfnPamOpenSession
+# define pam_close_session s_pfnPamCloseSession
+# define pam_end s_pfnPamEnd
+
+ /*
+ * Do the PAM stuff.
+ */
+ pam_handle_t *hPam = NULL;
+ RTPROCPAMARGS PamConvArgs = { pszUser, pszPassword };
+ struct pam_conv PamConversation;
+ RT_ZERO(PamConversation);
+ PamConversation.appdata_ptr = &PamConvArgs;
+ PamConversation.conv = rtPamConv;
+ int rc = pam_start(pszPamService, pszUser, &PamConversation, &hPam);
+ if (rc == PAM_SUCCESS)
+ {
+ rc = pam_set_item(hPam, PAM_RUSER, pszUser);
+ LogRel2(("rtProcPosixAuthenticateUsingPam(%s): pam_setitem/PAM_RUSER: %s\n", pszPamService, pszUser));
+ if (rc == PAM_SUCCESS)
+ {
+ /*
+ * Secure TTY fun ahead (for pam_securetty).
+ *
+ * We need to set PAM_TTY (if available) to make PAM stacks work which
+ * require a secure TTY via pam_securetty (Debian 10 + 11, for example). This
+ * is typically an issue when launching as 'root'. See @bugref{10225}.
+ *
+ * Note! We only can try (or better: guess) to a certain amount, as it really
+ * depends on the distribution or Administrator which has set up the
+ * system which (and how) things are allowed (see /etc/securetty).
+ *
+ * Note! We don't acctually try or guess anything about the distro like
+ * suggested by the above note, we just try determine the TTY of
+ * the _parent_ process and hope for the best. (bird)
+ */
+ char szTTY[64];
+ int rc2 = RTEnvGetEx(RTENV_DEFAULT, "DISPLAY", szTTY, sizeof(szTTY), NULL);
+ if (RT_FAILURE(rc2))
+ {
+ /* Virtual terminal hint given? */
+ static char const s_szPrefix[] = "tty";
+ memcpy(szTTY, s_szPrefix, sizeof(s_szPrefix));
+ rc2 = RTEnvGetEx(RTENV_DEFAULT, "XDG_VTNR", &szTTY[sizeof(s_szPrefix) - 1], sizeof(s_szPrefix) - 1, NULL);
+ }
+
+ /** @todo Should we - distinguished from the login service - also set the hostname as PAM_TTY?
+ * The pam_access and pam_systemd talk about this. Similarly, SSH and cron use "ssh" and "cron" for PAM_TTY
+ * (see PAM_TTY_KLUDGE). */
+#ifdef IPRT_WITH_PAM_TTY_KLUDGE
+ if (RT_FAILURE(rc2))
+ if (!RTStrICmp(pszPamService, "access")) /* Access management needed? */
+ {
+ int err = gethostname(szTTY, sizeof(szTTY));
+ if (err == 0)
+ rc2 = VINF_SUCCESS;
+ }
+#endif
+ /* As a last resort, try stdin's TTY name instead (if any). */
+ if (RT_FAILURE(rc2))
+ {
+ rc2 = ttyname_r(0 /*stdin*/, szTTY, sizeof(szTTY));
+ if (rc2 != 0)
+ rc2 = RTErrConvertFromErrno(rc2);
+ }
+
+ LogRel2(("rtProcPosixAuthenticateUsingPam(%s): pam_setitem/PAM_TTY: %s, rc2=%Rrc\n", pszPamService, szTTY, rc2));
+ if (szTTY[0] == '\0')
+ LogRel2(("rtProcPosixAuthenticateUsingPam(%s): Hint: Looks like running as a non-interactive user (no TTY/PTY).\n"
+ "Authentication requiring a secure terminal might fail.\n", pszPamService));
+
+ if ( RT_SUCCESS(rc2)
+ && szTTY[0] != '\0') /* Only try using PAM_TTY if we have something to set. */
+ rc = pam_set_item(hPam, PAM_TTY, szTTY);
+
+ if (rc == PAM_SUCCESS)
+ {
+ /* From this point on we don't allow falling back to other auth methods. */
+ if (pfMayFallBack)
+ *pfMayFallBack = false;
+
+ rc = pam_authenticate(hPam, 0);
+ if (rc == PAM_SUCCESS)
+ {
+ rc = pam_acct_mgmt(hPam, 0);
+ if ( rc == PAM_SUCCESS
+ || rc == PAM_AUTHINFO_UNAVAIL /*??*/)
+ {
+ if ( ppapszEnv
+ && s_pfnPamGetEnvList
+ && s_pfnPamSetCred)
+ {
+ /* pam_env.so creates the environment when pam_setcred is called,. */
+ int rcSetCred = pam_setcred(hPam, PAM_ESTABLISH_CRED | PAM_SILENT);
+ /** @todo check pam_setcred status code? */
+
+ /* Unless it does it during session opening (Ubuntu 21.10). This
+ unfortunately means we might mount user dir and other crap: */
+ /** @todo do session handling properly */
+ int rcOpenSession = PAM_ABORT;
+ if ( s_pfnPamOpenSession
+ && s_pfnPamCloseSession)
+ rcOpenSession = pam_open_session(hPam, PAM_SILENT);
+
+ *ppapszEnv = pam_getenvlist(hPam);
+ LogFlowFunc(("pam_getenvlist -> %p ([0]=%p); rcSetCred=%d rcOpenSession=%d\n",
+ *ppapszEnv, *ppapszEnv ? **ppapszEnv : NULL, rcSetCred, rcOpenSession)); RT_NOREF(rcSetCred);
+
+ if (rcOpenSession == PAM_SUCCESS)
+ pam_close_session(hPam, PAM_SILENT);
+ pam_setcred(hPam, PAM_DELETE_CRED);
+ }
+
+ pam_end(hPam, PAM_SUCCESS);
+ LogFlowFunc(("pam auth (for %s) successful\n", pszPamService));
+ return VINF_SUCCESS;
+ }
+ LogFunc(("pam_acct_mgmt -> %d\n", rc));
+ }
+ else
+ LogFunc(("pam_authenticate -> %d\n", rc));
+ }
+ else
+ LogFunc(("pam_setitem/PAM_TTY -> %d\n", rc));
+ }
+ else
+ LogFunc(("pam_set_item/PAM_RUSER -> %d\n", rc));
+ pam_end(hPam, rc);
+ }
+ else
+ LogFunc(("pam_start(%s) -> %d\n", pszPamService, rc));
+
+ LogRel2(("rtProcPosixAuthenticateUsingPam(%s): Failed authenticating user '%s' with %d\n", pszPamService, pszUser, rc));
+ return VERR_AUTHENTICATION_FAILURE;
+}
+
+
+/**
+ * Checks if the given service file is present in any of the pam.d directories.
+ */
+static bool rtProcPosixPamServiceExists(const char *pszService)
+{
+ char szPath[256];
+
+ /* PAM_CONFIG_D: */
+ int rc = RTPathJoin(szPath, sizeof(szPath), "/etc/pam.d/", pszService); AssertRC(rc);
+ if (RTFileExists(szPath))
+ return true;
+
+ /* PAM_CONFIG_DIST_D: */
+ rc = RTPathJoin(szPath, sizeof(szPath), "/usr/lib/pam.d/", pszService); AssertRC(rc);
+ if (RTFileExists(szPath))
+ return true;
+
+ /* No support for PAM_CONFIG_DIST2_D. */
+ return false;
+}
+
+#endif /* IPRT_USE_PAM */
+
+
+#if defined(IPRT_WITH_DYNAMIC_CRYPT_R)
+/** Pointer to crypt_r(). */
+typedef char *(*PFNCRYPTR)(const char *, const char *, struct crypt_data *);
+
+/**
+ * Wrapper for resolving and calling crypt_r dynamically.
+ *
+ * The reason for this is that fedora 30+ wants to use libxcrypt rather than the
+ * glibc libcrypt. The two libraries has different crypt_data sizes and layout,
+ * so we allocate a 256KB data block to be on the safe size (caller does this).
+ */
+static char *rtProcDynamicCryptR(const char *pszKey, const char *pszSalt, struct crypt_data *pData)
+{
+ static PFNCRYPTR volatile s_pfnCryptR = NULL;
+ PFNCRYPTR pfnCryptR = s_pfnCryptR;
+ if (pfnCryptR)
+ return pfnCryptR(pszKey, pszSalt, pData);
+
+ pfnCryptR = (PFNCRYPTR)(uintptr_t)RTLdrGetSystemSymbolEx("libcrypt.so", "crypt_r", RTLDRLOAD_FLAGS_SO_VER_RANGE(1, 6));
+ if (!pfnCryptR)
+ pfnCryptR = (PFNCRYPTR)(uintptr_t)RTLdrGetSystemSymbolEx("libxcrypt.so", "crypt_r", RTLDRLOAD_FLAGS_SO_VER_RANGE(1, 32));
+ if (pfnCryptR)
+ {
+ s_pfnCryptR = pfnCryptR;
+ return pfnCryptR(pszKey, pszSalt, pData);
+ }
+
+ LogRel(("IPRT/RTProc: Unable to locate crypt_r!\n"));
+ return NULL;
+}
+#endif /* IPRT_WITH_DYNAMIC_CRYPT_R */
+
+
+/** Free the environment list returned by rtCheckCredentials. */
+static void rtProcPosixFreePamEnv(char **papszEnv)
+{
+ if (papszEnv)
+ {
+ for (size_t i = 0; papszEnv[i] != NULL; i++)
+ free(papszEnv[i]);
+ free(papszEnv);
+ }
+}
+
+
+/**
+ * Check the credentials and return the gid/uid of user.
+ *
+ * @param pszUser The username.
+ * @param pszPasswd The password to authenticate with.
+ * @param gid Where to store the GID of the user.
+ * @param uid Where to store the UID of the user.
+ * @param ppapszEnv Where to return PAM environment variables, NULL is fine
+ * if no variables to return. Call rtProcPosixFreePamEnv to
+ * free. Optional, so NULL can be passed in.
+ * @returns IPRT status code
+ */
+static int rtCheckCredentials(const char *pszUser, const char *pszPasswd, gid_t *pGid, uid_t *pUid, char ***ppapszEnv)
+{
+ Log(("rtCheckCredentials: pszUser=%s\n", pszUser));
+ int rc;
+
+ if (ppapszEnv)
+ *ppapszEnv = NULL;
+
+ /*
+ * Resolve user to UID and GID.
+ */
+ char achBuf[_4K];
+ struct passwd Pw;
+ struct passwd *pPw;
+ if (getpwnam_r(pszUser, &Pw, achBuf, sizeof(achBuf), &pPw) != 0)
+ return VERR_AUTHENTICATION_FAILURE;
+ if (!pPw)
+ return VERR_AUTHENTICATION_FAILURE;
+
+ *pUid = pPw->pw_uid;
+ *pGid = pPw->pw_gid;
+
+#ifdef IPRT_USE_PAM
+ /*
+ * Try authenticate using PAM, and falling back on crypto if allowed.
+ */
+ const char *pszService = "iprt-as-user";
+ if (!rtProcPosixPamServiceExists("iprt-as-user"))
+# ifdef IPRT_PAM_NATIVE_SERVICE_NAME_AS_USER
+ pszService = IPRT_PAM_NATIVE_SERVICE_NAME_AS_USER;
+# else
+ pszService = "login";
+# endif
+ bool fMayFallBack = false;
+ rc = rtProcPosixAuthenticateUsingPam(pszService, pszUser, pszPasswd, ppapszEnv, &fMayFallBack);
+ if (RT_SUCCESS(rc) || !fMayFallBack)
+ {
+ RTMemWipeThoroughly(achBuf, sizeof(achBuf), 3);
+ return rc;
+ }
+#endif
+
+#if !defined(IPRT_USE_PAM) || defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_OS2)
+# if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
+ /*
+ * Ditto for /etc/shadow and replace pw_passwd from above if we can access it:
+ *
+ * Note! On FreeBSD and OS/2 the root user will open /etc/shadow above, so
+ * this getspnam_r step is not necessary.
+ */
+ struct spwd ShwPwd;
+ char achBuf2[_4K];
+# if defined(RT_OS_LINUX)
+ struct spwd *pShwPwd = NULL;
+ if (getspnam_r(pszUser, &ShwPwd, achBuf2, sizeof(achBuf2), &pShwPwd) != 0)
+ pShwPwd = NULL;
+# else
+ struct spwd *pShwPwd = getspnam_r(pszUser, &ShwPwd, achBuf2, sizeof(achBuf2));
+# endif
+ if (pShwPwd != NULL)
+ pPw->pw_passwd = pShwPwd->sp_pwdp;
+# endif
+
+ /*
+ * Encrypt the passed in password and see if it matches.
+ */
+# if defined(RT_OS_LINUX)
+ /* Default fCorrect=true if no password specified. In that case, pPw->pw_passwd
+ must be NULL (no password set for this user). Fail if a password is specified
+ but the user does not have one assigned. */
+ rc = !pszPasswd || !*pszPasswd ? VINF_SUCCESS : VERR_AUTHENTICATION_FAILURE;
+ if (pPw->pw_passwd && *pPw->pw_passwd)
+# endif
+ {
+# if defined(RT_OS_LINUX) || defined(RT_OS_OS2)
+# ifdef IPRT_WITH_DYNAMIC_CRYPT_R
+ size_t const cbCryptData = RT_MAX(sizeof(struct crypt_data) * 2, _256K);
+# else
+ size_t const cbCryptData = sizeof(struct crypt_data);
+# endif
+ struct crypt_data *pCryptData = (struct crypt_data *)RTMemTmpAllocZ(cbCryptData);
+ if (pCryptData)
+ {
+# ifdef IPRT_WITH_DYNAMIC_CRYPT_R
+ char *pszEncPasswd = rtProcDynamicCryptR(pszPasswd, pPw->pw_passwd, pCryptData);
+# else
+ char *pszEncPasswd = crypt_r(pszPasswd, pPw->pw_passwd, pCryptData);
+# endif
+ rc = pszEncPasswd && !strcmp(pszEncPasswd, pPw->pw_passwd) ? VINF_SUCCESS : VERR_AUTHENTICATION_FAILURE;
+ RTMemWipeThoroughly(pCryptData, cbCryptData, 3);
+ RTMemTmpFree(pCryptData);
+ }
+ else
+ rc = VERR_NO_TMP_MEMORY;
+# else
+ char *pszEncPasswd = crypt(pszPasswd, pPw->pw_passwd);
+ rc = strcmp(pszEncPasswd, pPw->pw_passwd) == 0 ? VINF_SUCCESS : VERR_AUTHENTICATION_FAILURE;
+# endif
+ }
+
+ /*
+ * Return GID and UID on success. Always wipe stack buffers.
+ */
+ if (RT_SUCCESS(rc))
+ {
+ *pGid = pPw->pw_gid;
+ *pUid = pPw->pw_uid;
+ }
+# if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
+ RTMemWipeThoroughly(achBuf2, sizeof(achBuf2), 3);
+# endif
+#endif
+ RTMemWipeThoroughly(achBuf, sizeof(achBuf), 3);
+ return rc;
+}
+
+#ifdef RT_OS_SOLARIS
+
+/** @todo the error reporting of the Solaris process contract code could be
+ * a lot better, but essentially it is not meant to run into errors after
+ * the debugging phase. */
+static int rtSolarisContractPreFork(void)
+{
+ int templateFd = open64(CTFS_ROOT "/process/template", O_RDWR);
+ if (templateFd < 0)
+ return -1;
+
+ /* Set template parameters and event sets. */
+ if (ct_pr_tmpl_set_param(templateFd, CT_PR_PGRPONLY))
+ {
+ close(templateFd);
+ return -1;
+ }
+ if (ct_pr_tmpl_set_fatal(templateFd, CT_PR_EV_HWERR))
+ {
+ close(templateFd);
+ return -1;
+ }
+ if (ct_tmpl_set_critical(templateFd, 0))
+ {
+ close(templateFd);
+ return -1;
+ }
+ if (ct_tmpl_set_informative(templateFd, CT_PR_EV_HWERR))
+ {
+ close(templateFd);
+ return -1;
+ }
+
+ /* Make this the active template for the process. */
+ if (ct_tmpl_activate(templateFd))
+ {
+ close(templateFd);
+ return -1;
+ }
+
+ return templateFd;
+}
+
+static void rtSolarisContractPostForkChild(int templateFd)
+{
+ if (templateFd == -1)
+ return;
+
+ /* Clear the active template. */
+ ct_tmpl_clear(templateFd);
+ close(templateFd);
+}
+
+static void rtSolarisContractPostForkParent(int templateFd, pid_t pid)
+{
+ if (templateFd == -1)
+ return;
+
+ /* Clear the active template. */
+ int cleared = ct_tmpl_clear(templateFd);
+ close(templateFd);
+
+ /* If the clearing failed or the fork failed there's nothing more to do. */
+ if (cleared || pid <= 0)
+ return;
+
+ /* Look up the contract which was created by this thread. */
+ int statFd = open64(CTFS_ROOT "/process/latest", O_RDONLY);
+ if (statFd == -1)
+ return;
+ ct_stathdl_t statHdl;
+ if (ct_status_read(statFd, CTD_COMMON, &statHdl))
+ {
+ close(statFd);
+ return;
+ }
+ ctid_t ctId = ct_status_get_id(statHdl);
+ ct_status_free(statHdl);
+ close(statFd);
+ if (ctId < 0)
+ return;
+
+ /* Abandon this contract we just created. */
+ char ctlPath[PATH_MAX];
+ size_t len = snprintf(ctlPath, sizeof(ctlPath),
+ CTFS_ROOT "/process/%ld/ctl", (long)ctId);
+ if (len >= sizeof(ctlPath))
+ return;
+ int ctlFd = open64(ctlPath, O_WRONLY);
+ if (statFd == -1)
+ return;
+ if (ct_ctl_abandon(ctlFd) < 0)
+ {
+ close(ctlFd);
+ return;
+ }
+ close(ctlFd);
+}
+
+#endif /* RT_OS_SOLARIS */
+
+
+RTR3DECL(int) RTProcCreate(const char *pszExec, const char * const *papszArgs, RTENV Env, unsigned fFlags, PRTPROCESS pProcess)
+{
+ return RTProcCreateEx(pszExec, papszArgs, Env, fFlags,
+ NULL, NULL, NULL, /* standard handles */
+ NULL /*pszAsUser*/, NULL /* pszPassword*/, NULL /*pvExtraData*/,
+ pProcess);
+}
+
+
+/**
+ * Adjust the profile environment after forking the child process and changing
+ * the UID.
+ *
+ * @returns IRPT status code.
+ * @param hEnvToUse The environment we're going to use with execve.
+ * @param fFlags The process creation flags.
+ * @param hEnv The environment passed in by the user.
+ */
+static int rtProcPosixAdjustProfileEnvFromChild(RTENV hEnvToUse, uint32_t fFlags, RTENV hEnv)
+{
+ int rc = VINF_SUCCESS;
+#ifdef RT_OS_DARWIN
+ if ( RT_SUCCESS(rc)
+ && (!(fFlags & RTPROC_FLAGS_ENV_CHANGE_RECORD) || RTEnvExistEx(hEnv, "TMPDIR")) )
+ {
+ char szValue[RTPATH_MAX];
+ size_t cbNeeded = confstr(_CS_DARWIN_USER_TEMP_DIR, szValue, sizeof(szValue));
+ if (cbNeeded > 0 && cbNeeded < sizeof(szValue))
+ {
+ char *pszTmp;
+ rc = RTStrCurrentCPToUtf8(&pszTmp, szValue);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTEnvSetEx(hEnvToUse, "TMPDIR", pszTmp);
+ RTStrFree(pszTmp);
+ }
+ }
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+#else
+ RT_NOREF_PV(hEnvToUse); RT_NOREF_PV(fFlags); RT_NOREF_PV(hEnv);
+#endif
+ return rc;
+}
+
+
+/**
+ * Undos quoting and escape sequences and looks for stop characters.
+ *
+ * @returns Where to continue scanning in @a pszString. This points to the
+ * next character after the stop character, but for the zero terminator
+ * it points to the terminator character.
+ * @param pszString The string to undo quoting and escaping for.
+ * This is both input and output as the work is
+ * done in place.
+ * @param pfStoppedOnEqual Where to return whether we stopped work on a
+ * plain equal characater or not. If this is NULL,
+ * then the equal character is not a stop
+ * character, then only newline and the zero
+ * terminator are.
+ */
+static char *rtProcPosixProfileEnvUnquoteAndUnescapeString(char *pszString, bool *pfStoppedOnEqual)
+{
+ if (pfStoppedOnEqual)
+ *pfStoppedOnEqual = false;
+
+ enum { kPlain, kSingleQ, kDoubleQ } enmState = kPlain;
+ char *pszDst = pszString;
+ for (;;)
+ {
+ char ch = *pszString++;
+ switch (ch)
+ {
+ default:
+ *pszDst++ = ch;
+ break;
+
+ case '\\':
+ {
+ char ch2;
+ if ( enmState == kSingleQ
+ || (ch2 = *pszString) == '\0'
+ || (enmState == kDoubleQ && strchr("\\$`\"\n", ch2) == NULL) )
+ *pszDst++ = ch;
+ else
+ {
+ *pszDst++ = ch2;
+ pszString++;
+ }
+ break;
+ }
+
+ case '"':
+ if (enmState == kSingleQ)
+ *pszDst++ = ch;
+ else
+ enmState = enmState == kPlain ? kDoubleQ : kPlain;
+ break;
+
+ case '\'':
+ if (enmState == kDoubleQ)
+ *pszDst++ = ch;
+ else
+ enmState = enmState == kPlain ? kSingleQ : kPlain;
+ break;
+
+ case '\n':
+ if (enmState == kPlain)
+ {
+ *pszDst = '\0';
+ return pszString;
+ }
+ *pszDst++ = ch;
+ break;
+
+ case '=':
+ if (enmState == kPlain && pfStoppedOnEqual)
+ {
+ *pszDst = '\0';
+ *pfStoppedOnEqual = true;
+ return pszString;
+ }
+ *pszDst++ = ch;
+ break;
+
+ case '\0':
+ Assert(enmState == kPlain);
+ *pszDst = '\0';
+ return pszString - 1;
+ }
+ }
+}
+
+
+/**
+ * Worker for rtProcPosixProfileEnvRunAndHarvest that parses the environment
+ * dump and loads it into hEnvToUse.
+ *
+ * @note This isn't entirely correct should any of the profile setup scripts
+ * unset any of the environment variables in the basic initial
+ * enviornment, but since that's unlikely and it's very convenient to
+ * have something half sensible as a basis if don't don't grok the dump
+ * entirely and would skip central stuff like PATH or HOME.
+ *
+ * @returns IPRT status code.
+ * @retval -VERR_PARSE_ERROR (positive, e.g. warning) if we run into trouble.
+ * @retval -VERR_INVALID_UTF8_ENCODING (positive, e.g. warning) if there are
+ * invalid UTF-8 in the environment. This isn't unlikely if the
+ * profile doesn't use UTF-8. This is unfortunately not something we
+ * can guess to accurately up front, so we don't do any guessing and
+ * hope everyone is sensible and use UTF-8.
+ *
+ * @param hEnvToUse The basic environment to extend with what we manage
+ * to parse here.
+ * @param pszEnvDump The environment dump to parse. Nominally in Bourne
+ * shell 'export -p' format.
+ * @param fWithMarkers Whether there are markers around the dump (C shell,
+ * tmux) or not.
+ */
+static int rtProcPosixProfileEnvHarvest(RTENV hEnvToUse, char *pszEnvDump, bool fWithMarkers)
+{
+ LogRel3(("**** pszEnvDump start ****\n%s**** pszEnvDump end ****\n", pszEnvDump));
+ if (!LogIs3Enabled())
+ LogFunc(("**** pszEnvDump start ****\n%s**** pszEnvDump end ****\n", pszEnvDump));
+
+ /*
+ * Clip dump at markers if we're using them (C shell).
+ */
+ if (fWithMarkers)
+ {
+ char *pszStart = strstr(pszEnvDump, g_szEnvMarkerBegin);
+ AssertReturn(pszStart, -VERR_PARSE_ERROR);
+ pszStart += sizeof(g_szEnvMarkerBegin) - 1;
+ if (*pszStart == '\n')
+ pszStart++;
+ pszEnvDump = pszStart;
+
+ char *pszEnd = strstr(pszStart, g_szEnvMarkerEnd);
+ AssertReturn(pszEnd, -VERR_PARSE_ERROR);
+ *pszEnd = '\0';
+ }
+
+ /*
+ * Since we're using /bin/sh -c "export -p" for all the dumping, we should
+ * always get lines on the format:
+ * export VAR1="Value 1"
+ * export VAR2=Value2
+ *
+ * However, just in case something goes wrong, like bash doesn't think it
+ * needs to be posixly correct, try deal with the alternative where
+ * "declare -x " replaces the "export".
+ */
+ const char *pszPrefix;
+ if ( strncmp(pszEnvDump, RT_STR_TUPLE("export")) == 0
+ && RT_C_IS_BLANK(pszEnvDump[6]))
+ pszPrefix = "export ";
+ else if ( strncmp(pszEnvDump, RT_STR_TUPLE("declare")) == 0
+ && RT_C_IS_BLANK(pszEnvDump[7])
+ && pszEnvDump[8] == '-')
+ pszPrefix = "declare -x "; /* We only need to care about the non-array, non-function lines. */
+ else
+ AssertFailedReturn(-VERR_PARSE_ERROR);
+ size_t const cchPrefix = strlen(pszPrefix);
+
+ /*
+ * Process the lines, ignoring stuff which we don't grok.
+ * The shell should quote problematic characters. Bash double quotes stuff
+ * by default, whereas almquist's shell does it as needed and only the value
+ * side.
+ */
+ int rc = VINF_SUCCESS;
+ while (pszEnvDump && *pszEnvDump != '\0')
+ {
+ /*
+ * Skip the prefixing command.
+ */
+ if ( cchPrefix == 0
+ || strncmp(pszEnvDump, pszPrefix, cchPrefix) == 0)
+ {
+ pszEnvDump += cchPrefix;
+ while (RT_C_IS_BLANK(*pszEnvDump))
+ pszEnvDump++;
+ }
+ else
+ {
+ /* Oops, must find our bearings for some reason... */
+ pszEnvDump = strchr(pszEnvDump, '\n');
+ rc = -VERR_PARSE_ERROR;
+ continue;
+ }
+
+ /*
+ * Parse out the variable name using typical bourne shell escaping
+ * and quoting rules.
+ */
+ /** @todo We should throw away lines that aren't propertly quoted, now we
+ * just continue and use what we found. */
+ const char *pszVar = pszEnvDump;
+ bool fStoppedOnPlainEqual = false;
+ pszEnvDump = rtProcPosixProfileEnvUnquoteAndUnescapeString(pszEnvDump, &fStoppedOnPlainEqual);
+ const char *pszValue = pszEnvDump;
+ if (fStoppedOnPlainEqual)
+ pszEnvDump = rtProcPosixProfileEnvUnquoteAndUnescapeString(pszEnvDump, NULL /*pfStoppedOnPlainEqual*/);
+ else
+ pszValue = "";
+
+ /*
+ * Add them if valid UTF-8, otherwise we simply drop them for now.
+ * The whole codeset stuff goes seriously wonky here as the environment
+ * we're harvesting probably contains it's own LC_CTYPE or LANG variables,
+ * so ignore the problem for now.
+ */
+ if ( RTStrIsValidEncoding(pszVar)
+ && RTStrIsValidEncoding(pszValue))
+ {
+ int rc2 = RTEnvSetEx(hEnvToUse, pszVar, pszValue);
+ AssertRCReturn(rc2, rc2);
+ }
+ else if (rc == VINF_SUCCESS)
+ rc = -VERR_INVALID_UTF8_ENCODING;
+ }
+
+ return rc;
+}
+
+
+/**
+ * Runs the user's shell in login mode with some environment dumping logic and
+ * harvests the dump, putting it into hEnvToUse.
+ *
+ * This is a bit hairy, esp. with regards to codesets.
+ *
+ * @returns IPRT status code. Not all error statuses will be returned and the
+ * caller should just continue with whatever is in hEnvToUse.
+ *
+ * @param hEnvToUse On input this is the basic user environment, on success
+ * in is fleshed out with stuff from the login shell dump.
+ * @param pszAsUser The user name for the profile.
+ * @param uid The UID corrsponding to @a pszAsUser, ~0 if current user.
+ * @param gid The GID corrsponding to @a pszAsUser, ~0 if current user.
+ * @param pszShell The login shell. This is a writable string to avoid
+ * needing to make a copy of it when examining the path
+ * part, instead we make a temporary change to it which is
+ * always reverted before returning.
+ */
+static int rtProcPosixProfileEnvRunAndHarvest(RTENV hEnvToUse, const char *pszAsUser, uid_t uid, gid_t gid, char *pszShell)
+{
+ LogFlowFunc(("pszAsUser=%s uid=%u gid=%u pszShell=%s; hEnvToUse contains %u variables on entry\n",
+ pszAsUser, uid, gid, pszShell, RTEnvCountEx(hEnvToUse) ));
+
+ /*
+ * The three standard handles should be pointed to /dev/null, the 3rd handle
+ * is used to dump the environment.
+ */
+ RTPIPE hPipeR, hPipeW;
+ int rc = RTPipeCreate(&hPipeR, &hPipeW, 0);
+ if (RT_SUCCESS(rc))
+ {
+ RTFILE hFileNull;
+ rc = RTFileOpenBitBucket(&hFileNull, RTFILE_O_READWRITE);
+ if (RT_SUCCESS(rc))
+ {
+ int aRedirFds[4];
+ aRedirFds[0] = aRedirFds[1] = aRedirFds[2] = RTFileToNative(hFileNull);
+ aRedirFds[3] = RTPipeToNative(hPipeW);
+
+ /*
+ * Allocate a buffer for receiving the environment dump.
+ *
+ * This is fixed sized for simplicity and safety (creative user script
+ * shouldn't be allowed to exhaust our memory or such, after all we're
+ * most likely running with root privileges in this code path).
+ */
+ size_t const cbEnvDump = _64K;
+ char * const pszEnvDump = (char *)RTMemTmpAllocZ(cbEnvDump);
+ if (pszEnvDump)
+ {
+ /*
+ * Our default approach is using /bin/sh:
+ */
+ const char *pszExec = _PATH_BSHELL;
+ const char *apszArgs[8];
+ apszArgs[0] = "-sh"; /* First arg must start with a dash for login shells. */
+ apszArgs[1] = "-c";
+ apszArgs[2] = "POSIXLY_CORRECT=1;export -p >&3";
+ apszArgs[3] = NULL;
+
+ /*
+ * But see if we can trust the shell to be a real usable shell.
+ * This would be great as different shell typically has different profile setup
+ * files and we'll endup with the wrong enviornment if we use a different shell.
+ */
+ char szDashShell[32];
+ char szExportArg[128];
+ bool fWithMarkers = false;
+ const char *pszShellNm = RTPathFilename(pszShell);
+ if ( pszShellNm
+ && access(pszShellNm, X_OK))
+ {
+ /*
+ * First the check that it's a known bin directory:
+ */
+ size_t const cchShellPath = pszShellNm - pszShell;
+ char const chSaved = pszShell[cchShellPath - 1];
+ pszShell[cchShellPath - 1] = '\0';
+ if ( RTPathCompare(pszShell, "/bin") == 0
+ || RTPathCompare(pszShell, "/usr/bin") == 0
+ || RTPathCompare(pszShell, "/usr/local/bin") == 0)
+ {
+ /*
+ * Then see if we recognize the shell name.
+ */
+ RTStrCopy(&szDashShell[1], sizeof(szDashShell) - 1, pszShellNm);
+ szDashShell[0] = '-';
+ if ( strcmp(pszShellNm, "bash") == 0
+ || strcmp(pszShellNm, "ksh") == 0
+ || strcmp(pszShellNm, "ksh93") == 0
+ || strcmp(pszShellNm, "zsh") == 0
+ || strcmp(pszShellNm, "fish") == 0)
+ {
+ pszExec = pszShell;
+ apszArgs[0] = szDashShell;
+
+ /* Use /bin/sh for doing the environment dumping so we get the same kind
+ of output from everyone and can limit our parsing + testing efforts. */
+ RTStrPrintf(szExportArg, sizeof(szExportArg),
+ "%s -c 'POSIXLY_CORRECT=1;export -p >&3'", _PATH_BSHELL);
+ apszArgs[2] = szExportArg;
+ }
+ /* C shell is very annoying in that it closes fd 3 without regard to what
+ we might have put there, so we must use stdout here but with markers so
+ we can find the dump.
+ Seems tmux have similar issues as it doesn't work above, but works fine here. */
+ else if ( strcmp(pszShellNm, "csh") == 0
+ || strcmp(pszShellNm, "tcsh") == 0
+ || strcmp(pszShellNm, "tmux") == 0)
+ {
+ pszExec = pszShell;
+ apszArgs[0] = szDashShell;
+
+ fWithMarkers = true;
+ size_t cch = RTStrPrintf(szExportArg, sizeof(szExportArg),
+ "%s -c 'set -e;POSIXLY_CORRECT=1;echo %s;export -p;echo %s'",
+ _PATH_BSHELL, g_szEnvMarkerBegin, g_szEnvMarkerEnd);
+ Assert(cch < sizeof(szExportArg) - 1); RT_NOREF(cch);
+ apszArgs[2] = szExportArg;
+
+ aRedirFds[1] = aRedirFds[3];
+ aRedirFds[3] = -1;
+ }
+ }
+ pszShell[cchShellPath - 1] = chSaved;
+ }
+
+ /*
+ * Create the process and wait for the output.
+ */
+ LogFunc(("Executing '%s': '%s', '%s', '%s'\n", pszExec, apszArgs[0], apszArgs[1], apszArgs[2]));
+ RTPROCESS hProcess = NIL_RTPROCESS;
+ rc = rtProcPosixCreateInner(pszExec, apszArgs, hEnvToUse, hEnvToUse, 0 /*fFlags*/,
+ pszAsUser, uid, gid, RT_ELEMENTS(aRedirFds), aRedirFds, &hProcess);
+ if (RT_SUCCESS(rc))
+ {
+ RTPipeClose(hPipeW);
+ hPipeW = NIL_RTPIPE;
+
+ size_t offEnvDump = 0;
+ uint64_t const msStart = RTTimeMilliTS();
+ for (;;)
+ {
+ size_t cbRead = 0;
+ if (offEnvDump < cbEnvDump - 1)
+ {
+ rc = RTPipeRead(hPipeR, &pszEnvDump[offEnvDump], cbEnvDump - 1 - offEnvDump, &cbRead);
+ if (RT_SUCCESS(rc))
+ offEnvDump += cbRead;
+ else
+ {
+ LogFlowFunc(("Breaking out of read loop: %Rrc\n", rc));
+ if (rc == VERR_BROKEN_PIPE)
+ rc = VINF_SUCCESS;
+ break;
+ }
+ pszEnvDump[offEnvDump] = '\0';
+ }
+ else
+ {
+ LogFunc(("Too much data in environment dump, dropping it\n"));
+ rc = VERR_TOO_MUCH_DATA;
+ break;
+ }
+
+ /* Do the timout check. */
+ uint64_t const cMsElapsed = RTTimeMilliTS() - msStart;
+ if (cMsElapsed >= RT_MS_15SEC)
+ {
+ LogFunc(("Timed out after %RU64 ms\n", cMsElapsed));
+ rc = VERR_TIMEOUT;
+ break;
+ }
+
+ /* If we got no data in above wait for more to become ready. */
+ if (!cbRead)
+ RTPipeSelectOne(hPipeR, RT_MS_15SEC - cMsElapsed);
+ }
+
+ /*
+ * Kill the process and wait for it to avoid leaving zombies behind.
+ */
+ /** @todo do we check the exit code? */
+ int rc2 = RTProcWait(hProcess, RTPROCWAIT_FLAGS_NOBLOCK, NULL);
+ if (RT_SUCCESS(rc2))
+ LogFlowFunc(("First RTProcWait succeeded\n"));
+ else
+ {
+ LogFunc(("First RTProcWait failed (%Rrc), terminating and doing a blocking wait\n", rc2));
+ RTProcTerminate(hProcess);
+ RTProcWait(hProcess, RTPROCWAIT_FLAGS_BLOCK, NULL);
+ }
+
+ /*
+ * Parse the result.
+ */
+ if (RT_SUCCESS(rc))
+ rc = rtProcPosixProfileEnvHarvest(hEnvToUse, pszEnvDump, fWithMarkers);
+ else
+ {
+ LogFunc(("Ignoring rc=%Rrc from the pipe read loop and continues with basic environment\n", rc));
+ rc = -rc;
+ }
+ }
+ else
+ LogFunc(("Failed to create process '%s': %Rrc\n", pszExec, rc));
+ RTMemTmpFree(pszEnvDump);
+ }
+ else
+ {
+ LogFunc(("Failed to allocate %#zx bytes for the dump\n", cbEnvDump));
+ rc = VERR_NO_TMP_MEMORY;
+ }
+ RTFileClose(hFileNull);
+ }
+ else
+ LogFunc(("Failed to open /dev/null: %Rrc\n", rc));
+ RTPipeClose(hPipeR);
+ RTPipeClose(hPipeW);
+ }
+ else
+ LogFunc(("Failed to create pipe: %Rrc\n", rc));
+ LogFlowFunc(("returns %Rrc (hEnvToUse contains %u variables now)\n", rc, RTEnvCountEx(hEnvToUse)));
+ return rc;
+}
+
+
+/**
+ * Create an environment for the given user.
+ *
+ * This starts by creating a very basic environment and then tries to do it
+ * properly by running the user's shell in login mode with some environment
+ * dumping attached. The latter may fail and we'll ignore that for now and move
+ * ahead with the very basic environment.
+ *
+ * @returns IPRT status code.
+ * @param phEnvToUse Where to return the created environment.
+ * @param pszAsUser The user name for the profile. NULL if the current
+ * user.
+ * @param uid The UID corrsponding to @a pszAsUser, ~0 if NULL.
+ * @param gid The GID corrsponding to @a pszAsUser, ~0 if NULL.
+ * @param fFlags RTPROC_FLAGS_XXX
+ * @param papszPamEnv Array of environment variables returned by PAM, if
+ * it was used for authentication and produced anything.
+ * Otherwise NULL.
+ */
+static int rtProcPosixCreateProfileEnv(PRTENV phEnvToUse, const char *pszAsUser, uid_t uid, gid_t gid,
+ uint32_t fFlags, char **papszPamEnv)
+{
+ /*
+ * Get the passwd entry for the user.
+ */
+ struct passwd Pwd;
+ struct passwd *pPwd = NULL;
+ char achBuf[_4K];
+ int rc;
+ errno = 0;
+ if (pszAsUser)
+ rc = getpwnam_r(pszAsUser, &Pwd, achBuf, sizeof(achBuf), &pPwd);
+ else
+ rc = getpwuid_r(getuid(), &Pwd, achBuf, sizeof(achBuf), &pPwd);
+ if (rc == 0 && pPwd)
+ {
+ /*
+ * Convert stuff to UTF-8 since the environment is UTF-8.
+ */
+ char *pszDir;
+ rc = RTStrCurrentCPToUtf8(&pszDir, pPwd->pw_dir);
+ if (RT_SUCCESS(rc))
+ {
+#if 0 /* Enable and modify this to test shells other that your login shell. */
+ pPwd->pw_shell = (char *)"/bin/tmux";
+#endif
+ char *pszShell;
+ rc = RTStrCurrentCPToUtf8(&pszShell, pPwd->pw_shell);
+ if (RT_SUCCESS(rc))
+ {
+ char *pszAsUserFree = NULL;
+ if (!pszAsUser)
+ {
+ rc = RTStrCurrentCPToUtf8(&pszAsUserFree, pPwd->pw_name);
+ if (RT_SUCCESS(rc))
+ pszAsUser = pszAsUserFree;
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create and populate the environment.
+ */
+ rc = RTEnvCreate(phEnvToUse);
+ if (RT_SUCCESS(rc))
+ {
+ RTENV hEnvToUse = *phEnvToUse;
+ rc = RTEnvSetEx(hEnvToUse, "HOME", pszDir);
+ if (RT_SUCCESS(rc))
+ rc = RTEnvSetEx(hEnvToUse, "SHELL", pszShell);
+ if (RT_SUCCESS(rc))
+ rc = RTEnvSetEx(hEnvToUse, "USER", pszAsUser);
+ if (RT_SUCCESS(rc))
+ rc = RTEnvSetEx(hEnvToUse, "LOGNAME", pszAsUser);
+ if (RT_SUCCESS(rc))
+ rc = RTEnvSetEx(hEnvToUse, "PATH", pPwd->pw_uid == 0 ? _PATH_STDPATH : _PATH_DEFPATH);
+ char szTmpPath[RTPATH_MAX];
+ if (RT_SUCCESS(rc))
+ {
+ RTStrPrintf(szTmpPath, sizeof(szTmpPath), "%s/%s", _PATH_MAILDIR, pszAsUser);
+ rc = RTEnvSetEx(hEnvToUse, "MAIL", szTmpPath);
+ }
+#ifdef RT_OS_DARWIN
+ if (RT_SUCCESS(rc))
+ {
+ /* TMPDIR is some unique per user directory under /var/folders on darwin,
+ so get the one for the current user. If we're launching the process as
+ a different user, rtProcPosixAdjustProfileEnvFromChild will update it
+ again for the actual child process user (provided we set it here). See
+ https://opensource.apple.com/source/Libc/Libc-997.1.1/darwin/_dirhelper.c
+ for the implementation of this query. */
+ size_t cbNeeded = confstr(_CS_DARWIN_USER_TEMP_DIR, szTmpPath, sizeof(szTmpPath));
+ if (cbNeeded > 0 && cbNeeded < sizeof(szTmpPath))
+ {
+ char *pszTmp;
+ rc = RTStrCurrentCPToUtf8(&pszTmp, szTmpPath);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTEnvSetEx(hEnvToUse, "TMPDIR", pszTmp);
+ RTStrFree(pszTmp);
+ }
+ }
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ }
+#endif
+ /*
+ * Add everything from the PAM environment.
+ */
+ if (RT_SUCCESS(rc) && papszPamEnv != NULL)
+ for (size_t i = 0; papszPamEnv[i] != NULL && RT_SUCCESS(rc); i++)
+ {
+ char *pszEnvVar;
+ rc = RTStrCurrentCPToUtf8(&pszEnvVar, papszPamEnv[i]);
+ if (RT_SUCCESS(rc))
+ {
+ char *pszValue = strchr(pszEnvVar, '=');
+ if (pszValue)
+ *pszValue++ = '\0';
+ rc = RTEnvSetEx(hEnvToUse, pszEnvVar, pszValue ? pszValue : "");
+ RTStrFree(pszEnvVar);
+ }
+ /* Ignore conversion issue, though LogRel them. */
+ else if (rc != VERR_NO_STR_MEMORY && rc != VERR_NO_MEMORY)
+ {
+ LogRelMax(256, ("RTStrCurrentCPToUtf8(,%.*Rhxs) -> %Rrc\n", strlen(pszEnvVar), pszEnvVar, rc));
+ rc = -rc;
+ }
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Now comes the fun part where we need to try run a shell in login mode
+ * and harvest its final environment to get the proper environment for
+ * the user. We ignore some failures here so buggy login scrips and
+ * other weird stuff won't trip us up too badly.
+ */
+ if (!(fFlags & RTPROC_FLAGS_ONLY_BASIC_PROFILE))
+ rc = rtProcPosixProfileEnvRunAndHarvest(hEnvToUse, pszAsUser, uid, gid, pszShell);
+ }
+
+ if (RT_FAILURE(rc))
+ RTEnvDestroy(hEnvToUse);
+ }
+ RTStrFree(pszAsUserFree);
+ }
+ RTStrFree(pszShell);
+ }
+ RTStrFree(pszDir);
+ }
+ }
+ else
+ rc = errno ? RTErrConvertFromErrno(errno) : VERR_ACCESS_DENIED;
+ return rc;
+}
+
+
+/**
+ * Converts the arguments to the child's LC_CTYPE charset if necessary.
+ *
+ * @returns IPRT status code.
+ * @param papszArgs The arguments (UTF-8).
+ * @param hEnvToUse The child process environment.
+ * @param ppapszArgs Where to return the converted arguments. The array
+ * entries must be freed by RTStrFree and the array itself
+ * by RTMemFree.
+ */
+static int rtProcPosixConvertArgv(const char * const *papszArgs, RTENV hEnvToUse, char ***ppapszArgs)
+{
+ *ppapszArgs = (char **)papszArgs;
+
+ /*
+ * The first thing we need to do here is to try guess the codeset of the
+ * child process and check if it's UTF-8 or not.
+ */
+ const char *pszEncoding;
+ char szEncoding[512];
+ if (hEnvToUse == RTENV_DEFAULT)
+ {
+ /* Same environment as us, assume setlocale is up to date: */
+ pszEncoding = rtStrGetLocaleCodeset();
+ }
+ else
+ {
+ /*
+ * LC_ALL overrides everything else. The LC_* environment variables are often set
+ * to the empty string so move on the next variable if that is the case (that's
+ * what setlocale in glibc does).
+ */
+ const char *pszVar;
+ int rc = RTEnvGetEx(hEnvToUse, pszVar = "LC_ALL", szEncoding, sizeof(szEncoding), NULL);
+ if (rc == VERR_ENV_VAR_NOT_FOUND || (RT_SUCCESS(rc) && szEncoding[0] == '\0'))
+ rc = RTEnvGetEx(hEnvToUse, pszVar = "LC_CTYPE", szEncoding, sizeof(szEncoding), NULL);
+ if (rc == VERR_ENV_VAR_NOT_FOUND || (RT_SUCCESS(rc) && szEncoding[0] == '\0'))
+ rc = RTEnvGetEx(hEnvToUse, pszVar = "LANG", szEncoding, sizeof(szEncoding), NULL);
+ if (RT_SUCCESS(rc) && szEncoding[0] != '\0')
+ {
+ /*
+ * LC_ALL can contain a composite locale consisting of the locales of each of the
+ * categories in two different formats depending on the OS. On Solaris, macOS, and
+ * *BSD composite locale names use slash ('/') as the separator and the following
+ * order for the categories:
+ * LC_CTYPE/LC_NUMERIC/LC_TIME/LC_COLLATE/LC_MONETARY/LC_MESSAGES
+ * e.g.:
+ * en_US.UTF-8/POSIX/el_GR.UTF-8/el_CY.UTF-8/en_GB.UTF-8/es_ES.UTF-8
+ *
+ * On Solaris there is also a leading slash.
+ *
+ * On Linux and OS/2 the composite locale format is made up of key-value pairs
+ * of category names and locales of the form 'name=value' with each element
+ * separated by a semicolon in the same order as above with following additional
+ * categories included as well:
+ * LC_PAPER/LC_NAME/LC_ADDRESS/LC_TELEPHONE/LC_MEASUREMENT/LC_IDENTIFICATION
+ * e.g.
+ * LC_CTYPE=fr_BE;LC_NUMERIC=fr_BE@euro;LC_TIME=fr_BE.utf8;LC_COLLATE=fr_CA;\
+ * LC_MONETARY=fr_CA.utf8;LC_MESSAGES=fr_CH;LC_PAPER=fr_CH.utf8;LC_NAME=fr_FR;\
+ * LC_ADDRESS=fr_FR.utf8;LC_TELEPHONE=fr_LU;LC_MEASUREMENT=fr_LU@euro;\
+ * LC_IDENTIFICATION=fr_LU.utf8
+ */
+ char *pszEncodingStart = szEncoding;
+#if !defined(RT_OS_LINUX) && !defined(RT_OS_OS2)
+ if (*pszEncodingStart == '/')
+ pszEncodingStart++;
+ char *pszSlash = strchr(pszEncodingStart, '/');
+ if (pszSlash)
+ *pszSlash = '\0'; /* This ASSUMES the first one is LC_CTYPE! */
+#else
+ char *pszCType = strstr(pszEncodingStart, "LC_CTYPE=");
+ if (pszCType)
+ {
+ pszEncodingStart = pszCType + sizeof("LC_CTYPE=") - 1;
+
+ char *pszSemiColon = strchr(pszEncodingStart, ';');
+ if (pszSemiColon)
+ *pszSemiColon = '\0';
+ }
+#endif
+
+ /*
+ * Use newlocale and nl_langinfo_l to determine the default codeset for the locale
+ * specified in the child's environment. These routines have been around since
+ * ancient days on Linux and for quite a long time on macOS, Solaris, and *BSD but
+ * to ensure their availability check that LC_CTYPE_MASK is defined.
+ *
+ * Note! The macOS nl_langinfo(3)/nl_langinfo_l(3) routines return a pointer to an
+ * empty string for "short" locale names like en_NZ, it_IT, el_GR, etc. so use
+ * UTF-8 in those cases as it is the default for short name locales on macOS
+ * (see also rtStrGetLocaleCodeset).
+ */
+#ifdef LC_CTYPE_MASK
+ locale_t hLocale = newlocale(LC_CTYPE_MASK, pszEncodingStart, (locale_t)0);
+ if (hLocale != (locale_t)0)
+ {
+ const char *pszCodeset = nl_langinfo_l(CODESET, hLocale);
+ Log2Func(("nl_langinfo_l(CODESET, %s=%s) -> %s\n", pszVar, pszEncodingStart, pszCodeset));
+ if (!pszCodeset || *pszCodeset == '\0')
+# ifdef RT_OS_DARWIN
+ pszEncoding = "UTF-8";
+# else
+ pszEncoding = "ASCII";
+# endif
+ else
+ {
+ rc = RTStrCopy(szEncoding, sizeof(szEncoding), pszCodeset);
+ AssertRC(rc); /* cannot possibly overflow */
+ }
+
+ freelocale(hLocale);
+ pszEncoding = szEncoding;
+ }
+ else
+#endif
+ {
+ /* If there is something that ought to be a character set encoding, try use it: */
+ const char *pszDot = strchr(pszEncodingStart, '.');
+ if (pszDot)
+ pszDot = RTStrStripL(pszDot + 1);
+ if (pszDot && *pszDot != '\0')
+ {
+ pszEncoding = pszDot;
+ Log2Func(("%s=%s -> %s (simple)\n", pszVar, szEncoding, pszEncoding));
+ }
+ else
+ {
+ /* This is mostly wrong, but I cannot think of anything better now: */
+ pszEncoding = rtStrGetLocaleCodeset();
+ LogFunc(("No newlocale or it failed (on '%s=%s', errno=%d), falling back on %s that we're using...\n",
+ pszVar, pszEncodingStart, errno, pszEncoding));
+ }
+ }
+ RT_NOREF_PV(pszVar);
+ }
+ else
+#ifdef RT_OS_DARWIN /* @bugref{10153}: Darwin defaults to UTF-8. */
+ pszEncoding = "UTF-8";
+#else
+ pszEncoding = "ASCII";
+#endif
+ }
+
+ /*
+ * Do nothing if it's UTF-8.
+ */
+ if (rtStrIsCodesetUtf8(pszEncoding))
+ {
+ LogFlowFunc(("No conversion needed (%s)\n", pszEncoding));
+ return VINF_SUCCESS;
+ }
+
+
+ /*
+ * Do the conversion.
+ */
+ size_t cArgs = 0;
+ while (papszArgs[cArgs] != NULL)
+ cArgs++;
+ LogFunc(("Converting #%u arguments to %s...\n", cArgs, pszEncoding));
+
+ char **papszArgsConverted = (char **)RTMemAllocZ(sizeof(papszArgsConverted[0]) * (cArgs + 2));
+ AssertReturn(papszArgsConverted, VERR_NO_MEMORY);
+
+ void *pvConversionCache = NULL;
+ rtStrLocalCacheInit(&pvConversionCache);
+ for (size_t i = 0; i < cArgs; i++)
+ {
+ int rc = rtStrLocalCacheConvert(papszArgs[i], strlen(papszArgs[i]), "UTF-8",
+ &papszArgsConverted[i], 0, pszEncoding, &pvConversionCache);
+ if (RT_SUCCESS(rc) && rc != VWRN_NO_TRANSLATION)
+ { /* likely */ }
+ else
+ {
+ LogRelMax(100, ("Failed to convert argument #%u '%s' to '%s': %Rrc\n", i, papszArgs[i], pszEncoding, rc));
+ while (i-- > 0)
+ RTStrFree(papszArgsConverted[i]);
+ RTMemFree(papszArgsConverted);
+ rtStrLocalCacheDelete(&pvConversionCache);
+ return rc == VWRN_NO_TRANSLATION || rc == VERR_NO_TRANSLATION ? VERR_PROC_NO_ARG_TRANSLATION : rc;
+ }
+ }
+
+ rtStrLocalCacheDelete(&pvConversionCache);
+ *ppapszArgs = papszArgsConverted;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * The result structure for rtPathFindExec/RTPathTraverseList.
+ * @todo move to common path code?
+ */
+typedef struct RTPATHINTSEARCH
+{
+ /** For EACCES or EPERM errors that we continued on.
+ * @note Must be initialized to VINF_SUCCESS. */
+ int rcSticky;
+ /** Buffer containing the filename. */
+ char szFound[RTPATH_MAX];
+} RTPATHINTSEARCH;
+/** Pointer to a rtPathFindExec/RTPathTraverseList result. */
+typedef RTPATHINTSEARCH *PRTPATHINTSEARCH;
+
+
+/**
+ * RTPathTraverseList callback used by RTProcCreateEx to locate the executable.
+ */
+static DECLCALLBACK(int) rtPathFindExec(char const *pchPath, size_t cchPath, void *pvUser1, void *pvUser2)
+{
+ const char *pszExec = (const char *)pvUser1;
+ PRTPATHINTSEARCH pResult = (PRTPATHINTSEARCH)pvUser2;
+ int rc = RTPathJoinEx(pResult->szFound, sizeof(pResult->szFound), pchPath, cchPath, pszExec, RTSTR_MAX,
+ RTPATH_STR_F_STYLE_HOST);
+ if (RT_SUCCESS(rc))
+ {
+ const char *pszNativeExec = NULL;
+ rc = rtPathToNative(&pszNativeExec, pResult->szFound, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (!access(pszNativeExec, X_OK))
+ rc = VINF_SUCCESS;
+ else
+ {
+ if ( errno == EACCES
+ || errno == EPERM)
+ pResult->rcSticky = RTErrConvertFromErrno(errno);
+ rc = VERR_TRY_AGAIN;
+ }
+ rtPathFreeNative(pszNativeExec, pResult->szFound);
+ }
+ else
+ AssertRCStmt(rc, rc = VERR_TRY_AGAIN /* don't stop on this, whatever it is */);
+ }
+ return rc;
+}
+
+
+RTR3DECL(int) RTProcCreateEx(const char *pszExec, const char * const *papszArgs, RTENV hEnv, uint32_t fFlags,
+ PCRTHANDLE phStdIn, PCRTHANDLE phStdOut, PCRTHANDLE phStdErr, const char *pszAsUser,
+ const char *pszPassword, void *pvExtraData, PRTPROCESS phProcess)
+{
+ int rc;
+ LogFlow(("RTProcCreateEx: pszExec=%s pszAsUser=%s fFlags=%#x phStdIn=%p phStdOut=%p phStdErr=%p\n",
+ pszExec, pszAsUser, fFlags, phStdIn, phStdOut, phStdErr));
+
+ /*
+ * Input validation
+ */
+ AssertPtrReturn(pszExec, VERR_INVALID_POINTER);
+ AssertReturn(*pszExec, VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & ~RTPROC_FLAGS_VALID_MASK), VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & RTPROC_FLAGS_DETACHED) || !phProcess, VERR_INVALID_PARAMETER);
+ AssertReturn(hEnv != NIL_RTENV, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(papszArgs, VERR_INVALID_PARAMETER);
+ AssertPtrNullReturn(pszAsUser, VERR_INVALID_POINTER);
+ AssertReturn(!pszAsUser || *pszAsUser, VERR_INVALID_PARAMETER);
+ AssertReturn(!pszPassword || pszAsUser, VERR_INVALID_PARAMETER);
+ AssertPtrNullReturn(pszPassword, VERR_INVALID_POINTER);
+#if defined(RT_OS_OS2)
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ return VERR_PROC_DETACH_NOT_SUPPORTED;
+#endif
+ AssertReturn(pvExtraData == NULL || (fFlags & RTPROC_FLAGS_DESIRED_SESSION_ID), VERR_INVALID_PARAMETER);
+
+ /*
+ * Get the file descriptors for the handles we've been passed.
+ */
+ PCRTHANDLE paHandles[3] = { phStdIn, phStdOut, phStdErr };
+ int aStdFds[3] = { -1, -1, -1 };
+ for (int i = 0; i < 3; i++)
+ {
+ if (paHandles[i])
+ {
+ AssertPtrReturn(paHandles[i], VERR_INVALID_POINTER);
+ switch (paHandles[i]->enmType)
+ {
+ case RTHANDLETYPE_FILE:
+ aStdFds[i] = paHandles[i]->u.hFile != NIL_RTFILE
+ ? (int)RTFileToNative(paHandles[i]->u.hFile)
+ : -2 /* close it */;
+ break;
+
+ case RTHANDLETYPE_PIPE:
+ aStdFds[i] = paHandles[i]->u.hPipe != NIL_RTPIPE
+ ? (int)RTPipeToNative(paHandles[i]->u.hPipe)
+ : -2 /* close it */;
+ break;
+
+ case RTHANDLETYPE_SOCKET:
+ aStdFds[i] = paHandles[i]->u.hSocket != NIL_RTSOCKET
+ ? (int)RTSocketToNative(paHandles[i]->u.hSocket)
+ : -2 /* close it */;
+ break;
+
+ default:
+ AssertMsgFailedReturn(("%d: %d\n", i, paHandles[i]->enmType), VERR_INVALID_PARAMETER);
+ }
+ /** @todo check the close-on-execness of these handles? */
+ }
+ }
+
+ for (int i = 0; i < 3; i++)
+ if (aStdFds[i] == i)
+ aStdFds[i] = -1;
+ LogFlowFunc(("aStdFds={%d, %d, %d}\n", aStdFds[0], aStdFds[1], aStdFds[2]));
+
+ for (int i = 0; i < 3; i++)
+ AssertMsgReturn(aStdFds[i] < 0 || aStdFds[i] > i,
+ ("%i := %i not possible because we're lazy\n", i, aStdFds[i]),
+ VERR_NOT_SUPPORTED);
+
+ /*
+ * Validate the credentials if a user is specified.
+ */
+ bool const fNeedLoginEnv = (fFlags & RTPROC_FLAGS_PROFILE)
+ && ((fFlags & RTPROC_FLAGS_ENV_CHANGE_RECORD) || hEnv == RTENV_DEFAULT);
+ uid_t uid = ~(uid_t)0;
+ gid_t gid = ~(gid_t)0;
+ char **papszPamEnv = NULL;
+ if (pszAsUser)
+ {
+ rc = rtCheckCredentials(pszAsUser, pszPassword, &gid, &uid, fNeedLoginEnv ? &papszPamEnv : NULL);
+ if (RT_FAILURE(rc))
+ return rc;
+ }
+#ifdef IPRT_USE_PAM
+ /*
+ * User unchanged, but if PROFILE is request we must try get the PAM
+ * environmnet variables.
+ *
+ * For this to work, we'll need a special PAM service profile which doesn't
+ * actually do any authentication, only concerns itself with the enviornment
+ * setup. gdm-launch-environment is such one, and we use it if we haven't
+ * got an IPRT specific one there.
+ */
+ else if (fNeedLoginEnv)
+ {
+ const char *pszService;
+ if (rtProcPosixPamServiceExists("iprt-environment"))
+ pszService = "iprt-environment";
+# ifdef IPRT_PAM_NATIVE_SERVICE_NAME_ENVIRONMENT
+ else if (rtProcPosixPamServiceExists(IPRT_PAM_NATIVE_SERVICE_NAME_ENVIRONMENT))
+ pszService = IPRT_PAM_NATIVE_SERVICE_NAME_ENVIRONMENT;
+# endif
+ else if (rtProcPosixPamServiceExists("gdm-launch-environment"))
+ pszService = "gdm-launch-environment";
+ else
+ pszService = NULL;
+ if (pszService)
+ {
+ char szLoginName[512];
+ rc = getlogin_r(szLoginName, sizeof(szLoginName));
+ if (rc == 0)
+ rc = rtProcPosixAuthenticateUsingPam(pszService, szLoginName, "xxx", &papszPamEnv, NULL);
+ }
+ }
+#endif
+
+ /*
+ * Create the child environment if either RTPROC_FLAGS_PROFILE or
+ * RTPROC_FLAGS_ENV_CHANGE_RECORD are in effect.
+ */
+ RTENV hEnvToUse = hEnv;
+ if ( (fFlags & (RTPROC_FLAGS_ENV_CHANGE_RECORD | RTPROC_FLAGS_PROFILE))
+ && ( (fFlags & RTPROC_FLAGS_ENV_CHANGE_RECORD)
+ || hEnv == RTENV_DEFAULT) )
+ {
+ if (fFlags & RTPROC_FLAGS_PROFILE)
+ rc = rtProcPosixCreateProfileEnv(&hEnvToUse, pszAsUser, uid, gid, fFlags, papszPamEnv);
+ else
+ rc = RTEnvClone(&hEnvToUse, RTENV_DEFAULT);
+ rtProcPosixFreePamEnv(papszPamEnv);
+ papszPamEnv = NULL;
+ if (RT_FAILURE(rc))
+ return rc;
+
+ if ((fFlags & RTPROC_FLAGS_ENV_CHANGE_RECORD) && hEnv != RTENV_DEFAULT)
+ {
+ rc = RTEnvApplyChanges(hEnvToUse, hEnv);
+ if (RT_FAILURE(rc))
+ {
+ RTEnvDestroy(hEnvToUse);
+ return rc;
+ }
+ }
+ }
+ Assert(papszPamEnv == NULL);
+
+ /*
+ * Check for execute access to the file, searching the PATH if needed.
+ */
+ const char *pszNativeExec = NULL;
+ rc = rtPathToNative(&pszNativeExec, pszExec, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ if (access(pszNativeExec, X_OK) == 0)
+ rc = VINF_SUCCESS;
+ else
+ {
+ rc = errno;
+ rtPathFreeNative(pszNativeExec, pszExec);
+
+ if ( !(fFlags & RTPROC_FLAGS_SEARCH_PATH)
+ || rc != ENOENT
+ || RTPathHavePath(pszExec) )
+ rc = RTErrConvertFromErrno(rc);
+ else
+ {
+ /* Search the PATH for it: */
+ char *pszPath = RTEnvDupEx(hEnvToUse, "PATH");
+ if (pszPath)
+ {
+ PRTPATHINTSEARCH pResult = (PRTPATHINTSEARCH)alloca(sizeof(*pResult));
+ pResult->rcSticky = VINF_SUCCESS;
+ rc = RTPathTraverseList(pszPath, ':', rtPathFindExec, (void *)pszExec, pResult);
+ RTStrFree(pszPath);
+ if (RT_SUCCESS(rc))
+ {
+ /* Found it. Now, convert to native path: */
+ pszExec = pResult->szFound;
+ rc = rtPathToNative(&pszNativeExec, pszExec, NULL);
+ }
+ else
+ rc = rc != VERR_END_OF_STRING ? rc
+ : pResult->rcSticky == VINF_SUCCESS ? VERR_FILE_NOT_FOUND : pResult->rcSticky;
+ }
+ else
+ rc = VERR_NO_STR_MEMORY;
+ }
+ }
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Convert arguments to child codeset if necessary.
+ */
+ char **papszArgsConverted = (char **)papszArgs;
+ if (!(fFlags & RTPROC_FLAGS_UTF8_ARGV))
+ rc = rtProcPosixConvertArgv(papszArgs, hEnvToUse, &papszArgsConverted);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * The rest of the process creation is reused internally by rtProcPosixCreateProfileEnv.
+ */
+ rc = rtProcPosixCreateInner(pszNativeExec, papszArgsConverted, hEnv, hEnvToUse, fFlags, pszAsUser, uid, gid,
+ RT_ELEMENTS(aStdFds), aStdFds, phProcess);
+
+ }
+
+ /* Free the translated argv copy, if needed. */
+ if (papszArgsConverted != (char **)papszArgs)
+ {
+ for (size_t i = 0; papszArgsConverted[i] != NULL; i++)
+ RTStrFree(papszArgsConverted[i]);
+ RTMemFree(papszArgsConverted);
+ }
+ rtPathFreeNative(pszNativeExec, pszExec);
+ }
+ }
+ if (hEnvToUse != hEnv)
+ RTEnvDestroy(hEnvToUse);
+ return rc;
+}
+
+
+/**
+ * The inner 2nd half of RTProcCreateEx.
+ *
+ * This is also used by rtProcPosixCreateProfileEnv().
+ *
+ * @returns IPRT status code.
+ * @param pszNativeExec The executable to run (absolute path, X_OK).
+ * Native path.
+ * @param papszArgs The arguments. Caller has done codeset conversions.
+ * @param hEnv The original enviornment request, needed for
+ * adjustments if starting as different user.
+ * @param hEnvToUse The environment we should use.
+ * @param fFlags The process creation flags, RTPROC_FLAGS_XXX.
+ * @param pszAsUser The user to start the process as, if requested.
+ * @param uid The UID corrsponding to @a pszAsUser, ~0 if NULL.
+ * @param gid The GID corrsponding to @a pszAsUser, ~0 if NULL.
+ * @param cRedirFds Number of redirection file descriptors.
+ * @param paRedirFds Pointer to redirection file descriptors. Entries
+ * containing -1 are not modified (inherit from parent),
+ * -2 indicates that the descriptor should be closed in the
+ * child.
+ * @param phProcess Where to return the process ID on success.
+ */
+static int rtProcPosixCreateInner(const char *pszNativeExec, const char * const *papszArgs, RTENV hEnv, RTENV hEnvToUse,
+ uint32_t fFlags, const char *pszAsUser, uid_t uid, gid_t gid,
+ unsigned cRedirFds, int *paRedirFds, PRTPROCESS phProcess)
+{
+ /*
+ * Get the environment block.
+ */
+ const char * const *papszEnv = RTEnvGetExecEnvP(hEnvToUse);
+ AssertPtrReturn(papszEnv, VERR_INVALID_HANDLE);
+
+ /*
+ * Optimize the redirections.
+ */
+ while (cRedirFds > 0 && paRedirFds[cRedirFds - 1] == -1)
+ cRedirFds--;
+
+ /*
+ * Child PID.
+ */
+ pid_t pid = -1;
+
+ /*
+ * Take care of detaching the process.
+ *
+ * HACK ALERT! Put the process into a new process group with pgid = pid
+ * to make sure it differs from that of the parent process to ensure that
+ * the IPRT waitpid call doesn't race anyone (read XPCOM) doing group wide
+ * waits. setsid() includes the setpgid() functionality.
+ * 2010-10-11 XPCOM no longer waits for anything, but it cannot hurt.
+ */
+#ifndef RT_OS_OS2
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ {
+# ifdef RT_OS_SOLARIS
+ int templateFd = -1;
+ if (!(fFlags & RTPROC_FLAGS_SAME_CONTRACT))
+ {
+ templateFd = rtSolarisContractPreFork();
+ if (templateFd == -1)
+ return VERR_OPEN_FAILED;
+ }
+# endif /* RT_OS_SOLARIS */
+ pid = fork();
+ if (!pid)
+ {
+# ifdef RT_OS_SOLARIS
+ if (!(fFlags & RTPROC_FLAGS_SAME_CONTRACT))
+ rtSolarisContractPostForkChild(templateFd);
+# endif
+ setsid(); /* see comment above */
+
+ pid = -1;
+ /* Child falls through to the actual spawn code below. */
+ }
+ else
+ {
+# ifdef RT_OS_SOLARIS
+ if (!(fFlags & RTPROC_FLAGS_SAME_CONTRACT))
+ rtSolarisContractPostForkParent(templateFd, pid);
+# endif
+ if (pid > 0)
+ {
+ /* Must wait for the temporary process to avoid a zombie. */
+ int status = 0;
+ pid_t pidChild = 0;
+
+ /* Restart if we get interrupted. */
+ do
+ {
+ pidChild = waitpid(pid, &status, 0);
+ } while ( pidChild == -1
+ && errno == EINTR);
+
+ /* Assume that something wasn't found. No detailed info. */
+ if (status)
+ return VERR_PROCESS_NOT_FOUND;
+ if (phProcess)
+ *phProcess = 0;
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+ }
+ }
+#endif
+
+ /*
+ * Spawn the child.
+ *
+ * Any spawn code MUST not execute any atexit functions if it is for a
+ * detached process. It would lead to running the atexit functions which
+ * make only sense for the parent. libORBit e.g. gets confused by multiple
+ * execution. Remember, there was only a fork() so far, and until exec()
+ * is successfully run there is nothing which would prevent doing anything
+ * silly with the (duplicated) file descriptors.
+ */
+ int rc;
+#ifdef HAVE_POSIX_SPAWN
+ /** @todo OS/2: implement DETACHED (BACKGROUND stuff), see VbglR3Daemonize. */
+ if ( uid == ~(uid_t)0
+ && gid == ~(gid_t)0)
+ {
+ /* Spawn attributes. */
+ posix_spawnattr_t Attr;
+ rc = posix_spawnattr_init(&Attr);
+ if (!rc)
+ {
+ /* Indicate that process group and signal mask are to be changed,
+ and that the child should use default signal actions. */
+ rc = posix_spawnattr_setflags(&Attr, POSIX_SPAWN_SETPGROUP | POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF);
+ Assert(rc == 0);
+
+ /* The child starts in its own process group. */
+ if (!rc)
+ {
+ rc = posix_spawnattr_setpgroup(&Attr, 0 /* pg == child pid */);
+ Assert(rc == 0);
+ }
+
+ /* Unmask all signals. */
+ if (!rc)
+ {
+ sigset_t SigMask;
+ sigemptyset(&SigMask);
+ rc = posix_spawnattr_setsigmask(&Attr, &SigMask); Assert(rc == 0);
+ }
+
+ /* File changes. */
+ posix_spawn_file_actions_t FileActions;
+ posix_spawn_file_actions_t *pFileActions = NULL;
+ if (!rc && cRedirFds > 0)
+ {
+ rc = posix_spawn_file_actions_init(&FileActions);
+ if (!rc)
+ {
+ pFileActions = &FileActions;
+ for (unsigned i = 0; i < cRedirFds; i++)
+ {
+ int fd = paRedirFds[i];
+ if (fd == -2)
+ rc = posix_spawn_file_actions_addclose(&FileActions, i);
+ else if (fd >= 0 && fd != (int)i)
+ {
+ rc = posix_spawn_file_actions_adddup2(&FileActions, fd, i);
+ if (!rc)
+ {
+ for (unsigned j = i + 1; j < cRedirFds; j++)
+ if (paRedirFds[j] == fd)
+ {
+ fd = -1;
+ break;
+ }
+ if (fd >= 0)
+ rc = posix_spawn_file_actions_addclose(&FileActions, fd);
+ }
+ }
+ if (rc)
+ break;
+ }
+ }
+ }
+
+ if (!rc)
+ rc = posix_spawn(&pid, pszNativeExec, pFileActions, &Attr, (char * const *)papszArgs,
+ (char * const *)papszEnv);
+
+ /* cleanup */
+ int rc2 = posix_spawnattr_destroy(&Attr); Assert(rc2 == 0); NOREF(rc2);
+ if (pFileActions)
+ {
+ rc2 = posix_spawn_file_actions_destroy(pFileActions);
+ Assert(rc2 == 0);
+ }
+
+ /* return on success.*/
+ if (!rc)
+ {
+ /* For a detached process this happens in the temp process, so
+ * it's not worth doing anything as this process must exit. */
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(0);
+ if (phProcess)
+ *phProcess = pid;
+ return VINF_SUCCESS;
+ }
+ }
+ /* For a detached process this happens in the temp process, so
+ * it's not worth doing anything as this process must exit. */
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(124);
+ }
+ else
+#endif
+ {
+#ifdef RT_OS_SOLARIS
+ int templateFd = -1;
+ if (!(fFlags & RTPROC_FLAGS_SAME_CONTRACT))
+ {
+ templateFd = rtSolarisContractPreFork();
+ if (templateFd == -1)
+ return VERR_OPEN_FAILED;
+ }
+#endif /* RT_OS_SOLARIS */
+ pid = fork();
+ if (!pid)
+ {
+#ifdef RT_OS_SOLARIS
+ if (!(fFlags & RTPROC_FLAGS_SAME_CONTRACT))
+ rtSolarisContractPostForkChild(templateFd);
+#endif /* RT_OS_SOLARIS */
+ if (!(fFlags & RTPROC_FLAGS_DETACHED))
+ setpgid(0, 0); /* see comment above */
+
+ /*
+ * Change group and user if requested.
+ */
+#if 1 /** @todo This needs more work, see suplib/hardening. */
+ if (pszAsUser)
+ {
+ int ret = initgroups(pszAsUser, gid);
+ if (ret)
+ {
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(126);
+ else
+ exit(126);
+ }
+ }
+ if (gid != ~(gid_t)0)
+ {
+ if (setgid(gid))
+ {
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(126);
+ else
+ exit(126);
+ }
+ }
+
+ if (uid != ~(uid_t)0)
+ {
+ if (setuid(uid))
+ {
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(126);
+ else
+ exit(126);
+ }
+ }
+#endif
+
+ /*
+ * Some final profile environment tweaks, if running as user.
+ */
+ if ( (fFlags & RTPROC_FLAGS_PROFILE)
+ && pszAsUser
+ && ( (fFlags & RTPROC_FLAGS_ENV_CHANGE_RECORD)
+ || hEnv == RTENV_DEFAULT) )
+ {
+ rc = rtProcPosixAdjustProfileEnvFromChild(hEnvToUse, fFlags, hEnv);
+ papszEnv = RTEnvGetExecEnvP(hEnvToUse);
+ if (RT_FAILURE(rc) || !papszEnv)
+ {
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(126);
+ else
+ exit(126);
+ }
+ }
+
+ /*
+ * Unset the signal mask.
+ */
+ sigset_t SigMask;
+ sigemptyset(&SigMask);
+ rc = sigprocmask(SIG_SETMASK, &SigMask, NULL);
+ Assert(rc == 0);
+
+ /*
+ * Apply changes to the standard file descriptor and stuff.
+ */
+ for (unsigned i = 0; i < cRedirFds; i++)
+ {
+ int fd = paRedirFds[i];
+ if (fd == -2)
+ close(i);
+ else if (fd >= 0)
+ {
+ int rc2 = dup2(fd, i);
+ if (rc2 != (int)i)
+ {
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(125);
+ else
+ exit(125);
+ }
+ for (unsigned j = i + 1; j < cRedirFds; j++)
+ if (paRedirFds[j] == fd)
+ {
+ fd = -1;
+ break;
+ }
+ if (fd >= 0)
+ close(fd);
+ }
+ }
+
+ /*
+ * Finally, execute the requested program.
+ */
+ rc = execve(pszNativeExec, (char * const *)papszArgs, (char * const *)papszEnv);
+ if (errno == ENOEXEC)
+ {
+ /* This can happen when trying to start a shell script without the magic #!/bin/sh */
+ RTAssertMsg2Weak("Cannot execute this binary format!\n");
+ }
+ else
+ RTAssertMsg2Weak("execve returns %d errno=%d (%s)\n", rc, errno, pszNativeExec);
+ RTAssertReleasePanic();
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(127);
+ else
+ exit(127);
+ }
+#ifdef RT_OS_SOLARIS
+ if (!(fFlags & RTPROC_FLAGS_SAME_CONTRACT))
+ rtSolarisContractPostForkParent(templateFd, pid);
+#endif /* RT_OS_SOLARIS */
+ if (pid > 0)
+ {
+ /* For a detached process this happens in the temp process, so
+ * it's not worth doing anything as this process must exit. */
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(0);
+ if (phProcess)
+ *phProcess = pid;
+ return VINF_SUCCESS;
+ }
+ /* For a detached process this happens in the temp process, so
+ * it's not worth doing anything as this process must exit. */
+ if (fFlags & RTPROC_FLAGS_DETACHED)
+ _Exit(124);
+ return RTErrConvertFromErrno(errno);
+ }
+
+ return VERR_NOT_IMPLEMENTED;
+}
+
+
+RTR3DECL(int) RTProcDaemonizeUsingFork(bool fNoChDir, bool fNoClose, const char *pszPidfile)
+{
+ /*
+ * Fork the child process in a new session and quit the parent.
+ *
+ * - fork once and create a new session (setsid). This will detach us
+ * from the controlling tty meaning that we won't receive the SIGHUP
+ * (or any other signal) sent to that session.
+ * - The SIGHUP signal is ignored because the session/parent may throw
+ * us one before we get to the setsid.
+ * - When the parent exit(0) we will become an orphan and re-parented to
+ * the init process.
+ * - Because of the sometimes unexpected semantics of assigning the
+ * controlling tty automagically when a session leader first opens a tty,
+ * we will fork() once more to get rid of the session leadership role.
+ */
+
+ /* We start off by opening the pidfile, so that we can fail straight away
+ * if it already exists. */
+ int fdPidfile = -1;
+ if (pszPidfile != NULL)
+ {
+ /* @note the exclusive create is not guaranteed on all file
+ * systems (e.g. NFSv2) */
+ if ((fdPidfile = open(pszPidfile, O_RDWR | O_CREAT | O_EXCL, 0644)) == -1)
+ return RTErrConvertFromErrno(errno);
+ }
+
+ /* Ignore SIGHUP straight away. */
+ struct sigaction OldSigAct;
+ struct sigaction SigAct;
+ memset(&SigAct, 0, sizeof(SigAct));
+ SigAct.sa_handler = SIG_IGN;
+ int rcSigAct = sigaction(SIGHUP, &SigAct, &OldSigAct);
+
+ /* First fork, to become independent process. */
+ pid_t pid = fork();
+ if (pid == -1)
+ {
+ if (fdPidfile != -1)
+ close(fdPidfile);
+ return RTErrConvertFromErrno(errno);
+ }
+ if (pid != 0)
+ {
+ /* Parent exits, no longer necessary. The child gets reparented
+ * to the init process. */
+ exit(0);
+ }
+
+ /* Create new session, fix up the standard file descriptors and the
+ * current working directory. */
+ /** @todo r=klaus the webservice uses this function and assumes that the
+ * contract id of the daemon is the same as that of the original process.
+ * Whenever this code is changed this must still remain possible. */
+ pid_t newpgid = setsid();
+ int SavedErrno = errno;
+ if (rcSigAct != -1)
+ sigaction(SIGHUP, &OldSigAct, NULL);
+ if (newpgid == -1)
+ {
+ if (fdPidfile != -1)
+ close(fdPidfile);
+ return RTErrConvertFromErrno(SavedErrno);
+ }
+
+ if (!fNoClose)
+ {
+ /* Open stdin(0), stdout(1) and stderr(2) as /dev/null. */
+ int fd = open("/dev/null", O_RDWR);
+ if (fd == -1) /* paranoia */
+ {
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+ fd = open("/dev/null", O_RDWR);
+ }
+ if (fd != -1)
+ {
+ dup2(fd, STDIN_FILENO);
+ dup2(fd, STDOUT_FILENO);
+ dup2(fd, STDERR_FILENO);
+ if (fd > 2)
+ close(fd);
+ }
+ }
+
+ if (!fNoChDir)
+ {
+ int rcIgnored = chdir("/");
+ NOREF(rcIgnored);
+ }
+
+ /* Second fork to lose session leader status. */
+ pid = fork();
+ if (pid == -1)
+ {
+ if (fdPidfile != -1)
+ close(fdPidfile);
+ return RTErrConvertFromErrno(errno);
+ }
+
+ if (pid != 0)
+ {
+ /* Write the pid file, this is done in the parent, before exiting. */
+ if (fdPidfile != -1)
+ {
+ char szBuf[256];
+ size_t cbPid = RTStrPrintf(szBuf, sizeof(szBuf), "%d\n", pid);
+ ssize_t cbIgnored = write(fdPidfile, szBuf, cbPid); NOREF(cbIgnored);
+ close(fdPidfile);
+ }
+ exit(0);
+ }
+
+ if (fdPidfile != -1)
+ close(fdPidfile);
+
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/process-posix.cpp b/src/VBox/Runtime/r3/posix/process-posix.cpp
new file mode 100644
index 00000000..b0074f1c
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/process-posix.cpp
@@ -0,0 +1,279 @@
+/* $Id: process-posix.cpp $ */
+/** @file
+ * IPRT - Process, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_PROCESS
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <pwd.h>
+
+#include <iprt/process.h>
+#include "internal/iprt.h"
+
+#include <iprt/assert.h>
+#include <iprt/env.h>
+#include <iprt/err.h>
+#include <iprt/file.h>
+#include <iprt/pipe.h>
+#include <iprt/socket.h>
+#include <iprt/string.h>
+#include <iprt/mem.h>
+#include "internal/process.h"
+
+
+RTR3DECL(int) RTProcWait(RTPROCESS Process, unsigned fFlags, PRTPROCSTATUS pProcStatus)
+{
+ int rc;
+ do rc = RTProcWaitNoResume(Process, fFlags, pProcStatus);
+ while (rc == VERR_INTERRUPTED);
+ return rc;
+}
+
+
+RTR3DECL(int) RTProcWaitNoResume(RTPROCESS Process, unsigned fFlags, PRTPROCSTATUS pProcStatus)
+{
+ /*
+ * Validate input.
+ */
+ if (Process <= 0)
+ {
+ AssertMsgFailed(("Invalid Process=%d\n", Process));
+ return VERR_INVALID_PARAMETER;
+ }
+ if (fFlags & ~(RTPROCWAIT_FLAGS_NOBLOCK | RTPROCWAIT_FLAGS_BLOCK))
+ {
+ AssertMsgFailed(("Invalid flags %#x\n", fFlags));
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /*
+ * Perform the wait.
+ */
+ int iStatus = 0;
+ int rc = waitpid(Process, &iStatus, fFlags & RTPROCWAIT_FLAGS_NOBLOCK ? WNOHANG : 0);
+ if (rc > 0)
+ {
+ /*
+ * Fill in the status structure.
+ */
+ if (pProcStatus)
+ {
+ if (WIFEXITED(iStatus))
+ {
+ pProcStatus->enmReason = RTPROCEXITREASON_NORMAL;
+ pProcStatus->iStatus = WEXITSTATUS(iStatus);
+ }
+ else if (WIFSIGNALED(iStatus))
+ {
+ pProcStatus->enmReason = RTPROCEXITREASON_SIGNAL;
+ pProcStatus->iStatus = WTERMSIG(iStatus);
+ }
+ else
+ {
+ Assert(!WIFSTOPPED(iStatus));
+ pProcStatus->enmReason = RTPROCEXITREASON_ABEND;
+ pProcStatus->iStatus = iStatus;
+ }
+ }
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Child running?
+ */
+ if (!rc)
+ {
+ Assert(fFlags & RTPROCWAIT_FLAGS_NOBLOCK);
+ return VERR_PROCESS_RUNNING;
+ }
+
+ /*
+ * Figure out which error to return.
+ */
+ int iErr = errno;
+ if (iErr == ECHILD)
+ return VERR_PROCESS_NOT_FOUND;
+ return RTErrConvertFromErrno(iErr);
+}
+
+
+RTR3DECL(int) RTProcTerminate(RTPROCESS Process)
+{
+ if (Process == NIL_RTPROCESS)
+ return VINF_SUCCESS;
+
+ if (!kill(Process, SIGKILL))
+ return VINF_SUCCESS;
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTR3DECL(uint64_t) RTProcGetAffinityMask(void)
+{
+ /// @todo
+ return 1;
+}
+
+
+RTR3DECL(int) RTProcQueryParent(RTPROCESS hProcess, PRTPROCESS phParent)
+{
+ if (hProcess == RTProcSelf())
+ {
+ *phParent = getppid();
+ return VINF_SUCCESS;
+ }
+ return VERR_NOT_SUPPORTED;
+}
+
+
+RTR3DECL(int) RTProcQueryUsername(RTPROCESS hProcess, char *pszUser, size_t cbUser, size_t *pcbUser)
+{
+ AssertReturn( (pszUser && cbUser > 0)
+ || (!pszUser && !cbUser), VERR_INVALID_PARAMETER);
+ AssertReturn(pcbUser || pszUser, VERR_INVALID_PARAMETER);
+
+ int rc;
+ if ( hProcess == NIL_RTPROCESS
+ || hProcess == RTProcSelf())
+ {
+ /*
+ * Figure a good buffer estimate.
+ */
+ int32_t cbPwdMax = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (cbPwdMax <= _1K)
+ cbPwdMax = _1K;
+ else
+ AssertStmt(cbPwdMax <= 32*_1M, cbPwdMax = 32*_1M);
+ char *pchBuf = (char *)RTMemTmpAllocZ(cbPwdMax);
+ if (pchBuf)
+ {
+ /*
+ * Get the password file entry.
+ */
+ struct passwd Pwd;
+ struct passwd *pPwd = NULL;
+ rc = getpwuid_r(geteuid(), &Pwd, pchBuf, cbPwdMax, &pPwd);
+ if (!rc)
+ {
+ /*
+ * Convert the name to UTF-8, assuming that we're getting it in the local codeset.
+ */
+ /** @todo This isn't exactly optimal... the current codeset/page conversion
+ * stuff never was. Should optimize that for UTF-8 and ASCII one day.
+ * And also optimize for avoiding heap. */
+ char *pszTmp = NULL;
+ rc = RTStrCurrentCPToUtf8(&pszTmp, pPwd->pw_name);
+ if (RT_SUCCESS(rc))
+ {
+ size_t cbTmp = strlen(pszTmp) + 1;
+ if (pcbUser)
+ *pcbUser = cbTmp;
+ if (cbTmp <= cbUser)
+ {
+ memcpy(pszUser, pszTmp, cbTmp);
+ rc = VINF_SUCCESS;
+ }
+ else
+ rc = VERR_BUFFER_OVERFLOW;
+ RTStrFree(pszTmp);
+ }
+ }
+ else
+ rc = RTErrConvertFromErrno(rc);
+ RTMemFree(pchBuf);
+ }
+ else
+ rc = VERR_NO_TMP_MEMORY;
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+ return rc;
+}
+
+
+RTR3DECL(int) RTProcQueryUsernameA(RTPROCESS hProcess, char **ppszUser)
+{
+ AssertPtrReturn(ppszUser, VERR_INVALID_POINTER);
+
+ int rc;
+ if ( hProcess == NIL_RTPROCESS
+ || hProcess == RTProcSelf())
+ {
+ /*
+ * Figure a good buffer estimate.
+ */
+ int32_t cbPwdMax = sysconf(_SC_GETPW_R_SIZE_MAX);
+ if (cbPwdMax <= _1K)
+ cbPwdMax = _1K;
+ else
+ AssertStmt(cbPwdMax <= 32*_1M, cbPwdMax = 32*_1M);
+ char *pchBuf = (char *)RTMemTmpAllocZ(cbPwdMax);
+ if (pchBuf)
+ {
+ /*
+ * Get the password file entry.
+ */
+ struct passwd Pwd;
+ struct passwd *pPwd = NULL;
+ rc = getpwuid_r(geteuid(), &Pwd, pchBuf, cbPwdMax, &pPwd);
+ if (!rc)
+ {
+ /*
+ * Convert the name to UTF-8, assuming that we're getting it in the local codeset.
+ */
+ rc = RTStrCurrentCPToUtf8(ppszUser, pPwd->pw_name);
+ }
+ else
+ rc = RTErrConvertFromErrno(rc);
+ RTMemFree(pchBuf);
+ }
+ else
+ rc = VERR_NO_TMP_MEMORY;
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/rand-posix.cpp b/src/VBox/Runtime/r3/posix/rand-posix.cpp
new file mode 100644
index 00000000..191e3db8
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/rand-posix.cpp
@@ -0,0 +1,148 @@
+/* $Id: rand-posix.cpp $ */
+/** @file
+ * IPRT - Random Numbers and Byte Streams, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#ifdef _MSC_VER
+# include <io.h>
+# include <stdio.h>
+#else
+# include <unistd.h>
+# include <sys/time.h>
+#endif
+
+#include <iprt/rand.h>
+#include <iprt/mem.h>
+#include <iprt/errcore.h>
+#include <iprt/assert.h>
+#include "internal/rand.h"
+#include "internal/magics.h"
+
+
+
+/** @copydoc RTRANDINT::pfnGetBytes */
+static DECLCALLBACK(void) rtRandAdvPosixGetBytes(PRTRANDINT pThis, uint8_t *pb, size_t cb)
+{
+ ssize_t cbRead = read(pThis->u.File.hFile, pb, cb);
+ if ((size_t)cbRead != cb)
+ {
+ /* S10 has been observed returning 1040 bytes at the time from /dev/urandom.
+ Which means we need to do than 256 rounds to reach 668171 bytes if
+ that's what demanded by the caller (like tstRTMemWipe.cpp). */
+ ssize_t cTries = RT_MAX(256, cb / 64);
+ do
+ {
+ if (cbRead > 0)
+ {
+ cb -= cbRead;
+ pb += cbRead;
+ }
+ cbRead = read(pThis->u.File.hFile, pb, cb);
+ } while ( (size_t)cbRead != cb
+ && cTries-- > 0);
+ AssertReleaseMsg((size_t)cbRead == cb, ("%zu != %zu, cTries=%zd errno=%d\n", cbRead, cb, cTries, errno));
+ }
+}
+
+
+/** @copydoc RTRANDINT::pfnDestroy */
+static DECLCALLBACK(int) rtRandAdvPosixDestroy(PRTRANDINT pThis)
+{
+ pThis->u32Magic = ~RTRANDINT_MAGIC;
+ int fd = pThis->u.File.hFile;
+ pThis->u.File.hFile = -1;
+ RTMemFree(pThis);
+ close(fd);
+ return VINF_SUCCESS;
+}
+
+
+static int rtRandAdvPosixCreateSystem(PRTRAND phRand, const char *pszDev) RT_NO_THROW_DEF
+{
+ /*
+ * Try open it first and then setup the handle structure.
+ */
+ int fd = open(pszDev, O_RDONLY);
+ if (fd < 0)
+ return RTErrConvertFromErrno(errno);
+ int rc;
+ if (fcntl(fd, F_SETFD, FD_CLOEXEC) != -1)
+ {
+ PRTRANDINT pThis = (PRTRANDINT)RTMemAlloc(sizeof(*pThis));
+ if (pThis)
+ {
+ pThis->u32Magic = RTRANDINT_MAGIC;
+ pThis->pfnGetBytes = rtRandAdvPosixGetBytes;
+ pThis->pfnGetU32 = rtRandAdvSynthesizeU32FromBytes;
+ pThis->pfnGetU64 = rtRandAdvSynthesizeU64FromBytes;
+ pThis->pfnSeed = rtRandAdvStubSeed;
+ pThis->pfnSaveState = rtRandAdvStubSaveState;
+ pThis->pfnRestoreState = rtRandAdvStubRestoreState;
+ pThis->pfnDestroy = rtRandAdvPosixDestroy;
+ pThis->u.File.hFile = fd;
+
+ *phRand = pThis;
+ return VINF_SUCCESS;
+ }
+
+ /* bail out */
+ rc = VERR_NO_MEMORY;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ close(fd);
+ return rc;
+}
+
+
+RTDECL(int) RTRandAdvCreateSystemFaster(PRTRAND phRand) RT_NO_THROW_DEF
+{
+ return rtRandAdvPosixCreateSystem(phRand, "/dev/urandom");
+}
+
+
+RTDECL(int) RTRandAdvCreateSystemTruer(PRTRAND phRand) RT_NO_THROW_DEF
+{
+ return rtRandAdvPosixCreateSystem(phRand, "/dev/random");
+}
+
diff --git a/src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-heap-posix.cpp b/src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-heap-posix.cpp
new file mode 100644
index 00000000..2bb60178
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-heap-posix.cpp
@@ -0,0 +1,797 @@
+/* $Id: rtmempage-exec-mmap-heap-posix.cpp $ */
+/** @file
+ * IPRT - RTMemPage*, POSIX with heap.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "internal/iprt.h"
+#include <iprt/mem.h>
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/avl.h>
+#include <iprt/critsect.h>
+#include <iprt/errcore.h>
+#include <iprt/once.h>
+#include <iprt/param.h>
+#include <iprt/string.h>
+#include "internal/mem.h"
+#include "../alloc-ef.h"
+
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/mman.h>
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** Threshold at which to we switch to simply calling mmap. */
+#define RTMEMPAGEPOSIX_MMAP_THRESHOLD _128K
+/** The size of a heap block (power of two) - in bytes. */
+#define RTMEMPAGEPOSIX_BLOCK_SIZE _2M
+AssertCompile(RTMEMPAGEPOSIX_BLOCK_SIZE == (RTMEMPAGEPOSIX_BLOCK_SIZE / PAGE_SIZE) * PAGE_SIZE);
+/** The number of pages per heap block. */
+#define RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT (RTMEMPAGEPOSIX_BLOCK_SIZE / PAGE_SIZE)
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/** Pointer to a page heap block. */
+typedef struct RTHEAPPAGEBLOCK *PRTHEAPPAGEBLOCK;
+
+/**
+ * A simple page heap.
+ */
+typedef struct RTHEAPPAGE
+{
+ /** Magic number (RTHEAPPAGE_MAGIC). */
+ uint32_t u32Magic;
+ /** The number of pages in the heap (in BlockTree). */
+ uint32_t cHeapPages;
+ /** The number of currently free pages. */
+ uint32_t cFreePages;
+ /** Number of successful calls. */
+ uint32_t cAllocCalls;
+ /** Number of successful free calls. */
+ uint32_t cFreeCalls;
+ /** The free call number at which we last tried to minimize the heap. */
+ uint32_t uLastMinimizeCall;
+ /** Tree of heap blocks. */
+ AVLRPVTREE BlockTree;
+ /** Allocation hint no 1 (last freed). */
+ PRTHEAPPAGEBLOCK pHint1;
+ /** Allocation hint no 2 (last alloc). */
+ PRTHEAPPAGEBLOCK pHint2;
+ /** Critical section protecting the heap. */
+ RTCRITSECT CritSect;
+ /** Set if the memory must allocated with execute access. */
+ bool fExec;
+} RTHEAPPAGE;
+#define RTHEAPPAGE_MAGIC UINT32_C(0xfeedface)
+/** Pointer to a page heap. */
+typedef RTHEAPPAGE *PRTHEAPPAGE;
+
+
+/**
+ * Describes a page heap block.
+ */
+typedef struct RTHEAPPAGEBLOCK
+{
+ /** The AVL tree node core (void pointer range). */
+ AVLRPVNODECORE Core;
+ /** Allocation bitmap. Set bits marks allocated pages. */
+ uint32_t bmAlloc[RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT / 32];
+ /** Allocation boundrary bitmap. Set bits marks the start of
+ * allocations. */
+ uint32_t bmFirst[RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT / 32];
+ /** The number of free pages. */
+ uint32_t cFreePages;
+ /** Pointer back to the heap. */
+ PRTHEAPPAGE pHeap;
+} RTHEAPPAGEBLOCK;
+
+
+/**
+ * Argument package for rtHeapPageAllocCallback.
+ */
+typedef struct RTHEAPPAGEALLOCARGS
+{
+ /** The number of pages to allocate. */
+ size_t cPages;
+ /** Non-null on success. */
+ void *pvAlloc;
+ /** RTMEMPAGEALLOC_F_XXX. */
+ uint32_t fFlags;
+} RTHEAPPAGEALLOCARGS;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** Initialize once structure. */
+static RTONCE g_MemPagePosixInitOnce = RTONCE_INITIALIZER;
+/** The page heap. */
+static RTHEAPPAGE g_MemPagePosixHeap;
+/** The exec page heap. */
+static RTHEAPPAGE g_MemExecPosixHeap;
+
+
+#ifdef RT_OS_OS2
+/*
+ * A quick mmap/munmap mockup for avoid duplicating lots of good code.
+ */
+# define INCL_BASE
+# include <os2.h>
+# undef MAP_PRIVATE
+# define MAP_PRIVATE 0
+# undef MAP_ANONYMOUS
+# define MAP_ANONYMOUS 0
+# undef MAP_FAILED
+# define MAP_FAILED (void *)-1
+# undef mmap
+# define mmap iprt_mmap
+# undef munmap
+# define munmap iprt_munmap
+
+static void *mmap(void *pvWhere, size_t cb, int fProt, int fFlags, int fd, off_t off)
+{
+ NOREF(pvWhere); NOREF(fd); NOREF(off);
+ void *pv = NULL;
+ ULONG fAlloc = OBJ_ANY | PAG_COMMIT;
+ if (fProt & PROT_EXEC)
+ fAlloc |= PAG_EXECUTE;
+ if (fProt & PROT_READ)
+ fAlloc |= PAG_READ;
+ if (fProt & PROT_WRITE)
+ fAlloc |= PAG_WRITE;
+ APIRET rc = DosAllocMem(&pv, cb, fAlloc);
+ if (rc == NO_ERROR)
+ return pv;
+ errno = ENOMEM;
+ return MAP_FAILED;
+}
+
+static int munmap(void *pv, size_t cb)
+{
+ APIRET rc = DosFreeMem(pv);
+ if (rc == NO_ERROR)
+ return 0;
+ errno = EINVAL;
+ return -1;
+}
+
+#endif
+
+/**
+ * Initializes the heap.
+ *
+ * @returns IPRT status code.
+ * @param pHeap The page heap to initialize.
+ * @param fExec Whether the heap memory should be marked as
+ * executable or not.
+ */
+int RTHeapPageInit(PRTHEAPPAGE pHeap, bool fExec)
+{
+ int rc = RTCritSectInitEx(&pHeap->CritSect,
+ RTCRITSECT_FLAGS_NO_LOCK_VAL | RTCRITSECT_FLAGS_NO_NESTING | RTCRITSECT_FLAGS_BOOTSTRAP_HACK,
+ NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ pHeap->cHeapPages = 0;
+ pHeap->cFreePages = 0;
+ pHeap->cAllocCalls = 0;
+ pHeap->cFreeCalls = 0;
+ pHeap->uLastMinimizeCall = 0;
+ pHeap->BlockTree = NULL;
+ pHeap->fExec = fExec;
+ pHeap->u32Magic = RTHEAPPAGE_MAGIC;
+ }
+ return rc;
+}
+
+
+/**
+ * Deletes the heap and all the memory it tracks.
+ *
+ * @returns IPRT status code.
+ * @param pHeap The page heap to delete.
+ */
+int RTHeapPageDelete(PRTHEAPPAGE pHeap)
+{
+ NOREF(pHeap);
+ return VERR_NOT_IMPLEMENTED;
+}
+
+
+/**
+ * Applies flags to an allocation.
+ *
+ * @param pv The allocation.
+ * @param cb The size of the allocation (page aligned).
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ */
+DECLINLINE(void) rtMemPagePosixApplyFlags(void *pv, size_t cb, uint32_t fFlags)
+{
+#ifndef RT_OS_OS2
+ if (fFlags & RTMEMPAGEALLOC_F_ADVISE_LOCKED)
+ {
+ int rc = mlock(pv, cb);
+# ifndef RT_OS_SOLARIS /* mlock(3C) on Solaris requires the priv_lock_memory privilege */
+ AssertMsg(rc == 0, ("mlock %p LB %#zx -> %d errno=%d\n", pv, cb, rc, errno));
+# endif
+ NOREF(rc);
+ }
+
+# ifdef MADV_DONTDUMP
+ if (fFlags & RTMEMPAGEALLOC_F_ADVISE_NO_DUMP)
+ {
+ int rc = madvise(pv, cb, MADV_DONTDUMP);
+ AssertMsg(rc == 0, ("madvice %p LB %#zx MADV_DONTDUMP -> %d errno=%d\n", pv, cb, rc, errno));
+ NOREF(rc);
+ }
+# endif
+#endif
+
+ if (fFlags & RTMEMPAGEALLOC_F_ZERO)
+ RT_BZERO(pv, cb);
+}
+
+
+/**
+ * Avoids some gotos in rtHeapPageAllocFromBlock.
+ *
+ * @returns VINF_SUCCESS.
+ * @param pBlock The block.
+ * @param iPage The page to start allocating at.
+ * @param cPages The number of pages.
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ * @param ppv Where to return the allocation address.
+ */
+DECLINLINE(int) rtHeapPageAllocFromBlockSuccess(PRTHEAPPAGEBLOCK pBlock, uint32_t iPage, size_t cPages, uint32_t fFlags, void **ppv)
+{
+ PRTHEAPPAGE pHeap = pBlock->pHeap;
+
+ ASMBitSet(&pBlock->bmFirst[0], iPage);
+ pBlock->cFreePages -= cPages;
+ pHeap->cFreePages -= cPages;
+ if (!pHeap->pHint2 || pHeap->pHint2->cFreePages < pBlock->cFreePages)
+ pHeap->pHint2 = pBlock;
+ pHeap->cAllocCalls++;
+
+ void *pv = (uint8_t *)pBlock->Core.Key + (iPage << PAGE_SHIFT);
+ *ppv = pv;
+
+ if (fFlags)
+ rtMemPagePosixApplyFlags(pv, cPages << PAGE_SHIFT, fFlags);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Checks if a page range is free in the specified block.
+ *
+ * @returns @c true if the range is free, @c false if not.
+ * @param pBlock The block.
+ * @param iFirst The first page to check.
+ * @param cPages The number of pages to check.
+ */
+DECLINLINE(bool) rtHeapPageIsPageRangeFree(PRTHEAPPAGEBLOCK pBlock, uint32_t iFirst, uint32_t cPages)
+{
+ uint32_t i = iFirst + cPages;
+ while (i-- > iFirst)
+ {
+ if (ASMBitTest(&pBlock->bmAlloc[0], i))
+ return false;
+ Assert(!ASMBitTest(&pBlock->bmFirst[0], i));
+ }
+ return true;
+}
+
+
+/**
+ * Tries to allocate a chunk of pages from a heap block.
+ *
+ * @retval VINF_SUCCESS on success.
+ * @retval VERR_NO_MEMORY if the allocation failed.
+ * @param pBlock The block to allocate from.
+ * @param cPages The size of the allocation.
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ * @param ppv Where to return the allocation address on success.
+ */
+DECLINLINE(int) rtHeapPageAllocFromBlock(PRTHEAPPAGEBLOCK pBlock, size_t cPages, uint32_t fFlags, void **ppv)
+{
+ if (pBlock->cFreePages >= cPages)
+ {
+ int iPage = ASMBitFirstClear(&pBlock->bmAlloc[0], RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT);
+ Assert(iPage >= 0);
+
+ /* special case: single page. */
+ if (cPages == 1)
+ {
+ ASMBitSet(&pBlock->bmAlloc[0], iPage);
+ return rtHeapPageAllocFromBlockSuccess(pBlock, iPage, cPages, fFlags, ppv);
+ }
+
+ while ( iPage >= 0
+ && (unsigned)iPage <= RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT - cPages)
+ {
+ if (rtHeapPageIsPageRangeFree(pBlock, iPage + 1, cPages - 1))
+ {
+ ASMBitSetRange(&pBlock->bmAlloc[0], iPage, iPage + cPages);
+ return rtHeapPageAllocFromBlockSuccess(pBlock, iPage, cPages, fFlags, ppv);
+ }
+
+ /* next */
+ iPage = ASMBitNextSet(&pBlock->bmAlloc[0], RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT, iPage);
+ if (iPage < 0 || iPage >= RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT - 1)
+ break;
+ iPage = ASMBitNextClear(&pBlock->bmAlloc[0], RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT, iPage);
+ }
+ }
+
+ return VERR_NO_MEMORY;
+}
+
+
+/**
+ * RTAvlrPVDoWithAll callback.
+ *
+ * @returns 0 to continue the enum, non-zero to quit it.
+ * @param pNode The node.
+ * @param pvUser The user argument.
+ */
+static DECLCALLBACK(int) rtHeapPageAllocCallback(PAVLRPVNODECORE pNode, void *pvUser)
+{
+ PRTHEAPPAGEBLOCK pBlock = RT_FROM_MEMBER(pNode, RTHEAPPAGEBLOCK, Core);
+ RTHEAPPAGEALLOCARGS *pArgs = (RTHEAPPAGEALLOCARGS *)pvUser;
+ int rc = rtHeapPageAllocFromBlock(pBlock, pArgs->cPages, pArgs->fFlags, &pArgs->pvAlloc);
+ return RT_SUCCESS(rc) ? 1 : 0;
+}
+
+
+/**
+ * Worker for RTHeapPageAlloc.
+ *
+ * @returns IPRT status code
+ * @param pHeap The heap - locked.
+ * @param cPages The page count.
+ * @param pszTag The tag.
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ * @param ppv Where to return the address of the allocation
+ * on success.
+ */
+static int rtHeapPageAllocLocked(PRTHEAPPAGE pHeap, size_t cPages, const char *pszTag, uint32_t fFlags, void **ppv)
+{
+ int rc;
+ NOREF(pszTag);
+
+ /*
+ * Use the hints first.
+ */
+ if (pHeap->pHint1)
+ {
+ rc = rtHeapPageAllocFromBlock(pHeap->pHint1, cPages, fFlags, ppv);
+ if (rc != VERR_NO_MEMORY)
+ return rc;
+ }
+ if (pHeap->pHint2)
+ {
+ rc = rtHeapPageAllocFromBlock(pHeap->pHint2, cPages, fFlags, ppv);
+ if (rc != VERR_NO_MEMORY)
+ return rc;
+ }
+
+ /*
+ * Search the heap for a block with enough free space.
+ *
+ * N.B. This search algorithm is not optimal at all. What (hopefully) saves
+ * it are the two hints above.
+ */
+ if (pHeap->cFreePages >= cPages)
+ {
+ RTHEAPPAGEALLOCARGS Args;
+ Args.cPages = cPages;
+ Args.pvAlloc = NULL;
+ Args.fFlags = fFlags;
+ RTAvlrPVDoWithAll(&pHeap->BlockTree, true /*fFromLeft*/, rtHeapPageAllocCallback, &Args);
+ if (Args.pvAlloc)
+ {
+ *ppv = Args.pvAlloc;
+ return VINF_SUCCESS;
+ }
+ }
+
+ /*
+ * Didn't find anytyhing, so expand the heap with a new block.
+ */
+ RTCritSectLeave(&pHeap->CritSect);
+ void *pvPages;
+ pvPages = mmap(NULL, RTMEMPAGEPOSIX_BLOCK_SIZE,
+ PROT_READ | PROT_WRITE | (pHeap->fExec ? PROT_EXEC : 0),
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (pvPages == MAP_FAILED)
+ {
+ RTCritSectEnter(&pHeap->CritSect);
+ return RTErrConvertFromErrno(errno);
+
+ }
+ /** @todo Eliminate this rtMemBaseAlloc dependency! */
+ PRTHEAPPAGEBLOCK pBlock;
+#ifdef RTALLOC_REPLACE_MALLOC
+ if (g_pfnOrgMalloc)
+ pBlock = (PRTHEAPPAGEBLOCK)g_pfnOrgMalloc(sizeof(*pBlock));
+ else
+#endif
+ pBlock = (PRTHEAPPAGEBLOCK)rtMemBaseAlloc(sizeof(*pBlock));
+ if (!pBlock)
+ {
+ munmap(pvPages, RTMEMPAGEPOSIX_BLOCK_SIZE);
+ RTCritSectEnter(&pHeap->CritSect);
+ return VERR_NO_MEMORY;
+ }
+
+ RT_ZERO(*pBlock);
+ pBlock->Core.Key = pvPages;
+ pBlock->Core.KeyLast = (uint8_t *)pvPages + RTMEMPAGEPOSIX_BLOCK_SIZE - 1;
+ pBlock->cFreePages = RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT;
+ pBlock->pHeap = pHeap;
+
+ RTCritSectEnter(&pHeap->CritSect);
+
+ bool fRc = RTAvlrPVInsert(&pHeap->BlockTree, &pBlock->Core); Assert(fRc); NOREF(fRc);
+ pHeap->cFreePages += RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT;
+ pHeap->cHeapPages += RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT;
+
+ /*
+ * Grab memory from the new block (cannot fail).
+ */
+ rc = rtHeapPageAllocFromBlock(pBlock, cPages, fFlags, ppv);
+ Assert(rc == VINF_SUCCESS);
+
+ return rc;
+}
+
+
+/**
+ * Allocates one or more pages off the heap.
+ *
+ * @returns IPRT status code.
+ * @param pHeap The page heap.
+ * @param cPages The number of pages to allocate.
+ * @param pszTag The allocation tag.
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ * @param ppv Where to return the pointer to the pages.
+ */
+int RTHeapPageAlloc(PRTHEAPPAGE pHeap, size_t cPages, const char *pszTag, uint32_t fFlags, void **ppv)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtr(ppv);
+ *ppv = NULL;
+ AssertPtrReturn(pHeap, VERR_INVALID_HANDLE);
+ AssertReturn(pHeap->u32Magic == RTHEAPPAGE_MAGIC, VERR_INVALID_HANDLE);
+ AssertMsgReturn(cPages < RTMEMPAGEPOSIX_BLOCK_SIZE, ("%#zx\n", cPages), VERR_OUT_OF_RANGE);
+
+ /*
+ * Grab the lock and call a worker with many returns.
+ */
+ int rc = RTCritSectEnter(&pHeap->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ rc = rtHeapPageAllocLocked(pHeap, cPages, pszTag, fFlags, ppv);
+ RTCritSectLeave(&pHeap->CritSect);
+ }
+
+ return rc;
+}
+
+
+/**
+ * RTAvlrPVDoWithAll callback.
+ *
+ * @returns 0 to continue the enum, non-zero to quit it.
+ * @param pNode The node.
+ * @param pvUser Pointer to a block pointer variable. For returning
+ * the address of the block to be freed.
+ */
+static DECLCALLBACK(int) rtHeapPageFindUnusedBlockCallback(PAVLRPVNODECORE pNode, void *pvUser)
+{
+ PRTHEAPPAGEBLOCK pBlock = RT_FROM_MEMBER(pNode, RTHEAPPAGEBLOCK, Core);
+ if (pBlock->cFreePages == RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT)
+ {
+ *(PRTHEAPPAGEBLOCK *)pvUser = pBlock;
+ return 1;
+ }
+ return 0;
+}
+
+
+/**
+ * Allocates one or more pages off the heap.
+ *
+ * @returns IPRT status code.
+ * @param pHeap The page heap.
+ * @param pv Pointer to what RTHeapPageAlloc returned.
+ * @param cPages The number of pages that was allocated.
+ */
+int RTHeapPageFree(PRTHEAPPAGE pHeap, void *pv, size_t cPages)
+{
+ /*
+ * Validate input.
+ */
+ if (!pv)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pHeap, VERR_INVALID_HANDLE);
+ AssertReturn(pHeap->u32Magic == RTHEAPPAGE_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Grab the lock and look up the page.
+ */
+ int rc = RTCritSectEnter(&pHeap->CritSect);
+ if (RT_SUCCESS(rc))
+ {
+ PRTHEAPPAGEBLOCK pBlock = (PRTHEAPPAGEBLOCK)RTAvlrPVRangeGet(&pHeap->BlockTree, pv);
+ if (pBlock)
+ {
+ /*
+ * Validate the specified address range.
+ */
+ uint32_t const iPage = (uint32_t)(((uintptr_t)pv - (uintptr_t)pBlock->Core.Key) >> PAGE_SHIFT);
+ /* Check the range is within the block. */
+ bool fOk = iPage + cPages <= RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT;
+ /* Check that it's the start of an allocation. */
+ fOk = fOk && ASMBitTest(&pBlock->bmFirst[0], iPage);
+ /* Check that the range ends at an allocation boundrary. */
+ fOk = fOk && ( iPage + cPages == RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT
+ || ASMBitTest(&pBlock->bmFirst[0], iPage + cPages)
+ || !ASMBitTest(&pBlock->bmAlloc[0], iPage + cPages));
+ /* Check the other pages. */
+ uint32_t const iLastPage = iPage + cPages - 1;
+ for (uint32_t i = iPage + 1; i < iLastPage && fOk; i++)
+ fOk = ASMBitTest(&pBlock->bmAlloc[0], i)
+ && !ASMBitTest(&pBlock->bmFirst[0], i);
+ if (fOk)
+ {
+ /*
+ * Free the memory.
+ */
+ ASMBitClearRange(&pBlock->bmAlloc[0], iPage, iPage + cPages);
+ ASMBitClear(&pBlock->bmFirst[0], iPage);
+ pBlock->cFreePages += cPages;
+ pHeap->cFreePages += cPages;
+ pHeap->cFreeCalls++;
+ if (!pHeap->pHint1 || pHeap->pHint1->cFreePages < pBlock->cFreePages)
+ pHeap->pHint1 = pBlock;
+
+ /** @todo Add bitmaps for tracking madvice and mlock so we can undo those. */
+
+ /*
+ * Shrink the heap. Not very efficient because of the AVL tree.
+ */
+ if ( pHeap->cFreePages >= RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT * 3
+ && pHeap->cFreePages >= pHeap->cHeapPages / 2 /* 50% free */
+ && pHeap->cFreeCalls - pHeap->uLastMinimizeCall > RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT
+ )
+ {
+ uint32_t cFreePageTarget = pHeap->cHeapPages / 4; /* 25% free */
+ while (pHeap->cFreePages > cFreePageTarget)
+ {
+ pHeap->uLastMinimizeCall = pHeap->cFreeCalls;
+
+ pBlock = NULL;
+ RTAvlrPVDoWithAll(&pHeap->BlockTree, false /*fFromLeft*/,
+ rtHeapPageFindUnusedBlockCallback, &pBlock);
+ if (!pBlock)
+ break;
+
+ void *pv2 = RTAvlrPVRemove(&pHeap->BlockTree, pBlock->Core.Key); Assert(pv2); NOREF(pv2);
+ pHeap->cHeapPages -= RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT;
+ pHeap->cFreePages -= RTMEMPAGEPOSIX_BLOCK_PAGE_COUNT;
+ pHeap->pHint1 = NULL;
+ pHeap->pHint2 = NULL;
+ RTCritSectLeave(&pHeap->CritSect);
+
+ munmap(pBlock->Core.Key, RTMEMPAGEPOSIX_BLOCK_SIZE);
+ pBlock->Core.Key = pBlock->Core.KeyLast = NULL;
+ pBlock->cFreePages = 0;
+#ifdef RTALLOC_REPLACE_MALLOC
+ if (g_pfnOrgFree)
+ g_pfnOrgFree(pBlock);
+ else
+#endif
+ rtMemBaseFree(pBlock);
+
+ RTCritSectEnter(&pHeap->CritSect);
+ }
+ }
+ }
+ else
+ rc = VERR_INVALID_POINTER;
+ }
+ else
+ rc = VERR_INVALID_POINTER;
+
+ RTCritSectLeave(&pHeap->CritSect);
+ }
+
+ return rc;
+}
+
+
+/**
+ * Initializes the heap.
+ *
+ * @returns IPRT status code
+ * @param pvUser Unused.
+ */
+static DECLCALLBACK(int) rtMemPagePosixInitOnce(void *pvUser)
+{
+ NOREF(pvUser);
+ int rc = RTHeapPageInit(&g_MemPagePosixHeap, false /*fExec*/);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTHeapPageInit(&g_MemExecPosixHeap, true /*fExec*/);
+ if (RT_SUCCESS(rc))
+ return rc;
+ RTHeapPageDelete(&g_MemPagePosixHeap);
+ }
+ return rc;
+}
+
+
+/**
+ * Allocates memory from the specified heap.
+ *
+ * @returns Address of the allocated memory.
+ * @param cb The number of bytes to allocate.
+ * @param pszTag The tag.
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ * @param pHeap The heap to use.
+ */
+static void *rtMemPagePosixAlloc(size_t cb, const char *pszTag, uint32_t fFlags, PRTHEAPPAGE pHeap)
+{
+ /*
+ * Validate & adjust the input.
+ */
+ Assert(cb > 0);
+ NOREF(pszTag);
+ cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+
+ /*
+ * If the allocation is relatively large, we use mmap/munmap directly.
+ */
+ void *pv;
+ if (cb >= RTMEMPAGEPOSIX_MMAP_THRESHOLD)
+ {
+
+ pv = mmap(NULL, cb,
+ PROT_READ | PROT_WRITE | (pHeap == &g_MemExecPosixHeap ? PROT_EXEC : 0),
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ -1, 0);
+ if (pv != MAP_FAILED)
+ {
+ AssertPtr(pv);
+
+ if (fFlags)
+ rtMemPagePosixApplyFlags(pv, cb, fFlags);
+ }
+ else
+ pv = NULL;
+ }
+ else
+ {
+ int rc = RTOnce(&g_MemPagePosixInitOnce, rtMemPagePosixInitOnce, NULL);
+ if (RT_SUCCESS(rc))
+ rc = RTHeapPageAlloc(pHeap, cb >> PAGE_SHIFT, pszTag, fFlags, &pv);
+ if (RT_FAILURE(rc))
+ pv = NULL;
+ }
+
+ return pv;
+}
+
+
+/**
+ * Free memory allocated by rtMemPagePosixAlloc.
+ *
+ * @param pv The address of the memory to free.
+ * @param cb The size.
+ * @param pHeap The heap.
+ */
+static void rtMemPagePosixFree(void *pv, size_t cb, PRTHEAPPAGE pHeap)
+{
+ /*
+ * Validate & adjust the input.
+ */
+ if (!pv)
+ return;
+ AssertPtr(pv);
+ Assert(cb > 0);
+ Assert(!((uintptr_t)pv & PAGE_OFFSET_MASK));
+ cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+
+ /*
+ * If the allocation is relatively large, we use mmap/munmap directly.
+ */
+ if (cb >= RTMEMPAGEPOSIX_MMAP_THRESHOLD)
+ {
+ int rc = munmap(pv, cb);
+ AssertMsg(rc == 0, ("rc=%d pv=%p cb=%#zx\n", rc, pv, cb)); NOREF(rc);
+ }
+ else
+ {
+ int rc = RTHeapPageFree(pHeap, pv, cb >> PAGE_SHIFT);
+ AssertRC(rc);
+ }
+}
+
+
+
+
+
+RTDECL(void *) RTMemPageAllocTag(size_t cb, const char *pszTag) RT_NO_THROW_DEF
+{
+ return rtMemPagePosixAlloc(cb, pszTag, 0, &g_MemPagePosixHeap);
+}
+
+
+RTDECL(void *) RTMemPageAllocZTag(size_t cb, const char *pszTag) RT_NO_THROW_DEF
+{
+ return rtMemPagePosixAlloc(cb, pszTag, RTMEMPAGEALLOC_F_ZERO, &g_MemPagePosixHeap);
+}
+
+
+RTDECL(void *) RTMemPageAllocExTag(size_t cb, uint32_t fFlags, const char *pszTag) RT_NO_THROW_DEF
+{
+ AssertReturn(!(fFlags & ~RTMEMPAGEALLOC_F_VALID_MASK), NULL);
+ return rtMemPagePosixAlloc(cb, pszTag, fFlags, &g_MemPagePosixHeap);
+}
+
+
+RTDECL(void) RTMemPageFree(void *pv, size_t cb) RT_NO_THROW_DEF
+{
+ return rtMemPagePosixFree(pv, cb, &g_MemPagePosixHeap);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-posix.cpp b/src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-posix.cpp
new file mode 100644
index 00000000..abe285a2
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/rtmempage-exec-mmap-posix.cpp
@@ -0,0 +1,182 @@
+/* $Id: rtmempage-exec-mmap-posix.cpp $ */
+/** @file
+ * IPRT - RTMemPage*, POSIX with mmap only.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include "internal/iprt.h"
+#include <iprt/mem.h>
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+#include <iprt/param.h>
+#include <iprt/string.h>
+
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/mman.h>
+#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
+# define MAP_ANONYMOUS MAP_ANON
+#endif
+
+
+/**
+ * Applies flags to an allocation.
+ *
+ * @param pv The allocation.
+ * @param cb The size of the allocation (page aligned).
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ */
+DECLINLINE(void) rtMemPagePosixApplyFlags(void *pv, size_t cb, uint32_t fFlags)
+{
+#ifndef RT_OS_OS2
+ if (fFlags & RTMEMPAGEALLOC_F_ADVISE_LOCKED)
+ {
+ int rc = mlock(pv, cb);
+# ifndef RT_OS_SOLARIS /* mlock(3C) on Solaris requires the priv_lock_memory privilege */
+ AssertMsg(rc == 0, ("mlock %p LB %#zx -> %d errno=%d\n", pv, cb, rc, errno));
+# endif
+ NOREF(rc);
+ }
+
+# ifdef MADV_DONTDUMP
+ if (fFlags & RTMEMPAGEALLOC_F_ADVISE_NO_DUMP)
+ {
+ int rc = madvise(pv, cb, MADV_DONTDUMP);
+ AssertMsg(rc == 0, ("madvice %p LB %#zx MADV_DONTDUMP -> %d errno=%d\n", pv, cb, rc, errno));
+ NOREF(rc);
+ }
+# endif
+#endif
+
+ if (fFlags & RTMEMPAGEALLOC_F_ZERO)
+ RT_BZERO(pv, cb);
+}
+
+
+/**
+ * Allocates memory from the specified heap.
+ *
+ * @returns Address of the allocated memory.
+ * @param cb The number of bytes to allocate.
+ * @param pszTag The tag.
+ * @param fFlags RTMEMPAGEALLOC_F_XXX.
+ * @param fProtExec PROT_EXEC or 0.
+ */
+static void *rtMemPagePosixAlloc(size_t cb, const char *pszTag, uint32_t fFlags, int fProtExec)
+{
+ /*
+ * Validate & adjust the input.
+ */
+ Assert(cb > 0);
+ NOREF(pszTag);
+ cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+
+ /*
+ * Do the allocation.
+ */
+ void *pv = mmap(NULL, cb,
+ PROT_READ | PROT_WRITE | fProtExec,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (pv != MAP_FAILED)
+ {
+ AssertPtr(pv);
+
+ if (fFlags)
+ rtMemPagePosixApplyFlags(pv, cb, fFlags);
+ }
+ else
+ pv = NULL;
+
+ return pv;
+}
+
+
+/**
+ * Free memory allocated by rtMemPagePosixAlloc.
+ *
+ * @param pv The address of the memory to free.
+ * @param cb The size.
+ */
+static void rtMemPagePosixFree(void *pv, size_t cb)
+{
+ /*
+ * Validate & adjust the input.
+ */
+ if (!pv)
+ return;
+ AssertPtr(pv);
+ Assert(cb > 0);
+ Assert(!((uintptr_t)pv & PAGE_OFFSET_MASK));
+ cb = RT_ALIGN_Z(cb, PAGE_SIZE);
+
+ /*
+ * Free the memory.
+ */
+ int rc = munmap(pv, cb);
+ AssertMsg(rc == 0, ("rc=%d pv=%p cb=%#zx\n", rc, pv, cb)); NOREF(rc);
+}
+
+
+
+
+
+RTDECL(void *) RTMemPageAllocTag(size_t cb, const char *pszTag) RT_NO_THROW_DEF
+{
+ return rtMemPagePosixAlloc(cb, pszTag, 0, 0);
+}
+
+
+RTDECL(void *) RTMemPageAllocZTag(size_t cb, const char *pszTag) RT_NO_THROW_DEF
+{
+ return rtMemPagePosixAlloc(cb, pszTag, RTMEMPAGEALLOC_F_ZERO, 0);
+}
+
+
+RTDECL(void *) RTMemPageAllocExTag(size_t cb, uint32_t fFlags, const char *pszTag) RT_NO_THROW_DEF
+{
+ AssertReturn(!(fFlags & ~RTMEMPAGEALLOC_F_VALID_MASK), NULL);
+ return rtMemPagePosixAlloc(cb, pszTag, fFlags, 0);
+}
+
+
+RTDECL(void) RTMemPageFree(void *pv, size_t cb) RT_NO_THROW_DEF
+{
+ return rtMemPagePosixFree(pv, cb);
+}
+
diff --git a/src/VBox/Runtime/r3/posix/sched-posix.cpp b/src/VBox/Runtime/r3/posix/sched-posix.cpp
new file mode 100644
index 00000000..b30b9c67
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/sched-posix.cpp
@@ -0,0 +1,849 @@
+/* $Id: sched-posix.cpp $ */
+/** @file
+ * IPRT - Scheduling, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+/*
+ * !WARNING!
+ *
+ * When talking about lowering and raising priority, we do *NOT* refer to
+ * the common direction priority values takes on unix systems (lower means
+ * higher). So, when we raise the priority of a linux thread the nice
+ * value will decrease, and when we lower the priority the nice value
+ * will increase. Confusing, right?
+ *
+ * !WARNING!
+ */
+
+
+
+/** @def THREAD_LOGGING
+ * Be very careful with enabling this, it may cause deadlocks when combined
+ * with the 'thread' logging prefix.
+ */
+#ifdef DOXYGEN_RUNNING
+#define THREAD_LOGGING
+#endif
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_THREAD
+#include <errno.h>
+#include <pthread.h>
+#include <sched.h>
+#include <unistd.h>
+#include <sys/resource.h>
+
+#include <iprt/thread.h>
+#include <iprt/process.h>
+#include <iprt/semaphore.h>
+#include <iprt/string.h>
+#include <iprt/assert.h>
+#include <iprt/log.h>
+#include <iprt/err.h>
+#include "internal/sched.h"
+#include "internal/thread.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/** Array scheduler attributes corresponding to each of the thread types. */
+typedef struct PROCPRIORITYTYPE
+{
+ /** For sanity include the array index. */
+ RTTHREADTYPE enmType;
+ /** The thread priority or nice delta - depends on which priority type. */
+ int iPriority;
+} PROCPRIORITYTYPE;
+
+
+/**
+ * Configuration of one priority.
+ */
+typedef struct
+{
+ /** The priority. */
+ RTPROCPRIORITY enmPriority;
+ /** The name of this priority. */
+ const char *pszName;
+ /** The process nice value. */
+ int iNice;
+ /** The delta applied to the iPriority value. */
+ int iDelta;
+ /** Array scheduler attributes corresponding to each of the thread types. */
+ const PROCPRIORITYTYPE *paTypes;
+} PROCPRIORITY;
+
+
+/**
+ * Saved priority settings
+ */
+typedef struct
+{
+ /** Process priority. */
+ int iPriority;
+ /** Process level. */
+ struct sched_param SchedParam;
+ /** Process level. */
+ int iPolicy;
+ /** pthread level. */
+ struct sched_param PthreadSchedParam;
+ /** pthread level. */
+ int iPthreadPolicy;
+} SAVEDPRIORITY, *PSAVEDPRIORITY;
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/**
+ * Thread level priorities based on a 0..31 priority range
+ * as specified as the minimum for SCHED_RR/FIFO. FreeBSD
+ * seems to be using this (needs more research to be
+ * certain).
+ */
+static const PROCPRIORITYTYPE g_aTypesThread[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, 5 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, 12 },
+ { RTTHREADTYPE_EMULATION, 14 },
+ { RTTHREADTYPE_DEFAULT, 15 },
+ { RTTHREADTYPE_GUI, 16 },
+ { RTTHREADTYPE_MAIN_WORKER, 18 },
+ { RTTHREADTYPE_VRDP_IO, 24 },
+ { RTTHREADTYPE_DEBUGGER, 28 },
+ { RTTHREADTYPE_MSG_PUMP, 29 },
+ { RTTHREADTYPE_IO, 30 },
+ { RTTHREADTYPE_TIMER, 31 }
+};
+
+static const PROCPRIORITYTYPE g_aTypesThreadFlat[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, ~0 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, 15 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, 15 },
+ { RTTHREADTYPE_EMULATION, 15 },
+ { RTTHREADTYPE_DEFAULT, 15 },
+ { RTTHREADTYPE_GUI, 15 },
+ { RTTHREADTYPE_MAIN_WORKER, 15 },
+ { RTTHREADTYPE_VRDP_IO, 15 },
+ { RTTHREADTYPE_DEBUGGER, 15 },
+ { RTTHREADTYPE_MSG_PUMP, 15 },
+ { RTTHREADTYPE_IO, 15 },
+ { RTTHREADTYPE_TIMER, 15 }
+};
+
+/**
+ * Process and thread level priority, full access at thread level.
+ */
+static const PROCPRIORITY g_aProcessAndThread[] =
+{
+ { RTPROCPRIORITY_FLAT, "Flat", 0, 0, g_aTypesThreadFlat },
+ { RTPROCPRIORITY_LOW, "Low", 9, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 11, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 15, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 17, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 19, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 7, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 5, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 3, 0, g_aTypesThread },
+ { RTPROCPRIORITY_LOW, "Low", 1, 0, g_aTypesThread },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesThread },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesThreadFlat },
+ { RTPROCPRIORITY_HIGH, "High", -9, 0, g_aTypesThread },
+ { RTPROCPRIORITY_HIGH, "High", -7, 0, g_aTypesThread },
+ { RTPROCPRIORITY_HIGH, "High", -5, 0, g_aTypesThread },
+ { RTPROCPRIORITY_HIGH, "High", -3, 0, g_aTypesThread },
+ { RTPROCPRIORITY_HIGH, "High", -1, 0, g_aTypesThread },
+ { RTPROCPRIORITY_HIGH, "High", -9, 0, g_aTypesThreadFlat },
+ { RTPROCPRIORITY_HIGH, "High", -1, 0, g_aTypesThreadFlat }
+};
+
+/**
+ * Deltas for a process in which we are not restricted
+ * to only be lowering the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesUnixFree[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, +3 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, +2 },
+ { RTTHREADTYPE_EMULATION, +1 },
+ { RTTHREADTYPE_DEFAULT, 0 },
+ { RTTHREADTYPE_GUI, 0 },
+ { RTTHREADTYPE_MAIN_WORKER, 0 },
+ { RTTHREADTYPE_VRDP_IO, -1 },
+ { RTTHREADTYPE_DEBUGGER, -1 },
+ { RTTHREADTYPE_MSG_PUMP, -2 },
+ { RTTHREADTYPE_IO, -3 },
+ { RTTHREADTYPE_TIMER, -4 }
+};
+
+/**
+ * Deltas for a process in which we are restricted
+ * to only be lowering the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesUnixRestricted[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, +3 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, +2 },
+ { RTTHREADTYPE_EMULATION, +1 },
+ { RTTHREADTYPE_DEFAULT, 0 },
+ { RTTHREADTYPE_GUI, 0 },
+ { RTTHREADTYPE_MAIN_WORKER, 0 },
+ { RTTHREADTYPE_VRDP_IO, 0 },
+ { RTTHREADTYPE_DEBUGGER, 0 },
+ { RTTHREADTYPE_MSG_PUMP, 0 },
+ { RTTHREADTYPE_IO, 0 },
+ { RTTHREADTYPE_TIMER, 0 }
+};
+
+/**
+ * Deltas for a process in which we are restricted
+ * to only be lowering the priority.
+ */
+static const PROCPRIORITYTYPE g_aTypesUnixFlat[RTTHREADTYPE_END] =
+{
+ { RTTHREADTYPE_INVALID, -999999999 },
+ { RTTHREADTYPE_INFREQUENT_POLLER, 0 },
+ { RTTHREADTYPE_MAIN_HEAVY_WORKER, 0 },
+ { RTTHREADTYPE_EMULATION, 0 },
+ { RTTHREADTYPE_DEFAULT, 0 },
+ { RTTHREADTYPE_GUI, 0 },
+ { RTTHREADTYPE_MAIN_WORKER, 0 },
+ { RTTHREADTYPE_VRDP_IO, 0 },
+ { RTTHREADTYPE_DEBUGGER, 0 },
+ { RTTHREADTYPE_MSG_PUMP, 0 },
+ { RTTHREADTYPE_IO, 0 },
+ { RTTHREADTYPE_TIMER, 0 }
+};
+
+/**
+ * Process and thread level priority, full access at thread level.
+ */
+static const PROCPRIORITY g_aUnixConfigs[] =
+{
+ { RTPROCPRIORITY_FLAT, "Flat", 0, 0, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_LOW, "Low", 9, 9, g_aTypesUnixFree },
+ { RTPROCPRIORITY_LOW, "Low", 9, 9, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_LOW, "Low", 15, 15, g_aTypesUnixFree },
+ { RTPROCPRIORITY_LOW, "Low", 15, 15, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_LOW, "Low", 17, 17, g_aTypesUnixFree },
+ { RTPROCPRIORITY_LOW, "Low", 17, 17, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_LOW, "Low", 19, 19, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_LOW, "Low", 9, 9, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_LOW, "Low", 15, 15, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_LOW, "Low", 17, 17, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesUnixFree },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_NORMAL, "Normal", 0, 0, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_HIGH, "High", -9, -9, g_aTypesUnixFree },
+ { RTPROCPRIORITY_HIGH, "High", -7, -7, g_aTypesUnixFree },
+ { RTPROCPRIORITY_HIGH, "High", -5, -5, g_aTypesUnixFree },
+ { RTPROCPRIORITY_HIGH, "High", -3, -3, g_aTypesUnixFree },
+ { RTPROCPRIORITY_HIGH, "High", -1, -1, g_aTypesUnixFree },
+ { RTPROCPRIORITY_HIGH, "High", -9, -9, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -7, -7, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -5, -5, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -3, -3, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -1, -1, g_aTypesUnixRestricted },
+ { RTPROCPRIORITY_HIGH, "High", -9, -9, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_HIGH, "High", -7, -7, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_HIGH, "High", -5, -5, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_HIGH, "High", -3, -3, g_aTypesUnixFlat },
+ { RTPROCPRIORITY_HIGH, "High", -1, -1, g_aTypesUnixFlat }
+};
+
+/**
+ * The dynamic default priority configuration.
+ *
+ * This will be recalulated at runtime depending on what the
+ * system allow us to do and what the current priority is.
+ */
+static PROCPRIORITY g_aDefaultPriority =
+{
+ RTPROCPRIORITY_LOW, "Default", 0, 0, g_aTypesUnixRestricted
+};
+
+/** Pointer to the current priority configuration. */
+static const PROCPRIORITY *g_pProcessPriority = &g_aDefaultPriority;
+
+
+/** Set to what kind of scheduling priority support the host
+ * OS seems to be offering. Determined at runtime.
+ */
+static enum
+{
+ OSPRIOSUP_UNDETERMINED = 0,
+ /** An excellent combination of process and thread level
+ * I.e. setpriority() works on process level, one have to be supervisor
+ * to raise priority as is the custom in unix. While pthread_setschedparam()
+ * works on thread level and we can raise the priority just like we want.
+ *
+ * I think this is what FreeBSD offers. (It is certainly analogous to what
+ * NT offers if you wondered.) Linux on the other hand doesn't provide this
+ * for processes with SCHED_OTHER policy, and I'm not sure if we want to
+ * play around with using the real-time SCHED_RR and SCHED_FIFO which would
+ * require special privileges anyway.
+ */
+ OSPRIOSUP_PROCESS_AND_THREAD_LEVEL,
+ /** A rough thread level priority only.
+ * setpriority() is the only real game in town, and it works on thread level.
+ */
+ OSPRIOSUP_THREAD_LEVEL
+} volatile g_enmOsPrioSup = OSPRIOSUP_UNDETERMINED;
+
+/** Set if we figure we have nice capability, meaning we can use setpriority
+ * to raise the priority. */
+static bool g_fCanNice = false;
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+
+
+/**
+ * Saves all the scheduling attributes we can think of.
+ */
+static void rtSchedNativeSave(PSAVEDPRIORITY pSave)
+{
+ memset(pSave, 0xff, sizeof(*pSave));
+
+ errno = 0;
+ pSave->iPriority = getpriority(PRIO_PROCESS, 0 /* current process */);
+ Assert(errno == 0);
+
+ errno = 0;
+ sched_getparam(0 /* current process */, &pSave->SchedParam);
+ Assert(errno == 0);
+
+ errno = 0;
+ pSave->iPolicy = sched_getscheduler(0 /* current process */);
+ Assert(errno == 0);
+
+ int rc = pthread_getschedparam(pthread_self(), &pSave->iPthreadPolicy, &pSave->PthreadSchedParam);
+ Assert(rc == 0); NOREF(rc);
+}
+
+
+/**
+ * Restores scheduling attributes.
+ * Most of this won't work right, but anyway...
+ */
+static void rtSchedNativeRestore(PSAVEDPRIORITY pSave)
+{
+ setpriority(PRIO_PROCESS, 0, pSave->iPriority);
+ sched_setscheduler(0, pSave->iPolicy, &pSave->SchedParam);
+ sched_setparam(0, &pSave->SchedParam);
+ pthread_setschedparam(pthread_self(), pSave->iPthreadPolicy, &pSave->PthreadSchedParam);
+}
+
+
+/**
+ * Starts a worker thread and wait for it to complete.
+ * We cannot use RTThreadCreate since we're already owner of the RW lock.
+ */
+static int rtSchedCreateThread(void *(*pfnThread)(void *pvArg), void *pvArg)
+{
+ /*
+ * Setup thread attributes.
+ */
+ pthread_attr_t ThreadAttr;
+ int rc = pthread_attr_init(&ThreadAttr);
+ if (!rc)
+ {
+ rc = pthread_attr_setdetachstate(&ThreadAttr, PTHREAD_CREATE_JOINABLE);
+ if (!rc)
+ {
+ rc = pthread_attr_setstacksize(&ThreadAttr, 128*1024);
+ if (!rc)
+ {
+ /*
+ * Create the thread.
+ */
+ pthread_t Thread;
+ rc = pthread_create(&Thread, &ThreadAttr, pfnThread, pvArg);
+ if (!rc)
+ {
+ pthread_attr_destroy(&ThreadAttr);
+ /*
+ * Wait for the thread to finish.
+ */
+ void *pvRet = (void *)-1;
+ do
+ {
+ rc = pthread_join(Thread, &pvRet);
+ } while (rc == EINTR);
+ if (rc)
+ return RTErrConvertFromErrno(rc);
+ return (int)(uintptr_t)pvRet;
+ }
+ }
+ }
+ pthread_attr_destroy(&ThreadAttr);
+ }
+ return RTErrConvertFromErrno(rc);
+}
+
+
+static void rtSchedDumpPriority(void)
+{
+#ifdef THREAD_LOGGING
+ Log(("Priority: g_fCanNice=%d g_enmOsPrioSup=%d\n", g_fCanNice, g_enmOsPrioSup));
+ Log(("Priority: enmPriority=%d \"%s\" iNice=%d iDelta=%d\n",
+ g_pProcessPriority->enmPriority,
+ g_pProcessPriority->pszName,
+ g_pProcessPriority->iNice,
+ g_pProcessPriority->iDelta));
+ Log(("Priority: %2d INFREQUENT_POLLER = %d\n", RTTHREADTYPE_INFREQUENT_POLLER, g_pProcessPriority->paTypes[RTTHREADTYPE_INFREQUENT_POLLER].iPriority));
+ Log(("Priority: %2d MAIN_HEAVY_WORKER = %d\n", RTTHREADTYPE_MAIN_HEAVY_WORKER, g_pProcessPriority->paTypes[RTTHREADTYPE_MAIN_HEAVY_WORKER].iPriority));
+ Log(("Priority: %2d EMULATION = %d\n", RTTHREADTYPE_EMULATION , g_pProcessPriority->paTypes[RTTHREADTYPE_EMULATION ].iPriority));
+ Log(("Priority: %2d DEFAULT = %d\n", RTTHREADTYPE_DEFAULT , g_pProcessPriority->paTypes[RTTHREADTYPE_DEFAULT ].iPriority));
+ Log(("Priority: %2d GUI = %d\n", RTTHREADTYPE_GUI , g_pProcessPriority->paTypes[RTTHREADTYPE_GUI ].iPriority));
+ Log(("Priority: %2d MAIN_WORKER = %d\n", RTTHREADTYPE_MAIN_WORKER , g_pProcessPriority->paTypes[RTTHREADTYPE_MAIN_WORKER ].iPriority));
+ Log(("Priority: %2d VRDP_IO = %d\n", RTTHREADTYPE_VRDP_IO , g_pProcessPriority->paTypes[RTTHREADTYPE_VRDP_IO ].iPriority));
+ Log(("Priority: %2d DEBUGGER = %d\n", RTTHREADTYPE_DEBUGGER , g_pProcessPriority->paTypes[RTTHREADTYPE_DEBUGGER ].iPriority));
+ Log(("Priority: %2d MSG_PUMP = %d\n", RTTHREADTYPE_MSG_PUMP , g_pProcessPriority->paTypes[RTTHREADTYPE_MSG_PUMP ].iPriority));
+ Log(("Priority: %2d IO = %d\n", RTTHREADTYPE_IO , g_pProcessPriority->paTypes[RTTHREADTYPE_IO ].iPriority));
+ Log(("Priority: %2d TIMER = %d\n", RTTHREADTYPE_TIMER , g_pProcessPriority->paTypes[RTTHREADTYPE_TIMER ].iPriority));
+#endif
+}
+
+
+/**
+ * The prober thread.
+ * We don't want to mess with the priority of the calling thread.
+ *
+ * @remark This is pretty presumptive stuff, but if it works on Linux and
+ * FreeBSD it does what I want.
+ */
+static void *rtSchedNativeProberThread(void *pvUser)
+{
+ SAVEDPRIORITY SavedPriority;
+ rtSchedNativeSave(&SavedPriority);
+
+ /*
+ * Let's first try and see what we get on a thread level.
+ */
+ int iMax = sched_get_priority_max(SavedPriority.iPthreadPolicy);
+ int iMin = sched_get_priority_min(SavedPriority.iPthreadPolicy);
+ if (iMax - iMin >= 32)
+ {
+ pthread_t Self = pthread_self();
+ int i = iMin;
+ while (i <= iMax)
+ {
+ struct sched_param SchedParam = SavedPriority.PthreadSchedParam;
+ SchedParam.sched_priority = i;
+ if (pthread_setschedparam(Self, SavedPriority.iPthreadPolicy, &SchedParam))
+ break;
+ i++;
+ }
+ if (i == iMax)
+ g_enmOsPrioSup = OSPRIOSUP_PROCESS_AND_THREAD_LEVEL;
+ }
+
+ /*
+ * Ok, we didn't have the good stuff, so let's fall back on the unix stuff.
+ */
+ if (g_enmOsPrioSup == OSPRIOSUP_UNDETERMINED)
+ g_enmOsPrioSup = OSPRIOSUP_THREAD_LEVEL;
+
+ /*
+ * Check if we can get higher priority (typically only root can do this).
+ * (Won't work right if our priority is -19 to start with, but what the heck.)
+ *
+ * We assume that the unix priority is -19 to 19. I know there are defines
+ * for this, but I don't remember which and if I'm awake enough to make sense
+ * of them from any SuS spec.
+ */
+ int iStart = getpriority(PRIO_PROCESS, 0);
+ int i = iStart;
+ while (i-- > -19)
+ {
+ if (setpriority(PRIO_PROCESS, 0, i))
+ break;
+ }
+ if (getpriority(PRIO_PROCESS, 0) != iStart)
+ g_fCanNice = true;
+ else
+ g_fCanNice = false;
+
+ /* done */
+ rtSchedNativeRestore(&SavedPriority);
+ RT_NOREF(pvUser);
+ return (void *)VINF_SUCCESS;
+}
+
+
+/**
+ * Calculate the scheduling properties for all the threads in the default
+ * process priority, assuming the current thread have the type enmType.
+ *
+ * @returns iprt status code.
+ * @param enmType The thread type to be assumed for the current thread.
+ */
+DECLHIDDEN(int) rtSchedNativeCalcDefaultPriority(RTTHREADTYPE enmType)
+{
+ Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);
+
+ /*
+ * First figure out what's supported by the OS.
+ */
+ if (g_enmOsPrioSup == OSPRIOSUP_UNDETERMINED)
+ {
+ int iPriority = getpriority(PRIO_PROCESS, 0);
+ int rc = rtSchedCreateThread(rtSchedNativeProberThread, NULL);
+ if (RT_FAILURE(rc))
+ return rc;
+ if (g_enmOsPrioSup == OSPRIOSUP_UNDETERMINED)
+ g_enmOsPrioSup = OSPRIOSUP_THREAD_LEVEL;
+ Assert(getpriority(PRIO_PROCESS, 0) == iPriority); NOREF(iPriority);
+ }
+
+ /*
+ * Now let's see what we can do...
+ */
+ int iPriority = getpriority(PRIO_PROCESS, 0);
+ switch (g_enmOsPrioSup)
+ {
+ case OSPRIOSUP_PROCESS_AND_THREAD_LEVEL:
+ {
+ g_aDefaultPriority.iNice = iPriority;
+ g_aDefaultPriority.iDelta = 0;
+ g_aDefaultPriority.paTypes = g_aTypesThread;
+ Assert(enmType == g_aDefaultPriority.paTypes[enmType].enmType);
+ break;
+ }
+
+ case OSPRIOSUP_THREAD_LEVEL:
+ {
+ if (g_fCanNice)
+ g_aDefaultPriority.paTypes = g_aTypesUnixFree;
+ else
+ g_aDefaultPriority.paTypes = g_aTypesUnixRestricted;
+ Assert(enmType == g_aDefaultPriority.paTypes[enmType].enmType);
+ g_aDefaultPriority.iNice = iPriority - g_aDefaultPriority.paTypes[enmType].iPriority;
+ g_aDefaultPriority.iDelta = g_aDefaultPriority.iNice;
+ break;
+ }
+
+ default:
+ AssertFailed();
+ break;
+ }
+ rtSchedDumpPriority();
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * The validator thread.
+ * We don't want to mess with the priority of the calling thread.
+ *
+ * @remark This is pretty presumptive stuff, but if it works on Linux and
+ * FreeBSD it does what I want.
+ */
+static void *rtSchedNativeValidatorThread(void *pvUser)
+{
+ const PROCPRIORITY *pCfg = (const PROCPRIORITY *)pvUser;
+ SAVEDPRIORITY SavedPriority;
+ rtSchedNativeSave(&SavedPriority);
+
+ int rc = VINF_SUCCESS;
+ switch (g_enmOsPrioSup)
+ {
+ /*
+ * Try set the specified process priority and then try
+ * out all the thread priorities which are used.
+ */
+ case OSPRIOSUP_PROCESS_AND_THREAD_LEVEL:
+ {
+ if (!setpriority(PRIO_PROCESS, 0, pCfg->iNice))
+ {
+ int iMin = sched_get_priority_min(SavedPriority.iPolicy);
+ pthread_t Self = pthread_self();
+ for (int i = RTTHREADTYPE_INVALID + 1; i < RTTHREADTYPE_END; i++)
+ {
+ struct sched_param SchedParam = SavedPriority.PthreadSchedParam;
+ SchedParam.sched_priority = pCfg->paTypes[i].iPriority
+ + pCfg->iDelta + iMin;
+ rc = pthread_setschedparam(Self, SavedPriority.iPthreadPolicy, &SchedParam);
+ if (rc)
+ {
+ rc = RTErrConvertFromErrno(rc);
+ break;
+ }
+ }
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ break;
+ }
+
+ /*
+ * Try out the priorities from the top and down.
+ */
+ case OSPRIOSUP_THREAD_LEVEL:
+ {
+ int i = RTTHREADTYPE_END;
+ while (--i > RTTHREADTYPE_INVALID)
+ {
+ int iPriority = pCfg->paTypes[i].iPriority + pCfg->iDelta;
+ if (setpriority(PRIO_PROCESS, 0, iPriority))
+ {
+ rc = RTErrConvertFromErrno(errno);
+ break;
+ }
+ }
+ break;
+ }
+
+ default:
+ AssertFailed();
+ break;
+ }
+
+ /* done */
+ rtSchedNativeRestore(&SavedPriority);
+ return (void *)(intptr_t)rc;
+}
+
+
+DECLHIDDEN(int) rtProcNativeSetPriority(RTPROCPRIORITY enmPriority)
+{
+ Assert(enmPriority > RTPROCPRIORITY_INVALID && enmPriority < RTPROCPRIORITY_LAST);
+
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+ /*
+ * Make sure the proxy creation thread is started so we don't 'lose' our
+ * initial priority if it's lowered.
+ */
+ rtThreadPosixPriorityProxyStart();
+#endif
+
+ /*
+ * Nothing to validate for the default priority (assuming no external renice).
+ */
+ int rc = VINF_SUCCESS;
+ if (enmPriority == RTPROCPRIORITY_DEFAULT)
+ g_pProcessPriority = &g_aDefaultPriority;
+ else
+ {
+ /*
+ * Select the array to search.
+ */
+ const PROCPRIORITY *pa;
+ unsigned c;
+ switch (g_enmOsPrioSup)
+ {
+ case OSPRIOSUP_PROCESS_AND_THREAD_LEVEL:
+ pa = g_aProcessAndThread;
+ c = RT_ELEMENTS(g_aProcessAndThread);
+ break;
+ case OSPRIOSUP_THREAD_LEVEL:
+ pa = g_aUnixConfigs;
+ c = RT_ELEMENTS(g_aUnixConfigs);
+ break;
+ default:
+ pa = NULL;
+ c = 0;
+ break;
+ }
+
+ /*
+ * Search the array.
+ */
+ rc = VERR_FILE_NOT_FOUND;
+ unsigned i;
+ for (i = 0; i < c; i++)
+ {
+ if (pa[i].enmPriority == enmPriority)
+ {
+ /*
+ * Validate it.
+ */
+ int iPriority = getpriority(PRIO_PROCESS, 0);
+ int rc3 = rtSchedCreateThread(rtSchedNativeValidatorThread, (void *)&pa[i]);
+ Assert(getpriority(PRIO_PROCESS, 0) == iPriority); NOREF(iPriority);
+ if (RT_SUCCESS(rc))
+ rc = rc3;
+ if (RT_SUCCESS(rc))
+ break;
+ }
+ }
+
+ /*
+ * Did we get lucky?
+ * If so update process priority and globals.
+ */
+ if (RT_SUCCESS(rc))
+ {
+ switch (g_enmOsPrioSup)
+ {
+ case OSPRIOSUP_PROCESS_AND_THREAD_LEVEL:
+ if (setpriority(PRIO_PROCESS, 0, pa[i].iNice))
+ {
+ rc = RTErrConvertFromErrno(errno);
+ AssertMsgFailed(("setpriority(,,%d) -> errno=%d rc=%Rrc\n", pa[i].iNice, errno, rc));
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (RT_SUCCESS(rc))
+ g_pProcessPriority = &pa[i];
+ }
+ }
+
+#ifdef THREAD_LOGGING
+ LogFlow(("rtProcNativeSetPriority: returns %Rrc enmPriority=%d\n", rc, enmPriority));
+ rtSchedDumpPriority();
+#endif
+ return rc;
+}
+
+
+/**
+ * Worker for rtThreadNativeSetPriority/OSPRIOSUP_PROCESS_AND_THREAD_LEVEL
+ * that's either called on the priority proxy thread or directly if no proxy.
+ */
+static DECLCALLBACK(int) rtThreadPosixSetPriorityOnProcAndThrdCallback(PRTTHREADINT pThread, RTTHREADTYPE enmType)
+{
+ struct sched_param SchedParam = {-9999999};
+ int iPolicy = -7777777;
+ int rc = pthread_getschedparam((pthread_t)pThread->Core.Key, &iPolicy, &SchedParam);
+ if (!rc)
+ {
+ SchedParam.sched_priority = g_pProcessPriority->paTypes[enmType].iPriority
+ + g_pProcessPriority->iDelta
+ + sched_get_priority_min(iPolicy);
+
+ rc = pthread_setschedparam((pthread_t)pThread->Core.Key, iPolicy, &SchedParam);
+ if (!rc)
+ {
+#ifdef THREAD_LOGGING
+ Log(("rtThreadNativeSetPriority: Thread=%p enmType=%d iPolicy=%d sched_priority=%d pid=%d\n",
+ pThread->Core.Key, enmType, iPolicy, SchedParam.sched_priority, getpid()));
+#endif
+ return VINF_SUCCESS;
+ }
+ }
+
+ int rcNative = rc;
+ rc = RTErrConvertFromErrno(rc);
+ AssertMsgFailed(("pthread_[gs]etschedparam(%p, %d, {%d}) -> rcNative=%d rc=%Rrc\n",
+ (void *)pThread->Core.Key, iPolicy, SchedParam.sched_priority, rcNative, rc)); NOREF(rcNative);
+ return rc;
+}
+
+
+DECLHIDDEN(int) rtThreadNativeSetPriority(PRTTHREADINT pThread, RTTHREADTYPE enmType)
+{
+ Assert(enmType > RTTHREADTYPE_INVALID && enmType < RTTHREADTYPE_END);
+ Assert(enmType == g_pProcessPriority->paTypes[enmType].enmType);
+
+ int rc = VINF_SUCCESS;
+ switch (g_enmOsPrioSup)
+ {
+ case OSPRIOSUP_PROCESS_AND_THREAD_LEVEL:
+ {
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+ if (rtThreadPosixPriorityProxyStart())
+ rc = rtThreadPosixPriorityProxyCall(pThread, (PFNRT)rtThreadPosixSetPriorityOnProcAndThrdCallback,
+ 2, pThread, enmType);
+ else
+#endif
+ rc = rtThreadPosixSetPriorityOnProcAndThrdCallback(pThread, enmType);
+ break;
+ }
+
+ case OSPRIOSUP_THREAD_LEVEL:
+ {
+ /* No cross platform way of getting the 'who' parameter value for
+ arbitrary threads, so this is restricted to the calling thread only. */
+ AssertReturn((pthread_t)pThread->Core.Key == pthread_self(), VERR_NOT_SUPPORTED);
+
+ int iPriority = g_pProcessPriority->paTypes[enmType].iPriority + g_pProcessPriority->iDelta;
+ if (!setpriority(PRIO_PROCESS, 0, iPriority))
+ {
+ AssertMsg(iPriority == getpriority(PRIO_PROCESS, 0), ("iPriority=%d getpriority()=%d\n", iPriority, getpriority(PRIO_PROCESS, 0)));
+#ifdef THREAD_LOGGING
+ Log(("rtThreadNativeSetPriority: Thread=%p enmType=%d iPriority=%d pid=%d\n", pThread->Core.Key, enmType, iPriority, getpid()));
+#endif
+ }
+ else
+ {
+#if 0
+ rc = RTErrConvertFromErrno(errno);
+ AssertMsgFailed(("setpriority(,, %d) -> errno=%d rc=%Rrc\n", iPriority, errno, rc));
+#else
+ /** @todo
+ * Just keep quiet about failures now - we'll fail here because we're not
+ * allowed to raise our own priority. This is a problem when starting the
+ * threads with higher priority from EMT (i.e. most threads it starts).
+ * This is apparently inherited from the parent in some cases and not
+ * in other cases. I guess this would come down to which kind of pthread
+ * implementation is actually in use, and how many sensible patches which
+ * are installed.
+ * I need to find a system where this problem shows up in order to come up
+ * with a proper fix. There's an pthread_create attribute for not inheriting
+ * scheduler stuff I think...
+ */
+ rc = VINF_SUCCESS;
+#endif
+ }
+ break;
+ }
+
+ /*
+ * Any thread created before we determine the default config, remains unchanged!
+ * The prober thread above is one of those.
+ */
+ default:
+ break;
+ }
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/semevent-posix.cpp b/src/VBox/Runtime/r3/posix/semevent-posix.cpp
new file mode 100644
index 00000000..96d01bac
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/semevent-posix.cpp
@@ -0,0 +1,654 @@
+/* $Id: semevent-posix.cpp $ */
+/** @file
+ * IPRT - Event Semaphore, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/time.h>
+
+#include "internal/mem.h"
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sched.h>
+
+#include "semwait.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/** Internal representation of the POSIX implementation of an Event semaphore.
+ * The POSIX implementation uses a mutex and a condition variable to implement
+ * the automatic reset event semaphore semantics.
+ */
+struct RTSEMEVENTINTERNAL
+{
+ /** pthread condition. */
+ pthread_cond_t Cond;
+ /** pthread mutex which protects the condition and the event state. */
+ pthread_mutex_t Mutex;
+ /** The state of the semaphore.
+ * This is operated while owning mutex and using atomic updating. */
+ volatile uint32_t u32State;
+ /** Number of waiters. */
+ volatile uint32_t cWaiters;
+#ifdef RTSEMEVENT_STRICT
+ /** Signallers. */
+ RTLOCKVALRECSHRD Signallers;
+ /** Indicates that lock validation should be performed. */
+ bool volatile fEverHadSignallers;
+#endif
+ /** The creation flags. */
+ uint32_t fFlags;
+ /** Set if we're using the monotonic clock. */
+ bool fMonotonicClock;
+};
+
+/** The values of the u32State variable in a RTSEMEVENTINTERNAL.
+ * @{ */
+/** The object isn't initialized. */
+#define EVENT_STATE_UNINITIALIZED 0
+/** The semaphore is signaled. */
+#define EVENT_STATE_SIGNALED 0xff00ff00
+/** The semaphore is not signaled. */
+#define EVENT_STATE_NOT_SIGNALED 0x00ff00ff
+/** @} */
+
+
+RTDECL(int) RTSemEventCreate(PRTSEMEVENT phEventSem)
+{
+ return RTSemEventCreateEx(phEventSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int) RTSemEventCreateEx(PRTSEMEVENT phEventSem, uint32_t fFlags, RTLOCKVALCLASS hClass, const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~(RTSEMEVENT_FLAGS_NO_LOCK_VAL | RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)), VERR_INVALID_PARAMETER);
+ Assert(!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK) || (fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL));
+
+ /*
+ * Allocate semaphore handle.
+ */
+ int rc;
+ struct RTSEMEVENTINTERNAL *pThis;
+ if (!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+ pThis = (struct RTSEMEVENTINTERNAL *)RTMemAlloc(sizeof(*pThis));
+ else
+ pThis = (struct RTSEMEVENTINTERNAL *)rtMemBaseAlloc(sizeof(*pThis));
+ if (pThis)
+ {
+ /*
+ * Create the condition variable.
+ */
+ pthread_condattr_t CondAttr;
+ rc = pthread_condattr_init(&CondAttr);
+ if (!rc)
+ {
+#if defined(CLOCK_MONOTONIC) && defined(IPRT_HAVE_PTHREAD_CONDATTR_SETCLOCK)
+ /* ASSUMES RTTimeSystemNanoTS() == RTTimeNanoTS() == clock_gettime(CLOCK_MONOTONIC). */
+ rc = pthread_condattr_setclock(&CondAttr, CLOCK_MONOTONIC);
+ pThis->fMonotonicClock = rc == 0;
+#else
+ pThis->fMonotonicClock = false;
+#endif
+ rc = pthread_cond_init(&pThis->Cond, &CondAttr);
+ if (!rc)
+ {
+ /*
+ * Create the semaphore.
+ */
+ rc = pthread_mutex_init(&pThis->Mutex, NULL);
+ if (!rc)
+ {
+ pthread_condattr_destroy(&CondAttr);
+
+ ASMAtomicWriteU32(&pThis->u32State, EVENT_STATE_NOT_SIGNALED);
+ ASMAtomicWriteU32(&pThis->cWaiters, 0);
+ pThis->fFlags = fFlags;
+#ifdef RTSEMEVENT_STRICT
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iSemEventAnon = 0;
+ RTLockValidatorRecSharedInit(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL),
+ "RTSemEvent-%u", ASMAtomicIncU32(&s_iSemEventAnon) - 1);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecSharedInitV(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENT_FLAGS_NO_LOCK_VAL),
+ pszNameFmt, va);
+ va_end(va);
+ }
+ pThis->fEverHadSignallers = false;
+#else
+ RT_NOREF_PV(hClass); RT_NOREF_PV(pszNameFmt);
+#endif
+
+ *phEventSem = pThis;
+ return VINF_SUCCESS;
+ }
+ pthread_cond_destroy(&pThis->Cond);
+ }
+ pthread_condattr_destroy(&CondAttr);
+ }
+
+ rc = RTErrConvertFromErrno(rc);
+ if (!(fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+ RTMemFree(pThis);
+ else
+ rtMemBaseFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+}
+
+
+RTDECL(int) RTSemEventDestroy(RTSEMEVENT hEventSem)
+{
+ /*
+ * Validate handle.
+ */
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ if (pThis == NIL_RTSEMEVENT)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENT_STATE_NOT_SIGNALED || u32 == EVENT_STATE_SIGNALED, VERR_INVALID_HANDLE);
+
+ /*
+ * Abort all waiters forcing them to return failure.
+ */
+ int rc;
+ for (int i = 30; i > 0; i--)
+ {
+ ASMAtomicWriteU32(&pThis->u32State, EVENT_STATE_UNINITIALIZED);
+ rc = pthread_cond_destroy(&pThis->Cond);
+ if (rc != EBUSY)
+ break;
+ pthread_cond_broadcast(&pThis->Cond);
+ usleep(1000);
+ }
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to destroy event sem %p, rc=%d.\n", pThis, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Destroy the semaphore
+ * If it's busy we'll wait a bit to give the threads a chance to be scheduled.
+ */
+ for (int i = 30; i > 0; i--)
+ {
+ rc = pthread_mutex_destroy(&pThis->Mutex);
+ if (rc != EBUSY)
+ break;
+ usleep(1000);
+ }
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to destroy event sem %p, rc=%d. (mutex)\n", pThis, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Free the semaphore memory and be gone.
+ */
+#ifdef RTSEMEVENT_STRICT
+ RTLockValidatorRecSharedDelete(&pThis->Signallers);
+#endif
+ if (!(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK))
+ RTMemFree(pThis);
+ else
+ rtMemBaseFree(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSemEventSignal(RTSEMEVENT hEventSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENT_STATE_NOT_SIGNALED || u32 == EVENT_STATE_SIGNALED, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMEVENT_STRICT
+ if (pThis->fEverHadSignallers)
+ {
+ int rc9 = RTLockValidatorRecSharedCheckSignaller(&pThis->Signallers, NIL_RTTHREAD);
+ if (RT_FAILURE(rc9))
+ return rc9;
+ }
+#endif
+
+ /*
+ * Lock the mutex semaphore.
+ */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to lock event sem %p, rc=%d.\n", hEventSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Check the state.
+ */
+ if (pThis->u32State == EVENT_STATE_NOT_SIGNALED)
+ {
+ ASMAtomicWriteU32(&pThis->u32State, EVENT_STATE_SIGNALED);
+ rc = pthread_cond_signal(&pThis->Cond);
+ AssertMsg(!rc, ("Failed to signal event sem %p, rc=%d.\n", hEventSem, rc));
+ }
+ else if (pThis->u32State == EVENT_STATE_SIGNALED)
+ {
+ rc = pthread_cond_signal(&pThis->Cond); /* give'm another kick... */
+ AssertMsg(!rc, ("Failed to signal event sem %p, rc=%d. (2)\n", hEventSem, rc));
+ }
+ else
+ rc = VERR_SEM_DESTROYED;
+
+ /*
+ * Release the mutex and return.
+ */
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event sem %p, rc=%d.\n", hEventSem, rc));
+ if (rc)
+ return RTErrConvertFromErrno(rc);
+ if (rc2)
+ return RTErrConvertFromErrno(rc2);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Handle polling (timeout already expired at the time of the call).
+ *
+ * @returns VINF_SUCCESS, VERR_TIMEOUT, VERR_SEM_DESTROYED.
+ * @param pThis The semaphore.
+ */
+DECLINLINE(int) rtSemEventPosixWaitPoll(struct RTSEMEVENTINTERNAL *pThis)
+{
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ AssertMsgReturn(!rc, ("Failed to lock event sem %p, rc=%d.\n", pThis, rc), RTErrConvertFromErrno(rc));
+
+ uint32_t u32OldState;
+ bool fSuccess = ASMAtomicCmpXchgExU32(&pThis->u32State, EVENT_STATE_NOT_SIGNALED, EVENT_STATE_SIGNALED, &u32OldState);
+
+ rc = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc, ("Failed to unlock event sem %p, rc=%d.\n", pThis, rc)); NOREF(rc);
+
+ return fSuccess
+ ? VINF_SUCCESS
+ : u32OldState != EVENT_STATE_UNINITIALIZED
+ ? VERR_TIMEOUT
+ : VERR_SEM_DESTROYED;
+}
+
+
+/**
+ * Performs an indefinite wait on the event.
+ */
+static int rtSemEventPosixWaitIndefinite(struct RTSEMEVENTINTERNAL *pThis, uint32_t fFlags, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ RT_NOREF_PV(pSrcPos);
+
+ /* for fairness, yield before going to sleep. */
+ if ( ASMAtomicIncU32(&pThis->cWaiters) > 1
+ && pThis->u32State == EVENT_STATE_SIGNALED)
+ sched_yield();
+
+ /* take mutex */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ if (rc)
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ AssertMsgFailed(("Failed to lock event sem %p, rc=%d.\n", pThis, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ for (;;)
+ {
+ /* check state. */
+ if (pThis->u32State == EVENT_STATE_SIGNALED)
+ {
+ ASMAtomicWriteU32(&pThis->u32State, EVENT_STATE_NOT_SIGNALED);
+ ASMAtomicDecU32(&pThis->cWaiters);
+ rc = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc, ("Failed to unlock event sem %p, rc=%d.\n", pThis, rc)); NOREF(rc);
+ return VINF_SUCCESS;
+ }
+ if (pThis->u32State == EVENT_STATE_UNINITIALIZED)
+ {
+ rc = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc, ("Failed to unlock event sem %p, rc=%d.\n", pThis, rc)); NOREF(rc);
+ return VERR_SEM_DESTROYED;
+ }
+
+ /* wait */
+#ifdef RTSEMEVENT_STRICT
+ RTTHREAD hThreadSelf = !(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)
+ ? RTThreadSelfAutoAdopt()
+ : RTThreadSelf();
+ if (pThis->fEverHadSignallers)
+ {
+ rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+ RT_INDEFINITE_WAIT, RTTHREADSTATE_EVENT, true);
+ if (RT_FAILURE(rc))
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ pthread_mutex_unlock(&pThis->Mutex);
+ return rc;
+ }
+ }
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT, true);
+ RT_NOREF_PV(fFlags); /** @todo interruptible wait is not implementable... */
+ rc = pthread_cond_wait(&pThis->Cond, &pThis->Mutex);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to wait on event sem %p, rc=%d.\n", pThis, rc));
+ ASMAtomicDecU32(&pThis->cWaiters);
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event sem %p, rc=%d.\n", pThis, rc2)); NOREF(rc2);
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+}
+
+
+/**
+ * Performs an timed wait on the event.
+ */
+static int rtSemEventPosixWaitTimed(struct RTSEMEVENTINTERNAL *pThis, uint32_t fFlags, uint64_t uTimeout,
+ PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Convert the timeout specification to absolute and relative deadlines,
+ * divierting polling and infinite waits to the appropriate workers.
+ */
+ struct timespec AbsDeadline = { 0, 0 };
+ uint64_t const cNsRelativeDeadline = rtSemPosixCalcDeadline(fFlags, uTimeout, pThis->fMonotonicClock, &AbsDeadline);
+ if (cNsRelativeDeadline == 0)
+ return rtSemEventPosixWaitPoll(pThis);
+ if (cNsRelativeDeadline == UINT64_MAX)
+ return rtSemEventPosixWaitIndefinite(pThis, fFlags, pSrcPos);
+
+ /*
+ * Now to the business of waiting...
+ */
+
+ /* for fairness, yield before going to sleep. */
+ if (ASMAtomicIncU32(&pThis->cWaiters) > 1)
+ sched_yield();
+
+ /* take mutex */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ if (rc)
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ AssertMsg(rc == ETIMEDOUT, ("Failed to lock event sem %p, rc=%d.\n", pThis, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ for (;;)
+ {
+ /* check state. */
+ uint32_t const u32State = pThis->u32State;
+ if (u32State != EVENT_STATE_NOT_SIGNALED)
+ {
+ if (u32State == EVENT_STATE_SIGNALED)
+ {
+ ASMAtomicWriteU32(&pThis->u32State, EVENT_STATE_NOT_SIGNALED);
+ ASMAtomicDecU32(&pThis->cWaiters);
+ rc = VINF_SUCCESS;
+ }
+ else
+ {
+ Assert(u32State == EVENT_STATE_UNINITIALIZED);
+ rc = VERR_SEM_DESTROYED;
+ }
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event sem %p, rc2=%d.\n", pThis, rc2)); RT_NOREF(rc2);
+ return rc;
+ }
+
+ /* wait */
+#ifdef RTSEMEVENT_STRICT
+ RTTHREAD hThreadSelf = !(pThis->fFlags & RTSEMEVENT_FLAGS_BOOTSTRAP_HACK)
+ ? RTThreadSelfAutoAdopt()
+ : RTThreadSelf();
+ if (pThis->fEverHadSignallers)
+ {
+ rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+ (cNsRelativeDeadline + RT_NS_1MS - 1) / RT_NS_1MS,
+ RTTHREADSTATE_EVENT, true);
+ if (RT_FAILURE(rc))
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ pthread_mutex_unlock(&pThis->Mutex);
+ return rc;
+ }
+ }
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT, true);
+ rc = pthread_cond_timedwait(&pThis->Cond, &pThis->Mutex, &AbsDeadline);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT);
+
+ /* According to SuS this function shall not return EINTR, but linux man page might have said differently at some point... */
+ if ( rc != 0
+ && ( rc != EINTR
+ || !(fFlags & RTSEMWAIT_FLAGS_NORESUME)))
+ {
+ AssertMsg(rc == ETIMEDOUT, ("Failed to wait on event sem %p, rc=%d.\n", pThis, rc));
+ ASMAtomicDecU32(&pThis->cWaiters);
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event sem %p, rc2=%d.\n", pThis, rc2)); NOREF(rc2);
+ return RTErrConvertFromErrno(rc);
+ }
+ } /* for (;;) */
+}
+
+
+/**
+ * Internal wait worker function.
+ */
+DECLINLINE(int) rtSemEventPosixWait(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENT_STATE_NOT_SIGNALED || u32 == EVENT_STATE_SIGNALED, VERR_INVALID_HANDLE);
+ AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+
+ /*
+ * Timed or indefinite wait?
+ */
+ if (fFlags & RTSEMWAIT_FLAGS_INDEFINITE)
+ return rtSemEventPosixWaitIndefinite(pThis, fFlags, pSrcPos);
+ return rtSemEventPosixWaitTimed(hEventSem, fFlags, uTimeout, pSrcPos);
+}
+
+
+RTDECL(int) RTSemEventWait(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies)
+{
+ int rc;
+#ifndef RTSEMEVENT_STRICT
+ if (cMillies == RT_INDEFINITE_WAIT)
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, NULL);
+ else
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+ cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ if (cMillies == RT_INDEFINITE_WAIT)
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, &SrcPos);
+ else
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_RESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+ cMillies, &SrcPos);
+#endif
+ Assert(rc != VERR_INTERRUPTED);
+ return rc;
+}
+
+
+RTDECL(int) RTSemEventWaitNoResume(RTSEMEVENT hEventSem, RTMSINTERVAL cMillies)
+{
+ int rc;
+#ifndef RTSEMEVENT_STRICT
+ if (cMillies == RT_INDEFINITE_WAIT)
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, NULL);
+ else
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+ cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ if (cMillies == RT_INDEFINITE_WAIT)
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_INDEFINITE, 0, &SrcPos);
+ else
+ rc = rtSemEventPosixWait(hEventSem, RTSEMWAIT_FLAGS_NORESUME | RTSEMWAIT_FLAGS_RELATIVE | RTSEMWAIT_FLAGS_MILLISECS,
+ cMillies, &SrcPos);
+#endif
+ Assert(rc != VERR_INTERRUPTED);
+ return rc;
+}
+
+
+RTDECL(int) RTSemEventWaitEx(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+ return rtSemEventPosixWait(hEventSem, fFlags, uTimeout, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemEventPosixWait(hEventSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemEventWaitExDebug(RTSEMEVENT hEventSem, uint32_t fFlags, uint64_t uTimeout,
+ RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemEventPosixWait(hEventSem, fFlags, uTimeout, &SrcPos);
+}
+
+
+RTDECL(uint32_t) RTSemEventGetResolution(void)
+{
+ /** @todo we have 1ns parameter resolution, but we need to check each host
+ * what the actual resolution might be once the parameter makes it to the
+ * kernel and is processed there. */
+ return 1;
+}
+
+
+RTDECL(void) RTSemEventSetSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturnVoid(pThis);
+ uint32_t u32 = pThis->u32State;
+ AssertReturnVoid(u32 == EVENT_STATE_NOT_SIGNALED || u32 == EVENT_STATE_SIGNALED);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedResetOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF_PV(hEventSem); RT_NOREF_PV(hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventAddSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturnVoid(pThis);
+ uint32_t u32 = pThis->u32State;
+ AssertReturnVoid(u32 == EVENT_STATE_NOT_SIGNALED || u32 == EVENT_STATE_SIGNALED);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedAddOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF_PV(hEventSem); RT_NOREF_PV(hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventRemoveSignaller(RTSEMEVENT hEventSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENT_STRICT
+ struct RTSEMEVENTINTERNAL *pThis = hEventSem;
+ AssertPtrReturnVoid(pThis);
+ uint32_t u32 = pThis->u32State;
+ AssertReturnVoid(u32 == EVENT_STATE_NOT_SIGNALED || u32 == EVENT_STATE_SIGNALED);
+
+ RTLockValidatorRecSharedRemoveOwner(&pThis->Signallers, hThread);
+#else
+ RT_NOREF_PV(hEventSem); RT_NOREF_PV(hThread);
+#endif
+}
+
diff --git a/src/VBox/Runtime/r3/posix/semeventmulti-posix.cpp b/src/VBox/Runtime/r3/posix/semeventmulti-posix.cpp
new file mode 100644
index 00000000..52171b4a
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/semeventmulti-posix.cpp
@@ -0,0 +1,613 @@
+/* $Id: semeventmulti-posix.cpp $ */
+/** @file
+ * IPRT - Multiple Release Event Semaphore, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+#include <iprt/time.h>
+
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "semwait.h"
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/** Posix internal representation of a Mutex Multi semaphore.
+ * The POSIX implementation uses a mutex and a condition variable to implement
+ * the automatic reset event semaphore semantics. */
+struct RTSEMEVENTMULTIINTERNAL
+{
+ /** pthread condition. */
+ pthread_cond_t Cond;
+ /** pthread mutex which protects the condition and the event state. */
+ pthread_mutex_t Mutex;
+ /** The state of the semaphore.
+ * This is operated while owning mutex and using atomic updating. */
+ volatile uint32_t u32State;
+ /** Number of waiters. */
+ volatile uint32_t cWaiters;
+#ifdef RTSEMEVENTMULTI_STRICT
+ /** Signallers. */
+ RTLOCKVALRECSHRD Signallers;
+ /** Indicates that lock validation should be performed. */
+ bool volatile fEverHadSignallers;
+#endif
+ /** Set if we're using the monotonic clock. */
+ bool fMonotonicClock;
+};
+
+/** The values of the u32State variable in RTSEMEVENTMULTIINTERNAL.
+ * @{ */
+/** The object isn't initialized. */
+#define EVENTMULTI_STATE_UNINITIALIZED 0
+/** The semaphore is signaled. */
+#define EVENTMULTI_STATE_SIGNALED 0xff00ff00
+/** The semaphore is not signaled. */
+#define EVENTMULTI_STATE_NOT_SIGNALED 0x00ff00ff
+/** @} */
+
+
+
+RTDECL(int) RTSemEventMultiCreate(PRTSEMEVENTMULTI phEventMultiSem)
+{
+ return RTSemEventMultiCreateEx(phEventMultiSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, NULL);
+}
+
+
+RTDECL(int) RTSemEventMultiCreateEx(PRTSEMEVENTMULTI phEventMultiSem, uint32_t fFlags, RTLOCKVALCLASS hClass,
+ const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+ /*
+ * Allocate semaphore handle.
+ */
+ int rc;
+ struct RTSEMEVENTMULTIINTERNAL *pThis = (struct RTSEMEVENTMULTIINTERNAL *)RTMemAlloc(sizeof(struct RTSEMEVENTMULTIINTERNAL));
+ if (pThis)
+ {
+ /*
+ * Create the condition variable.
+ */
+ pthread_condattr_t CondAttr;
+ rc = pthread_condattr_init(&CondAttr);
+ if (!rc)
+ {
+#if defined(CLOCK_MONOTONIC) && defined(IPRT_HAVE_PTHREAD_CONDATTR_SETCLOCK)
+ /* ASSUMES RTTimeSystemNanoTS() == RTTimeNanoTS() == clock_gettime(CLOCK_MONOTONIC). */
+ rc = pthread_condattr_setclock(&CondAttr, CLOCK_MONOTONIC);
+ pThis->fMonotonicClock = rc == 0;
+#else
+ pThis->fMonotonicClock = false;
+#endif
+ rc = pthread_cond_init(&pThis->Cond, &CondAttr);
+ if (!rc)
+ {
+ /*
+ * Create the semaphore.
+ */
+ rc = pthread_mutex_init(&pThis->Mutex, NULL);
+ if (!rc)
+ {
+ pthread_condattr_destroy(&CondAttr);
+
+ ASMAtomicWriteU32(&pThis->u32State, EVENTMULTI_STATE_NOT_SIGNALED);
+ ASMAtomicWriteU32(&pThis->cWaiters, 0);
+#ifdef RTSEMEVENTMULTI_STRICT
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iSemEventMultiAnon = 0;
+ RTLockValidatorRecSharedInit(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL),
+ "RTSemEventMulti-%u", ASMAtomicIncU32(&s_iSemEventMultiAnon) - 1);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecSharedInitV(&pThis->Signallers, hClass, RTLOCKVAL_SUB_CLASS_ANY, pThis,
+ true /*fSignaller*/, !(fFlags & RTSEMEVENTMULTI_FLAGS_NO_LOCK_VAL),
+ pszNameFmt, va);
+ va_end(va);
+ }
+ pThis->fEverHadSignallers = false;
+#else
+ RT_NOREF_PV(hClass); RT_NOREF_PV(pszNameFmt);
+#endif
+
+ *phEventMultiSem = pThis;
+ return VINF_SUCCESS;
+ }
+
+ pthread_cond_destroy(&pThis->Cond);
+ }
+ pthread_condattr_destroy(&CondAttr);
+ }
+ rc = RTErrConvertFromErrno(rc);
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+
+}
+
+
+RTDECL(int) RTSemEventMultiDestroy(RTSEMEVENTMULTI hEventMultiSem)
+{
+ /*
+ * Validate handle.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ if (pThis == NIL_RTSEMEVENTMULTI)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED, VERR_INVALID_HANDLE);
+
+ /*
+ * Abort all waiters forcing them to return failure.
+ */
+ int rc;
+ for (int i = 30; i > 0; i--)
+ {
+ ASMAtomicXchgU32(&pThis->u32State, EVENTMULTI_STATE_UNINITIALIZED);
+ rc = pthread_cond_destroy(&pThis->Cond);
+ if (rc != EBUSY)
+ break;
+ pthread_cond_broadcast(&pThis->Cond);
+ usleep(1000);
+ }
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to destroy event sem %p, rc=%d.\n", hEventMultiSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Destroy the semaphore
+ * If it's busy we'll wait a bit to give the threads a chance to be scheduled.
+ */
+ for (int i = 30; i > 0; i--)
+ {
+ rc = pthread_mutex_destroy(&pThis->Mutex);
+ if (rc != EBUSY)
+ break;
+ usleep(1000);
+ }
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to destroy event sem %p, rc=%d. (mutex)\n", hEventMultiSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Free the semaphore memory and be gone.
+ */
+#ifdef RTSEMEVENTMULTI_STRICT
+ RTLockValidatorRecSharedDelete(&pThis->Signallers);
+#endif
+ RTMemFree(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSemEventMultiSignal(RTSEMEVENTMULTI hEventMultiSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMEVENTMULTI_STRICT
+ if (pThis->fEverHadSignallers)
+ {
+ int rc9 = RTLockValidatorRecSharedCheckSignaller(&pThis->Signallers, NIL_RTTHREAD);
+ if (RT_FAILURE(rc9))
+ return rc9;
+ }
+#endif
+
+ /*
+ * Lock the mutex semaphore.
+ */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to lock event sem %p, rc=%d.\n", hEventMultiSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Check the state.
+ */
+ if (pThis->u32State == EVENTMULTI_STATE_NOT_SIGNALED)
+ {
+ ASMAtomicXchgU32(&pThis->u32State, EVENTMULTI_STATE_SIGNALED);
+ rc = pthread_cond_broadcast(&pThis->Cond);
+ AssertMsg(!rc, ("Failed to signal event sem %p, rc=%d.\n", hEventMultiSem, rc));
+ }
+ else if (pThis->u32State == EVENTMULTI_STATE_SIGNALED)
+ {
+ rc = pthread_cond_broadcast(&pThis->Cond); /* give'm another kick... */
+ AssertMsg(!rc, ("Failed to signal event sem %p, rc=%d. (2)\n", hEventMultiSem, rc));
+ }
+ else
+ rc = VERR_SEM_DESTROYED;
+
+ /*
+ * Release the mutex and return.
+ */
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event sem %p, rc=%d.\n", hEventMultiSem, rc));
+ if (rc)
+ return RTErrConvertFromErrno(rc);
+ if (rc2)
+ return RTErrConvertFromErrno(rc2);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSemEventMultiReset(RTSEMEVENTMULTI hEventMultiSem)
+{
+ /*
+ * Validate input.
+ */
+ int rc = VINF_SUCCESS;
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED, VERR_INVALID_HANDLE);
+
+ /*
+ * Lock the mutex semaphore.
+ */
+ int rcPosix = pthread_mutex_lock(&pThis->Mutex);
+ if (RT_UNLIKELY(rcPosix))
+ {
+ AssertMsgFailed(("Failed to lock event multi sem %p, rc=%d.\n", hEventMultiSem, rcPosix));
+ return RTErrConvertFromErrno(rcPosix);
+ }
+
+ /*
+ * Check the state.
+ */
+ if (pThis->u32State == EVENTMULTI_STATE_SIGNALED)
+ ASMAtomicXchgU32(&pThis->u32State, EVENTMULTI_STATE_NOT_SIGNALED);
+ else if (pThis->u32State != EVENTMULTI_STATE_NOT_SIGNALED)
+ rc = VERR_SEM_DESTROYED;
+
+ /*
+ * Release the mutex and return.
+ */
+ rcPosix = pthread_mutex_unlock(&pThis->Mutex);
+ if (RT_UNLIKELY(rcPosix))
+ {
+ AssertMsgFailed(("Failed to unlock event multi sem %p, rc=%d.\n", hEventMultiSem, rcPosix));
+ return RTErrConvertFromErrno(rcPosix);
+ }
+
+ return rc;
+}
+
+
+/**
+ * Handle polling (timeout already expired at the time of the call).
+ *
+ * @returns VINF_SUCCESS, VERR_TIMEOUT, VERR_SEM_DESTROYED.
+ * @param pThis The semaphore.
+ */
+DECLINLINE(int) rtSemEventMultiPosixWaitPoll(struct RTSEMEVENTMULTIINTERNAL *pThis)
+{
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ AssertMsgReturn(!rc, ("Failed to lock event multi sem %p, rc=%d.\n", pThis, rc), RTErrConvertFromErrno(rc));
+
+ uint32_t const u32State = pThis->u32State;
+
+ rc = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc, ("Failed to unlock event multi sem %p, rc=%d.\n", pThis, rc)); NOREF(rc);
+
+ return u32State == EVENTMULTI_STATE_SIGNALED
+ ? VINF_SUCCESS
+ : u32State != EVENTMULTI_STATE_UNINITIALIZED
+ ? VERR_TIMEOUT
+ : VERR_SEM_DESTROYED;
+}
+
+
+
+/**
+ * Implements the indefinite wait.
+ *
+ * @returns See RTSemEventMultiWaitEx.
+ * @param pThis The semaphore.
+ * @param fFlags See RTSemEventMultiWaitEx.
+ * @param pSrcPos The source position, can be NULL.
+ */
+static int rtSemEventMultiPosixWaitIndefinite(struct RTSEMEVENTMULTIINTERNAL *pThis, uint32_t fFlags, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /* take mutex */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ AssertMsgReturn(!rc, ("Failed to lock event multi sem %p, rc=%d.\n", pThis, rc), RTErrConvertFromErrno(rc));
+ ASMAtomicIncU32(&pThis->cWaiters);
+
+ for (;;)
+ {
+ /* check state. */
+ uint32_t const u32State = pThis->u32State;
+ if (u32State != EVENTMULTI_STATE_NOT_SIGNALED)
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ rc = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc, ("Failed to unlock event multi sem %p, rc=%d.\n", pThis, rc));
+ return u32State == EVENTMULTI_STATE_SIGNALED
+ ? VINF_SUCCESS
+ : VERR_SEM_DESTROYED;
+ }
+
+ /* wait */
+#ifdef RTSEMEVENTMULTI_STRICT
+ RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+ if (pThis->fEverHadSignallers)
+ {
+ rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+ RT_INDEFINITE_WAIT, RTTHREADSTATE_EVENT_MULTI, true);
+ if (RT_FAILURE(rc))
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ pthread_mutex_unlock(&pThis->Mutex);
+ return rc;
+ }
+ }
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+ RT_NOREF_PV(pSrcPos);
+#endif
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT_MULTI, true);
+ /** @todo interruptible wait is not implementable... */ NOREF(fFlags);
+ rc = pthread_cond_wait(&pThis->Cond, &pThis->Mutex);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT_MULTI);
+ if (RT_UNLIKELY(rc))
+ {
+ AssertMsgFailed(("Failed to wait on event multi sem %p, rc=%d.\n", pThis, rc));
+ ASMAtomicDecU32(&pThis->cWaiters);
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event multi sem %p, rc=%d.\n", pThis, rc2)); NOREF(rc2);
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+}
+
+
+/**
+ * Implements the timed wait.
+ *
+ * @returns See RTSemEventMultiWaitEx
+ * @param pThis The semaphore.
+ * @param fFlags See RTSemEventMultiWaitEx.
+ * @param uTimeout See RTSemEventMultiWaitEx.
+ * @param pSrcPos The source position, can be NULL.
+ */
+static int rtSemEventMultiPosixWaitTimed(struct RTSEMEVENTMULTIINTERNAL *pThis, uint32_t fFlags, uint64_t uTimeout,
+ PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Convert the timeout specification to absolute and relative deadlines,
+ * divierting polling and infinite waits to the appropriate workers.
+ */
+ struct timespec AbsDeadline = { 0, 0 };
+ uint64_t const cNsRelativeDeadline = rtSemPosixCalcDeadline(fFlags, uTimeout, pThis->fMonotonicClock, &AbsDeadline);
+ if (cNsRelativeDeadline == 0)
+ return rtSemEventMultiPosixWaitPoll(pThis);
+ if (cNsRelativeDeadline == UINT64_MAX)
+ return rtSemEventMultiPosixWaitIndefinite(pThis, fFlags, pSrcPos);
+
+ /*
+ * To business!
+ */
+ /* take mutex */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ AssertMsgReturn(rc == 0, ("rc=%d pThis=%p\n", rc, pThis), RTErrConvertFromErrno(rc)); NOREF(rc);
+ ASMAtomicIncU32(&pThis->cWaiters);
+
+ for (;;)
+ {
+ /* check state. */
+ uint32_t const u32State = pThis->u32State;
+ if (u32State != EVENTMULTI_STATE_NOT_SIGNALED)
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ rc = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc, ("Failed to unlock event multi sem %p, rc=%d.\n", pThis, rc));
+ return u32State == EVENTMULTI_STATE_SIGNALED
+ ? VINF_SUCCESS
+ : VERR_SEM_DESTROYED;
+ }
+
+ /* wait */
+#ifdef RTSEMEVENTMULTI_STRICT
+ RTTHREAD hThreadSelf = RTThreadSelfAutoAdopt();
+ if (pThis->fEverHadSignallers)
+ {
+ rc = RTLockValidatorRecSharedCheckBlocking(&pThis->Signallers, hThreadSelf, pSrcPos, false,
+ (uTimeout + RT_NS_1MS - 1)/ RT_NS_1MS, RTTHREADSTATE_EVENT_MULTI, true);
+ if (RT_FAILURE(rc))
+ {
+ ASMAtomicDecU32(&pThis->cWaiters);
+ pthread_mutex_unlock(&pThis->Mutex);
+ return rc;
+ }
+ }
+#else
+ RTTHREAD hThreadSelf = RTThreadSelf();
+#endif
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_EVENT_MULTI, true);
+ rc = pthread_cond_timedwait(&pThis->Cond, &pThis->Mutex, &AbsDeadline);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_EVENT_MULTI);
+
+ /* According to SuS this function shall not return EINTR, but linux man page might have said differently at some point... */
+ if ( rc != 0
+ && ( rc != EINTR
+ || (fFlags & RTSEMWAIT_FLAGS_NORESUME)) )
+ {
+ AssertMsg(rc == ETIMEDOUT, ("Failed to wait on event multi sem %p, rc=%d.\n", pThis, rc));
+ ASMAtomicDecU32(&pThis->cWaiters);
+ int rc2 = pthread_mutex_unlock(&pThis->Mutex);
+ AssertMsg(!rc2, ("Failed to unlock event multi sem %p, rc=%d.\n", pThis, rc2)); NOREF(rc2);
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+}
+
+
+DECLINLINE(int) rtSemEventMultiPosixWait(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout,
+ PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ uint32_t u32 = pThis->u32State;
+ AssertReturn(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED, VERR_INVALID_HANDLE);
+ AssertReturn(RTSEMWAIT_FLAGS_ARE_VALID(fFlags), VERR_INVALID_PARAMETER);
+
+ /*
+ * Optimize the case where the event is signalled.
+ */
+ if (ASMAtomicUoReadU32(&pThis->u32State) == EVENTMULTI_STATE_SIGNALED)
+ {
+ int rc = rtSemEventMultiPosixWaitPoll(pThis);
+ if (RT_LIKELY(rc != VERR_TIMEOUT))
+ return rc;
+ }
+
+ /*
+ * Indefinite or timed wait?
+ */
+ if (fFlags & RTSEMWAIT_FLAGS_INDEFINITE)
+ return rtSemEventMultiPosixWaitIndefinite(pThis, fFlags, pSrcPos);
+ return rtSemEventMultiPosixWaitTimed(pThis, fFlags, uTimeout, pSrcPos);
+}
+
+
+#undef RTSemEventMultiWaitEx
+RTDECL(int) RTSemEventMultiWaitEx(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout)
+{
+#ifndef RTSEMEVENT_STRICT
+ return rtSemEventMultiPosixWait(hEventMultiSem, fFlags, uTimeout, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemEventMultiPosixWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemEventMultiWaitExDebug(RTSEMEVENTMULTI hEventMultiSem, uint32_t fFlags, uint64_t uTimeout,
+ RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemEventMultiPosixWait(hEventMultiSem, fFlags, uTimeout, &SrcPos);
+}
+
+
+RTDECL(void) RTSemEventMultiSetSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturnVoid(pThis);
+ uint32_t u32 = pThis->u32State;
+ AssertReturnVoid(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedResetOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF_PV(hEventMultiSem); RT_NOREF_PV(hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventMultiAddSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturnVoid(pThis);
+ uint32_t u32 = pThis->u32State;
+ AssertReturnVoid(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED);
+
+ ASMAtomicWriteBool(&pThis->fEverHadSignallers, true);
+ RTLockValidatorRecSharedAddOwner(&pThis->Signallers, hThread, NULL);
+#else
+ RT_NOREF_PV(hEventMultiSem); RT_NOREF_PV(hThread);
+#endif
+}
+
+
+RTDECL(void) RTSemEventMultiRemoveSignaller(RTSEMEVENTMULTI hEventMultiSem, RTTHREAD hThread)
+{
+#ifdef RTSEMEVENTMULTI_STRICT
+ struct RTSEMEVENTMULTIINTERNAL *pThis = hEventMultiSem;
+ AssertPtrReturnVoid(pThis);
+ uint32_t u32 = pThis->u32State;
+ AssertReturnVoid(u32 == EVENTMULTI_STATE_NOT_SIGNALED || u32 == EVENTMULTI_STATE_SIGNALED);
+
+ RTLockValidatorRecSharedRemoveOwner(&pThis->Signallers, hThread);
+#else
+ RT_NOREF_PV(hEventMultiSem); RT_NOREF_PV(hThread);
+#endif
+}
+
diff --git a/src/VBox/Runtime/r3/posix/semmutex-posix.cpp b/src/VBox/Runtime/r3/posix/semmutex-posix.cpp
new file mode 100644
index 00000000..f0df48ef
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/semmutex-posix.cpp
@@ -0,0 +1,467 @@
+/* $Id: semmutex-posix.cpp $ */
+/** @file
+ * IPRT - Mutex Semaphore, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloc.h>
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/thread.h>
+#include "internal/magics.h"
+#include "internal/strict.h"
+
+#include <errno.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/** Posix internal representation of a Mutex semaphore. */
+struct RTSEMMUTEXINTERNAL
+{
+ /** pthread mutex. */
+ pthread_mutex_t Mutex;
+ /** The owner of the mutex. */
+ volatile pthread_t Owner;
+ /** Nesting count. */
+ volatile uint32_t cNesting;
+ /** Magic value (RTSEMMUTEX_MAGIC). */
+ uint32_t u32Magic;
+#ifdef RTSEMMUTEX_STRICT
+ /** Lock validator record associated with this mutex. */
+ RTLOCKVALRECEXCL ValidatorRec;
+#endif
+};
+
+#if defined(RT_OS_DARWIN) || defined(RT_OS_NETBSD)
+/**
+ * This function is a crude approximation of pthread_mutex_timedlock.
+ */
+int rtSemFallbackPthreadMutexTimedlock(pthread_mutex_t *mutex, RTMSINTERVAL cMillies)
+{
+ struct timespec ts;
+ int rc;
+
+ rc = pthread_mutex_trylock(mutex);
+ if (rc != EBUSY)
+ return rc;
+
+ ts.tv_sec = cMillies / 1000;
+ ts.tv_nsec = (cMillies % 1000) * 1000000;
+
+ while (ts.tv_sec > 0 || ts.tv_nsec > 0)
+ {
+ struct timespec delta, remaining;
+
+ if (ts.tv_sec > 0)
+ {
+ delta.tv_sec = 1;
+ delta.tv_nsec = 0;
+ ts.tv_sec--;
+ }
+ else
+ {
+ delta.tv_sec = 0;
+ delta.tv_nsec = ts.tv_nsec;
+ ts.tv_nsec = 0;
+ }
+
+ nanosleep(&delta, &remaining);
+
+ rc = pthread_mutex_trylock(mutex);
+ if (rc != EBUSY)
+ return rc;
+
+ if (RT_UNLIKELY(remaining.tv_nsec > 0 || remaining.tv_sec > 0))
+ {
+ ts.tv_sec += remaining.tv_sec;
+ ts.tv_nsec += remaining.tv_nsec;
+ if (ts.tv_nsec >= 1000000000)
+ {
+ ts.tv_nsec -= 1000000000;
+ ts.tv_sec++;
+ }
+ }
+ }
+
+ return ETIMEDOUT;
+}
+#endif
+
+
+#undef RTSemMutexCreate
+RTDECL(int) RTSemMutexCreate(PRTSEMMUTEX phMutexSem)
+{
+ return RTSemMutexCreateEx(phMutexSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, NULL);
+}
+
+
+RTDECL(int) RTSemMutexCreateEx(PRTSEMMUTEX phMutexSem, uint32_t fFlags,
+ RTLOCKVALCLASS hClass, uint32_t uSubClass, const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~RTSEMMUTEX_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+ /*
+ * Allocate semaphore handle.
+ */
+ int rc;
+ struct RTSEMMUTEXINTERNAL *pThis = (struct RTSEMMUTEXINTERNAL *)RTMemAlloc(sizeof(struct RTSEMMUTEXINTERNAL));
+ if (pThis)
+ {
+ /*
+ * Create the semaphore.
+ */
+ rc = pthread_mutex_init(&pThis->Mutex, NULL);
+ if (!rc)
+ {
+ pThis->Owner = (pthread_t)-1;
+ pThis->cNesting = 0;
+ pThis->u32Magic = RTSEMMUTEX_MAGIC;
+#ifdef RTSEMMUTEX_STRICT
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iMutexAnon = 0;
+ RTLockValidatorRecExclInit(&pThis->ValidatorRec, hClass, uSubClass, pThis,
+ !(fFlags & RTSEMMUTEX_FLAGS_NO_LOCK_VAL),
+ "RTSemMutex-%u", ASMAtomicIncU32(&s_iMutexAnon) - 1);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecExclInitV(&pThis->ValidatorRec, hClass, uSubClass, pThis,
+ !(fFlags & RTSEMMUTEX_FLAGS_NO_LOCK_VAL), pszNameFmt, va);
+ va_end(va);
+ }
+#else
+ RT_NOREF_PV(hClass); RT_NOREF_PV(uSubClass); RT_NOREF_PV(pszNameFmt);
+#endif
+
+ *phMutexSem = pThis;
+ return VINF_SUCCESS;
+ }
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+}
+
+
+RTDECL(int) RTSemMutexDestroy(RTSEMMUTEX hMutexSem)
+{
+ /*
+ * Validate input.
+ */
+ if (hMutexSem == NIL_RTSEMMUTEX)
+ return VINF_SUCCESS;
+ struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Try destroy it.
+ */
+ int rc = pthread_mutex_destroy(&pThis->Mutex);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to destroy mutex sem %p, rc=%d.\n", hMutexSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ /*
+ * Free the memory and be gone.
+ */
+ ASMAtomicWriteU32(&pThis->u32Magic, RTSEMMUTEX_MAGIC_DEAD);
+ pThis->Owner = (pthread_t)-1;
+ pThis->cNesting = UINT32_MAX;
+#ifdef RTSEMMUTEX_STRICT
+ RTLockValidatorRecExclDelete(&pThis->ValidatorRec);
+#endif
+ RTMemTmpFree(pThis);
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(uint32_t) RTSemMutexSetSubClass(RTSEMMUTEX hMutexSem, uint32_t uSubClass)
+{
+#ifdef RTSEMMUTEX_STRICT
+ /*
+ * Validate.
+ */
+ RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, RTLOCKVAL_SUB_CLASS_INVALID);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, RTLOCKVAL_SUB_CLASS_INVALID);
+
+ return RTLockValidatorRecExclSetSubClass(&pThis->ValidatorRec, uSubClass);
+#else
+ RT_NOREF_PV(hMutexSem); RT_NOREF_PV(uSubClass);
+ return RTLOCKVAL_SUB_CLASS_INVALID;
+#endif
+}
+
+
+DECL_FORCE_INLINE(int) rtSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Check if nested request.
+ */
+ pthread_t Self = pthread_self();
+ if ( pThis->Owner == Self
+ && pThis->cNesting > 0)
+ {
+#ifdef RTSEMMUTEX_STRICT
+ int rc9 = RTLockValidatorRecExclRecursion(&pThis->ValidatorRec, pSrcPos);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+ ASMAtomicIncU32(&pThis->cNesting);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Lock it.
+ */
+ RTTHREAD hThreadSelf = NIL_RTTHREAD;
+ if (cMillies != 0)
+ {
+#ifdef RTSEMMUTEX_STRICT
+ hThreadSelf = RTThreadSelfAutoAdopt();
+ int rc9 = RTLockValidatorRecExclCheckOrderAndBlocking(&pThis->ValidatorRec, hThreadSelf, pSrcPos, true,
+ cMillies, RTTHREADSTATE_MUTEX, true);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#else
+ hThreadSelf = RTThreadSelf();
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_MUTEX, true);
+ RT_NOREF_PV(pSrcPos);
+#endif
+ }
+
+ if (cMillies == RT_INDEFINITE_WAIT)
+ {
+ /* take mutex */
+ int rc = pthread_mutex_lock(&pThis->Mutex);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_MUTEX);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed to lock mutex sem %p, rc=%d.\n", hMutexSem, rc)); NOREF(rc);
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+ else
+ {
+ int rc;
+#if !defined(RT_OS_DARWIN) && !defined(RT_OS_NETBSD)
+ struct timespec ts = {0,0};
+# if defined(RT_OS_HAIKU)
+ struct timeval tv = {0,0};
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * 1000;
+# else
+ clock_gettime(CLOCK_REALTIME, &ts);
+# endif
+ if (cMillies != 0)
+ {
+ ts.tv_nsec += (cMillies % 1000) * 1000000;
+ ts.tv_sec += cMillies / 1000;
+ if (ts.tv_nsec >= 1000000000)
+ {
+ ts.tv_nsec -= 1000000000;
+ ts.tv_sec++;
+ }
+ }
+
+ /* take mutex */
+ rc = pthread_mutex_timedlock(&pThis->Mutex, &ts);
+#else
+ /*
+ * When there's no pthread_mutex_timedlock() use a crude sleep
+ * and retry approximation. Since the sleep interval is
+ * relative, we don't need to convert to the absolute time
+ * here only to convert back to relative in the fallback
+ * function.
+ */
+ rc = rtSemFallbackPthreadMutexTimedlock(&pThis->Mutex, cMillies);
+#endif
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_MUTEX);
+ if (rc)
+ {
+ AssertMsg(rc == ETIMEDOUT, ("Failed to lock mutex sem %p, rc=%d.\n", hMutexSem, rc)); NOREF(rc);
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+
+ /*
+ * Set the owner and nesting.
+ */
+ pThis->Owner = Self;
+ ASMAtomicWriteU32(&pThis->cNesting, 1);
+#ifdef RTSEMMUTEX_STRICT
+ RTLockValidatorRecExclSetOwner(&pThis->ValidatorRec, hThreadSelf, pSrcPos, true);
+#endif
+
+ return VINF_SUCCESS;
+}
+
+
+#undef RTSemMutexRequest
+RTDECL(int) RTSemMutexRequest(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMMUTEX_STRICT
+ return rtSemMutexRequest(hMutexSem, cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemMutexRequest(hMutexSem, cMillies, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemMutexRequestDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemMutexRequest(hMutexSem, cMillies, &SrcPos);
+}
+
+
+#undef RTSemMutexRequestNoResume
+RTDECL(int) RTSemMutexRequestNoResume(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies)
+{
+ /* (EINTR isn't returned by the wait functions we're using.) */
+#ifndef RTSEMMUTEX_STRICT
+ return rtSemMutexRequest(hMutexSem, cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemMutexRequest(hMutexSem, cMillies, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemMutexRequestNoResumeDebug(RTSEMMUTEX hMutexSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemMutexRequest(hMutexSem, cMillies, &SrcPos);
+}
+
+
+RTDECL(int) RTSemMutexRelease(RTSEMMUTEX hMutexSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, VERR_INVALID_HANDLE);
+
+#ifdef RTSEMMUTEX_STRICT
+ int rc9 = RTLockValidatorRecExclReleaseOwner(&pThis->ValidatorRec, pThis->cNesting == 1);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+
+ /*
+ * Check if nested.
+ */
+ pthread_t Self = pthread_self();
+ if (RT_UNLIKELY( pThis->Owner != Self
+ || pThis->cNesting == 0))
+ {
+ AssertMsgFailed(("Not owner of mutex %p!! Self=%08x Owner=%08x cNesting=%d\n",
+ pThis, Self, pThis->Owner, pThis->cNesting));
+ return VERR_NOT_OWNER;
+ }
+
+ /*
+ * If nested we'll just pop a nesting.
+ */
+ if (pThis->cNesting > 1)
+ {
+ ASMAtomicDecU32(&pThis->cNesting);
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Clear the state. (cNesting == 1)
+ */
+ pThis->Owner = (pthread_t)-1;
+ ASMAtomicWriteU32(&pThis->cNesting, 0);
+
+ /*
+ * Unlock mutex semaphore.
+ */
+ int rc = pthread_mutex_unlock(&pThis->Mutex);
+ if (RT_UNLIKELY(rc))
+ {
+ AssertMsgFailed(("Failed to unlock mutex sem %p, rc=%d.\n", hMutexSem, rc)); NOREF(rc);
+ return RTErrConvertFromErrno(rc);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(bool) RTSemMutexIsOwned(RTSEMMUTEX hMutexSem)
+{
+ /*
+ * Validate.
+ */
+ RTSEMMUTEXINTERNAL *pThis = hMutexSem;
+ AssertPtrReturn(pThis, false);
+ AssertReturn(pThis->u32Magic == RTSEMMUTEX_MAGIC, false);
+
+ return pThis->Owner != (pthread_t)-1;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/semrw-posix.cpp b/src/VBox/Runtime/r3/posix/semrw-posix.cpp
new file mode 100644
index 00000000..ab657566
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/semrw-posix.cpp
@@ -0,0 +1,741 @@
+/* $Id: semrw-posix.cpp $ */
+/** @file
+ * IPRT - Read-Write Semaphore, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/semaphore.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/lockvalidator.h>
+#include <iprt/mem.h>
+#include <iprt/thread.h>
+
+#include <errno.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include "internal/magics.h"
+#include "internal/strict.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** @todo move this to r3/posix/something.h. */
+#ifdef RT_OS_SOLARIS
+# define ATOMIC_GET_PTHREAD_T(ppvVar, pThread) ASMAtomicReadSize(ppvVar, pThread)
+# define ATOMIC_SET_PTHREAD_T(ppvVar, pThread) ASMAtomicWriteSize(ppvVar, pThread)
+#else
+AssertCompileSize(pthread_t, sizeof(void *));
+# define ATOMIC_GET_PTHREAD_T(ppvVar, pThread) do { *(pThread) = (pthread_t)ASMAtomicReadPtr((void * volatile *)ppvVar); } while (0)
+# define ATOMIC_SET_PTHREAD_T(ppvVar, pThread) ASMAtomicWritePtr((void * volatile *)ppvVar, (void *)pThread)
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/** Posix internal representation of a read-write semaphore. */
+struct RTSEMRWINTERNAL
+{
+ /** The usual magic. (RTSEMRW_MAGIC) */
+ uint32_t u32Magic;
+ /** The number of readers.
+ * (For preventing screwing up the lock on linux). */
+ uint32_t volatile cReaders;
+ /** Number of write recursions. */
+ uint32_t cWrites;
+ /** Number of read recursions by the writer. */
+ uint32_t cWriterReads;
+ /** The write owner of the lock. */
+ volatile pthread_t Writer;
+ /** pthread rwlock. */
+ pthread_rwlock_t RWLock;
+#ifdef RTSEMRW_STRICT
+ /** The validator record for the writer. */
+ RTLOCKVALRECEXCL ValidatorWrite;
+ /** The validator record for the readers. */
+ RTLOCKVALRECSHRD ValidatorRead;
+#endif
+};
+
+
+
+#undef RTSemRWCreate
+RTDECL(int) RTSemRWCreate(PRTSEMRW phRWSem)
+{
+ return RTSemRWCreateEx(phRWSem, 0 /*fFlags*/, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "RTSemRW");
+}
+
+
+RTDECL(int) RTSemRWCreateEx(PRTSEMRW phRWSem, uint32_t fFlags,
+ RTLOCKVALCLASS hClass, uint32_t uSubClass, const char *pszNameFmt, ...)
+{
+ AssertReturn(!(fFlags & ~RTSEMRW_FLAGS_NO_LOCK_VAL), VERR_INVALID_PARAMETER);
+
+ /*
+ * Allocate handle.
+ */
+ int rc;
+ struct RTSEMRWINTERNAL *pThis = (struct RTSEMRWINTERNAL *)RTMemAlloc(sizeof(struct RTSEMRWINTERNAL));
+ if (pThis)
+ {
+ /*
+ * Create the rwlock.
+ */
+ rc = pthread_rwlock_init(&pThis->RWLock, NULL);
+ if (!rc)
+ {
+ pThis->u32Magic = RTSEMRW_MAGIC;
+ pThis->cReaders = 0;
+ pThis->cWrites = 0;
+ pThis->cWriterReads = 0;
+ pThis->Writer = (pthread_t)-1;
+#ifdef RTSEMRW_STRICT
+ bool const fLVEnabled = !(fFlags & RTSEMRW_FLAGS_NO_LOCK_VAL);
+ if (!pszNameFmt)
+ {
+ static uint32_t volatile s_iSemRWAnon = 0;
+ uint32_t i = ASMAtomicIncU32(&s_iSemRWAnon) - 1;
+ RTLockValidatorRecExclInit(&pThis->ValidatorWrite, hClass, uSubClass, pThis,
+ fLVEnabled, "RTSemRW-%u", i);
+ RTLockValidatorRecSharedInit(&pThis->ValidatorRead, hClass, uSubClass, pThis,
+ false /*fSignaller*/, fLVEnabled, "RTSemRW-%u", i);
+ }
+ else
+ {
+ va_list va;
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecExclInitV(&pThis->ValidatorWrite, hClass, uSubClass, pThis,
+ fLVEnabled, pszNameFmt, va);
+ va_end(va);
+ va_start(va, pszNameFmt);
+ RTLockValidatorRecSharedInitV(&pThis->ValidatorRead, hClass, uSubClass, pThis,
+ false /*fSignaller*/, fLVEnabled, pszNameFmt, va);
+ va_end(va);
+ }
+ RTLockValidatorRecMakeSiblings(&pThis->ValidatorWrite.Core, &pThis->ValidatorRead.Core);
+#else
+ RT_NOREF_PV(hClass); RT_NOREF_PV(uSubClass); RT_NOREF_PV(pszNameFmt);
+#endif
+ *phRWSem = pThis;
+ return VINF_SUCCESS;
+ }
+
+ rc = RTErrConvertFromErrno(rc);
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+}
+
+
+RTDECL(int) RTSemRWDestroy(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input, nil handle is fine.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ if (pThis == NIL_RTSEMRW)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ VERR_INVALID_HANDLE);
+ Assert(pThis->Writer == (pthread_t)-1);
+ Assert(!pThis->cReaders);
+ Assert(!pThis->cWrites);
+ Assert(!pThis->cWriterReads);
+
+ /*
+ * Try destroy it.
+ */
+ AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, ~RTSEMRW_MAGIC, RTSEMRW_MAGIC), VERR_INVALID_HANDLE);
+ int rc = pthread_rwlock_destroy(&pThis->RWLock);
+ if (!rc)
+ {
+#ifdef RTSEMRW_STRICT
+ RTLockValidatorRecSharedDelete(&pThis->ValidatorRead);
+ RTLockValidatorRecExclDelete(&pThis->ValidatorWrite);
+#endif
+ RTMemFree(pThis);
+ rc = VINF_SUCCESS;
+ }
+ else
+ {
+ ASMAtomicWriteU32(&pThis->u32Magic, RTSEMRW_MAGIC);
+ AssertMsgFailed(("Failed to destroy read-write sem %p, rc=%d.\n", hRWSem, rc));
+ rc = RTErrConvertFromErrno(rc);
+ }
+
+ return rc;
+}
+
+
+RTDECL(uint32_t) RTSemRWSetSubClass(RTSEMRW hRWSem, uint32_t uSubClass)
+{
+#ifdef RTSEMRW_STRICT
+ /*
+ * Validate handle.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, RTLOCKVAL_SUB_CLASS_INVALID);
+ AssertReturn(pThis->u32Magic == RTSEMRW_MAGIC, RTLOCKVAL_SUB_CLASS_INVALID);
+
+ RTLockValidatorRecSharedSetSubClass(&pThis->ValidatorRead, uSubClass);
+ return RTLockValidatorRecExclSetSubClass(&pThis->ValidatorWrite, uSubClass);
+#else
+ RT_NOREF_PV(hRWSem); RT_NOREF_PV(uSubClass);
+ return RTLOCKVAL_SUB_CLASS_INVALID;
+#endif
+}
+
+
+DECL_FORCE_INLINE(int) rtSemRWRequestRead(RTSEMRW hRWSem, RTMSINTERVAL cMillies, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Check if it's the writer (implement write+read recursion).
+ */
+ pthread_t Self = pthread_self();
+ pthread_t Writer;
+ ATOMIC_GET_PTHREAD_T(&pThis->Writer, &Writer);
+ if (Writer == Self)
+ {
+#ifdef RTSEMRW_STRICT
+ int rc9 = RTLockValidatorRecExclRecursionMixed(&pThis->ValidatorWrite, &pThis->ValidatorRead.Core, pSrcPos);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+ Assert(pThis->cWriterReads < INT32_MAX);
+ pThis->cWriterReads++;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Try lock it.
+ */
+ RTTHREAD hThreadSelf = NIL_RTTHREAD;
+ if (cMillies > 0)
+ {
+#ifdef RTSEMRW_STRICT
+ hThreadSelf = RTThreadSelfAutoAdopt();
+ int rc9 = RTLockValidatorRecSharedCheckOrderAndBlocking(&pThis->ValidatorRead, hThreadSelf, pSrcPos, true,
+ cMillies, RTTHREADSTATE_RW_READ, true);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#else
+ hThreadSelf = RTThreadSelf();
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_RW_READ, true);
+ RT_NOREF_PV(pSrcPos);
+#endif
+ }
+
+ if (cMillies == RT_INDEFINITE_WAIT)
+ {
+ /* take rwlock */
+ int rc = pthread_rwlock_rdlock(&pThis->RWLock);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_RW_READ);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed read lock read-write sem %p, rc=%d.\n", hRWSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+ else
+ {
+#ifdef RT_OS_DARWIN
+ AssertMsgFailed(("Not implemented on Darwin yet because of incomplete pthreads API."));
+ return VERR_NOT_IMPLEMENTED;
+
+#else /* !RT_OS_DARWIN */
+ /*
+ * Get current time and calc end of wait time.
+ */
+ struct timespec ts = {0,0};
+ clock_gettime(CLOCK_REALTIME, &ts);
+ if (cMillies != 0)
+ {
+ ts.tv_nsec += (cMillies % 1000) * 1000000;
+ ts.tv_sec += cMillies / 1000;
+ if (ts.tv_nsec >= 1000000000)
+ {
+ ts.tv_nsec -= 1000000000;
+ ts.tv_sec++;
+ }
+ }
+
+ /* take rwlock */
+ int rc = pthread_rwlock_timedrdlock(&pThis->RWLock, &ts);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_RW_READ);
+ if (rc)
+ {
+ AssertMsg(rc == ETIMEDOUT, ("Failed read lock read-write sem %p, rc=%d.\n", hRWSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+#endif /* !RT_OS_DARWIN */
+ }
+
+ ASMAtomicIncU32(&pThis->cReaders);
+#ifdef RTSEMRW_STRICT
+ RTLockValidatorRecSharedAddOwner(&pThis->ValidatorRead, hThreadSelf, pSrcPos);
+#endif
+ return VINF_SUCCESS;
+}
+
+
+#undef RTSemRWRequestRead
+RTDECL(int) RTSemRWRequestRead(RTSEMRW hRWSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMRW_STRICT
+ return rtSemRWRequestRead(hRWSem, cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemRWRequestRead(hRWSem, cMillies, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemRWRequestReadDebug(RTSEMRW hRWSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemRWRequestRead(hRWSem, cMillies, &SrcPos);
+}
+
+
+#undef RTSemRWRequestReadNoResume
+RTDECL(int) RTSemRWRequestReadNoResume(RTSEMRW hRWSem, RTMSINTERVAL cMillies)
+{
+ /* EINTR isn't returned by the wait functions we're using. */
+#ifndef RTSEMRW_STRICT
+ return rtSemRWRequestRead(hRWSem, cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemRWRequestRead(hRWSem, cMillies, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemRWRequestReadNoResumeDebug(RTSEMRW hRWSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemRWRequestRead(hRWSem, cMillies, &SrcPos);
+}
+
+
+RTDECL(int) RTSemRWReleaseRead(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Check if it's the writer.
+ */
+ pthread_t Self = pthread_self();
+ pthread_t Writer;
+ ATOMIC_GET_PTHREAD_T(&pThis->Writer, &Writer);
+ if (Writer == Self)
+ {
+ AssertMsgReturn(pThis->cWriterReads > 0, ("pThis=%p\n", pThis), VERR_NOT_OWNER);
+#ifdef RTSEMRW_STRICT
+ int rc9 = RTLockValidatorRecExclUnwindMixed(&pThis->ValidatorWrite, &pThis->ValidatorRead.Core);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+ pThis->cWriterReads--;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Try unlock it.
+ */
+#ifdef RTSEMRW_STRICT
+ int rc9 = RTLockValidatorRecSharedCheckAndRelease(&pThis->ValidatorRead, RTThreadSelf());
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+#ifdef RT_OS_LINUX /* glibc (at least 2.8) may screw up when unlocking a lock we don't own. */
+ if (ASMAtomicReadU32(&pThis->cReaders) == 0)
+ {
+ AssertMsgFailed(("Not owner of %p\n", pThis));
+ return VERR_NOT_OWNER;
+ }
+#endif
+ ASMAtomicDecU32(&pThis->cReaders);
+ int rc = pthread_rwlock_unlock(&pThis->RWLock);
+ if (rc)
+ {
+ ASMAtomicIncU32(&pThis->cReaders);
+ AssertMsgFailed(("Failed read unlock read-write sem %p, rc=%d.\n", hRWSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+ return VINF_SUCCESS;
+}
+
+
+DECL_FORCE_INLINE(int) rtSemRWRequestWrite(RTSEMRW hRWSem, RTMSINTERVAL cMillies, PCRTLOCKVALSRCPOS pSrcPos)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Recursion?
+ */
+ pthread_t Self = pthread_self();
+ pthread_t Writer;
+ ATOMIC_GET_PTHREAD_T(&pThis->Writer, &Writer);
+ if (Writer == Self)
+ {
+#ifdef RTSEMRW_STRICT
+ int rc9 = RTLockValidatorRecExclRecursion(&pThis->ValidatorWrite, pSrcPos);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+ Assert(pThis->cWrites < INT32_MAX);
+ pThis->cWrites++;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Try lock it.
+ */
+ RTTHREAD hThreadSelf = NIL_RTTHREAD;
+ if (cMillies)
+ {
+#ifdef RTSEMRW_STRICT
+ hThreadSelf = RTThreadSelfAutoAdopt();
+ int rc9 = RTLockValidatorRecExclCheckOrderAndBlocking(&pThis->ValidatorWrite, hThreadSelf, pSrcPos, true,
+ cMillies, RTTHREADSTATE_RW_WRITE, true);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#else
+ hThreadSelf = RTThreadSelf();
+ RTThreadBlocking(hThreadSelf, RTTHREADSTATE_RW_WRITE, true);
+ RT_NOREF_PV(pSrcPos);
+#endif
+ }
+
+ if (cMillies == RT_INDEFINITE_WAIT)
+ {
+ /* take rwlock */
+ int rc = pthread_rwlock_wrlock(&pThis->RWLock);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_RW_WRITE);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed write lock read-write sem %p, rc=%d.\n", hRWSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+ }
+ else
+ {
+#ifdef RT_OS_DARWIN
+ AssertMsgFailed(("Not implemented on Darwin yet because of incomplete pthreads API."));
+ return VERR_NOT_IMPLEMENTED;
+#else /* !RT_OS_DARWIN */
+ /*
+ * Get current time and calc end of wait time.
+ */
+ struct timespec ts = {0,0};
+ clock_gettime(CLOCK_REALTIME, &ts);
+ if (cMillies != 0)
+ {
+ ts.tv_nsec += (cMillies % 1000) * 1000000;
+ ts.tv_sec += cMillies / 1000;
+ if (ts.tv_nsec >= 1000000000)
+ {
+ ts.tv_nsec -= 1000000000;
+ ts.tv_sec++;
+ }
+ }
+
+ /* take rwlock */
+ int rc = pthread_rwlock_timedwrlock(&pThis->RWLock, &ts);
+ RTThreadUnblocked(hThreadSelf, RTTHREADSTATE_RW_WRITE);
+ if (rc)
+ {
+ AssertMsg(rc == ETIMEDOUT, ("Failed read lock read-write sem %p, rc=%d.\n", hRWSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+#endif /* !RT_OS_DARWIN */
+ }
+
+ ATOMIC_SET_PTHREAD_T(&pThis->Writer, Self);
+ pThis->cWrites = 1;
+ Assert(!pThis->cReaders);
+#ifdef RTSEMRW_STRICT
+ RTLockValidatorRecExclSetOwner(&pThis->ValidatorWrite, hThreadSelf, pSrcPos, true);
+#endif
+ return VINF_SUCCESS;
+}
+
+
+#undef RTSemRWRequestWrite
+RTDECL(int) RTSemRWRequestWrite(RTSEMRW hRWSem, RTMSINTERVAL cMillies)
+{
+#ifndef RTSEMRW_STRICT
+ return rtSemRWRequestWrite(hRWSem, cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemRWRequestWrite(hRWSem, cMillies, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemRWRequestWriteDebug(RTSEMRW hRWSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemRWRequestWrite(hRWSem, cMillies, &SrcPos);
+}
+
+
+#undef RTSemRWRequestWriteNoResume
+RTDECL(int) RTSemRWRequestWriteNoResume(RTSEMRW hRWSem, RTMSINTERVAL cMillies)
+{
+ /* EINTR isn't returned by the wait functions we're using. */
+#ifndef RTSEMRW_STRICT
+ return rtSemRWRequestWrite(hRWSem, cMillies, NULL);
+#else
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_NORMAL_API();
+ return rtSemRWRequestWrite(hRWSem, cMillies, &SrcPos);
+#endif
+}
+
+
+RTDECL(int) RTSemRWRequestWriteNoResumeDebug(RTSEMRW hRWSem, RTMSINTERVAL cMillies, RTHCUINTPTR uId, RT_SRC_POS_DECL)
+{
+ /* EINTR isn't returned by the wait functions we're using. */
+ RTLOCKVALSRCPOS SrcPos = RTLOCKVALSRCPOS_INIT_DEBUG_API();
+ return rtSemRWRequestWrite(hRWSem, cMillies, &SrcPos);
+}
+
+
+RTDECL(int) RTSemRWReleaseWrite(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, VERR_INVALID_HANDLE);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ VERR_INVALID_HANDLE);
+
+ /*
+ * Verify ownership and implement recursion.
+ */
+ pthread_t Self = pthread_self();
+ pthread_t Writer;
+ ATOMIC_GET_PTHREAD_T(&pThis->Writer, &Writer);
+ AssertMsgReturn(Writer == Self, ("pThis=%p\n", pThis), VERR_NOT_OWNER);
+ AssertReturn(pThis->cWriterReads == 0 || pThis->cWrites > 1, VERR_WRONG_ORDER);
+
+ if (pThis->cWrites > 1)
+ {
+#ifdef RTSEMRW_STRICT
+ int rc9 = RTLockValidatorRecExclUnwind(&pThis->ValidatorWrite);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+ pThis->cWrites--;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Try unlock it.
+ */
+#ifdef RTSEMRW_STRICT
+ int rc9 = RTLockValidatorRecExclReleaseOwner(&pThis->ValidatorWrite, true);
+ if (RT_FAILURE(rc9))
+ return rc9;
+#endif
+
+ pThis->cWrites--;
+ ATOMIC_SET_PTHREAD_T(&pThis->Writer, (pthread_t)-1);
+ int rc = pthread_rwlock_unlock(&pThis->RWLock);
+ if (rc)
+ {
+ AssertMsgFailed(("Failed write unlock read-write sem %p, rc=%d.\n", hRWSem, rc));
+ return RTErrConvertFromErrno(rc);
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(bool) RTSemRWIsWriteOwner(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, false);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ false);
+
+ /*
+ * Check ownership.
+ */
+ pthread_t Self = pthread_self();
+ pthread_t Writer;
+ ATOMIC_GET_PTHREAD_T(&pThis->Writer, &Writer);
+ return Writer == Self;
+}
+
+
+RTDECL(bool) RTSemRWIsReadOwner(RTSEMRW hRWSem, bool fWannaHear)
+{
+ /*
+ * Validate handle.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, false);
+ AssertReturn(pThis->u32Magic == RTSEMRW_MAGIC, false);
+
+ /*
+ * Check write ownership. The writer is also a valid reader.
+ */
+ pthread_t Self = pthread_self();
+ pthread_t Writer;
+ ATOMIC_GET_PTHREAD_T(&pThis->Writer, &Writer);
+ if (Writer == Self)
+ return true;
+ if (Writer != (pthread_t)-1)
+ return false;
+
+ /*
+ * If there are no readers, we cannot be one of them, can we?
+ */
+ if (ASMAtomicReadU32(&pThis->cReaders) == 0)
+ return false;
+
+#ifdef RTSEMRW_STRICT
+ /*
+ * Ask the lock validator.
+ */
+ NOREF(fWannaHear);
+ return RTLockValidatorRecSharedIsOwner(&pThis->ValidatorRead, NIL_RTTHREAD);
+#else
+ /*
+ * Just tell the caller what he want to hear.
+ */
+ return fWannaHear;
+#endif
+}
+RT_EXPORT_SYMBOL(RTSemRWIsReadOwner);
+
+
+RTDECL(uint32_t) RTSemRWGetWriteRecursion(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, 0);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ 0);
+
+ /*
+ * Return the requested data.
+ */
+ return pThis->cWrites;
+}
+
+
+RTDECL(uint32_t) RTSemRWGetWriterReadRecursion(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, 0);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ 0);
+
+ /*
+ * Return the requested data.
+ */
+ return pThis->cWriterReads;
+}
+
+
+RTDECL(uint32_t) RTSemRWGetReadCount(RTSEMRW hRWSem)
+{
+ /*
+ * Validate input.
+ */
+ struct RTSEMRWINTERNAL *pThis = hRWSem;
+ AssertPtrReturn(pThis, 0);
+ AssertMsgReturn(pThis->u32Magic == RTSEMRW_MAGIC,
+ ("pThis=%p u32Magic=%#x\n", pThis, pThis->u32Magic),
+ 0);
+
+ /*
+ * Return the requested data.
+ */
+ return pThis->cReaders;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/semwait.h b/src/VBox/Runtime/r3/posix/semwait.h
new file mode 100644
index 00000000..ade8c4a7
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/semwait.h
@@ -0,0 +1,162 @@
+/* $Id: semwait.h $ */
+/** @file
+ * IPRT - Common semaphore wait code.
+ */
+
+/*
+ * Copyright (C) 2021-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+#ifndef IPRT_INCLUDED_SRC_r3_posix_semwait_h
+#define IPRT_INCLUDED_SRC_r3_posix_semwait_h
+#ifndef RT_WITHOUT_PRAGMA_ONCE
+# pragma once
+#endif
+
+
+/** @def IPRT_HAVE_PTHREAD_CONDATTR_SETCLOCK
+ * Set if the platform implements pthread_condattr_setclock().
+ * Enables the use of the monotonic clock for waiting on condition variables. */
+#ifndef IPRT_HAVE_PTHREAD_CONDATTR_SETCLOCK
+/* Linux detection */
+# if defined(RT_OS_LINUX) && defined(__USE_XOPEN2K)
+# include <features.h>
+# if __GLIBC_PREREQ(2,6) /** @todo figure the exact version where this was added */
+# define IPRT_HAVE_PTHREAD_CONDATTR_SETCLOCK
+# endif
+# endif
+/** @todo check other platforms */
+#endif
+
+
+/**
+ * Converts a extended wait timeout specification to an absolute timespec and a
+ * relative nanosecond count.
+ *
+ * @note This does not check for RTSEMWAIT_FLAGS_INDEFINITE, caller should've
+ * done that already.
+ *
+ * @returns The relative wait in nanoseconds. 0 for a poll call, UINT64_MAX for
+ * an effectively indefinite wait.
+ * @param fFlags RTSEMWAIT_FLAGS_XXX.
+ * @param fMonotonicClock Whether the timeout is in monotonic (true) or real
+ * (false) time.
+ * @param uTimeout The timeout.
+ * @param pAbsDeadline Where to return the absolute deadline.
+ */
+DECLINLINE(uint64_t) rtSemPosixCalcDeadline(uint32_t fFlags, uint64_t uTimeout, bool fMonotonicClock,
+ struct timespec *pAbsDeadline)
+{
+ Assert(!(fFlags & RTSEMWAIT_FLAGS_INDEFINITE));
+
+ /*
+ * Convert uTimeout to a relative value in nanoseconds.
+ */
+ if (fFlags & RTSEMWAIT_FLAGS_MILLISECS)
+ {
+ if (uTimeout < UINT64_MAX / RT_NS_1MS)
+ uTimeout = uTimeout * RT_NS_1MS;
+ else
+ return UINT64_MAX;
+ }
+ else if (uTimeout == UINT64_MAX) /* unofficial way of indicating an indefinite wait */
+ return UINT64_MAX;
+
+ /*
+ * Make uTimeout relative and check for polling (zero timeout) calls.
+ */
+ uint64_t uAbsTimeout = uTimeout;
+ if (fFlags & RTSEMWAIT_FLAGS_ABSOLUTE)
+ {
+ uint64_t const u64Now = RTTimeSystemNanoTS();
+ if (uTimeout > u64Now)
+ uTimeout -= u64Now;
+ else
+ return 0;
+ }
+ else if (uTimeout == 0)
+ return 0;
+
+ /*
+ * Calculate the deadline according to the clock we're using.
+ */
+ if (!fMonotonicClock)
+ {
+#if defined(RT_OS_DARWIN) || defined(RT_OS_HAIKU)
+ struct timeval tv = {0,0};
+ gettimeofday(&tv, NULL);
+ pAbsDeadline->tv_sec = tv.tv_sec;
+ pAbsDeadline->tv_nsec = tv.tv_usec * 1000;
+#else
+ clock_gettime(CLOCK_REALTIME, pAbsDeadline);
+#endif
+ struct timespec TsAdd;
+ TsAdd.tv_nsec = uTimeout % RT_NS_1SEC;
+ TsAdd.tv_sec = uTimeout / RT_NS_1SEC;
+
+ /* Check for 32-bit tv_sec overflows: */
+ if ( sizeof(pAbsDeadline->tv_sec) < sizeof(uint64_t)
+ && ( uTimeout >= (uint64_t)RT_NS_1SEC * UINT32_MAX
+ || (uint64_t)pAbsDeadline->tv_sec + pAbsDeadline->tv_sec >= UINT32_MAX) )
+ return UINT64_MAX;
+
+ pAbsDeadline->tv_sec += TsAdd.tv_sec;
+ pAbsDeadline->tv_nsec += TsAdd.tv_nsec;
+ if ((uint32_t)pAbsDeadline->tv_nsec >= RT_NS_1SEC)
+ {
+ pAbsDeadline->tv_nsec -= RT_NS_1SEC;
+ pAbsDeadline->tv_sec++;
+ }
+ }
+ else
+ {
+ /* ASSUMES RTTimeSystemNanoTS() == RTTimeNanoTS() == clock_gettime(CLOCK_MONOTONIC). */
+ if (fFlags & RTSEMWAIT_FLAGS_RELATIVE)
+ {
+ uint64_t const nsNow = RTTimeSystemNanoTS();
+ uAbsTimeout += nsNow;
+ if (uAbsTimeout < nsNow)
+ return UINT64_MAX;
+ }
+
+ /* Check for 32-bit tv_sec overflows: */
+ if ( sizeof(pAbsDeadline->tv_sec) < sizeof(uint64_t)
+ && uAbsTimeout >= (uint64_t)RT_NS_1SEC * UINT32_MAX)
+ return UINT64_MAX;
+
+ pAbsDeadline->tv_nsec = uAbsTimeout % RT_NS_1SEC;
+ pAbsDeadline->tv_sec = uAbsTimeout / RT_NS_1SEC;
+ }
+
+ return uTimeout;
+}
+
+#endif /* !IPRT_INCLUDED_SRC_r3_posix_semwait_h */
+
diff --git a/src/VBox/Runtime/r3/posix/serialport-posix.cpp b/src/VBox/Runtime/r3/posix/serialport-posix.cpp
new file mode 100644
index 00000000..0afbb676
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/serialport-posix.cpp
@@ -0,0 +1,1269 @@
+/* $Id: serialport-posix.cpp $ */
+/** @file
+ * IPRT - Serial Port API, POSIX Implementation.
+ */
+
+/*
+ * Copyright (C) 2017-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/serialport.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/cdefs.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include <iprt/thread.h>
+#include <iprt/time.h>
+#include "internal/magics.h"
+
+#include <errno.h>
+#ifdef RT_OS_SOLARIS
+# include <sys/termios.h>
+#else
+# include <termios.h>
+#endif
+#include <sys/types.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#ifdef RT_OS_DARWIN
+# include <sys/poll.h>
+#else
+# include <sys/poll.h>
+#endif
+#include <sys/ioctl.h>
+#include <pthread.h>
+
+#ifdef RT_OS_LINUX
+/*
+ * TIOCM_LOOP is not defined in the above header files for some reason but in asm/termios.h.
+ * But inclusion of this file however leads to compilation errors because of redefinition of some
+ * structs. That's why it is defined here until a better solution is found.
+ */
+# ifndef TIOCM_LOOP
+# define TIOCM_LOOP 0x8000
+# endif
+/* For linux custom baudrate code we also need serial_struct */
+# include <linux/serial.h>
+#endif /* linux */
+
+/** Define fallback if not supported. */
+#if !defined(CMSPAR)
+# define CMSPAR 0
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/**
+ * Internal serial port state.
+ */
+typedef struct RTSERIALPORTINTERNAL
+{
+ /** Magic value (RTSERIALPORT_MAGIC). */
+ uint32_t u32Magic;
+ /** Flags given while opening the serial port. */
+ uint32_t fOpenFlags;
+ /** The file descriptor of the serial port. */
+ int iFd;
+ /** The status line monitor thread if enabled. */
+ RTTHREAD hMonThrd;
+ /** Flag whether the monitoring thread should shutdown. */
+ volatile bool fMonThrdShutdown;
+ /** Reading end of wakeup pipe. */
+ int iFdPipeR;
+ /** Writing end of wakeup pipe. */
+ int iFdPipeW;
+ /** Event pending mask. */
+ volatile uint32_t fEvtsPending;
+ /** Flag whether we are in blocking or non blocking mode. */
+ bool fBlocking;
+ /** The current active config (we assume no one changes this behind our back). */
+ struct termios PortCfg;
+ /** Flag whether a custom baud rate was chosen (for hosts supporting this.). */
+ bool fBaudrateCust;
+ /** The custom baud rate. */
+ uint32_t uBaudRateCust;
+} RTSERIALPORTINTERNAL;
+/** Pointer to the internal serial port state. */
+typedef RTSERIALPORTINTERNAL *PRTSERIALPORTINTERNAL;
+
+
+/**
+ * Baud rate conversion table descriptor.
+ */
+typedef struct RTSERIALPORTBRATECONVDESC
+{
+ /** The platform independent baud rate used by the RTSerialPort* API. */
+ uint32_t uBaudRateCfg;
+ /** The speed identifier used in the termios structure. */
+ speed_t iSpeedTermios;
+} RTSERIALPORTBRATECONVDESC;
+/** Pointer to a baud rate converions table descriptor. */
+typedef RTSERIALPORTBRATECONVDESC *PRTSERIALPORTBRATECONVDESC;
+/** Pointer to a const baud rate conversion table descriptor. */
+typedef const RTSERIALPORTBRATECONVDESC *PCRTSERIALPORTBRATECONVDESC;
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+
+/** The event poller was woken up due to an external interrupt. */
+#define RTSERIALPORT_WAKEUP_PIPE_REASON_INTERRUPT 0x0
+/** The event poller was woken up due to a change in the monitored status lines. */
+#define RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_CHANGED 0x1
+/** The monitor thread encoutnered repeating errors querying the status lines and terminated. */
+#define RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_MONITOR_FAILED 0x2
+
+
+/*********************************************************************************************************************************
+* Global variables *
+*********************************************************************************************************************************/
+
+/** The baud rate conversion table. */
+static const RTSERIALPORTBRATECONVDESC s_rtSerialPortBaudrateConv[] =
+{
+ { 50, B50 },
+ { 75, B75 },
+ { 110, B110 },
+ { 134, B134 },
+ { 150, B150 },
+ { 200, B200 },
+ { 300, B300 },
+ { 600, B600 },
+ { 1200, B1200 },
+ { 1800, B1800 },
+ { 2400, B2400 },
+ { 4800, B4800 },
+ { 9600, B9600 },
+ { 19200, B19200 },
+ { 38400, B38400 },
+ { 57600, B57600 },
+ { 115200, B115200 }
+};
+
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+
+/**
+ * Converts the given termios speed identifier to the baud rate used in the API.
+ *
+ * @returns Baud rate or 0 if not a standard baud rate
+ */
+DECLINLINE(uint32_t) rtSerialPortGetBaudrateFromTermiosSpeed(speed_t enmSpeed)
+{
+ for (unsigned i = 0; i < RT_ELEMENTS(s_rtSerialPortBaudrateConv); i++)
+ {
+ if (s_rtSerialPortBaudrateConv[i].iSpeedTermios == enmSpeed)
+ return s_rtSerialPortBaudrateConv[i].uBaudRateCfg;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Converts the given baud rate to proper termios speed identifier.
+ *
+ * @returns Speed identifier if available or B0 if no matching speed for the baud rate
+ * could be found.
+ * @param uBaudRate The baud rate to convert.
+ * @param pfBaudrateCust Where to store the flag whether a custom baudrate was selected.
+ */
+DECLINLINE(speed_t) rtSerialPortGetTermiosSpeedFromBaudrate(uint32_t uBaudRate, bool *pfBaudrateCust)
+{
+ *pfBaudrateCust = false;
+
+ for (unsigned i = 0; i < RT_ELEMENTS(s_rtSerialPortBaudrateConv); i++)
+ {
+ if (s_rtSerialPortBaudrateConv[i].uBaudRateCfg == uBaudRate)
+ return s_rtSerialPortBaudrateConv[i].iSpeedTermios;
+ }
+
+#ifdef RT_OS_LINUX
+ *pfBaudrateCust = true;
+ return B38400;
+#else
+ return B0;
+#endif
+}
+
+
+/**
+ * Tries to set the default config on the given serial port.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ */
+static int rtSerialPortSetDefaultCfg(PRTSERIALPORTINTERNAL pThis)
+{
+ pThis->fBaudrateCust = false;
+ pThis->uBaudRateCust = 0;
+ pThis->PortCfg.c_iflag = INPCK; /* Input parity checking. */
+ cfsetispeed(&pThis->PortCfg, B9600);
+ cfsetospeed(&pThis->PortCfg, B9600);
+ pThis->PortCfg.c_cflag |= CS8 | CLOCAL; /* 8 data bits, ignore modem control lines. */
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_READ)
+ pThis->PortCfg.c_cflag |= CREAD; /* Enable receiver. */
+
+ /* Set to raw input mode. */
+ pThis->PortCfg.c_lflag &= ~(ICANON | ECHO | ECHOE | ECHONL | ECHOK | ISIG | IEXTEN);
+ pThis->PortCfg.c_cc[VMIN] = 0; /* Achieve non-blocking behavior. */
+ pThis->PortCfg.c_cc[VTIME] = 0;
+
+ int rc = VINF_SUCCESS;
+ int rcPsx = tcflush(pThis->iFd, TCIOFLUSH);
+ if (!rcPsx)
+ {
+ rcPsx = tcsetattr(pThis->iFd, TCSANOW, &pThis->PortCfg);
+ if (rcPsx == -1)
+ rc = RTErrConvertFromErrno(errno);
+
+ if (RT_SUCCESS(rc))
+ {
+#ifdef RT_OS_LINUX
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_ENABLE_LOOPBACK)
+ {
+ int fTiocmSet = TIOCM_LOOP;
+ rcPsx = ioctl(pThis->iFd, TIOCMBIS, &fTiocmSet);
+ if (rcPsx == -1)
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ {
+ /* Make sure it is clear. */
+ int fTiocmClear = TIOCM_LOOP;
+ rcPsx = ioctl(pThis->iFd, TIOCMBIC, &fTiocmClear);
+ if (rcPsx == -1 && errno != EINVAL) /* Pseudo terminals don't support loopback mode so ignore an error here. */
+ rc = RTErrConvertFromErrno(errno);
+ }
+#else
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_ENABLE_LOOPBACK)
+ return VERR_NOT_SUPPORTED;
+#endif
+ }
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
+
+/**
+ * Converts the given serial port config to the appropriate termios counterpart.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ * @param pCfg Pointer to the serial port config descriptor.
+ * @param pTermios Pointer to the termios structure to fill.
+ * @param pfBaudrateCust Where to store the flag whether a custom baudrate was selected.
+ * @param pErrInfo Additional error to be set when the conversion fails.
+ */
+static int rtSerialPortCfg2Termios(PRTSERIALPORTINTERNAL pThis, PCRTSERIALPORTCFG pCfg, struct termios *pTermios,
+ bool *pfBaudrateCust, PRTERRINFO pErrInfo)
+{
+ RT_NOREF(pErrInfo); /** @todo Make use of the error info. */
+ speed_t enmSpeed = rtSerialPortGetTermiosSpeedFromBaudrate(pCfg->uBaudRate, pfBaudrateCust);
+ if (enmSpeed != B0)
+ {
+ tcflag_t const fCFlagMask = (CS5 | CS6 | CS7 | CS8 | CSTOPB | PARENB | PARODD | CMSPAR);
+ tcflag_t fCFlagNew = CLOCAL;
+
+ switch (pCfg->enmDataBitCount)
+ {
+ case RTSERIALPORTDATABITS_5BITS:
+ fCFlagNew |= CS5;
+ break;
+ case RTSERIALPORTDATABITS_6BITS:
+ fCFlagNew |= CS6;
+ break;
+ case RTSERIALPORTDATABITS_7BITS:
+ fCFlagNew |= CS7;
+ break;
+ case RTSERIALPORTDATABITS_8BITS:
+ fCFlagNew |= CS8;
+ break;
+ default:
+ AssertFailed();
+ return VERR_INVALID_PARAMETER;
+ }
+
+ switch (pCfg->enmParity)
+ {
+ case RTSERIALPORTPARITY_NONE:
+ break;
+ case RTSERIALPORTPARITY_EVEN:
+ fCFlagNew |= PARENB;
+ break;
+ case RTSERIALPORTPARITY_ODD:
+ fCFlagNew |= PARENB | PARODD;
+ break;
+#if CMSPAR != 0
+ case RTSERIALPORTPARITY_MARK:
+ fCFlagNew |= PARENB | CMSPAR | PARODD;
+ break;
+ case RTSERIALPORTPARITY_SPACE:
+ fCFlagNew |= PARENB | CMSPAR;
+ break;
+#else
+ case RTSERIALPORTPARITY_MARK:
+ case RTSERIALPORTPARITY_SPACE:
+ return VERR_NOT_SUPPORTED;
+#endif
+ default:
+ AssertFailed();
+ return VERR_INVALID_PARAMETER;
+ }
+
+ switch (pCfg->enmStopBitCount)
+ {
+ case RTSERIALPORTSTOPBITS_ONE:
+ break;
+ case RTSERIALPORTSTOPBITS_ONEPOINTFIVE:
+ if (pCfg->enmDataBitCount == RTSERIALPORTDATABITS_5BITS)
+ fCFlagNew |= CSTOPB;
+ else
+ return VERR_NOT_SUPPORTED;
+ break;
+ case RTSERIALPORTSTOPBITS_TWO:
+ if (pCfg->enmDataBitCount != RTSERIALPORTDATABITS_5BITS)
+ fCFlagNew |= CSTOPB;
+ else
+ return VERR_NOT_SUPPORTED;
+ break;
+ default:
+ AssertFailed();
+ return VERR_INVALID_PARAMETER;
+ }
+
+ /* Assign new flags. */
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_READ)
+ pTermios->c_cflag |= CREAD; /* Enable receiver. */
+ pTermios->c_cflag = (pTermios->c_cflag & ~fCFlagMask) | fCFlagNew;
+ pTermios->c_lflag &= ~(ICANON | ECHO | ECHOE | ECHONL | ECHOK | ISIG | IEXTEN);
+ pTermios->c_iflag = INPCK; /* Input parity checking. */
+ pTermios->c_cc[VMIN] = 0; /* Achieve non-blocking behavior. */
+ pTermios->c_cc[VTIME] = 0;
+ cfsetispeed(pTermios, enmSpeed);
+ cfsetospeed(pTermios, enmSpeed);
+ }
+ else
+ return VERR_SERIALPORT_INVALID_BAUDRATE;
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Converts the given termios structure to an appropriate serial port config.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ * @param pTermios The termios structure to convert.
+ * @param pCfg The serial port config to fill in.
+ */
+static int rtSerialPortTermios2Cfg(PRTSERIALPORTINTERNAL pThis, struct termios *pTermios, PRTSERIALPORTCFG pCfg)
+{
+ int rc = VINF_SUCCESS;
+ bool f5DataBits = false;
+ speed_t enmSpeedIn = cfgetispeed(pTermios);
+ Assert(enmSpeedIn == cfgetospeed(pTermios)); /* Should always be the same. */
+
+ if (!pThis->fBaudrateCust)
+ {
+ pCfg->uBaudRate = rtSerialPortGetBaudrateFromTermiosSpeed(enmSpeedIn);
+ if (!pCfg->uBaudRate)
+ rc = VERR_SERIALPORT_INVALID_BAUDRATE;
+ }
+ else
+ pCfg->uBaudRate = pThis->uBaudRateCust;
+
+ switch (pTermios->c_cflag & CSIZE)
+ {
+ case CS5:
+ pCfg->enmDataBitCount = RTSERIALPORTDATABITS_5BITS;
+ f5DataBits = true;
+ break;
+ case CS6:
+ pCfg->enmDataBitCount = RTSERIALPORTDATABITS_6BITS;
+ break;
+ case CS7:
+ pCfg->enmDataBitCount = RTSERIALPORTDATABITS_7BITS;
+ break;
+ case CS8:
+ pCfg->enmDataBitCount = RTSERIALPORTDATABITS_8BITS;
+ break;
+ default:
+ AssertFailed(); /* Should not happen. */
+ pCfg->enmDataBitCount = RTSERIALPORTDATABITS_INVALID;
+ rc = RT_FAILURE(rc) ? rc : VERR_INVALID_PARAMETER;
+ }
+
+ /* Convert parity. */
+ if (pTermios->c_cflag & PARENB)
+ {
+ /*
+ * CMSPAR is not supported on all systems, especially OS X. As configuring
+ * mark/space parity there is not supported and we start from a known config
+ * when opening the serial port it is not required to check for this here.
+ */
+#if CMSPAR == 0
+ bool fCmsParSet = RT_BOOL(pTermios->c_cflag & CMSPAR);
+#else
+ bool fCmsParSet = false;
+#endif
+ if (pTermios->c_cflag & PARODD)
+ pCfg->enmParity = fCmsParSet ? RTSERIALPORTPARITY_MARK : RTSERIALPORTPARITY_ODD;
+ else
+ pCfg->enmParity = fCmsParSet ? RTSERIALPORTPARITY_SPACE: RTSERIALPORTPARITY_EVEN;
+ }
+ else
+ pCfg->enmParity = RTSERIALPORTPARITY_NONE;
+
+ /*
+ * 1.5 stop bits are used with a data count of 5 bits when a UART derived from the 8250
+ * is used.
+ */
+ if (pTermios->c_cflag & CSTOPB)
+ pCfg->enmStopBitCount = f5DataBits ? RTSERIALPORTSTOPBITS_ONEPOINTFIVE : RTSERIALPORTSTOPBITS_TWO;
+ else
+ pCfg->enmStopBitCount = RTSERIALPORTSTOPBITS_ONE;
+
+ return rc;
+}
+
+
+/**
+ * Wakes up any thread polling for a serial port event with the given reason.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ * @param bWakeupReason The wakeup reason to pass to the event poller.
+ */
+DECLINLINE(int) rtSerialPortWakeupEvtPoller(PRTSERIALPORTINTERNAL pThis, uint8_t bWakeupReason)
+{
+ int rcPsx = write(pThis->iFdPipeW, &bWakeupReason, 1);
+ if (rcPsx != 1)
+ return RTErrConvertFromErrno(errno);
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * The status line monitor thread worker.
+ *
+ * @returns IPRT status code.
+ * @param ThreadSelf Thread handle to this thread.
+ * @param pvUser User argument.
+ */
+static DECLCALLBACK(int) rtSerialPortStsLineMonitorThrd(RTTHREAD hThreadSelf, void *pvUser)
+{
+ RT_NOREF(hThreadSelf);
+ PRTSERIALPORTINTERNAL pThis = (PRTSERIALPORTINTERNAL)pvUser;
+ unsigned long const fStsLinesChk = TIOCM_CAR | TIOCM_RNG | TIOCM_DSR | TIOCM_CTS;
+ int rc = VINF_SUCCESS;
+ uint32_t fStsLinesOld = 0;
+ uint32_t cStsLineGetErrors = 0;
+#ifdef RT_OS_LINUX
+ bool fPoll = false;
+#endif
+
+ RTThreadUserSignal(hThreadSelf);
+
+ int rcPsx = ioctl(pThis->iFd, TIOCMGET, &fStsLinesOld);
+ if (rcPsx == -1)
+ {
+ ASMAtomicXchgBool(&pThis->fMonThrdShutdown, true);
+ return RTErrConvertFromErrno(errno);
+ }
+
+ while ( !pThis->fMonThrdShutdown
+ && RT_SUCCESS(rc))
+ {
+# ifdef RT_OS_LINUX
+ /*
+ * Wait for status line change.
+ *
+ * XXX In Linux, if a thread calls tcsetattr while the monitor thread is
+ * waiting in ioctl for a modem status change then 8250.c wrongly disables
+ * modem irqs and so the monitor thread never gets released. The workaround
+ * is to send a signal after each tcsetattr.
+ *
+ * TIOCMIWAIT doesn't work for the DSR line with TIOCM_DSR set
+ * (see http://lxr.linux.no/#linux+v4.7/drivers/usb/class/cdc-acm.c#L949)
+ * However as it is possible to query the line state we will not just clear
+ * the TIOCM_DSR bit from the lines to check but resort to the polling
+ * approach just like on other hosts.
+ */
+ if (!fPoll)
+ {
+ rcPsx = ioctl(pThis->iFd, TIOCMIWAIT, fStsLinesChk);
+ if (!rcPsx)
+ {
+ rc = rtSerialPortWakeupEvtPoller(pThis, RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_CHANGED);
+ if (RT_FAILURE(rc))
+ break;
+ }
+ else if (rcPsx == -1 && errno != EINTR)
+ fPoll = true;
+ }
+ else
+#endif
+ {
+ uint32_t fStsLines = 0;
+ rcPsx = ioctl(pThis->iFd, TIOCMGET, &fStsLines);
+ if (!rcPsx)
+ {
+ cStsLineGetErrors = 0; /* Reset the error counter once we had one successful query. */
+
+ if (((fStsLines ^ fStsLinesOld) & fStsLinesChk))
+ {
+ rc = rtSerialPortWakeupEvtPoller(pThis, RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_CHANGED);
+ if (RT_FAILURE(rc))
+ break;
+
+ fStsLinesOld = fStsLines;
+ }
+ else /* No change, sleep for a bit. */
+ RTThreadSleep(100 /*ms*/);
+ }
+ else if (rcPsx == -1 && errno != EINTR)
+ {
+ /*
+ * If querying the status line fails too often we have to shut down the
+ * thread and notify the user of the serial port.
+ */
+ if (cStsLineGetErrors++ >= 10)
+ {
+ rc = RTErrConvertFromErrno(errno);
+ rtSerialPortWakeupEvtPoller(pThis, RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_MONITOR_FAILED);
+ break;
+ }
+
+ RTThreadSleep(100 /*ms*/);
+ }
+ }
+ }
+
+ ASMAtomicXchgBool(&pThis->fMonThrdShutdown, true);
+ return rc;
+}
+
+
+/**
+ * Creates the status line monitoring thread.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ */
+static int rtSerialPortMonitorThreadCreate(PRTSERIALPORTINTERNAL pThis)
+{
+ int rc = VINF_SUCCESS;
+
+ /*
+ * Check whether querying the status lines is supported at all, pseudo terminals
+ * don't support it so an error returned in that case.
+ */
+ uint32_t fStsLines = 0;
+ int rcPsx = ioctl(pThis->iFd, TIOCMGET, &fStsLines);
+ if (!rcPsx)
+ {
+ pThis->fMonThrdShutdown = false;
+ rc = RTThreadCreate(&pThis->hMonThrd, rtSerialPortStsLineMonitorThrd, pThis, 0 /*cbStack*/,
+ RTTHREADTYPE_IO, RTTHREADFLAGS_WAITABLE, "IPRT-SerPortMon");
+ if (RT_SUCCESS(rc))
+ {
+ /* Wait for the thread to start up. */
+ rc = RTThreadUserWait(pThis->hMonThrd, 20*RT_MS_1SEC);
+ if ( rc == VERR_TIMEOUT
+ || pThis->fMonThrdShutdown)
+ {
+ /* Startup failed, try to reap the thread. */
+ int rcThrd;
+ rc = RTThreadWait(pThis->hMonThrd, 20*RT_MS_1SEC, &rcThrd);
+ if (RT_SUCCESS(rc))
+ rc = rcThrd;
+ else
+ rc = VERR_INTERNAL_ERROR;
+ /* The thread is lost otherwise. */
+ }
+ }
+ }
+ else if (errno == ENOTTY || errno == EINVAL)
+ rc = VERR_NOT_SUPPORTED;
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
+
+/**
+ * Shuts down the status line monitor thread.
+ *
+ * @param pThis The internal serial port instance data.
+ */
+static void rtSerialPortMonitorThreadShutdown(PRTSERIALPORTINTERNAL pThis)
+{
+ bool fShutDown = ASMAtomicXchgBool(&pThis->fMonThrdShutdown, true);
+ if (!fShutDown)
+ {
+ int rc = RTThreadPoke(pThis->hMonThrd);
+ AssertRC(rc);
+ }
+
+ int rcThrd = VINF_SUCCESS;
+ int rc = RTThreadWait(pThis->hMonThrd, 20*RT_MS_1SEC, &rcThrd);
+ AssertRC(rc);
+ AssertRC(rcThrd);
+}
+
+
+/**
+ * The slow path of rtSerialPortSwitchBlockingMode that does the actual switching.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ * @param fBlocking The desired mode of operation.
+ * @remarks Do not call directly.
+ */
+static int rtSerialPortSwitchBlockingModeSlow(PRTSERIALPORTINTERNAL pThis, bool fBlocking)
+{
+ int fFlags = fcntl(pThis->iFd, F_GETFL, 0);
+ if (fFlags == -1)
+ return RTErrConvertFromErrno(errno);
+
+ if (fBlocking)
+ fFlags &= ~O_NONBLOCK;
+ else
+ fFlags |= O_NONBLOCK;
+ if (fcntl(pThis->iFd, F_SETFL, fFlags) == -1)
+ return RTErrConvertFromErrno(errno);
+
+ pThis->fBlocking = fBlocking;
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Switches the serial port to the desired blocking mode if necessary.
+ *
+ * @returns IPRT status code.
+ * @param pThis The internal serial port instance data.
+ * @param fBlocking The desired mode of operation.
+ */
+DECLINLINE(int) rtSerialPortSwitchBlockingMode(PRTSERIALPORTINTERNAL pThis, bool fBlocking)
+{
+ if (pThis->fBlocking != fBlocking)
+ return rtSerialPortSwitchBlockingModeSlow(pThis, fBlocking);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTSerialPortOpen(PRTSERIALPORT phSerialPort, const char *pszPortAddress, uint32_t fFlags)
+{
+ AssertPtrReturn(phSerialPort, VERR_INVALID_POINTER);
+ AssertPtrReturn(pszPortAddress, VERR_INVALID_POINTER);
+ AssertReturn(*pszPortAddress != '\0', VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & ~RTSERIALPORT_OPEN_F_VALID_MASK), VERR_INVALID_PARAMETER);
+ AssertReturn((fFlags & RTSERIALPORT_OPEN_F_READ) || (fFlags & RTSERIALPORT_OPEN_F_WRITE),
+ VERR_INVALID_PARAMETER);
+
+ int rc = VINF_SUCCESS;
+ PRTSERIALPORTINTERNAL pThis = (PRTSERIALPORTINTERNAL)RTMemAllocZ(sizeof(*pThis));
+ if (pThis)
+ {
+ int fPsxFlags = O_NOCTTY | O_NONBLOCK;
+
+ if ((fFlags & RTSERIALPORT_OPEN_F_READ) && !(fFlags & RTSERIALPORT_OPEN_F_WRITE))
+ fPsxFlags |= O_RDONLY;
+ else if (!(fFlags & RTSERIALPORT_OPEN_F_READ) && (fFlags & RTSERIALPORT_OPEN_F_WRITE))
+ fPsxFlags |= O_WRONLY;
+ else
+ fPsxFlags |= O_RDWR;
+
+ pThis->u32Magic = RTSERIALPORT_MAGIC;
+ pThis->fOpenFlags = fFlags;
+ pThis->fEvtsPending = 0;
+ pThis->iFd = open(pszPortAddress, fPsxFlags);
+ pThis->fBlocking = false;
+ if (pThis->iFd != -1)
+ {
+ /* Create wakeup pipe for the event API. */
+ int aPipeFds[2];
+ int rcPsx = pipe(&aPipeFds[0]);
+ if (!rcPsx)
+ {
+ /* Make the pipes close on exec. */
+ pThis->iFdPipeR = aPipeFds[0];
+ pThis->iFdPipeW = aPipeFds[1];
+
+ if (fcntl(pThis->iFdPipeR, F_SETFD, FD_CLOEXEC))
+ rc = RTErrConvertFromErrno(errno);
+
+ if ( RT_SUCCESS(rc)
+ && fcntl(pThis->iFdPipeW, F_SETFD, FD_CLOEXEC))
+ rc = RTErrConvertFromErrno(errno);
+
+ if (RT_SUCCESS(rc))
+ {
+ rc = rtSerialPortSetDefaultCfg(pThis);
+ if ( RT_SUCCESS(rc)
+ && (fFlags & RTSERIALPORT_OPEN_F_SUPPORT_STATUS_LINE_MONITORING))
+ rc = rtSerialPortMonitorThreadCreate(pThis);
+
+ if (RT_SUCCESS(rc))
+ {
+ *phSerialPort = pThis;
+ return VINF_SUCCESS;
+ }
+ }
+
+ close(pThis->iFdPipeR);
+ close(pThis->iFdPipeW);
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ close(pThis->iFd);
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortClose(RTSERIALPORT hSerialPort)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ if (pThis == NIL_RTSERIALPORT)
+ return VINF_SUCCESS;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+
+ /*
+ * Do the cleanup.
+ */
+ AssertReturn(ASMAtomicCmpXchgU32(&pThis->u32Magic, RTSERIALPORT_MAGIC_DEAD, RTSERIALPORT_MAGIC), VERR_INVALID_HANDLE);
+
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_SUPPORT_STATUS_LINE_MONITORING)
+ rtSerialPortMonitorThreadShutdown(pThis);
+
+ close(pThis->iFd);
+ close(pThis->iFdPipeR);
+ close(pThis->iFdPipeW);
+ RTMemFree(pThis);
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(RTHCINTPTR) RTSerialPortToNative(RTSERIALPORT hSerialPort)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, -1);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, -1);
+
+ return pThis->iFd;
+}
+
+
+RTDECL(int) RTSerialPortRead(RTSERIALPORT hSerialPort, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pvBuf, VERR_INVALID_POINTER);
+ AssertReturn(cbToRead > 0, VERR_INVALID_PARAMETER);
+
+ int rc = rtSerialPortSwitchBlockingMode(pThis, true);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Attempt read.
+ */
+ ssize_t cbRead = read(pThis->iFd, pvBuf, cbToRead);
+ if (cbRead > 0)
+ {
+ if (pcbRead)
+ /* caller can handle partial read. */
+ *pcbRead = cbRead;
+ else
+ {
+ /* Caller expects all to be read. */
+ while ((ssize_t)cbToRead > cbRead)
+ {
+ ssize_t cbReadPart = read(pThis->iFd, (uint8_t *)pvBuf + cbRead, cbToRead - cbRead);
+ if (cbReadPart < 0)
+ return RTErrConvertFromErrno(errno);
+ else if (cbReadPart == 0)
+ return VERR_DEV_IO_ERROR;
+
+ cbRead += cbReadPart;
+ }
+ }
+ }
+ else if (cbRead == 0)
+ rc = VERR_DEV_IO_ERROR;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortReadNB(RTSERIALPORT hSerialPort, void *pvBuf, size_t cbToRead, size_t *pcbRead)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pvBuf, VERR_INVALID_POINTER);
+ AssertReturn(cbToRead > 0, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pcbRead, VERR_INVALID_POINTER);
+
+ *pcbRead = 0;
+
+ int rc = rtSerialPortSwitchBlockingMode(pThis, false);
+ if (RT_SUCCESS(rc))
+ {
+ ssize_t cbThisRead = read(pThis->iFd, pvBuf, cbToRead);
+ if (cbThisRead > 0)
+ {
+ /*
+ * The read data needs to be scanned for the BREAK condition marker encoded in the data stream,
+ * if break detection was enabled during open.
+ */
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_DETECT_BREAK_CONDITION)
+ { /** @todo */ }
+
+ *pcbRead = cbThisRead;
+ }
+ else if (cbThisRead == 0)
+ rc = VERR_DEV_IO_ERROR;
+ else if ( errno == EAGAIN
+# ifdef EWOULDBLOCK
+# if EWOULDBLOCK != EAGAIN
+ || errno == EWOULDBLOCK
+# endif
+# endif
+ )
+ rc = VINF_TRY_AGAIN;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortWrite(RTSERIALPORT hSerialPort, const void *pvBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pvBuf, VERR_INVALID_POINTER);
+ AssertReturn(cbToWrite > 0, VERR_INVALID_PARAMETER);
+
+ int rc = rtSerialPortSwitchBlockingMode(pThis, true);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Attempt write.
+ */
+ ssize_t cbWritten = write(pThis->iFd, pvBuf, cbToWrite);
+ if (cbWritten > 0)
+ {
+ if (pcbWritten)
+ /* caller can handle partial write. */
+ *pcbWritten = cbWritten;
+ else
+ {
+ /* Caller expects all to be written. */
+ while ((ssize_t)cbToWrite > cbWritten)
+ {
+ ssize_t cbWrittenPart = write(pThis->iFd, (const uint8_t *)pvBuf + cbWritten, cbToWrite - cbWritten);
+ if (cbWrittenPart < 0)
+ return RTErrConvertFromErrno(errno);
+ else if (cbWrittenPart == 0)
+ return VERR_DEV_IO_ERROR;
+ cbWritten += cbWrittenPart;
+ }
+ }
+ }
+ else if (cbWritten == 0)
+ rc = VERR_DEV_IO_ERROR;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortWriteNB(RTSERIALPORT hSerialPort, const void *pvBuf, size_t cbToWrite, size_t *pcbWritten)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pvBuf, VERR_INVALID_POINTER);
+ AssertReturn(cbToWrite > 0, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pcbWritten, VERR_INVALID_POINTER);
+
+ *pcbWritten = 0;
+
+ int rc = rtSerialPortSwitchBlockingMode(pThis, false);
+ if (RT_SUCCESS(rc))
+ {
+ ssize_t cbThisWrite = write(pThis->iFd, pvBuf, cbToWrite);
+ if (cbThisWrite > 0)
+ *pcbWritten = cbThisWrite;
+ else if (cbThisWrite == 0)
+ rc = VERR_DEV_IO_ERROR;
+ else if ( errno == EAGAIN
+# ifdef EWOULDBLOCK
+# if EWOULDBLOCK != EAGAIN
+ || errno == EWOULDBLOCK
+# endif
+# endif
+ )
+ rc = VINF_TRY_AGAIN;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortCfgQueryCurrent(RTSERIALPORT hSerialPort, PRTSERIALPORTCFG pCfg)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+
+ return rtSerialPortTermios2Cfg(pThis, &pThis->PortCfg, pCfg);
+}
+
+
+RTDECL(int) RTSerialPortCfgSet(RTSERIALPORT hSerialPort, PCRTSERIALPORTCFG pCfg, PRTERRINFO pErrInfo)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+
+ struct termios PortCfgNew; RT_ZERO(PortCfgNew);
+ bool fBaudrateCust = false;
+ int rc = rtSerialPortCfg2Termios(pThis, pCfg, &PortCfgNew, &fBaudrateCust, pErrInfo);
+ if (RT_SUCCESS(rc))
+ {
+ int rcPsx = tcflush(pThis->iFd, TCIOFLUSH);
+ if (!rcPsx)
+ {
+#ifdef RT_OS_LINUX
+ if (fBaudrateCust)
+ {
+ struct serial_struct SerLnx;
+ rcPsx = ioctl(pThis->iFd, TIOCGSERIAL, &SerLnx);
+ if (!rcPsx)
+ {
+ SerLnx.custom_divisor = SerLnx.baud_base / pCfg->uBaudRate;
+ if (!SerLnx.custom_divisor)
+ SerLnx.custom_divisor = 1;
+ SerLnx.flags &= ~ASYNC_SPD_MASK;
+ SerLnx.flags |= ASYNC_SPD_CUST;
+ rcPsx = ioctl(pThis->iFd, TIOCSSERIAL, &SerLnx);
+ }
+ }
+#else /* !RT_OS_LINUX */
+ /* Hosts not supporting custom baud rates should already fail in rtSerialPortCfg2Termios(). */
+ AssertMsgFailed(("Should not get here!\n"));
+#endif /* !RT_OS_LINUX */
+ pThis->fBaudrateCust = fBaudrateCust;
+ pThis->uBaudRateCust = pCfg->uBaudRate;
+
+ if (!rcPsx)
+ rcPsx = tcsetattr(pThis->iFd, TCSANOW, &PortCfgNew);
+ if (rcPsx == -1)
+ rc = RTErrConvertFromErrno(errno);
+ else
+ memcpy(&pThis->PortCfg, &PortCfgNew, sizeof(struct termios));
+
+#ifdef RT_OS_LINUX
+ /*
+ * XXX In Linux, if a thread calls tcsetattr while the monitor thread is
+ * waiting in ioctl for a modem status change then 8250.c wrongly disables
+ * modem irqs and so the monitor thread never gets released. The workaround
+ * is to send a signal after each tcsetattr.
+ */
+ if (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_SUPPORT_STATUS_LINE_MONITORING)
+ RTThreadPoke(pThis->hMonThrd);
+#endif
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortEvtPoll(RTSERIALPORT hSerialPort, uint32_t fEvtMask, uint32_t *pfEvtsRecv,
+ RTMSINTERVAL msTimeout)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(!(fEvtMask & ~RTSERIALPORT_EVT_F_VALID_MASK), VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pfEvtsRecv, VERR_INVALID_POINTER);
+
+ *pfEvtsRecv = 0;
+
+ fEvtMask |= RTSERIALPORT_EVT_F_STATUS_LINE_MONITOR_FAILED; /* This will be reported always, no matter what the caller wants. */
+
+ /* Return early if there are events pending from previous calls which weren't fetched yet. */
+ for (;;)
+ {
+ uint32_t fEvtsPending = ASMAtomicReadU32(&pThis->fEvtsPending);
+ if (fEvtsPending & fEvtMask)
+ {
+ *pfEvtsRecv = fEvtsPending & fEvtMask;
+ /* Write back, repeat the whole procedure if someone else raced us. */
+ if (ASMAtomicCmpXchgU32(&pThis->fEvtsPending, fEvtsPending & ~fEvtMask, fEvtsPending))
+ return VINF_SUCCESS;
+ }
+ else
+ break;
+ }
+
+ int rc = rtSerialPortSwitchBlockingMode(pThis, false);
+ if (RT_SUCCESS(rc))
+ {
+ struct pollfd aPollFds[2]; RT_ZERO(aPollFds);
+ aPollFds[0].fd = pThis->iFd;
+ aPollFds[0].events = POLLERR | POLLHUP;
+ aPollFds[0].revents = 0;
+ if ( (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_READ)
+ && (fEvtMask & RTSERIALPORT_EVT_F_DATA_RX))
+ aPollFds[0].events |= POLLIN;
+ if ( (pThis->fOpenFlags & RTSERIALPORT_OPEN_F_WRITE)
+ && (fEvtMask & RTSERIALPORT_EVT_F_DATA_TX))
+ aPollFds[0].events |= POLLOUT;
+
+ aPollFds[1].fd = pThis->iFdPipeR;
+ aPollFds[1].events = POLLIN | POLLERR | POLLHUP;
+ aPollFds[1].revents = 0;
+
+ int rcPsx = 0;
+ int msTimeoutLeft = msTimeout == RT_INDEFINITE_WAIT ? -1 : (int)msTimeout;
+ while (msTimeoutLeft != 0)
+ {
+ uint64_t tsPollStart = RTTimeMilliTS();
+
+ rcPsx = poll(&aPollFds[0], RT_ELEMENTS(aPollFds), msTimeoutLeft);
+ if (rcPsx != -1 || errno != EINTR)
+ break;
+ /* Restart when getting interrupted. */
+ if (msTimeoutLeft > -1)
+ {
+ uint64_t tsPollEnd = RTTimeMilliTS();
+ uint64_t tsPollSpan = tsPollEnd - tsPollStart;
+ msTimeoutLeft -= RT_MIN(tsPollSpan, (uint32_t)msTimeoutLeft);
+ }
+ }
+
+ uint32_t fEvtsPending = 0;
+ if (rcPsx < 0 && errno != EINTR)
+ rc = RTErrConvertFromErrno(errno);
+ else if (rcPsx > 0)
+ {
+ if (aPollFds[0].revents != 0)
+ {
+ if (aPollFds[0].revents & POLLERR)
+ rc = VERR_DEV_IO_ERROR;
+ else
+ {
+ fEvtsPending |= (aPollFds[0].revents & POLLIN) ? RTSERIALPORT_EVT_F_DATA_RX : 0;
+ fEvtsPending |= (aPollFds[0].revents & POLLOUT) ? RTSERIALPORT_EVT_F_DATA_TX : 0;
+ /** @todo BREAK condition detection. */
+ }
+ }
+
+ if (aPollFds[1].revents != 0)
+ {
+ AssertReturn(!(aPollFds[1].revents & (POLLHUP | POLLERR | POLLNVAL)), VERR_INTERNAL_ERROR);
+ Assert(aPollFds[1].revents & POLLIN);
+
+ uint8_t bWakeupReason = 0;
+ ssize_t cbRead = read(pThis->iFdPipeR, &bWakeupReason, 1);
+ if (cbRead == 1)
+ {
+ switch (bWakeupReason)
+ {
+ case RTSERIALPORT_WAKEUP_PIPE_REASON_INTERRUPT:
+ rc = VERR_INTERRUPTED;
+ break;
+ case RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_CHANGED:
+ fEvtsPending |= RTSERIALPORT_EVT_F_STATUS_LINE_CHANGED;
+ break;
+ case RTSERIALPORT_WAKEUP_PIPE_REASON_STS_LINE_MONITOR_FAILED:
+ fEvtsPending |= RTSERIALPORT_EVT_F_STATUS_LINE_MONITOR_FAILED;
+ break;
+ default:
+ AssertFailed();
+ rc = VERR_INTERNAL_ERROR;
+ }
+ }
+ else
+ rc = VERR_INTERNAL_ERROR;
+ }
+ }
+ else
+ rc = VERR_TIMEOUT;
+
+ *pfEvtsRecv = fEvtsPending & fEvtMask;
+ fEvtsPending &= ~fEvtMask;
+ ASMAtomicOrU32(&pThis->fEvtsPending, fEvtsPending);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortEvtPollInterrupt(RTSERIALPORT hSerialPort)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+
+ return rtSerialPortWakeupEvtPoller(pThis, RTSERIALPORT_WAKEUP_PIPE_REASON_INTERRUPT);
+}
+
+
+RTDECL(int) RTSerialPortChgBreakCondition(RTSERIALPORT hSerialPort, bool fSet)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+
+ int rc = VINF_SUCCESS;
+ int rcPsx = ioctl(pThis->iFd, fSet ? TIOCSBRK : TIOCCBRK);
+ if (rcPsx == -1)
+ rc = RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortChgStatusLines(RTSERIALPORT hSerialPort, uint32_t fClear, uint32_t fSet)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+
+ int rc = VINF_SUCCESS;
+ int fTiocmSet = 0;
+ int fTiocmClear = 0;
+
+ if (fClear & RTSERIALPORT_CHG_STS_LINES_F_RTS)
+ fTiocmClear |= TIOCM_RTS;
+ if (fClear & RTSERIALPORT_CHG_STS_LINES_F_DTR)
+ fTiocmClear |= TIOCM_DTR;
+
+ if (fSet & RTSERIALPORT_CHG_STS_LINES_F_RTS)
+ fTiocmSet |= TIOCM_RTS;
+ if (fSet & RTSERIALPORT_CHG_STS_LINES_F_DTR)
+ fTiocmSet |= TIOCM_DTR;
+
+ int rcPsx = ioctl(pThis->iFd, TIOCMBIS, &fTiocmSet);
+ if (!rcPsx)
+ {
+ rcPsx = ioctl(pThis->iFd, TIOCMBIC, &fTiocmClear);
+ if (rcPsx == -1)
+ rc = RTErrConvertFromErrno(errno);
+ }
+ return rc;
+}
+
+
+RTDECL(int) RTSerialPortQueryStatusLines(RTSERIALPORT hSerialPort, uint32_t *pfStsLines)
+{
+ PRTSERIALPORTINTERNAL pThis = hSerialPort;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSERIALPORT_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pfStsLines, VERR_INVALID_POINTER);
+
+ *pfStsLines = 0;
+
+ int rc = VINF_SUCCESS;
+ int fStsLines = 0;
+ int rcPsx = ioctl(pThis->iFd, TIOCMGET, &fStsLines);
+ if (!rcPsx)
+ {
+ /* This resets the status line event pending flag. */
+ for (;;)
+ {
+ uint32_t fEvtsPending = ASMAtomicReadU32(&pThis->fEvtsPending);
+ if (ASMAtomicCmpXchgU32(&pThis->fEvtsPending, fEvtsPending & ~RTSERIALPORT_EVT_F_STATUS_LINE_CHANGED, fEvtsPending))
+ break;
+ }
+
+ *pfStsLines |= (fStsLines & TIOCM_CAR) ? RTSERIALPORT_STS_LINE_DCD : 0;
+ *pfStsLines |= (fStsLines & TIOCM_RNG) ? RTSERIALPORT_STS_LINE_RI : 0;
+ *pfStsLines |= (fStsLines & TIOCM_DSR) ? RTSERIALPORT_STS_LINE_DSR : 0;
+ *pfStsLines |= (fStsLines & TIOCM_CTS) ? RTSERIALPORT_STS_LINE_CTS : 0;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/shmem-posix.cpp b/src/VBox/Runtime/r3/posix/shmem-posix.cpp
new file mode 100644
index 00000000..5e21f26b
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/shmem-posix.cpp
@@ -0,0 +1,419 @@
+/* $Id: shmem-posix.cpp $ */
+/** @file
+ * IPRT - Named shared memory object, POSIX Implementation.
+ */
+
+/*
+ * Copyright (C) 2018-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/shmem.h>
+#include "internal/iprt.h"
+
+#include <iprt/asm.h>
+#include <iprt/assert.h>
+#include <iprt/cdefs.h>
+#include <iprt/err.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include "internal/magics.h"
+
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+
+/* Workaround on systems which do not provide this. */
+#ifndef NAME_MAX
+# define NAME_MAX 255
+#endif
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+
+/**
+ * Shared memory object mapping descriptor.
+ */
+typedef struct RTSHMEMMAPPINGDESC
+{
+ /** Number of references held to this mapping, 0 if the descriptor is free. */
+ volatile uint32_t cMappings;
+ /** Pointer to the region mapping. */
+ void *pvMapping;
+ /** Start offset */
+ size_t offRegion;
+ /** Size of the region. */
+ size_t cbRegion;
+ /** Access flags for this region .*/
+ uint32_t fFlags;
+} RTSHMEMMAPPINGDESC;
+/** Pointer to a shared memory object mapping descriptor. */
+typedef RTSHMEMMAPPINGDESC *PRTSHMEMMAPPINGDESC;
+/** Pointer to a constant shared memory object mapping descriptor. */
+typedef const RTSHMEMMAPPINGDESC *PCRTSHMEMMAPPINGDESC;
+
+
+/**
+ * Internal shared memory object state.
+ */
+typedef struct RTSHMEMINT
+{
+ /** Magic value (RTSHMEM_MAGIC). */
+ uint32_t u32Magic;
+ /** File descriptor for the underlying shared memory object. */
+ int iFdShm;
+ /** Pointer to the shared memory object name. */
+ char *pszName;
+ /** Flag whether this instance created the named shared memory object. */
+ bool fCreate;
+ /** Overall number of mappings active for this shared memory object. */
+ volatile uint32_t cMappings;
+ /** Maximum number of mapping descriptors allocated. */
+ uint32_t cMappingDescsMax;
+ /** Number of mapping descriptors used. */
+ volatile uint32_t cMappingDescsUsed;
+ /** Array of mapping descriptors - variable in size. */
+ RTSHMEMMAPPINGDESC aMappingDescs[1];
+} RTSHMEMINT;
+/** Pointer to the internal shared memory object state. */
+typedef RTSHMEMINT *PRTSHMEMINT;
+
+
+
+/**
+ * Returns a mapping descriptor matching the given region properties or NULL if none was found.
+ *
+ * @returns Pointer to the matching mapping descriptor or NULL if not found.
+ * @param pThis Pointer to the shared memory object instance.
+ * @param offRegion Offset into the shared memory object to start mapping at.
+ * @param cbRegion Size of the region to map.
+ * @param fFlags Desired properties of the mapped region, combination of RTSHMEM_MAP_F_* defines.
+ */
+DECLINLINE(PRTSHMEMMAPPINGDESC) rtShMemMappingDescFindByProp(PRTSHMEMINT pThis, size_t offRegion, size_t cbRegion, uint32_t fFlags)
+{
+ for (uint32_t i = 0; i < pThis->cMappingDescsMax; i++)
+ {
+ if ( pThis->aMappingDescs[i].offRegion == offRegion
+ && pThis->aMappingDescs[i].cbRegion == cbRegion
+ && pThis->aMappingDescs[i].fFlags == fFlags)
+ return &pThis->aMappingDescs[i];
+ }
+
+ return NULL;
+}
+
+
+RTDECL(int) RTShMemOpen(PRTSHMEM phShMem, const char *pszName, uint32_t fFlags, size_t cbMax, uint32_t cMappingsHint)
+{
+ AssertPtrReturn(phShMem, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pszName, VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & ~RTSHMEM_O_F_VALID_MASK), VERR_INVALID_PARAMETER);
+ AssertReturn(cMappingsHint < 64, VERR_OUT_OF_RANGE);
+
+ size_t cchName = strlen(pszName);
+ AssertReturn(cchName, VERR_INVALID_PARAMETER);
+ AssertReturn(cchName < NAME_MAX - 1, VERR_INVALID_PARAMETER); /* account for the / we add later on. */
+ cMappingsHint = cMappingsHint == 0 ? 5 : cMappingsHint;
+ int rc = VINF_SUCCESS;
+ PRTSHMEMINT pThis = (PRTSHMEMINT)RTMemAllocZ(RT_UOFFSETOF_DYN(RTSHMEMINT, aMappingDescs[cMappingsHint]) + cchName + 2); /* '/' + terminator. */
+ if (RT_LIKELY(pThis))
+ {
+ pThis->u32Magic = RTSHMEM_MAGIC;
+ pThis->pszName = (char *)&pThis->aMappingDescs[cMappingsHint];
+ /*pThis->fCreate = false; */
+ /*pThis->cMappings = 0; */
+ pThis->cMappingDescsMax = cMappingsHint;
+ /*pThis->cMappingDescsUsed = 0; */
+ pThis->pszName[0] = '/';
+ memcpy(&pThis->pszName[1], pszName, cchName);
+ int fShmFlags = 0;
+ if (fFlags & RTSHMEM_O_F_CREATE)
+ {
+ fShmFlags |= O_CREAT;
+ pThis->fCreate = true;
+ }
+ if ((fFlags & RTSHMEM_O_F_CREATE_EXCL) == RTSHMEM_O_F_CREATE_EXCL)
+ fShmFlags |= O_EXCL;
+ if ( (fFlags & RTSHMEM_O_F_READWRITE) == RTSHMEM_O_F_READWRITE
+ || (fFlags & RTSHMEM_O_F_WRITE))
+ fShmFlags |= O_RDWR;
+ else
+ fShmFlags |= O_RDONLY;
+ if (fFlags & RTSHMEM_O_F_TRUNCATE)
+ fShmFlags |= O_TRUNC;
+ pThis->iFdShm = shm_open(pThis->pszName, fShmFlags , 0600);
+ if (pThis->iFdShm > 0)
+ {
+ if (cbMax)
+ rc = RTShMemSetSize(pThis, cbMax);
+ if (RT_SUCCESS(rc))
+ {
+ *phShMem = pThis;
+ return VINF_SUCCESS;
+ }
+
+ close(pThis->iFdShm);
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ RTMemFree(pThis);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+ return rc;
+}
+
+
+RTDECL(int) RTShMemClose(RTSHMEM hShMem)
+{
+ PRTSHMEMINT pThis = hShMem;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSHMEM_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(!pThis->cMappings, VERR_INVALID_STATE);
+
+ int rc = VINF_SUCCESS;
+ if (!close(pThis->iFdShm))
+ {
+ if (pThis->fCreate)
+ shm_unlink(pThis->pszName); /* Ignore any error here. */
+ pThis->u32Magic = RTSHMEM_MAGIC_DEAD;
+ RTMemFree(pThis);
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
+
+RTDECL(int) RTShMemDelete(const char *pszName)
+{
+ AssertPtrReturn(pszName, VERR_INVALID_POINTER);
+
+ size_t cchName = strlen(pszName);
+ AssertReturn(cchName, VERR_INVALID_PARAMETER);
+ AssertReturn(cchName < NAME_MAX - 1, VERR_INVALID_PARAMETER); /* account for the / we add later on. */
+ char *psz = NULL;
+
+ int rc = RTStrAllocEx(&psz, cchName + 2); /* '/' + terminator */
+ if (RT_SUCCESS(rc))
+ {
+ psz[0] = '/';
+ memcpy(&psz[1], pszName, cchName + 1);
+ if (shm_unlink(psz))
+ rc = RTErrConvertFromErrno(errno);
+ RTStrFree(psz);
+ }
+
+ return rc;
+}
+
+
+RTDECL(uint32_t) RTShMemRefCount(RTSHMEM hShMem)
+{
+ PRTSHMEMINT pThis = hShMem;
+ AssertPtrReturn(pThis, 0);
+ AssertReturn(pThis->u32Magic == RTSHMEM_MAGIC, 0);
+
+ return pThis->cMappings;
+}
+
+
+RTDECL(int) RTShMemSetSize(RTSHMEM hShMem, size_t cbMem)
+{
+ PRTSHMEMINT pThis = hShMem;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSHMEM_MAGIC, VERR_INVALID_HANDLE);
+ AssertReturn(!pThis->cMappings, VERR_INVALID_STATE);
+
+ int rc = VINF_SUCCESS;
+ if (ftruncate(pThis->iFdShm, (off_t)cbMem))
+ rc = RTErrConvertFromErrno(errno);
+
+ return rc;
+}
+
+
+RTDECL(int) RTShMemQuerySize(RTSHMEM hShMem, size_t *pcbMem)
+{
+ PRTSHMEMINT pThis = hShMem;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSHMEM_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pcbMem, VERR_INVALID_PARAMETER);
+
+ struct stat st;
+ if (!fstat(pThis->iFdShm, &st))
+ {
+ *pcbMem = st.st_size;
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(int) RTShMemMapRegion(RTSHMEM hShMem, size_t offRegion, size_t cbRegion, uint32_t fFlags, void **ppv)
+{
+ PRTSHMEMINT pThis = hShMem;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSHMEM_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(ppv, VERR_INVALID_PARAMETER);
+ AssertReturn(!(fFlags & ~RTSHMEM_MAP_F_VALID_MASK), VERR_INVALID_PARAMETER);
+
+ /* Try to find a mapping with compatible parameters first. */
+ PRTSHMEMMAPPINGDESC pMappingDesc = NULL;
+ for (uint32_t iTry = 0; iTry < 10; iTry++)
+ {
+ pMappingDesc = rtShMemMappingDescFindByProp(pThis, offRegion, cbRegion, fFlags);
+ if (!pMappingDesc)
+ break;
+
+ /* Increase the mapping count and check that the region is still accessible by us. */
+ if ( ASMAtomicIncU32(&pMappingDesc->cMappings) > 1
+ && pMappingDesc->offRegion == offRegion
+ && pMappingDesc->cbRegion == cbRegion
+ && pMappingDesc->fFlags == fFlags)
+ break;
+ /* Mapping was freed inbetween, next round. */
+ }
+
+ int rc = VINF_SUCCESS;
+ if (!pMappingDesc)
+ {
+ /* Find an empty region descriptor and map the region. */
+ for (uint32_t i = 0; i < pThis->cMappingDescsMax && !pMappingDesc; i++)
+ {
+ if (!pThis->aMappingDescs[i].cMappings)
+ {
+ pMappingDesc = &pThis->aMappingDescs[i];
+
+ /* Try to grab this one. */
+ if (ASMAtomicIncU32(&pMappingDesc->cMappings) == 1)
+ break;
+
+ /* Somebody raced us, drop reference and continue. */
+ ASMAtomicDecU32(&pMappingDesc->cMappings);
+ pMappingDesc = NULL;
+ }
+ }
+
+ if (RT_LIKELY(pMappingDesc))
+ {
+ /* Try to map it. */
+ int fMmapFlags = 0;
+ int fProt = 0;
+ if (fFlags & RTSHMEM_MAP_F_READ)
+ fProt |= PROT_READ;
+ if (fFlags & RTSHMEM_MAP_F_WRITE)
+ fProt |= PROT_WRITE;
+ if (fFlags & RTSHMEM_MAP_F_EXEC)
+ fProt |= PROT_EXEC;
+ if (fFlags & RTSHMEM_MAP_F_COW)
+ fMmapFlags |= MAP_PRIVATE;
+ else
+ fMmapFlags |= MAP_SHARED;
+
+ void *pv = mmap(NULL, cbRegion, fProt, fMmapFlags, pThis->iFdShm, (off_t)offRegion);
+ if (pv != MAP_FAILED)
+ {
+ pMappingDesc->pvMapping = pv;
+ pMappingDesc->offRegion = offRegion;
+ pMappingDesc->cbRegion = cbRegion;
+ pMappingDesc->fFlags = fFlags;
+ }
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ ASMAtomicDecU32(&pMappingDesc->cMappings);
+ }
+ }
+ else
+ rc = VERR_SHMEM_MAXIMUM_MAPPINGS_REACHED;
+ }
+
+ if (RT_SUCCESS(rc))
+ {
+ *ppv = pMappingDesc->pvMapping;
+ ASMAtomicIncU32(&pThis->cMappings);
+ }
+
+ return rc;
+}
+
+
+RTDECL(int) RTShMemUnmapRegion(RTSHMEM hShMem, void *pv)
+{
+ PRTSHMEMINT pThis = hShMem;
+ AssertPtrReturn(pThis, VERR_INVALID_PARAMETER);
+ AssertReturn(pThis->u32Magic == RTSHMEM_MAGIC, VERR_INVALID_HANDLE);
+ AssertPtrReturn(pv, VERR_INVALID_PARAMETER);
+
+ /* Find the mapping descriptor by the given region address. */
+ PRTSHMEMMAPPINGDESC pMappingDesc = NULL;
+ for (uint32_t i = 0; i < pThis->cMappingDescsMax && !pMappingDesc; i++)
+ {
+ if (pThis->aMappingDescs[i].pvMapping == pv)
+ {
+ pMappingDesc = &pThis->aMappingDescs[i];
+ break;
+ }
+ }
+
+ AssertPtrReturn(pMappingDesc, VERR_INVALID_PARAMETER);
+
+ int rc = VINF_SUCCESS;
+ size_t cbRegion = pMappingDesc->cMappings;
+ if (!ASMAtomicDecU32(&pMappingDesc->cMappings))
+ {
+ /* Last mapping of this region was unmapped, so do the real unmapping now. */
+ if (munmap(pv, cbRegion))
+ {
+ ASMAtomicIncU32(&pMappingDesc->cMappings);
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ {
+ ASMAtomicDecU32(&pThis->cMappingDescsUsed);
+ ASMAtomicDecU32(&pThis->cMappings);
+ }
+ }
+
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/symlink-posix.cpp b/src/VBox/Runtime/r3/posix/symlink-posix.cpp
new file mode 100644
index 00000000..e43f5b36
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/symlink-posix.cpp
@@ -0,0 +1,247 @@
+/* $Id: symlink-posix.cpp $ */
+/** @file
+ * IPRT - Symbolic Links, POSIX.
+ */
+
+/*
+ * Copyright (C) 2010-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_SYMLINK
+
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <iprt/symlink.h>
+
+#include <iprt/assert.h>
+#include <iprt/err.h>
+#include <iprt/log.h>
+#include <iprt/mem.h>
+#include <iprt/string.h>
+#include "internal/path.h"
+
+
+
+RTDECL(bool) RTSymlinkExists(const char *pszSymlink)
+{
+ bool fRc = false;
+ char const *pszNativeSymlink;
+ int rc = rtPathToNative(&pszNativeSymlink, pszSymlink, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat s;
+ fRc = !lstat(pszNativeSymlink, &s)
+ && S_ISLNK(s.st_mode);
+
+ rtPathFreeNative(pszNativeSymlink, pszSymlink);
+ }
+
+ LogFlow(("RTSymlinkExists(%p={%s}): returns %RTbool\n", pszSymlink, pszSymlink, fRc));
+ return fRc;
+}
+
+
+RTDECL(bool) RTSymlinkIsDangling(const char *pszSymlink)
+{
+ bool fRc = false;
+ char const *pszNativeSymlink;
+ int rc = rtPathToNative(&pszNativeSymlink, pszSymlink, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat s;
+ fRc = !lstat(pszNativeSymlink, &s)
+ && S_ISLNK(s.st_mode);
+ if (fRc)
+ {
+ errno = 0;
+ fRc = stat(pszNativeSymlink, &s) != 0
+ && ( errno == ENOENT
+ || errno == ENOTDIR
+ || errno == ELOOP);
+ }
+
+ rtPathFreeNative(pszNativeSymlink, pszSymlink);
+ }
+
+ LogFlow(("RTSymlinkIsDangling(%p={%s}): returns %RTbool\n", pszSymlink, pszSymlink, fRc));
+ return fRc;
+}
+
+
+RTDECL(int) RTSymlinkCreate(const char *pszSymlink, const char *pszTarget, RTSYMLINKTYPE enmType, uint32_t fCreate)
+{
+ RT_NOREF_PV(fCreate);
+
+ /*
+ * Validate the input.
+ */
+ AssertReturn(enmType > RTSYMLINKTYPE_INVALID && enmType < RTSYMLINKTYPE_END, VERR_INVALID_PARAMETER);
+ AssertPtrReturn(pszSymlink, VERR_INVALID_POINTER);
+ AssertPtrReturn(pszTarget, VERR_INVALID_POINTER);
+
+ /*
+ * Convert the paths.
+ */
+ char const *pszNativeSymlink;
+ int rc = rtPathToNative(&pszNativeSymlink, pszSymlink, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ const char *pszNativeTarget;
+ rc = rtPathToNative(&pszNativeTarget, pszTarget, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Create the link.
+ */
+ if (symlink(pszNativeTarget, pszNativeSymlink) == 0)
+ rc = VINF_SUCCESS;
+ else
+ rc = RTErrConvertFromErrno(errno);
+
+ rtPathFreeNative(pszNativeTarget, pszTarget);
+ }
+ rtPathFreeNative(pszNativeSymlink, pszSymlink);
+ }
+
+ LogFlow(("RTSymlinkCreate(%p={%s}, %p={%s}, %d, %#x): returns %Rrc\n", pszSymlink, pszSymlink, pszTarget, pszTarget, enmType, fCreate, rc));
+ return rc;
+}
+
+
+RTDECL(int) RTSymlinkDelete(const char *pszSymlink, uint32_t fDelete)
+{
+ RT_NOREF_PV(fDelete);
+
+ char const *pszNativeSymlink;
+ int rc = rtPathToNative(&pszNativeSymlink, pszSymlink, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ struct stat s;
+ if (!lstat(pszNativeSymlink, &s))
+ {
+ if (S_ISLNK(s.st_mode))
+ {
+ if (unlink(pszNativeSymlink) == 0)
+ rc = VINF_SUCCESS;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ rc = VERR_NOT_SYMLINK;
+ }
+ else
+ rc = RTErrConvertFromErrno(errno);
+ rtPathFreeNative(pszNativeSymlink, pszSymlink);
+ }
+
+ LogFlow(("RTSymlinkDelete(%p={%s}, #%x): returns %Rrc\n", pszSymlink, pszSymlink, fDelete, rc));
+ return rc;
+}
+
+
+RTDECL(int) RTSymlinkRead(const char *pszSymlink, char *pszTarget, size_t cbTarget, uint32_t fRead)
+{
+ RT_NOREF_PV(fRead);
+
+ char *pszMyTarget;
+ int rc = RTSymlinkReadA(pszSymlink, &pszMyTarget);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTStrCopy(pszTarget, cbTarget, pszMyTarget);
+ RTStrFree(pszMyTarget);
+ }
+ LogFlow(("RTSymlinkRead(%p={%s}): returns %Rrc\n", pszSymlink, pszSymlink, rc));
+ return rc;
+}
+
+
+RTDECL(int) RTSymlinkReadA(const char *pszSymlink, char **ppszTarget)
+{
+ AssertPtr(ppszTarget);
+ char const *pszNativeSymlink;
+ int rc = rtPathToNative(&pszNativeSymlink, pszSymlink, NULL);
+ if (RT_SUCCESS(rc))
+ {
+ /* Guess the initial buffer size. */
+ ssize_t cbBuf;
+ struct stat s;
+ if (!lstat(pszNativeSymlink, &s))
+ cbBuf = RT_MAX(RT_ALIGN_Z(s.st_size, 64), 64);
+ else
+ cbBuf = 1024;
+
+ /* Read loop that grows the buffer. */
+ char *pszBuf = NULL;
+ for (;;)
+ {
+ RTMemTmpFree(pszBuf);
+ pszBuf = (char *)RTMemTmpAlloc(cbBuf);
+ if (pszBuf)
+ {
+ ssize_t cbReturned = readlink(pszNativeSymlink, pszBuf, cbBuf);
+ if (cbReturned >= cbBuf)
+ {
+ /* Increase the buffer size and try again */
+ cbBuf *= 2;
+ continue;
+ }
+
+ if (cbReturned > 0)
+ {
+ pszBuf[cbReturned] = '\0';
+ rc = rtPathFromNativeDup(ppszTarget, pszBuf, pszSymlink);
+ }
+ else if (errno == EINVAL)
+ rc = VERR_NOT_SYMLINK;
+ else
+ rc = RTErrConvertFromErrno(errno);
+ }
+ else
+ rc = VERR_NO_TMP_MEMORY;
+ break;
+ } /* for loop */
+
+ RTMemTmpFree(pszBuf);
+ rtPathFreeNative(pszNativeSymlink, pszSymlink);
+ }
+
+ if (RT_SUCCESS(rc))
+ LogFlow(("RTSymlinkReadA(%p={%s},%p): returns %Rrc *ppszTarget=%p:{%s}\n", pszSymlink, pszSymlink, ppszTarget, rc, *ppszTarget, *ppszTarget));
+ else
+ LogFlow(("RTSymlinkReadA(%p={%s},%p): returns %Rrc\n", pszSymlink, pszSymlink, ppszTarget, rc));
+ return rc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/thread-posix.cpp b/src/VBox/Runtime/r3/posix/thread-posix.cpp
new file mode 100644
index 00000000..cd621ce0
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/thread-posix.cpp
@@ -0,0 +1,780 @@
+/* $Id: thread-posix.cpp $ */
+/** @file
+ * IPRT - Threads, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_THREAD
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#if defined(RT_OS_LINUX)
+# include <unistd.h>
+# include <sys/syscall.h>
+#endif
+#if defined(RT_OS_SOLARIS)
+# include <sched.h>
+# include <sys/resource.h>
+#endif
+#if defined(RT_OS_DARWIN)
+# include <mach/thread_act.h>
+# include <mach/thread_info.h>
+# include <mach/host_info.h>
+# include <mach/mach_init.h>
+# include <mach/mach_host.h>
+#endif
+#if defined(RT_OS_DARWIN) /*|| defined(RT_OS_FREEBSD) - later */ \
+ || (defined(RT_OS_LINUX) && !defined(IN_RT_STATIC) /* static + dlsym = trouble */) \
+ || defined(IPRT_MAY_HAVE_PTHREAD_SET_NAME_NP)
+# define IPRT_MAY_HAVE_PTHREAD_SET_NAME_NP
+# include <dlfcn.h>
+#endif
+#if defined(RT_OS_HAIKU)
+# include <OS.h>
+#endif
+#if defined(RT_OS_DARWIN)
+# define sigprocmask pthread_sigmask /* On xnu sigprocmask works on the process, not the calling thread as elsewhere. */
+#endif
+
+#include <iprt/thread.h>
+#include <iprt/log.h>
+#include <iprt/assert.h>
+#include <iprt/asm.h>
+#include <iprt/err.h>
+#include <iprt/initterm.h>
+#include <iprt/string.h>
+#include <iprt/semaphore.h>
+#include <iprt/list.h>
+#include <iprt/once.h>
+#include <iprt/critsect.h>
+#include <iprt/req.h>
+#include "internal/thread.h"
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/*#ifndef IN_GUEST - shouldn't need to exclude this now with the non-obtrusive init option. */
+/** Includes RTThreadPoke. */
+# define RTTHREAD_POSIX_WITH_POKE
+/*#endif*/
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+/** The pthread key in which we store the pointer to our own PRTTHREAD structure.
+ * @note There is a defined NIL value here, nor can we really assume this is an
+ * integer. However, zero is a valid key on Linux, so we get into trouble
+ * if we accidentally use it uninitialized.
+ *
+ * So, we ASSUME it's a integer value and the valid range is in approx 0
+ * to PTHREAD_KEYS_MAX. Solaris has at least one negative value (-1)
+ * defined. Thus, we go for 16 MAX values below zero and keep our fingers
+ * cross that it will always be an invalid key value everywhere...
+ *
+ * See also NIL_RTTLS, which is -1.
+ */
+static pthread_key_t g_SelfKey = (pthread_key_t)(-PTHREAD_KEYS_MAX * 16);
+#ifdef RTTHREAD_POSIX_WITH_POKE
+/** The signal we use for poking threads.
+ * This is set to -1 if no available signal was found. */
+static int volatile g_iSigPokeThread = -1;
+#endif
+
+#ifdef IPRT_MAY_HAVE_PTHREAD_SET_NAME_NP
+# if defined(RT_OS_DARWIN)
+/**
+ * The Mac OS X (10.6 and later) variant of pthread_setname_np.
+ *
+ * @returns errno.h
+ * @param pszName The new thread name.
+ */
+typedef int (*PFNPTHREADSETNAME)(const char *pszName);
+# else
+/**
+ * The variant of pthread_setname_np most other unix-like systems implement.
+ *
+ * @returns errno.h
+ * @param hThread The thread.
+ * @param pszName The new thread name.
+ */
+typedef int (*PFNPTHREADSETNAME)(pthread_t hThread, const char *pszName);
+# endif
+
+/** Pointer to pthread_setname_np if found. */
+static PFNPTHREADSETNAME g_pfnThreadSetName = NULL;
+#endif /* IPRT_MAY_HAVE_PTHREAD_SET_NAME_NP */
+
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+/** Atomic indicator of whether the priority proxy thread has been (attempted) started.
+ *
+ * The priority proxy thread is started under these circumstances:
+ * - RTThreadCreate
+ * - RTThreadSetType
+ * - RTProcSetPriority
+ *
+ * Which means that we'll be single threaded when this is modified.
+ *
+ * Speical values:
+ * - VERR_TRY_AGAIN: Not yet started.
+ * - VERR_WRONG_ORDER: Starting.
+ * - VINF_SUCCESS: Started successfully.
+ * - VERR_PROCESS_NOT_FOUND: Stopping or stopped
+ * - Other error status if failed to start.
+ *
+ * @note We could potentially optimize this by only start it when we lower the
+ * priority of ourselves, the process, or a newly created thread. But
+ * that would means we would need to take multi-threading into account, so
+ * let's not do that for now.
+ */
+static int32_t volatile g_rcPriorityProxyThreadStart = VERR_TRY_AGAIN;
+/** The IPRT thread handle for the priority proxy. */
+static RTTHREAD g_hRTThreadPosixPriorityProxyThread = NIL_RTTHREAD;
+/** The priority proxy queue. */
+static RTREQQUEUE g_hRTThreadPosixPriorityProxyQueue = NIL_RTREQQUEUE;
+#endif /* RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY */
+
+
+/*********************************************************************************************************************************
+* Internal Functions *
+*********************************************************************************************************************************/
+static void *rtThreadNativeMain(void *pvArgs);
+static void rtThreadKeyDestruct(void *pvValue);
+#ifdef RTTHREAD_POSIX_WITH_POKE
+static void rtThreadPosixPokeSignal(int iSignal);
+#endif
+
+
+#ifdef RTTHREAD_POSIX_WITH_POKE
+/**
+ * Try register the dummy signal handler for RTThreadPoke.
+ */
+static void rtThreadPosixSelectPokeSignal(void)
+{
+ /*
+ * Note! Avoid SIGRTMIN thru SIGRTMIN+2 because of LinuxThreads.
+ */
+# if !defined(RT_OS_LINUX) && !defined(RT_OS_SOLARIS) /* glibc defines SIGRTMAX to __libc_current_sigrtmax() and Solaris libc defines it relying on _sysconf(), causing compiler to deploy serialization here. */
+ static
+# endif
+ const int s_aiSigCandidates[] =
+ {
+# ifdef SIGRTMAX
+ SIGRTMAX-3,
+ SIGRTMAX-2,
+ SIGRTMAX-1,
+# endif
+# ifndef RT_OS_SOLARIS
+ SIGUSR2,
+# endif
+ SIGWINCH
+ };
+
+ g_iSigPokeThread = -1;
+ if (!RTR3InitIsUnobtrusive())
+ {
+ for (unsigned iSig = 0; iSig < RT_ELEMENTS(s_aiSigCandidates); iSig++)
+ {
+ struct sigaction SigActOld;
+ if (!sigaction(s_aiSigCandidates[iSig], NULL, &SigActOld))
+ {
+ if ( SigActOld.sa_handler == SIG_DFL
+ || SigActOld.sa_handler == rtThreadPosixPokeSignal)
+ {
+ struct sigaction SigAct;
+ RT_ZERO(SigAct);
+ SigAct.sa_handler = rtThreadPosixPokeSignal;
+ SigAct.sa_flags = 0; /* no SA_RESTART! */
+ sigfillset(&SigAct.sa_mask);
+
+ /* ASSUMES no sigaction race... (lazy bird) */
+ if (!sigaction(s_aiSigCandidates[iSig], &SigAct, NULL))
+ {
+ g_iSigPokeThread = s_aiSigCandidates[iSig];
+ break;
+ }
+ AssertMsgFailed(("rc=%Rrc errno=%d\n", RTErrConvertFromErrno(errno), errno));
+ }
+ }
+ else
+ AssertMsgFailed(("rc=%Rrc errno=%d\n", RTErrConvertFromErrno(errno), errno));
+ }
+ }
+}
+#endif /* RTTHREAD_POSIX_WITH_POKE */
+
+
+DECLHIDDEN(int) rtThreadNativeInit(void)
+{
+ /*
+ * Allocate the TLS (key in posix terms) where we store the pointer to
+ * a threads RTTHREADINT structure.
+ */
+ int rc = pthread_key_create(&g_SelfKey, rtThreadKeyDestruct);
+ if (rc)
+ return VERR_NO_TLS_FOR_SELF;
+
+#ifdef RTTHREAD_POSIX_WITH_POKE
+ rtThreadPosixSelectPokeSignal();
+#endif
+
+#ifdef IPRT_MAY_HAVE_PTHREAD_SET_NAME_NP
+ if (RT_SUCCESS(rc))
+ g_pfnThreadSetName = (PFNPTHREADSETNAME)(uintptr_t)dlsym(RTLD_DEFAULT, "pthread_setname_np");
+#endif
+ return rc;
+}
+
+static void rtThreadPosixBlockSignals(PRTTHREADINT pThread)
+{
+ /*
+ * Mask all signals, including the poke one, if requested.
+ */
+ if ( pThread
+ && (pThread->fFlags & RTTHREADFLAGS_NO_SIGNALS))
+ {
+ sigset_t SigSet;
+ sigfillset(&SigSet);
+ sigdelset(&SigSet, SIGILL); /* On the m1 we end up spinning on UDF ... */
+ sigdelset(&SigSet, SIGTRAP); /* ... and BRK instruction if these signals are masked. */
+ sigdelset(&SigSet, SIGFPE); /* Just adding the rest here to be on the safe side. */
+ sigdelset(&SigSet, SIGBUS);
+ sigdelset(&SigSet, SIGSEGV);
+ int rc = sigprocmask(SIG_BLOCK, &SigSet, NULL);
+ AssertMsg(rc == 0, ("rc=%Rrc errno=%d\n", RTErrConvertFromErrno(errno), errno)); RT_NOREF(rc);
+ }
+ /*
+ * Block SIGALRM - required for timer-posix.cpp.
+ * This is done to limit harm done by OSes which doesn't do special SIGALRM scheduling.
+ * It will not help much if someone creates threads directly using pthread_create. :/
+ */
+ else if (!RTR3InitIsUnobtrusive())
+ {
+ sigset_t SigSet;
+ sigemptyset(&SigSet);
+ sigaddset(&SigSet, SIGALRM);
+ sigprocmask(SIG_BLOCK, &SigSet, NULL);
+ }
+
+#ifdef RTTHREAD_POSIX_WITH_POKE
+ /*
+ * bird 2020-10-28: Not entirely sure why we do this, but it makes sure the signal works
+ * on the new thread. Probably some pre-NPTL linux reasons.
+ */
+ if (g_iSigPokeThread != -1)
+ {
+# if 1 /* siginterrupt() is typically implemented as two sigaction calls, this should be faster and w/o deprecations: */
+ struct sigaction SigActOld;
+ RT_ZERO(SigActOld);
+
+ struct sigaction SigAct;
+ RT_ZERO(SigAct);
+ SigAct.sa_handler = rtThreadPosixPokeSignal;
+ SigAct.sa_flags = 0; /* no SA_RESTART! */
+ sigfillset(&SigAct.sa_mask);
+
+ int rc = sigaction(g_iSigPokeThread, &SigAct, &SigActOld);
+ AssertMsg(rc == 0, ("rc=%Rrc errno=%d\n", RTErrConvertFromErrno(errno), errno)); RT_NOREF(rc);
+ AssertMsg(rc || SigActOld.sa_handler == rtThreadPosixPokeSignal, ("%p\n", SigActOld.sa_handler));
+# else
+ siginterrupt(g_iSigPokeThread, 1);
+# endif
+ }
+#endif
+}
+
+DECLHIDDEN(void) rtThreadNativeReInitObtrusive(void)
+{
+#ifdef RTTHREAD_POSIX_WITH_POKE
+ Assert(!RTR3InitIsUnobtrusive());
+ rtThreadPosixSelectPokeSignal();
+#endif
+ rtThreadPosixBlockSignals(NULL);
+}
+
+
+/**
+ * Destructor called when a thread terminates.
+ * @param pvValue The key value. PRTTHREAD in our case.
+ */
+static void rtThreadKeyDestruct(void *pvValue)
+{
+ /*
+ * Deal with alien threads.
+ */
+ PRTTHREADINT pThread = (PRTTHREADINT)pvValue;
+ if (pThread->fIntFlags & RTTHREADINT_FLAGS_ALIEN)
+ {
+ pthread_setspecific(g_SelfKey, pThread);
+ rtThreadTerminate(pThread, 0);
+ pthread_setspecific(g_SelfKey, NULL);
+ }
+}
+
+
+#ifdef RTTHREAD_POSIX_WITH_POKE
+/**
+ * Dummy signal handler for the poke signal.
+ *
+ * @param iSignal The signal number.
+ */
+static void rtThreadPosixPokeSignal(int iSignal)
+{
+ Assert(iSignal == g_iSigPokeThread);
+ NOREF(iSignal);
+}
+#endif
+
+
+/**
+ * Adopts a thread, this is called immediately after allocating the
+ * thread structure.
+ *
+ * @param pThread Pointer to the thread structure.
+ */
+DECLHIDDEN(int) rtThreadNativeAdopt(PRTTHREADINT pThread)
+{
+ rtThreadPosixBlockSignals(pThread);
+
+ int rc = pthread_setspecific(g_SelfKey, pThread);
+ if (!rc)
+ return VINF_SUCCESS;
+ return VERR_FAILED_TO_SET_SELF_TLS;
+}
+
+
+DECLHIDDEN(void) rtThreadNativeDestroy(PRTTHREADINT pThread)
+{
+ if (pThread == (PRTTHREADINT)pthread_getspecific(g_SelfKey))
+ pthread_setspecific(g_SelfKey, NULL);
+}
+
+
+/**
+ * Wrapper which unpacks the params and calls thread function.
+ */
+static void *rtThreadNativeMain(void *pvArgs)
+{
+ PRTTHREADINT pThread = (PRTTHREADINT)pvArgs;
+ pthread_t Self = pthread_self();
+#if !defined(RT_OS_SOLARIS) /* On Solaris sizeof(pthread_t) = 4 and sizeof(NIL_RTNATIVETHREAD) = 8 */
+ Assert((uintptr_t)Self != NIL_RTNATIVETHREAD);
+#endif
+ Assert(Self == (pthread_t)(RTNATIVETHREAD)Self);
+
+#if defined(RT_OS_LINUX)
+ /*
+ * Set the TID.
+ */
+ pThread->tid = syscall(__NR_gettid);
+ ASMMemoryFence();
+#endif
+
+ rtThreadPosixBlockSignals(pThread);
+
+ /*
+ * Set the TLS entry and, if possible, the thread name.
+ */
+ int rc = pthread_setspecific(g_SelfKey, pThread);
+ AssertReleaseMsg(!rc, ("failed to set self TLS. rc=%d thread '%s'\n", rc, pThread->szName));
+
+#ifdef IPRT_MAY_HAVE_PTHREAD_SET_NAME_NP
+ if (g_pfnThreadSetName)
+# ifdef RT_OS_DARWIN
+ g_pfnThreadSetName(pThread->szName);
+# else
+ g_pfnThreadSetName(Self, pThread->szName);
+# endif
+#endif
+
+ /*
+ * Call common main.
+ */
+ rc = rtThreadMain(pThread, (uintptr_t)Self, &pThread->szName[0]);
+
+ pthread_setspecific(g_SelfKey, NULL);
+ pthread_exit((void *)(intptr_t)rc);
+ return (void *)(intptr_t)rc;
+}
+
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+
+/**
+ * @callback_method_impl{FNRTTHREAD,
+ * Priority proxy thread that services g_hRTThreadPosixPriorityProxyQueue.}
+ */
+static DECLCALLBACK(int) rtThreadPosixPriorityProxyThread(PRTTHREADINT, void *)
+{
+ for (;;)
+ {
+ RTREQQUEUE hReqQueue = g_hRTThreadPosixPriorityProxyQueue;
+ if (hReqQueue != NIL_RTREQQUEUE)
+ RTReqQueueProcess(hReqQueue, RT_INDEFINITE_WAIT);
+ else
+ break;
+
+ int32_t rc = ASMAtomicUoReadS32(&g_rcPriorityProxyThreadStart);
+ if (rc != VINF_SUCCESS && rc != VERR_WRONG_ORDER)
+ break;
+ }
+
+ return VINF_SUCCESS;
+}
+
+
+/**
+ * Just returns a non-success status codes to force the thread to re-evaluate
+ * the global shutdown variable.
+ */
+static DECLCALLBACK(int) rtThreadPosixPriorityProxyStopper(void)
+{
+ return VERR_CANCELLED;
+}
+
+
+/**
+ * An atexit() callback that stops the proxy creation/priority thread.
+ */
+static void rtThreadStopProxyThread(void)
+{
+ /*
+ * Signal to the thread that it's time to shut down.
+ */
+ int32_t rc = ASMAtomicXchgS32(&g_rcPriorityProxyThreadStart, VERR_PROCESS_NOT_FOUND);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Grab the associated handles.
+ */
+ RTTHREAD hThread = g_hRTThreadPosixPriorityProxyThread;
+ RTREQQUEUE hQueue = g_hRTThreadPosixPriorityProxyQueue;
+ g_hRTThreadPosixPriorityProxyQueue = NIL_RTREQQUEUE;
+ g_hRTThreadPosixPriorityProxyThread = NIL_RTTHREAD;
+ ASMCompilerBarrier(); /* paranoia */
+
+ AssertReturnVoid(hThread != NIL_RTTHREAD);
+ AssertReturnVoid(hQueue != NIL_RTREQQUEUE);
+
+ /*
+ * Kick the thread so it gets out of any pending RTReqQueueProcess call ASAP.
+ */
+ rc = RTReqQueueCallEx(hQueue, NULL, 0 /*cMillies*/, RTREQFLAGS_IPRT_STATUS | RTREQFLAGS_NO_WAIT,
+ (PFNRT)rtThreadPosixPriorityProxyStopper, 0);
+
+ /*
+ * Wait for the thread to complete.
+ */
+ rc = RTThreadWait(hThread, RT_SUCCESS(rc) ? RT_MS_1SEC * 5 : 32, NULL);
+ if (RT_SUCCESS(rc))
+ RTReqQueueDestroy(hQueue);
+ /* else: just leak the stuff, we're exitting, so nobody cares... */
+ }
+}
+
+
+/**
+ * Ensure that the proxy priority proxy thread has been started.
+ *
+ * Since we will always start a proxy thread when asked to create a thread,
+ * there is no need for serialization here.
+ *
+ * @retval true if started
+ * @retval false if it failed to start (caller must handle this scenario).
+ */
+DECLHIDDEN(bool) rtThreadPosixPriorityProxyStart(void)
+{
+ /*
+ * Read the result.
+ */
+ int rc = ASMAtomicUoReadS32(&g_rcPriorityProxyThreadStart);
+ if (rc != VERR_TRY_AGAIN)
+ return RT_SUCCESS(rc);
+
+ /* If this triggers then there is a very unexpected race somewhere. It
+ should be harmless though. */
+ AssertReturn(ASMAtomicCmpXchgS32(&g_rcPriorityProxyThreadStart, VERR_WRONG_ORDER, VERR_TRY_AGAIN), false);
+
+ /*
+ * Not yet started, so do that.
+ */
+ rc = RTReqQueueCreate(&g_hRTThreadPosixPriorityProxyQueue);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTThreadCreate(&g_hRTThreadPosixPriorityProxyThread, rtThreadPosixPriorityProxyThread, NULL, 0 /*cbStack*/,
+ RTTHREADTYPE_DEFAULT, RTTHREADFLAGS_WAITABLE, "RTThrdPP");
+ if (RT_SUCCESS(rc))
+ {
+ ASMAtomicWriteS32(&g_rcPriorityProxyThreadStart, VINF_SUCCESS);
+
+ atexit(rtThreadStopProxyThread);
+ return true;
+ }
+ RTReqQueueCreate(&g_hRTThreadPosixPriorityProxyQueue);
+ }
+ ASMAtomicWriteS32(&g_rcPriorityProxyThreadStart, rc != VERR_WRONG_ORDER ? rc : VERR_PROCESS_NOT_FOUND);
+ return false;
+}
+
+
+/**
+ * Calls @a pfnFunction from the priority proxy thread.
+ *
+ * Caller must have called rtThreadPosixStartProxy() to check that the priority
+ * proxy thread is running.
+ *
+ * @returns
+ * @param pTargetThread The target thread, NULL if not applicable. This is
+ * so we can skip calls pertaining to the priority
+ * proxy thread itself.
+ * @param pfnFunction The function to call. Must return IPRT status code.
+ * @param cArgs Number of arguments (see also RTReqQueueCall).
+ * @param ... Arguments (see also RTReqQueueCall).
+ */
+DECLHIDDEN(int) rtThreadPosixPriorityProxyCall(PRTTHREADINT pTargetThread, PFNRT pfnFunction, int cArgs, ...)
+{
+ int rc;
+ if ( !pTargetThread
+ || pTargetThread->pfnThread != rtThreadPosixPriorityProxyThread)
+ {
+ va_list va;
+ va_start(va, cArgs);
+ PRTREQ pReq;
+ rc = RTReqQueueCallV(g_hRTThreadPosixPriorityProxyQueue, &pReq, RT_INDEFINITE_WAIT, RTREQFLAGS_IPRT_STATUS,
+ pfnFunction, cArgs, va);
+ va_end(va);
+ RTReqRelease(pReq);
+ }
+ else
+ rc = VINF_SUCCESS;
+ return rc;
+}
+
+#endif /* !RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY */
+
+/**
+ * Worker for rtThreadNativeCreate that's either called on the priority proxy
+ * thread or directly on the calling thread depending on the proxy state.
+ */
+static DECLCALLBACK(int) rtThreadNativeInternalCreate(PRTTHREADINT pThread, PRTNATIVETHREAD pNativeThread)
+{
+ /*
+ * Set the default stack size.
+ */
+ if (!pThread->cbStack)
+ pThread->cbStack = 512*1024;
+
+#ifdef RT_OS_LINUX
+ pThread->tid = -1;
+#endif
+
+ /*
+ * Setup thread attributes.
+ */
+ pthread_attr_t ThreadAttr;
+ int rc = pthread_attr_init(&ThreadAttr);
+ if (!rc)
+ {
+ rc = pthread_attr_setdetachstate(&ThreadAttr, PTHREAD_CREATE_DETACHED);
+ if (!rc)
+ {
+ rc = pthread_attr_setstacksize(&ThreadAttr, pThread->cbStack);
+ if (!rc)
+ {
+ /*
+ * Create the thread.
+ */
+ pthread_t ThreadId;
+ rc = pthread_create(&ThreadId, &ThreadAttr, rtThreadNativeMain, pThread);
+ if (!rc)
+ {
+ pthread_attr_destroy(&ThreadAttr);
+ *pNativeThread = (uintptr_t)ThreadId;
+ return VINF_SUCCESS;
+ }
+ }
+ }
+ pthread_attr_destroy(&ThreadAttr);
+ }
+ return RTErrConvertFromErrno(rc);
+}
+
+
+DECLHIDDEN(int) rtThreadNativeCreate(PRTTHREADINT pThread, PRTNATIVETHREAD pNativeThread)
+{
+#ifdef RTTHREAD_POSIX_WITH_CREATE_PRIORITY_PROXY
+ /*
+ * If we have a priority proxy thread, use it. Make sure to ignore the
+ * staring of the proxy thread itself.
+ */
+ if ( pThread->pfnThread != rtThreadPosixPriorityProxyThread
+ && rtThreadPosixPriorityProxyStart())
+ {
+ PRTREQ pReq;
+ int rc = RTReqQueueCall(g_hRTThreadPosixPriorityProxyQueue, &pReq, RT_INDEFINITE_WAIT,
+ (PFNRT)rtThreadNativeInternalCreate, 2, pThread, pNativeThread);
+ RTReqRelease(pReq);
+ return rc;
+ }
+
+ /*
+ * Fall back on creating it directly without regard to priority proxying.
+ */
+#endif
+ return rtThreadNativeInternalCreate(pThread, pNativeThread);
+}
+
+
+RTDECL(RTTHREAD) RTThreadSelf(void)
+{
+ /** @todo import alien threads? */
+#if defined(RT_OS_DARWIN)
+ /* On darwin, there seems to be input checking with pthread_getspecific.
+ So, we must prevent using g_SelfKey before rtThreadNativeInit has run,
+ otherwise we might crash or starting working with total garbage pointer
+ values here (see _os_tsd_get_direct in znu/libsyscall/os/tsd.h).
+
+ Now, since the init value is a "negative" one, we just have to check
+ that it's positive or zero before calling the API. */
+ if (RT_LIKELY((intptr_t)g_SelfKey >= 0))
+ return (PRTTHREADINT)pthread_getspecific(g_SelfKey);
+ return NIL_RTTHREAD;
+#else
+ return (PRTTHREADINT)pthread_getspecific(g_SelfKey);
+#endif
+}
+
+
+#ifdef RTTHREAD_POSIX_WITH_POKE
+
+RTDECL(int) RTThreadPoke(RTTHREAD hThread)
+{
+ AssertReturn(hThread != RTThreadSelf(), VERR_INVALID_PARAMETER);
+ PRTTHREADINT pThread = rtThreadGet(hThread);
+ AssertReturn(pThread, VERR_INVALID_HANDLE);
+
+ int rc;
+ if (g_iSigPokeThread != -1)
+ {
+ rc = pthread_kill((pthread_t)(uintptr_t)pThread->Core.Key, g_iSigPokeThread);
+ rc = RTErrConvertFromErrno(rc);
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+
+ rtThreadRelease(pThread);
+ return rc;
+}
+
+
+RTDECL(int) RTThreadControlPokeSignal(RTTHREAD hThread, bool fEnable)
+{
+ AssertReturn(hThread == RTThreadSelf() && hThread != NIL_RTTHREAD, VERR_INVALID_PARAMETER);
+ int rc;
+ if (g_iSigPokeThread != -1)
+ {
+ sigset_t SigSet;
+ sigemptyset(&SigSet);
+ sigaddset(&SigSet, g_iSigPokeThread);
+
+ int rc2 = sigprocmask(fEnable ? SIG_UNBLOCK : SIG_BLOCK, &SigSet, NULL);
+ if (rc2 == 0)
+ rc = VINF_SUCCESS;
+ else
+ {
+ rc = RTErrConvertFromErrno(errno);
+ AssertMsgFailed(("rc=%Rrc errno=%d (rc2=%d)\n", rc, errno, rc2));
+ }
+ }
+ else
+ rc = VERR_NOT_SUPPORTED;
+ return rc;
+}
+
+
+#endif
+
+/** @todo move this into platform specific files. */
+RTR3DECL(int) RTThreadGetExecutionTimeMilli(uint64_t *pKernelTime, uint64_t *pUserTime)
+{
+#if defined(RT_OS_SOLARIS)
+ struct rusage ts;
+ int rc = getrusage(RUSAGE_LWP, &ts);
+ if (rc)
+ return RTErrConvertFromErrno(rc);
+
+ *pKernelTime = ts.ru_stime.tv_sec * 1000 + ts.ru_stime.tv_usec / 1000;
+ *pUserTime = ts.ru_utime.tv_sec * 1000 + ts.ru_utime.tv_usec / 1000;
+ return VINF_SUCCESS;
+
+#elif defined(RT_OS_LINUX) || defined(RT_OS_FREEBSD)
+ /* on Linux, getrusage(RUSAGE_THREAD, ...) is available since 2.6.26 */
+ struct timespec ts;
+ int rc = clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts);
+ if (rc)
+ return RTErrConvertFromErrno(rc);
+
+ *pKernelTime = 0;
+ *pUserTime = (uint64_t)ts.tv_sec * 1000 + ts.tv_nsec / 1000000;
+ return VINF_SUCCESS;
+
+#elif defined(RT_OS_DARWIN)
+ thread_basic_info ThreadInfo;
+ mach_msg_type_number_t Count = THREAD_BASIC_INFO_COUNT;
+ kern_return_t krc = thread_info(mach_thread_self(), THREAD_BASIC_INFO, (thread_info_t)&ThreadInfo, &Count);
+ AssertReturn(krc == KERN_SUCCESS, RTErrConvertFromDarwinKern(krc));
+
+ *pKernelTime = ThreadInfo.system_time.seconds * 1000 + ThreadInfo.system_time.microseconds / 1000;
+ *pUserTime = ThreadInfo.user_time.seconds * 1000 + ThreadInfo.user_time.microseconds / 1000;
+
+ return VINF_SUCCESS;
+#elif defined(RT_OS_HAIKU)
+ thread_info ThreadInfo;
+ status_t status = get_thread_info(find_thread(NULL), &ThreadInfo);
+ AssertReturn(status == B_OK, RTErrConvertFromErrno(status));
+
+ *pKernelTime = ThreadInfo.kernel_time / 1000;
+ *pUserTime = ThreadInfo.user_time / 1000;
+
+ return VINF_SUCCESS;
+#else
+ return VERR_NOT_IMPLEMENTED;
+#endif
+}
+
diff --git a/src/VBox/Runtime/r3/posix/thread2-posix.cpp b/src/VBox/Runtime/r3/posix/thread2-posix.cpp
new file mode 100644
index 00000000..6eaee22e
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/thread2-posix.cpp
@@ -0,0 +1,133 @@
+/* $Id: thread2-posix.cpp $ */
+/** @file
+ * IPRT - Threads part 2, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_THREAD
+#include <errno.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include <iprt/thread.h>
+#include <iprt/log.h>
+#include <iprt/asm.h>
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+# include <iprt/asm-amd64-x86.h>
+#endif
+#include <iprt/errcore.h>
+#include "internal/thread.h"
+
+
+RTDECL(RTNATIVETHREAD) RTThreadNativeSelf(void)
+{
+ return (RTNATIVETHREAD)pthread_self();
+}
+
+
+RTDECL(int) RTThreadSleep(RTMSINTERVAL cMillies)
+{
+ LogFlow(("RTThreadSleep: cMillies=%d\n", cMillies));
+ if (!cMillies)
+ {
+ if (!sched_yield())
+ {
+ LogFlow(("RTThreadSleep: returning %Rrc (cMillies=%d)\n", VINF_SUCCESS, cMillies));
+ return VINF_SUCCESS;
+ }
+ }
+ else
+ {
+ struct timespec ts;
+ struct timespec tsrem = {0,0};
+
+ ts.tv_nsec = (cMillies % 1000) * 1000000;
+ ts.tv_sec = cMillies / 1000;
+ if (!nanosleep(&ts, &tsrem))
+ {
+ LogFlow(("RTThreadSleep: returning %Rrc (cMillies=%d)\n", VINF_SUCCESS, cMillies));
+ return VINF_SUCCESS;
+ }
+ }
+
+ int rc = RTErrConvertFromErrno(errno);
+ LogFlow(("RTThreadSleep: returning %Rrc (cMillies=%d)\n", rc, cMillies));
+ return rc;
+}
+
+
+RTDECL(int) RTThreadSleepNoLog(RTMSINTERVAL cMillies)
+{
+ if (!cMillies)
+ {
+ if (!sched_yield())
+ return VINF_SUCCESS;
+ }
+ else
+ {
+ struct timespec ts;
+ struct timespec tsrem = {0,0};
+
+ ts.tv_nsec = (cMillies % 1000) * 1000000;
+ ts.tv_sec = cMillies / 1000;
+ if (!nanosleep(&ts, &tsrem))
+ return VINF_SUCCESS;
+ }
+
+ return RTErrConvertFromErrno(errno);
+}
+
+
+RTDECL(bool) RTThreadYield(void)
+{
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+ uint64_t u64TS = ASMReadTSC();
+#endif
+
+ sched_yield();
+
+#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
+ u64TS = ASMReadTSC() - u64TS;
+ bool fRc = u64TS > 1500;
+ LogFlow(("RTThreadYield: returning %d (%llu ticks)\n", fRc, u64TS));
+#else
+ bool fRc = true; /* PORTME: Add heuristics for determining whether the cpus was yielded. */
+#endif
+ return fRc;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/time-posix.cpp b/src/VBox/Runtime/r3/posix/time-posix.cpp
new file mode 100644
index 00000000..3c028b73
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/time-posix.cpp
@@ -0,0 +1,99 @@
+/* $Id: time-posix.cpp $ */
+/** @file
+ * IPRT - Time, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#define RTTIME_INCL_TIMEVAL
+#include <sys/time.h>
+#include <time.h>
+
+#include <iprt/time.h>
+#include "internal/time.h"
+
+
+DECLINLINE(uint64_t) rtTimeGetSystemNanoTS(void)
+{
+#if defined(CLOCK_MONOTONIC) && !defined(RT_OS_L4) && !defined(RT_OS_OS2)
+ /* check monotonic clock first. */
+ static bool s_fMonoClock = true;
+ if (s_fMonoClock)
+ {
+ struct timespec ts;
+ if (!clock_gettime(CLOCK_MONOTONIC, &ts))
+ return (uint64_t)ts.tv_sec * RT_NS_1SEC_64
+ + ts.tv_nsec;
+ s_fMonoClock = false;
+ }
+#endif
+
+ /* fallback to gettimeofday(). */
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ return (uint64_t)tv.tv_sec * RT_NS_1SEC_64
+ + (uint64_t)(tv.tv_usec * RT_NS_1US);
+}
+
+
+/**
+ * Gets the current nanosecond timestamp.
+ *
+ * This differs from RTTimeNanoTS in that it will use system APIs and not do any
+ * resolution or performance optimizations.
+ *
+ * @returns nanosecond timestamp.
+ */
+RTDECL(uint64_t) RTTimeSystemNanoTS(void)
+{
+ return rtTimeGetSystemNanoTS();
+}
+
+
+/**
+ * Gets the current millisecond timestamp.
+ *
+ * This differs from RTTimeNanoTS in that it will use system APIs and not do any
+ * resolution or performance optimizations.
+ *
+ * @returns millisecond timestamp.
+ */
+RTDECL(uint64_t) RTTimeSystemMilliTS(void)
+{
+ return rtTimeGetSystemNanoTS() / RT_NS_1MS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/timelocal-posix.cpp b/src/VBox/Runtime/r3/posix/timelocal-posix.cpp
new file mode 100644
index 00000000..40e2fc16
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/timelocal-posix.cpp
@@ -0,0 +1,215 @@
+/* $Id $ */
+/** @file
+ * IPRT - Local Time, Posix.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIME
+#define RTTIME_INCL_TIMEVAL
+#include <iprt/types.h>
+#include <iprt/assert.h>
+
+#include <sys/time.h>
+#include <time.h>
+
+#include <iprt/time.h>
+
+
+/**
+ * This tries to find the UTC offset for a given timespec.
+ *
+ * It does probably not take into account changes in daylight
+ * saving over the years or similar stuff.
+ *
+ * @returns UTC offset in nanoseconds.
+ * @param pTime The time.
+ * @param fCurrentTime Whether the input is current time or not.
+ * This is for avoid infinit recursion on errors in the fallback path.
+ */
+static int64_t rtTimeLocalUTCOffset(PCRTTIMESPEC pTime, bool fCurrentTime)
+{
+ RTTIMESPEC Fallback;
+
+ /*
+ * Convert to time_t.
+ */
+ int64_t i64UnixTime = RTTimeSpecGetSeconds(pTime);
+ time_t UnixTime = i64UnixTime;
+ if (UnixTime != i64UnixTime)
+ return fCurrentTime ? 0 : rtTimeLocalUTCOffset(RTTimeNow(&Fallback), true);
+
+ /*
+ * Explode it as both local and UTC time.
+ */
+ struct tm TmLocal;
+ if ( !localtime_r(&UnixTime, &TmLocal)
+ || !TmLocal.tm_year)
+ return fCurrentTime ? 0 : rtTimeLocalUTCOffset(RTTimeNow(&Fallback), true);
+ struct tm TmUtc;
+ if (!gmtime_r(&UnixTime, &TmUtc))
+ return fCurrentTime ? 0 : rtTimeLocalUTCOffset(RTTimeNow(&Fallback), true);
+
+ /*
+ * Calc the difference (if any).
+ * We ASSUME that the difference is less that 24 hours.
+ */
+ if ( TmLocal.tm_hour == TmUtc.tm_hour
+ && TmLocal.tm_min == TmUtc.tm_min
+ && TmLocal.tm_sec == TmUtc.tm_sec
+ && TmLocal.tm_mday == TmUtc.tm_mday)
+ return 0;
+
+ int cLocalSecs = TmLocal.tm_hour * 3600
+ + TmLocal.tm_min * 60
+ + TmLocal.tm_sec;
+ int cUtcSecs = TmUtc.tm_hour * 3600
+ + TmUtc.tm_min * 60
+ + TmUtc.tm_sec;
+ if (TmLocal.tm_mday != TmUtc.tm_mday)
+ {
+ /*
+ * Must add 24 hours to the value that is ahead of the other.
+ *
+ * To determine which is ahead was busted for a long long time (bugref:9078),
+ * so here are some examples and two different approaches.
+ *
+ * TmLocal TmUtc => Add 24:00 to => Diff
+ * 2007-04-02 01:00 2007-04-01 23:00 => TmLocal => +02:00
+ * 2007-04-01 01:00 2007-03-31 23:00 => TmLocal => +02:00
+ * 2007-03-31 01:00 2007-03-30 23:00 => TmLocal => +02:00
+ *
+ * 2007-04-01 01:00 2007-04-02 23:00 => TmUtc => -02:00
+ * 2007-03-31 23:00 2007-04-01 01:00 => TmUtc => -02:00
+ * 2007-03-30 23:00 2007-03-31 01:00 => TmUtc => -02:00
+ *
+ */
+#if 0
+ /* Using day of month turned out to be a little complicated. */
+ if ( ( TmLocal.tm_mday > TmUtc.tm_mday
+ && (TmUtc.tm_mday != 1 || TmLocal.tm_mday < 28) )
+ || (TmLocal.tm_mday == 1 && TmUtc.tm_mday >= 28) )
+ {
+ cLocalSecs += 24*60*60;
+ Assert( TmLocal.tm_yday - TmUtc.tm_yday == 1
+ || (TmLocal.tm_yday == 0 && TmUtc.tm_yday >= 364 && TmLocal.tm_year == TmUtc.tm_year + 1));
+ }
+ else
+ {
+ cUtcSecs += 24*60*60;
+ Assert( TmUtc.tm_yday - TmLocal.tm_yday == 1
+ || (TmUtc.tm_yday == 0 && TmLocal.tm_yday >= 364 && TmUtc.tm_year == TmLocal.tm_year + 1));
+ }
+#else
+ /* Using day of year and year is simpler. */
+ if ( ( TmLocal.tm_year == TmUtc.tm_year
+ && TmLocal.tm_yday > TmUtc.tm_yday)
+ || TmLocal.tm_year > TmUtc.tm_year)
+ {
+ cLocalSecs += 24*60*60;
+ Assert( TmLocal.tm_yday - TmUtc.tm_yday == 1
+ || (TmLocal.tm_yday == 0 && TmUtc.tm_yday >= 364 && TmLocal.tm_year == TmUtc.tm_year + 1));
+ }
+ else
+ {
+ cUtcSecs += 24*60*60;
+ Assert( TmUtc.tm_yday - TmLocal.tm_yday == 1
+ || (TmUtc.tm_yday == 0 && TmLocal.tm_yday >= 364 && TmUtc.tm_year == TmLocal.tm_year + 1));
+ }
+#endif
+ }
+
+ return (cLocalSecs - cUtcSecs) * INT64_C(1000000000);
+}
+
+
+/**
+ * Gets the current delta between UTC and local time.
+ *
+ * @code
+ * RTTIMESPEC LocalTime;
+ * RTTimeSpecAddNano(RTTimeNow(&LocalTime), RTTimeLocalDeltaNano());
+ * @endcode
+ *
+ * @returns Returns the nanosecond delta between UTC and local time.
+ */
+RTDECL(int64_t) RTTimeLocalDeltaNano(void)
+{
+ RTTIMESPEC Time;
+ return rtTimeLocalUTCOffset(RTTimeNow(&Time), true /* current time, skip fallback */);
+}
+
+
+/**
+ * Gets the delta between UTC and local time at the given time.
+ *
+ * @code
+ * RTTIMESPEC LocalTime;
+ * RTTimeNow(&LocalTime);
+ * RTTimeSpecAddNano(&LocalTime, RTTimeLocalDeltaNanoFor(&LocalTime));
+ * @endcode
+ *
+ * @param pTimeSpec The time spec giving the time to get the delta for.
+ * @returns Returns the nanosecond delta between UTC and local time.
+ */
+RTDECL(int64_t) RTTimeLocalDeltaNanoFor(PCRTTIMESPEC pTimeSpec)
+{
+ AssertPtr(pTimeSpec);
+ return rtTimeLocalUTCOffset(pTimeSpec, false /* current time, skip fallback */);
+}
+
+
+/**
+ * Explodes a time spec to the localized timezone.
+ *
+ * @returns pTime.
+ * @param pTime Where to store the exploded time.
+ * @param pTimeSpec The time spec to exploded. (UTC)
+ */
+RTDECL(PRTTIME) RTTimeLocalExplode(PRTTIME pTime, PCRTTIMESPEC pTimeSpec)
+{
+ RTTIMESPEC LocalTime = *pTimeSpec;
+ int64_t cNsUtcOffset = rtTimeLocalUTCOffset(&LocalTime, true /* current time, skip fallback */);
+ RTTimeSpecAddNano(&LocalTime, cNsUtcOffset);
+ pTime = RTTimeExplode(pTime, &LocalTime);
+ if (pTime)
+ {
+ pTime->fFlags = (pTime->fFlags & ~RTTIME_FLAGS_TYPE_MASK) | RTTIME_FLAGS_TYPE_LOCAL;
+ pTime->offUTC = cNsUtcOffset / RT_NS_1MIN;
+ }
+ return pTime;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/timer-posix.cpp b/src/VBox/Runtime/r3/posix/timer-posix.cpp
new file mode 100644
index 00000000..59c905b9
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/timer-posix.cpp
@@ -0,0 +1,847 @@
+/* $Id: timer-posix.cpp $ */
+/** @file
+ * IPRT - Timer, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Defined Constants And Macros *
+*********************************************************************************************************************************/
+/** Enables the use of POSIX RT timers. */
+#ifndef RT_OS_SOLARIS /* Solaris 10 doesn't have SIGEV_THREAD */
+# define IPRT_WITH_POSIX_TIMERS
+#endif /* !RT_OS_SOLARIS */
+
+/** @def RT_TIMER_SIGNAL
+ * The signal number that the timers use.
+ * We currently use SIGALRM for both setitimer and posix real time timers
+ * out of simplicity, but we might want change this later for the posix ones. */
+#ifdef IPRT_WITH_POSIX_TIMERS
+# define RT_TIMER_SIGNAL SIGALRM
+#else
+# define RT_TIMER_SIGNAL SIGALRM
+#endif
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_TIMER
+#include <iprt/timer.h>
+#include <iprt/alloc.h>
+#include <iprt/assert.h>
+#include <iprt/thread.h>
+#include <iprt/log.h>
+#include <iprt/asm.h>
+#include <iprt/semaphore.h>
+#include <iprt/string.h>
+#include <iprt/once.h>
+#include <iprt/err.h>
+#include <iprt/initterm.h>
+#include <iprt/critsect.h>
+#include "internal/magics.h"
+
+#include <unistd.h>
+#include <sys/fcntl.h>
+#include <sys/ioctl.h>
+#ifdef RT_OS_LINUX
+# include <linux/rtc.h>
+#endif
+#include <sys/time.h>
+#include <signal.h>
+#include <errno.h>
+#include <pthread.h>
+#if defined(RT_OS_DARWIN)
+# define sigprocmask pthread_sigmask /* On xnu sigprocmask works on the process, not the calling thread as elsewhere. */
+#endif
+
+
+/*********************************************************************************************************************************
+* Global Variables *
+*********************************************************************************************************************************/
+#ifdef IPRT_WITH_POSIX_TIMERS
+/** Init the critsect on first call. */
+static RTONCE g_TimerOnce = RTONCE_INITIALIZER;
+/** Global critsect that serializes timer creation and destruction.
+ * This is lazily created on the first RTTimerCreateEx call and will not be
+ * freed up (I'm afraid). */
+static RTCRITSECT g_TimerCritSect;
+/**
+ * Global counter of RTTimer instances. The signal thread is
+ * started when it changes from 0 to 1. The signal thread
+ * terminates when it becomes 0 again.
+ */
+static uint32_t volatile g_cTimerInstances;
+/** The signal handling thread. */
+static RTTHREAD g_TimerThread;
+#endif /* IPRT_WITH_POSIX_TIMERS */
+
+
+/*********************************************************************************************************************************
+* Structures and Typedefs *
+*********************************************************************************************************************************/
+/**
+ * The internal representation of a timer handle.
+ */
+typedef struct RTTIMER
+{
+ /** Magic.
+ * This is RTTIMER_MAGIC, but changes to something else before the timer
+ * is destroyed to indicate clearly that thread should exit. */
+ uint32_t volatile u32Magic;
+ /** Flag indicating the timer is suspended. */
+ uint8_t volatile fSuspended;
+ /** Flag indicating that the timer has been destroyed. */
+ uint8_t volatile fDestroyed;
+#ifndef IPRT_WITH_POSIX_TIMERS /** @todo We have to take the signals on a dedicated timer thread as
+ * we (might) have code assuming that signals doesn't screw around
+ * on existing threads. (It would be sufficient to have one thread
+ * per signal of course since the signal will be masked while it's
+ * running, however, it may just cause more complications than its
+ * worth - sigwait/sigwaitinfo work atomically anyway...)
+ * Also, must block the signal in the thread main procedure too. */
+ /** The timer thread. */
+ RTTHREAD Thread;
+ /** Event semaphore on which the thread is blocked. */
+ RTSEMEVENT Event;
+#endif /* !IPRT_WITH_POSIX_TIMERS */
+ /** User argument. */
+ void *pvUser;
+ /** Callback. */
+ PFNRTTIMER pfnTimer;
+ /** The timer interval. 0 if one-shot. */
+ uint64_t u64NanoInterval;
+#ifndef IPRT_WITH_POSIX_TIMERS
+ /** The first shot interval. 0 if ASAP. */
+ uint64_t volatile u64NanoFirst;
+#endif /* !IPRT_WITH_POSIX_TIMERS */
+ /** The current timer tick. */
+ uint64_t volatile iTick;
+#ifndef IPRT_WITH_POSIX_TIMERS
+ /** The error/status of the timer.
+ * Initially -1, set to 0 when the timer have been successfully started, and
+ * to errno on failure in starting the timer. */
+ int volatile iError;
+#else /* IPRT_WITH_POSIX_TIMERS */
+ timer_t NativeTimer;
+#endif /* IPRT_WITH_POSIX_TIMERS */
+
+} RTTIMER;
+
+
+
+#ifdef IPRT_WITH_POSIX_TIMERS
+
+/**
+ * RTOnce callback that initializes the critical section.
+ *
+ * @returns RTCritSectInit return code.
+ * @param pvUser NULL, ignored.
+ *
+ */
+static DECLCALLBACK(int) rtTimerOnce(void *pvUser)
+{
+ NOREF(pvUser);
+ return RTCritSectInit(&g_TimerCritSect);
+}
+#endif
+
+
+/**
+ * Signal handler which ignore everything it gets.
+ *
+ * @param iSignal The signal number.
+ */
+static void rttimerSignalIgnore(int iSignal)
+{
+ //AssertBreakpoint();
+ NOREF(iSignal);
+}
+
+
+/**
+ * RT_TIMER_SIGNAL wait thread.
+ */
+static DECLCALLBACK(int) rttimerThread(RTTHREAD hThreadSelf, void *pvArg)
+{
+ NOREF(hThreadSelf); NOREF(pvArg);
+#ifndef IPRT_WITH_POSIX_TIMERS
+ PRTTIMER pTimer = (PRTTIMER)pvArg;
+ RTTIMER Timer = *pTimer;
+ Assert(pTimer->u32Magic == RTTIMER_MAGIC);
+#endif /* !IPRT_WITH_POSIX_TIMERS */
+
+ /*
+ * Install signal handler.
+ */
+ struct sigaction SigAct;
+ memset(&SigAct, 0, sizeof(SigAct));
+ SigAct.sa_flags = SA_RESTART;
+ sigemptyset(&SigAct.sa_mask);
+ SigAct.sa_handler = rttimerSignalIgnore;
+ if (sigaction(RT_TIMER_SIGNAL, &SigAct, NULL))
+ {
+ SigAct.sa_flags &= ~SA_RESTART;
+ if (sigaction(RT_TIMER_SIGNAL, &SigAct, NULL))
+ AssertMsgFailed(("sigaction failed, errno=%d\n", errno));
+ }
+
+ /*
+ * Mask most signals except those which might be used by the pthread implementation (linux).
+ */
+ sigset_t SigSet;
+ sigfillset(&SigSet);
+ sigdelset(&SigSet, SIGTERM);
+ sigdelset(&SigSet, SIGHUP);
+ sigdelset(&SigSet, SIGINT);
+ sigdelset(&SigSet, SIGABRT);
+ sigdelset(&SigSet, SIGKILL);
+#ifdef SIGRTMIN
+ for (int iSig = SIGRTMIN; iSig < SIGRTMAX; iSig++)
+ sigdelset(&SigSet, iSig);
+#endif
+ if (sigprocmask(SIG_SETMASK, &SigSet, NULL))
+ {
+#ifdef IPRT_WITH_POSIX_TIMERS
+ int rc = RTErrConvertFromErrno(errno);
+#else
+ int rc = pTimer->iError = RTErrConvertFromErrno(errno);
+#endif
+ AssertMsgFailed(("sigprocmask -> errno=%d\n", errno));
+ return rc;
+ }
+
+ /*
+ * The work loop.
+ */
+ RTThreadUserSignal(hThreadSelf);
+
+#ifndef IPRT_WITH_POSIX_TIMERS
+ while ( !pTimer->fDestroyed
+ && pTimer->u32Magic == RTTIMER_MAGIC)
+ {
+ /*
+ * Wait for a start or destroy event.
+ */
+ if (pTimer->fSuspended)
+ {
+ int rc = RTSemEventWait(pTimer->Event, RT_INDEFINITE_WAIT);
+ if (RT_FAILURE(rc) && rc != VERR_INTERRUPTED)
+ {
+ AssertRC(rc);
+ if (pTimer->fDestroyed)
+ continue;
+ RTThreadSleep(1000); /* Don't cause trouble! */
+ }
+ if ( pTimer->fSuspended
+ || pTimer->fDestroyed)
+ continue;
+ }
+
+ /*
+ * Start the timer.
+ *
+ * For some SunOS (/SysV?) threading compatibility Linux will only
+ * deliver the RT_TIMER_SIGNAL to the thread calling setitimer(). Therefore
+ * we have to call it here.
+ *
+ * It turns out this might not always be the case, see RT_TIMER_SIGNAL killing
+ * processes on RH 2.4.21.
+ */
+ struct itimerval TimerVal;
+ if (pTimer->u64NanoFirst)
+ {
+ uint64_t u64 = RT_MAX(1000, pTimer->u64NanoFirst);
+ TimerVal.it_value.tv_sec = u64 / 1000000000;
+ TimerVal.it_value.tv_usec = (u64 % 1000000000) / 1000;
+ }
+ else
+ {
+ TimerVal.it_value.tv_sec = 0;
+ TimerVal.it_value.tv_usec = 10;
+ }
+ if (pTimer->u64NanoInterval)
+ {
+ uint64_t u64 = RT_MAX(1000, pTimer->u64NanoInterval);
+ TimerVal.it_interval.tv_sec = u64 / 1000000000;
+ TimerVal.it_interval.tv_usec = (u64 % 1000000000) / 1000;
+ }
+ else
+ {
+ TimerVal.it_interval.tv_sec = 0;
+ TimerVal.it_interval.tv_usec = 0;
+ }
+
+ if (setitimer(ITIMER_REAL, &TimerVal, NULL))
+ {
+ ASMAtomicXchgU8(&pTimer->fSuspended, true);
+ pTimer->iError = RTErrConvertFromErrno(errno);
+ RTThreadUserSignal(hThreadSelf);
+ continue; /* back to suspended mode. */
+ }
+ pTimer->iError = 0;
+ RTThreadUserSignal(hThreadSelf);
+
+ /*
+ * Timer Service Loop.
+ */
+ sigemptyset(&SigSet);
+ sigaddset(&SigSet, RT_TIMER_SIGNAL);
+ do
+ {
+ siginfo_t SigInfo;
+ RT_ZERO(SigInfo);
+#ifdef RT_OS_DARWIN
+ if (RT_LIKELY(sigwait(&SigSet, &SigInfo.si_signo) >= 0))
+ {
+#else
+ if (RT_LIKELY(sigwaitinfo(&SigSet, &SigInfo) >= 0))
+ {
+ if (RT_LIKELY(SigInfo.si_signo == RT_TIMER_SIGNAL))
+#endif
+ {
+ if (RT_UNLIKELY( pTimer->fSuspended
+ || pTimer->fDestroyed
+ || pTimer->u32Magic != RTTIMER_MAGIC))
+ break;
+
+ pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pTimer->iTick);
+
+ /* auto suspend one-shot timers. */
+ if (RT_UNLIKELY(!pTimer->u64NanoInterval))
+ {
+ ASMAtomicWriteU8(&pTimer->fSuspended, true);
+ break;
+ }
+ }
+ }
+ else if (errno != EINTR)
+ AssertMsgFailed(("sigwaitinfo -> errno=%d\n", errno));
+ } while (RT_LIKELY( !pTimer->fSuspended
+ && !pTimer->fDestroyed
+ && pTimer->u32Magic == RTTIMER_MAGIC));
+
+ /*
+ * Disable the timer.
+ */
+ struct itimerval TimerVal2 = {{0,0}, {0,0}};
+ if (setitimer(ITIMER_REAL, &TimerVal2, NULL))
+ AssertMsgFailed(("setitimer(ITIMER_REAL,&{0}, NULL) failed, errno=%d\n", errno));
+
+ /*
+ * ACK any pending suspend request.
+ */
+ if (!pTimer->fDestroyed)
+ {
+ pTimer->iError = 0;
+ RTThreadUserSignal(hThreadSelf);
+ }
+ }
+
+ /*
+ * Exit.
+ */
+ pTimer->iError = 0;
+ RTThreadUserSignal(hThreadSelf);
+
+#else /* IPRT_WITH_POSIX_TIMERS */
+
+ sigemptyset(&SigSet);
+ sigaddset(&SigSet, RT_TIMER_SIGNAL);
+ while (g_cTimerInstances)
+ {
+ siginfo_t SigInfo;
+ RT_ZERO(SigInfo);
+ if (RT_LIKELY(sigwaitinfo(&SigSet, &SigInfo) >= 0))
+ {
+ LogFlow(("rttimerThread: signo=%d pTimer=%p\n", SigInfo.si_signo, SigInfo.si_value.sival_ptr));
+ if (RT_LIKELY( SigInfo.si_signo == RT_TIMER_SIGNAL
+ && SigInfo.si_code == SI_TIMER)) /* The SI_TIMER check is *essential* because of the pthread_kill. */
+ {
+ PRTTIMER pTimer = (PRTTIMER)SigInfo.si_value.sival_ptr;
+ AssertPtr(pTimer);
+ if (RT_UNLIKELY( !RT_VALID_PTR(pTimer)
+ || ASMAtomicUoReadU8(&pTimer->fSuspended)
+ || ASMAtomicUoReadU8(&pTimer->fDestroyed)
+ || pTimer->u32Magic != RTTIMER_MAGIC))
+ continue;
+
+ pTimer->pfnTimer(pTimer, pTimer->pvUser, ++pTimer->iTick);
+
+ /* auto suspend one-shot timers. */
+ if (RT_UNLIKELY(!pTimer->u64NanoInterval))
+ ASMAtomicWriteU8(&pTimer->fSuspended, true);
+ }
+ }
+ }
+#endif /* IPRT_WITH_POSIX_TIMERS */
+
+ return VINF_SUCCESS;
+}
+
+
+RTDECL(int) RTTimerCreateEx(PRTTIMER *ppTimer, uint64_t u64NanoInterval, uint32_t fFlags, PFNRTTIMER pfnTimer, void *pvUser)
+{
+ /*
+ * We don't support the fancy MP features.
+ */
+ if (fFlags & RTTIMER_FLAGS_CPU_SPECIFIC)
+ return VERR_NOT_SUPPORTED;
+
+ /*
+ * We need the signal masks to be set correctly, which they won't be in
+ * unobtrusive mode.
+ */
+ if (RTR3InitIsUnobtrusive())
+ return VERR_NOT_SUPPORTED;
+
+#ifndef IPRT_WITH_POSIX_TIMERS
+ /*
+ * Check if timer is busy.
+ */
+ struct itimerval TimerVal;
+ if (getitimer(ITIMER_REAL, &TimerVal))
+ {
+ AssertMsgFailed(("getitimer() -> errno=%d\n", errno));
+ return VERR_NOT_IMPLEMENTED;
+ }
+ if ( TimerVal.it_value.tv_usec
+ || TimerVal.it_value.tv_sec
+ || TimerVal.it_interval.tv_usec
+ || TimerVal.it_interval.tv_sec)
+ {
+ AssertMsgFailed(("A timer is running. System limit is one timer per process!\n"));
+ return VERR_TIMER_BUSY;
+ }
+#endif /* !IPRT_WITH_POSIX_TIMERS */
+
+ /*
+ * Block RT_TIMER_SIGNAL from calling thread.
+ */
+ sigset_t SigSet;
+ sigemptyset(&SigSet);
+ sigaddset(&SigSet, RT_TIMER_SIGNAL);
+ sigprocmask(SIG_BLOCK, &SigSet, NULL);
+
+#ifndef IPRT_WITH_POSIX_TIMERS /** @todo combine more of the setitimer/timer_create code. setitimer could also use the global thread. */
+ /** @todo Move this RTC hack else where... */
+ static bool fDoneRTC;
+ if (!fDoneRTC)
+ {
+ fDoneRTC = true;
+ /* check resolution. */
+ TimerVal.it_interval.tv_sec = 0;
+ TimerVal.it_interval.tv_usec = 1000;
+ TimerVal.it_value = TimerVal.it_interval;
+ if ( setitimer(ITIMER_REAL, &TimerVal, NULL)
+ || getitimer(ITIMER_REAL, &TimerVal)
+ || TimerVal.it_interval.tv_usec > 1000)
+ {
+ /*
+ * Try open /dev/rtc to set the irq rate to 1024 and
+ * turn periodic
+ */
+ Log(("RTTimerCreate: interval={%ld,%ld} trying to adjust /dev/rtc!\n", TimerVal.it_interval.tv_sec, TimerVal.it_interval.tv_usec));
+# ifdef RT_OS_LINUX
+ int fh = open("/dev/rtc", O_RDONLY);
+ if (fh >= 0)
+ {
+ if ( ioctl(fh, RTC_IRQP_SET, 1024) < 0
+ || ioctl(fh, RTC_PIE_ON, 0) < 0)
+ Log(("RTTimerCreate: couldn't configure rtc! errno=%d\n", errno));
+ ioctl(fh, F_SETFL, O_ASYNC);
+ ioctl(fh, F_SETOWN, getpid());
+ /* not so sure if closing it is a good idea... */
+ //close(fh);
+ }
+ else
+ Log(("RTTimerCreate: couldn't configure rtc! open failed with errno=%d\n", errno));
+# endif
+ }
+ /* disable it */
+ TimerVal.it_interval.tv_sec = 0;
+ TimerVal.it_interval.tv_usec = 0;
+ TimerVal.it_value = TimerVal.it_interval;
+ setitimer(ITIMER_REAL, &TimerVal, NULL);
+ }
+
+ /*
+ * Create a new timer.
+ */
+ int rc;
+ PRTTIMER pTimer = (PRTTIMER)RTMemAlloc(sizeof(*pTimer));
+ if (pTimer)
+ {
+ pTimer->u32Magic = RTTIMER_MAGIC;
+ pTimer->fSuspended = true;
+ pTimer->fDestroyed = false;
+ pTimer->Thread = NIL_RTTHREAD;
+ pTimer->Event = NIL_RTSEMEVENT;
+ pTimer->pfnTimer = pfnTimer;
+ pTimer->pvUser = pvUser;
+ pTimer->u64NanoInterval = u64NanoInterval;
+ pTimer->u64NanoFirst = 0;
+ pTimer->iTick = 0;
+ pTimer->iError = 0;
+ rc = RTSemEventCreate(&pTimer->Event);
+ AssertRC(rc);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTThreadCreate(&pTimer->Thread, rttimerThread, pTimer, 0, RTTHREADTYPE_TIMER, RTTHREADFLAGS_WAITABLE, "Timer");
+ AssertRC(rc);
+ if (RT_SUCCESS(rc))
+ {
+ /*
+ * Wait for the timer thread to initialize it self.
+ * This might take a little while...
+ */
+ rc = RTThreadUserWait(pTimer->Thread, 45*1000);
+ AssertRC(rc);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTThreadUserReset(pTimer->Thread); AssertRC(rc);
+ rc = pTimer->iError;
+ AssertRC(rc);
+ if (RT_SUCCESS(rc))
+ {
+ RTThreadYield(); /* <-- Horrible hack to make tstTimer work. (linux 2.6.12) */
+ *ppTimer = pTimer;
+ return VINF_SUCCESS;
+ }
+ }
+
+ /* bail out */
+ ASMAtomicXchgU8(&pTimer->fDestroyed, true);
+ ASMAtomicXchgU32(&pTimer->u32Magic, ~RTTIMER_MAGIC);
+ RTThreadWait(pTimer->Thread, 45*1000, NULL);
+ }
+ RTSemEventDestroy(pTimer->Event);
+ pTimer->Event = NIL_RTSEMEVENT;
+ }
+ RTMemFree(pTimer);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+#else /* IPRT_WITH_POSIX_TIMERS */
+
+ /*
+ * Do the global init first.
+ */
+ int rc = RTOnce(&g_TimerOnce, rtTimerOnce, NULL);
+ if (RT_FAILURE(rc))
+ return rc;
+
+ /*
+ * Create a new timer structure.
+ */
+ LogFlow(("RTTimerCreateEx: u64NanoInterval=%llu fFlags=%lu\n", u64NanoInterval, fFlags));
+ PRTTIMER pTimer = (PRTTIMER)RTMemAlloc(sizeof(*pTimer));
+ if (pTimer)
+ {
+ /* Initialize timer structure. */
+ pTimer->u32Magic = RTTIMER_MAGIC;
+ pTimer->fSuspended = true;
+ pTimer->fDestroyed = false;
+ pTimer->pfnTimer = pfnTimer;
+ pTimer->pvUser = pvUser;
+ pTimer->u64NanoInterval = u64NanoInterval;
+ pTimer->iTick = 0;
+
+ /*
+ * Create a timer that deliver RT_TIMER_SIGNAL upon timer expiration.
+ */
+ struct sigevent SigEvt;
+ SigEvt.sigev_notify = SIGEV_SIGNAL;
+ SigEvt.sigev_signo = RT_TIMER_SIGNAL;
+ SigEvt.sigev_value.sival_ptr = pTimer; /* sigev_value gets copied to siginfo. */
+ int err = timer_create(CLOCK_REALTIME, &SigEvt, &pTimer->NativeTimer);
+ if (!err)
+ {
+ /*
+ * Increment the timer count, do this behind the critsect to avoid races.
+ */
+ RTCritSectEnter(&g_TimerCritSect);
+
+ if (ASMAtomicIncU32(&g_cTimerInstances) != 1)
+ {
+ Assert(g_cTimerInstances > 1);
+ RTCritSectLeave(&g_TimerCritSect);
+
+ LogFlow(("RTTimerCreateEx: rc=%Rrc pTimer=%p (thread already running)\n", rc, pTimer));
+ *ppTimer = pTimer;
+ return VINF_SUCCESS;
+ }
+
+ /*
+ * Create the signal handling thread. It will wait for the signal
+ * and execute the timer functions.
+ */
+ rc = RTThreadCreate(&g_TimerThread, rttimerThread, NULL, 0, RTTHREADTYPE_TIMER, RTTHREADFLAGS_WAITABLE, "Timer");
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTThreadUserWait(g_TimerThread, 45*1000); /* this better not fail... */
+ if (RT_SUCCESS(rc))
+ {
+ RTCritSectLeave(&g_TimerCritSect);
+
+ LogFlow(("RTTimerCreateEx: rc=%Rrc pTimer=%p (thread already running)\n", rc, pTimer));
+ *ppTimer = pTimer;
+ return VINF_SUCCESS;
+ }
+ /* darn, what do we do here? */
+ }
+
+ /* bail out */
+ ASMAtomicDecU32(&g_cTimerInstances);
+ Assert(!g_cTimerInstances);
+
+ RTCritSectLeave(&g_TimerCritSect);
+
+ timer_delete(pTimer->NativeTimer);
+ }
+ else
+ {
+ rc = RTErrConvertFromErrno(err);
+ Log(("RTTimerCreateEx: err=%d (%Rrc)\n", err, rc));
+ }
+
+ RTMemFree(pTimer);
+ }
+ else
+ rc = VERR_NO_MEMORY;
+
+#endif /* IPRT_WITH_POSIX_TIMERS */
+ return rc;
+}
+
+
+RTR3DECL(int) RTTimerDestroy(PRTTIMER pTimer)
+{
+ LogFlow(("RTTimerDestroy: pTimer=%p\n", pTimer));
+
+ /*
+ * Validate input.
+ */
+ /* NULL is ok. */
+ if (!pTimer)
+ return VINF_SUCCESS;
+ int rc = VINF_SUCCESS;
+ AssertPtrReturn(pTimer, VERR_INVALID_POINTER);
+ AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_MAGIC);
+#ifdef IPRT_WITH_POSIX_TIMERS
+ AssertReturn(g_TimerThread != RTThreadSelf(), VERR_INTERNAL_ERROR);
+#else
+ AssertReturn(pTimer->Thread != RTThreadSelf(), VERR_INTERNAL_ERROR);
+#endif
+
+ /*
+ * Mark the semaphore as destroyed.
+ */
+ ASMAtomicWriteU8(&pTimer->fDestroyed, true);
+ ASMAtomicWriteU32(&pTimer->u32Magic, ~RTTIMER_MAGIC);
+
+#ifdef IPRT_WITH_POSIX_TIMERS
+ /*
+ * Suspend the timer if it's running.
+ */
+ if (!pTimer->fSuspended)
+ {
+ struct itimerspec TimerSpec;
+ TimerSpec.it_value.tv_sec = 0;
+ TimerSpec.it_value.tv_nsec = 0;
+ TimerSpec.it_interval.tv_sec = 0;
+ TimerSpec.it_interval.tv_nsec = 0;
+ int err = timer_settime(pTimer->NativeTimer, 0, &TimerSpec, NULL); NOREF(err);
+ AssertMsg(!err, ("%d / %d\n", err, errno));
+ }
+#endif
+
+ /*
+ * Poke the thread and wait for it to finish.
+ * This is only done for the last timer when using posix timers.
+ */
+#ifdef IPRT_WITH_POSIX_TIMERS
+ RTTHREAD Thread = NIL_RTTHREAD;
+ RTCritSectEnter(&g_TimerCritSect);
+ if (ASMAtomicDecU32(&g_cTimerInstances) == 0)
+ {
+ Thread = g_TimerThread;
+ g_TimerThread = NIL_RTTHREAD;
+ }
+ RTCritSectLeave(&g_TimerCritSect);
+#else /* IPRT_WITH_POSIX_TIMERS */
+ RTTHREAD Thread = pTimer->Thread;
+ rc = RTSemEventSignal(pTimer->Event);
+ AssertRC(rc);
+#endif /* IPRT_WITH_POSIX_TIMERS */
+ if (Thread != NIL_RTTHREAD)
+ {
+ /* Signal it so it gets out of the sigwait if it's stuck there... */
+ pthread_kill((pthread_t)RTThreadGetNative(Thread), RT_TIMER_SIGNAL);
+
+ /*
+ * Wait for the thread to complete.
+ */
+ rc = RTThreadWait(Thread, 30 * 1000, NULL);
+ AssertRC(rc);
+ }
+
+
+ /*
+ * Free up the resources associated with the timer.
+ */
+#ifdef IPRT_WITH_POSIX_TIMERS
+ timer_delete(pTimer->NativeTimer);
+#else
+ RTSemEventDestroy(pTimer->Event);
+ pTimer->Event = NIL_RTSEMEVENT;
+#endif /* !IPRT_WITH_POSIX_TIMERS */
+ if (RT_SUCCESS(rc))
+ RTMemFree(pTimer);
+ return rc;
+}
+
+
+RTDECL(int) RTTimerStart(PRTTIMER pTimer, uint64_t u64First)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pTimer, VERR_INVALID_POINTER);
+ AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_MAGIC);
+#ifndef IPRT_WITH_POSIX_TIMERS
+ AssertReturn(pTimer->Thread != RTThreadSelf(), VERR_INTERNAL_ERROR);
+#endif
+
+ /*
+ * Already running?
+ */
+ if (!ASMAtomicXchgU8(&pTimer->fSuspended, false))
+ return VERR_TIMER_ACTIVE;
+ LogFlow(("RTTimerStart: pTimer=%p u64First=%llu u64NanoInterval=%llu\n", pTimer, u64First, pTimer->u64NanoInterval));
+
+#ifndef IPRT_WITH_POSIX_TIMERS
+ /*
+ * Tell the thread to start servicing the timer.
+ * Wait for it to ACK the request to avoid reset races.
+ */
+ RTThreadUserReset(pTimer->Thread);
+ ASMAtomicUoWriteU64(&pTimer->u64NanoFirst, u64First);
+ ASMAtomicUoWriteU64(&pTimer->iTick, 0);
+ ASMAtomicWriteU8(&pTimer->fSuspended, false);
+ int rc = RTSemEventSignal(pTimer->Event);
+ if (RT_SUCCESS(rc))
+ {
+ rc = RTThreadUserWait(pTimer->Thread, 45*1000);
+ AssertRC(rc);
+ RTThreadUserReset(pTimer->Thread);
+ }
+ else
+ AssertRC(rc);
+
+#else /* IPRT_WITH_POSIX_TIMERS */
+ /*
+ * Start the timer.
+ */
+ struct itimerspec TimerSpec;
+ TimerSpec.it_value.tv_sec = u64First / 1000000000; /* nanosec => sec */
+ TimerSpec.it_value.tv_nsec = u64First ? u64First % 1000000000 : 10; /* 0 means disable, replace it with 10. */
+ TimerSpec.it_interval.tv_sec = pTimer->u64NanoInterval / 1000000000;
+ TimerSpec.it_interval.tv_nsec = pTimer->u64NanoInterval % 1000000000;
+ int err = timer_settime(pTimer->NativeTimer, 0, &TimerSpec, NULL);
+ int rc = err == 0 ? VINF_SUCCESS : RTErrConvertFromErrno(errno);
+#endif /* IPRT_WITH_POSIX_TIMERS */
+
+ if (RT_FAILURE(rc))
+ ASMAtomicXchgU8(&pTimer->fSuspended, false);
+ return rc;
+}
+
+
+RTDECL(int) RTTimerStop(PRTTIMER pTimer)
+{
+ /*
+ * Validate input.
+ */
+ AssertPtrReturn(pTimer, VERR_INVALID_POINTER);
+ AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_MAGIC);
+
+ /*
+ * Already running?
+ */
+ if (ASMAtomicXchgU8(&pTimer->fSuspended, true))
+ return VERR_TIMER_SUSPENDED;
+ LogFlow(("RTTimerStop: pTimer=%p\n", pTimer));
+
+#ifndef IPRT_WITH_POSIX_TIMERS
+ /*
+ * Tell the thread to stop servicing the timer.
+ */
+ RTThreadUserReset(pTimer->Thread);
+ ASMAtomicXchgU8(&pTimer->fSuspended, true);
+ int rc = VINF_SUCCESS;
+ if (RTThreadSelf() != pTimer->Thread)
+ {
+ pthread_kill((pthread_t)RTThreadGetNative(pTimer->Thread), RT_TIMER_SIGNAL);
+ rc = RTThreadUserWait(pTimer->Thread, 45*1000);
+ AssertRC(rc);
+ RTThreadUserReset(pTimer->Thread);
+ }
+
+#else /* IPRT_WITH_POSIX_TIMERS */
+ /*
+ * Stop the timer.
+ */
+ struct itimerspec TimerSpec;
+ TimerSpec.it_value.tv_sec = 0;
+ TimerSpec.it_value.tv_nsec = 0;
+ TimerSpec.it_interval.tv_sec = 0;
+ TimerSpec.it_interval.tv_nsec = 0;
+ int err = timer_settime(pTimer->NativeTimer, 0, &TimerSpec, NULL);
+ int rc = err == 0 ? VINF_SUCCESS : RTErrConvertFromErrno(errno);
+#endif /* IPRT_WITH_POSIX_TIMERS */
+
+ return rc;
+}
+
+
+RTDECL(int) RTTimerChangeInterval(PRTTIMER pTimer, uint64_t u64NanoInterval)
+{
+ AssertPtrReturn(pTimer, VERR_INVALID_POINTER);
+ AssertReturn(pTimer->u32Magic == RTTIMER_MAGIC, VERR_INVALID_MAGIC);
+ NOREF(u64NanoInterval);
+ return VERR_NOT_SUPPORTED;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/tls-posix.cpp b/src/VBox/Runtime/r3/posix/tls-posix.cpp
new file mode 100644
index 00000000..22ef28ab
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/tls-posix.cpp
@@ -0,0 +1,119 @@
+/* $Id: tls-posix.cpp $ */
+/** @file
+ * IPRT - Thread Local Storage (TLS), POSIX.
+ */
+
+/*
+ * Copyright (C) 2008-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#define LOG_GROUP RTLOGGROUP_THREAD
+#include <errno.h>
+#include <pthread.h>
+
+#include <iprt/thread.h>
+#include <iprt/log.h>
+#include <iprt/assert.h>
+#include <iprt/errcore.h>
+
+
+AssertCompile(sizeof(pthread_key_t) <= sizeof(RTTLS));
+
+
+RTR3DECL(RTTLS) RTTlsAlloc(void)
+{
+ pthread_key_t iTls = (pthread_key_t)NIL_RTTLS;
+ int rc = pthread_key_create(&iTls, NULL);
+ if (!rc)
+ {
+ Assert(iTls != (pthread_key_t)NIL_RTTLS);
+ return iTls;
+ }
+ return NIL_RTTLS;
+}
+
+
+RTR3DECL(int) RTTlsAllocEx(PRTTLS piTls, PFNRTTLSDTOR pfnDestructor)
+{
+ pthread_key_t iTls = (pthread_key_t)NIL_RTTLS;
+#if defined(__GNUC__) && defined(RT_ARCH_X86)
+ int rc = pthread_key_create(&iTls, (void (*)(void*))pfnDestructor);
+#else
+ int rc = pthread_key_create(&iTls, pfnDestructor);
+#endif
+ if (!rc)
+ {
+ *piTls = iTls;
+ Assert((pthread_key_t)*piTls == iTls);
+ Assert(*piTls != NIL_RTTLS);
+ return VINF_SUCCESS;
+ }
+ return RTErrConvertFromErrno(rc);
+}
+
+
+RTR3DECL(int) RTTlsFree(RTTLS iTls)
+{
+ if (iTls == NIL_RTTLS)
+ return VINF_SUCCESS;
+ int rc = pthread_key_delete(iTls);
+ if (!rc)
+ return VINF_SUCCESS;
+ return RTErrConvertFromErrno(rc);
+}
+
+
+RTR3DECL(void *) RTTlsGet(RTTLS iTls)
+{
+ return pthread_getspecific(iTls);
+}
+
+
+RTR3DECL(int) RTTlsGetEx(RTTLS iTls, void **ppvValue)
+{
+ if (RT_UNLIKELY(iTls == NIL_RTTLS))
+ return VERR_INVALID_PARAMETER;
+ *ppvValue = pthread_getspecific(iTls);
+ return VINF_SUCCESS;
+}
+
+
+RTR3DECL(int) RTTlsSet(RTTLS iTls, void *pvValue)
+{
+ int rc = pthread_setspecific(iTls, pvValue);
+ if (RT_UNLIKELY(rc != 0))
+ return RTErrConvertFromErrno(rc);
+ return VINF_SUCCESS;
+}
+
diff --git a/src/VBox/Runtime/r3/posix/utf8-posix.cpp b/src/VBox/Runtime/r3/posix/utf8-posix.cpp
new file mode 100644
index 00000000..5f4c947e
--- /dev/null
+++ b/src/VBox/Runtime/r3/posix/utf8-posix.cpp
@@ -0,0 +1,709 @@
+/* $Id: utf8-posix.cpp $ */
+/** @file
+ * IPRT - UTF-8 helpers, POSIX.
+ */
+
+/*
+ * Copyright (C) 2006-2023 Oracle and/or its affiliates.
+ *
+ * This file is part of VirtualBox base platform packages, as
+ * available from https://www.virtualbox.org.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, in version 3 of the
+ * License.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses>.
+ *
+ * The contents of this file may alternatively be used under the terms
+ * of the Common Development and Distribution License Version 1.0
+ * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
+ * in the VirtualBox distribution, in which case the provisions of the
+ * CDDL are applicable instead of those of the GPL.
+ *
+ * You may elect to license modified versions of this file under the
+ * terms and conditions of either the GPL or the CDDL or both.
+ *
+ * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
+ */
+
+
+/*********************************************************************************************************************************
+* Header Files *
+*********************************************************************************************************************************/
+#include <iprt/string.h>
+#include "internal/iprt.h"
+
+#include <iprt/alloc.h>
+#include <iprt/assert.h>
+#include <iprt/ctype.h>
+#include <iprt/err.h>
+#include <iprt/string.h>
+
+#include <errno.h>
+#include <locale.h>
+#ifdef RT_OS_DARWIN
+# include <stdlib.h>
+#endif
+
+/* iconv prototype changed with 165+ (thanks to PSARC/2010/160 Bugster 7037400) */
+#if defined(RT_OS_SOLARIS)
+# if !defined(_XPG6)
+# define IPRT_XPG6_TMP_DEF
+# define _XPG6
+# endif
+# if defined(__USE_LEGACY_PROTOTYPES__)
+# define IPRT_LEGACY_PROTO_TMP_DEF
+# undef __USE_LEGACY_PROTOTYPES__
+# endif
+#endif /* RT_OS_SOLARIS */
+
+# include <iconv.h>
+
+#if defined(RT_OS_SOLARIS)
+# if defined(IPRT_XPG6_TMP_DEF)
+# undef _XPG6
+# undef IPRT_XPG6_TMP_DEF
+# endif
+# if defined(IPRT_LEGACY_PROTO_TMP_DEF)
+# define __USE_LEGACY_PROTOTYPES__
+# undef IPRT_LEGACY_PROTO_TMP_DEF
+# endif
+#endif /* RT_OS_SOLARIS */
+
+#include <wctype.h>
+
+#include <langinfo.h>
+
+#include "internal/alignmentchecks.h"
+#include "internal/string.h"
+#ifdef RT_WITH_ICONV_CACHE
+# include "internal/thread.h"
+AssertCompile(sizeof(iconv_t) <= sizeof(void *));
+#endif
+
+
+/* There are different opinions about the constness of the input buffer. */
+#if defined(RT_OS_LINUX) || defined(RT_OS_HAIKU) || defined(RT_OS_SOLARIS) \
+ || (defined(RT_OS_DARWIN) && defined(_DARWIN_FEATURE_UNIX_CONFORMANCE))
+# define NON_CONST_ICONV_INPUT
+#endif
+#ifdef RT_OS_FREEBSD
+# include <sys/param.h>
+# if __FreeBSD_version >= 1002000 /* Changed around 10.2.2 (https://svnweb.freebsd.org/base?view=revision&revision=281550) */
+# define NON_CONST_ICONV_INPUT
+# else
+# error __FreeBSD_version__
+# endif
+#endif
+#ifdef RT_OS_NETBSD
+/* iconv constness was changed on 2019-10-24, shortly after 9.99.17 */
+# include <sys/param.h>
+# if __NetBSD_Prereq__(9,99,18)
+# define NON_CONST_ICONV_INPUT
+# endif
+#endif
+
+
+/**
+ * Gets the codeset of the current locale (LC_CTYPE).
+ *
+ * @returns Pointer to read-only string with the codeset name.
+ */
+DECLHIDDEN(const char *) rtStrGetLocaleCodeset(void)
+{
+#ifdef RT_OS_DARWIN
+ /*
+ * @bugref{10153}: If no locale specified in the environment (typically the
+ * case when launched via Finder, LaunchPad or similar) default to UTF-8.
+ */
+ static int8_t volatile s_fIsUtf8 = -1;
+ int8_t fIsUtf8 = s_fIsUtf8;
+ if (fIsUtf8)
+ {
+ if (fIsUtf8 == true)
+ return "UTF-8";
+
+ /* Initialize: */
+ fIsUtf8 = true;
+ static const char * const s_papszVariables[] = { "LC_ALL", "LC_CTYPE", "LANG" };
+ for (size_t i = 0; i < RT_ELEMENTS(s_papszVariables); i++)
+ {
+ const char *pszValue = getenv(s_papszVariables[i]);
+ if (pszValue && *pszValue)
+ {
+ fIsUtf8 = false;
+ break;
+ }
+ }
+ s_fIsUtf8 = fIsUtf8;
+ if (fIsUtf8 == true)
+ return "UTF-8";
+ }
+#endif
+ return nl_langinfo(CODESET);
+}
+
+
+/**
+ * Checks if the codeset specified by current locale (LC_CTYPE) is UTF-8.
+ *
+ * @returns true if UTF-8, false if not.
+ */
+DECLHIDDEN(bool) rtStrIsLocaleCodesetUtf8(void)
+{
+ return rtStrIsCodesetUtf8(rtStrGetLocaleCodeset());
+}
+
+
+/**
+ * Checks if @a pszCodeset specified UTF-8.
+ *
+ * @returns true if UTF-8, false if not.
+ * @param pszCodeset Codeset to test.
+ */
+DECLHIDDEN(bool) rtStrIsCodesetUtf8(const char *pszCodeset)
+{
+ if (pszCodeset)
+ {
+ /* Skip leading spaces just in case: */
+ while (RT_C_IS_SPACE(*pszCodeset))
+ pszCodeset++;
+
+ /* If prefixed by 'ISO-10646/' skip that (iconv access this, dunno about
+ LC_CTYPE et al., but play it safe): */
+ if ( strncmp(pszCodeset, RT_STR_TUPLE("ISO-10646/")) == 0
+ || strncmp(pszCodeset, RT_STR_TUPLE("iso-10646/")) == 0)
+ pszCodeset += sizeof("ISO-10646/") - 1;
+
+ /* Match 'utf': */
+ if ( (pszCodeset[0] == 'u' || pszCodeset[0] == 'U')
+ && (pszCodeset[1] == 't' || pszCodeset[1] == 'T')
+ && (pszCodeset[2] == 'f' || pszCodeset[2] == 'F'))
+ {
+ pszCodeset += 3;
+
+ /* Treat the dash as optional: */
+ if (*pszCodeset == '-')
+ pszCodeset++;
+
+ /* Match '8': */
+ if (*pszCodeset == '8')
+ {
+ do
+ pszCodeset++;
+ while (RT_C_IS_SPACE(*pszCodeset));
+
+ /* We ignore modifiers here (e.g. "[be_BY.]utf8@latin"). */
+ if (!*pszCodeset || *pszCodeset == '@')
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
+
+#ifdef RT_WITH_ICONV_CACHE
+
+/**
+ * Initializes the iconv handle cache associated with a thread.
+ *
+ * @param pThread The thread in question.
+ */
+DECLHIDDEN(void) rtStrIconvCacheInit(PRTTHREADINT pThread)
+{
+ for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
+ pThread->ahIconvs[i] = (iconv_t)-1;
+}
+
+/**
+ * Destroys the iconv handle cache associated with a thread.
+ *
+ * @param pThread The thread in question.
+ */
+DECLHIDDEN(void) rtStrIconvCacheDestroy(PRTTHREADINT pThread)
+{
+ for (size_t i = 0; i < RT_ELEMENTS(pThread->ahIconvs); i++)
+ {
+ iconv_t hIconv = (iconv_t)pThread->ahIconvs[i];
+ pThread->ahIconvs[i] = (iconv_t)-1;
+ if (hIconv != (iconv_t)-1)
+ iconv_close(hIconv);
+ }
+}
+
+
+/**
+ * Converts a string from one charset to another.
+ *
+ * @returns iprt status code.
+ * @param pvInput Pointer to intput string.
+ * @param cbInput Size (in bytes) of input string. Excludes any terminators.
+ * @param pszInputCS Codeset of the input string.
+ * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
+ * If cbOutput is 0 this is where the pointer to the allocated
+ * buffer is stored.
+ * @param cbOutput Size of the passed in buffer.
+ * @param pszOutputCS Codeset of the input string.
+ * @param cFactor Input vs. output size factor.
+ * @param phIconv Pointer to the cache entry.
+ */
+static int rtstrConvertCached(const void *pvInput, size_t cbInput, const char *pszInputCS,
+ void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
+ unsigned cFactor, iconv_t *phIconv)
+{
+ /*
+ * Allocate buffer
+ */
+ bool fUcs2Term;
+ void *pvOutput;
+ size_t cbOutput2;
+ if (!cbOutput)
+ {
+ cbOutput2 = cbInput * cFactor;
+ pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
+ if (!pvOutput)
+ return VERR_NO_TMP_MEMORY;
+ fUcs2Term = true;
+ }
+ else
+ {
+ pvOutput = *ppvOutput;
+ fUcs2Term = !strcmp(pszOutputCS, "UCS-2")
+ || !strcmp(pszOutputCS, "UTF-16")
+ || !strcmp(pszOutputCS, "ucs-2")
+ || !strcmp(pszOutputCS, "utf-16");
+ cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
+ if (cbOutput2 > cbOutput)
+ return VERR_BUFFER_OVERFLOW;
+ }
+
+ /*
+ * Use a loop here to retry with bigger buffers.
+ */
+ for (unsigned cTries = 10; cTries > 0; cTries--)
+ {
+ /*
+ * Create conversion object if necessary.
+ */
+ iconv_t hIconv = (iconv_t)*phIconv;
+ if (hIconv == (iconv_t)-1)
+ {
+#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD) || /* @bugref{10153}: Default to UTF-8: */ defined(RT_OS_DARWIN)
+ /* Some systems don't grok empty codeset strings, so help them find the current codeset. */
+ if (!*pszInputCS)
+ pszInputCS = rtStrGetLocaleCodeset();
+ if (!*pszOutputCS)
+ pszOutputCS = rtStrGetLocaleCodeset();
+#endif
+ IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
+ *phIconv = hIconv = iconv_open(pszOutputCS, pszInputCS);
+ IPRT_ALIGNMENT_CHECKS_ENABLE();
+ }
+ if (hIconv != (iconv_t)-1)
+ {
+ /*
+ * Do the conversion.
+ */
+ size_t cbInLeft = cbInput;
+ size_t cbOutLeft = cbOutput2;
+ const void *pvInputLeft = pvInput;
+ void *pvOutputLeft = pvOutput;
+ size_t cchNonRev;
+#ifdef NON_CONST_ICONV_INPUT
+ cchNonRev = iconv(hIconv, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
+#else
+ cchNonRev = iconv(hIconv, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
+#endif
+ if (cchNonRev != (size_t)-1)
+ {
+ if (!cbInLeft)
+ {
+ /*
+ * We're done, just add the terminator and return.
+ * (Two terminators to support UCS-2 output, too.)
+ */
+ ((char *)pvOutputLeft)[0] = '\0';
+ if (fUcs2Term)
+ ((char *)pvOutputLeft)[1] = '\0';
+ *ppvOutput = pvOutput;
+ if (cchNonRev == 0)
+ return VINF_SUCCESS;
+ return VWRN_NO_TRANSLATION;
+ }
+ errno = E2BIG;
+ }
+
+ /*
+ * If we failed because of output buffer space we'll
+ * increase the output buffer size and retry.
+ */
+ if (errno == E2BIG)
+ {
+ if (!cbOutput)
+ {
+ RTMemTmpFree(pvOutput);
+ cbOutput2 *= 2;
+ pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
+ if (!pvOutput)
+ return VERR_NO_TMP_MEMORY;
+ continue;
+ }
+ return VERR_BUFFER_OVERFLOW;
+ }
+
+ /*
+ * Close the handle on all other errors to make sure we won't carry
+ * any bad state with us.
+ */
+ *phIconv = (iconv_t)-1;
+ iconv_close(hIconv);
+ }
+ break;
+ }
+
+ /* failure */
+ if (!cbOutput)
+ RTMemTmpFree(pvOutput);
+ return VERR_NO_TRANSLATION;
+}
+
+#endif /* RT_WITH_ICONV_CACHE */
+
+/**
+ * Converts a string from one charset to another without using the handle cache.
+ *
+ * @returns IPRT status code.
+ *
+ * @param pvInput Pointer to intput string.
+ * @param cbInput Size (in bytes) of input string. Excludes any terminators.
+ * @param pszInputCS Codeset of the input string.
+ * @param ppvOutput Pointer to pointer to output buffer if cbOutput > 0.
+ * If cbOutput is 0 this is where the pointer to the allocated
+ * buffer is stored.
+ * @param cbOutput Size of the passed in buffer.
+ * @param pszOutputCS Codeset of the input string.
+ * @param cFactor Input vs. output size factor.
+ */
+static int rtStrConvertUncached(const void *pvInput, size_t cbInput, const char *pszInputCS,
+ void **ppvOutput, size_t cbOutput, const char *pszOutputCS,
+ unsigned cFactor)
+{
+ /*
+ * Allocate buffer
+ */
+ bool fUcs2Term;
+ void *pvOutput;
+ size_t cbOutput2;
+ if (!cbOutput)
+ {
+ cbOutput2 = cbInput * cFactor;
+ pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
+ if (!pvOutput)
+ return VERR_NO_TMP_MEMORY;
+ fUcs2Term = true;
+ }
+ else
+ {
+ pvOutput = *ppvOutput;
+ fUcs2Term = !strcmp(pszOutputCS, "UCS-2");
+ cbOutput2 = cbOutput - (fUcs2Term ? sizeof(RTUTF16) : 1);
+ if (cbOutput2 > cbOutput)
+ return VERR_BUFFER_OVERFLOW;
+ }
+
+ /*
+ * Use a loop here to retry with bigger buffers.
+ */
+ for (unsigned cTries = 10; cTries > 0; cTries--)
+ {
+ /*
+ * Create conversion object.
+ */
+#if defined(RT_OS_SOLARIS) || defined(RT_OS_NETBSD) || /* @bugref{10153}: Default to UTF-8: */ defined(RT_OS_DARWIN)
+ /* Some systems don't grok empty codeset strings, so help them find the current codeset. */
+ if (!*pszInputCS)
+ pszInputCS = rtStrGetLocaleCodeset();
+ if (!*pszOutputCS)
+ pszOutputCS = rtStrGetLocaleCodeset();
+#endif
+ IPRT_ALIGNMENT_CHECKS_DISABLE(); /* glibc causes trouble */
+ iconv_t icHandle = iconv_open(pszOutputCS, pszInputCS);
+ IPRT_ALIGNMENT_CHECKS_ENABLE();
+ if (icHandle != (iconv_t)-1)
+ {
+ /*
+ * Do the conversion.
+ */
+ size_t cbInLeft = cbInput;
+ size_t cbOutLeft = cbOutput2;
+ const void *pvInputLeft = pvInput;
+ void *pvOutputLeft = pvOutput;
+ size_t cchNonRev;
+#ifdef NON_CONST_ICONV_INPUT
+ cchNonRev = iconv(icHandle, (char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
+#else
+ cchNonRev = iconv(icHandle, (const char **)&pvInputLeft, &cbInLeft, (char **)&pvOutputLeft, &cbOutLeft);
+#endif
+ if (cchNonRev != (size_t)-1)
+ {
+ if (!cbInLeft)
+ {
+ /*
+ * We're done, just add the terminator and return.
+ * (Two terminators to support UCS-2 output, too.)
+ */
+ iconv_close(icHandle);
+ ((char *)pvOutputLeft)[0] = '\0';
+ if (fUcs2Term)
+ ((char *)pvOutputLeft)[1] = '\0';
+ *ppvOutput = pvOutput;
+ if (cchNonRev == 0)
+ return VINF_SUCCESS;
+ return VWRN_NO_TRANSLATION;
+ }
+ errno = E2BIG;
+ }
+ iconv_close(icHandle);
+
+ /*
+ * If we failed because of output buffer space we'll
+ * increase the output buffer size and retry.
+ */
+ if (errno == E2BIG)
+ {
+ if (!cbOutput)
+ {
+ RTMemTmpFree(pvOutput);
+ cbOutput2 *= 2;
+ pvOutput = RTMemTmpAlloc(cbOutput2 + sizeof(RTUTF16));
+ if (!pvOutput)
+ return VERR_NO_TMP_MEMORY;
+ continue;
+ }
+ return VERR_BUFFER_OVERFLOW;
+ }
+ }
+ break;
+ }
+
+ /* failure */
+ if (!cbOutput)
+ RTMemTmpFree(pvOutput);
+ return VERR_NO_TRANSLATION;
+}
+
+
+/**
+ * Wrapper that selects rtStrConvertCached or rtStrConvertUncached.
+ *
+ * @returns IPRT status code.
+ *
+ * @param pszInput Pointer to intput string.
+ * @param cchInput Size (in bytes) of input string. Excludes any
+ * terminators.
+ * @param pszInputCS Codeset of the input string.
+ * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
+ * If cbOutput is 0 this is where the pointer to the
+ * allocated buffer is stored.
+ * @param cbOutput Size of the passed in buffer.
+ * @param pszOutputCS Codeset of the input string.
+ * @param cFactor Input vs. output size factor.
+ * @param enmCacheIdx The iconv cache index.
+ */
+DECLINLINE(int) rtStrConvertWrapper(const char *pchInput, size_t cchInput, const char *pszInputCS,
+ char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
+ unsigned cFactor, RTSTRICONV enmCacheIdx)
+{
+#ifdef RT_WITH_ICONV_CACHE
+ RTTHREAD hSelf = RTThreadSelf();
+ if (hSelf != NIL_RTTHREAD)
+ {
+ PRTTHREADINT pThread = rtThreadGet(hSelf);
+ if (pThread)
+ {
+ if ((pThread->fIntFlags & (RTTHREADINT_FLAGS_ALIEN | RTTHREADINT_FLAGS_MAIN)) != RTTHREADINT_FLAGS_ALIEN)
+ {
+ int rc = rtstrConvertCached(pchInput, cchInput, pszInputCS,
+ (void **)ppszOutput, cbOutput, pszOutputCS,
+ cFactor, (iconv_t *)&pThread->ahIconvs[enmCacheIdx]);
+ rtThreadRelease(pThread);
+ return rc;
+ }
+ rtThreadRelease(pThread);
+ }
+ }
+#endif
+ return rtStrConvertUncached(pchInput, cchInput, pszInputCS,
+ (void **)ppszOutput, cbOutput, pszOutputCS,
+ cFactor);
+}
+
+
+/**
+ * Internal API for use by the path conversion code.
+ *
+ * @returns IPRT status code.
+ *
+ * @param pszInput Pointer to intput string.
+ * @param cchInput Size (in bytes) of input string. Excludes any
+ * terminators.
+ * @param pszInputCS Codeset of the input string.
+ * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
+ * If cbOutput is 0 this is where the pointer to the
+ * allocated buffer is stored.
+ * @param cbOutput Size of the passed in buffer.
+ * @param pszOutputCS Codeset of the input string.
+ * @param cFactor Input vs. output size factor.
+ * @param enmCacheIdx The iconv cache index.
+ */
+DECLHIDDEN(int) rtStrConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
+ char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
+ unsigned cFactor, RTSTRICONV enmCacheIdx)
+{
+ Assert(enmCacheIdx >= 0 && enmCacheIdx < RTSTRICONV_END);
+ return rtStrConvertWrapper(pchInput, cchInput, pszInputCS,
+ ppszOutput, cbOutput, pszOutputCS,
+ cFactor, enmCacheIdx);
+}
+
+
+/**
+ * Initializes a local conversion cache for use with rtStrLocalCacheConvert.
+ *
+ * Call rtStrLocalCacheDelete when done.
+ */
+DECLHIDDEN(void) rtStrLocalCacheInit(void **ppvTmpCache)
+{
+ *ppvTmpCache = (iconv_t)-1;
+}
+
+
+/**
+ * Cleans up a local conversion cache.
+ */
+DECLHIDDEN(void) rtStrLocalCacheDelete(void **ppvTmpCache)
+{
+#ifdef RT_WITH_ICONV_CACHE
+ iconv_t icHandle = (iconv_t)*ppvTmpCache;
+ if (icHandle != (iconv_t)-1)
+ iconv_close(icHandle);
+#endif
+ *ppvTmpCache = (iconv_t)-1;
+}
+
+
+/**
+ * Internal API for use by the process creation conversion code.
+ *
+ * @returns IPRT status code.
+ *
+ * @param pszInput Pointer to intput string.
+ * @param cchInput Size (in bytes) of input string. Excludes any
+ * terminators.
+ * @param pszInputCS Codeset of the input string.
+ * @param ppszOutput Pointer to pointer to output buffer if cbOutput > 0.
+ * If cbOutput is 0 this is where the pointer to the
+ * allocated buffer is stored.
+ * @param cbOutput Size of the passed in buffer.
+ * @param pszOutputCS Codeset of the input string.
+ * @param ppvTmpCache Pointer to local temporary cache. Must be
+ * initialized by calling rtStrLocalCacheInit and
+ * cleaned up afterwards by rtStrLocalCacheDelete.
+ * Optional.
+ */
+DECLHIDDEN(int) rtStrLocalCacheConvert(const char *pchInput, size_t cchInput, const char *pszInputCS,
+ char **ppszOutput, size_t cbOutput, const char *pszOutputCS,
+ void **ppvTmpCache)
+{
+#ifdef RT_WITH_ICONV_CACHE
+ if (ppvTmpCache)
+ return rtstrConvertCached(pchInput, cchInput, pszInputCS, (void **)ppszOutput, cbOutput, pszOutputCS,
+ 1 /*cFactor*/, (iconv_t *)ppvTmpCache);
+#else
+ RT_NOREF(ppvTmpCache);
+#endif
+
+ return rtStrConvertUncached(pchInput, cchInput, pszInputCS, (void **)ppszOutput, cbOutput, pszOutputCS, 1 /*cFactor*/);
+}
+
+
+RTR3DECL(int) RTStrUtf8ToCurrentCPTag(char **ppszString, const char *pszString, const char *pszTag)
+{
+ Assert(ppszString);
+ Assert(pszString);
+ *ppszString = NULL;
+
+ /*
+ * Assume result string length is not longer than UTF-8 string.
+ */
+ size_t cch = strlen(pszString);
+ if (cch <= 0)
+ {
+ /* zero length string passed. */
+ *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
+ if (*ppszString)
+ return VINF_SUCCESS;
+ return VERR_NO_TMP_MEMORY;
+ }
+ return rtStrConvertWrapper(pszString, cch, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
+}
+
+
+RTR3DECL(int) RTStrUtf8ToCurrentCPExTag(char **ppszString, const char *pszString, size_t cchString, const char *pszTag)
+{
+ Assert(ppszString);
+ Assert(pszString);
+ *ppszString = NULL;
+
+ /*
+ * Assume result string length is not longer than UTF-8 string.
+ */
+ cchString = RTStrNLen(pszString, cchString);
+ if (cchString < 1)
+ {
+ /* zero length string passed. */
+ *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
+ if (*ppszString)
+ return VINF_SUCCESS;
+ return VERR_NO_TMP_MEMORY;
+ }
+ return rtStrConvertWrapper(pszString, cchString, "UTF-8", ppszString, 0, "", 1, RTSTRICONV_UTF8_TO_LOCALE);
+}
+
+
+RTR3DECL(int) RTStrCurrentCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag)
+{
+ Assert(ppszString);
+ Assert(pszString);
+ *ppszString = NULL;
+
+ /*
+ * Attempt with UTF-8 length of 2x the native length.
+ */
+ size_t cch = strlen(pszString);
+ if (cch <= 0)
+ {
+ /* zero length string passed. */
+ *ppszString = (char *)RTMemTmpAllocZTag(sizeof(char), pszTag);
+ if (*ppszString)
+ return VINF_SUCCESS;
+ return VERR_NO_TMP_MEMORY;
+ }
+ return rtStrConvertWrapper(pszString, cch, "", ppszString, 0, "UTF-8", 2, RTSTRICONV_LOCALE_TO_UTF8);
+}
+
+
+RTR3DECL(int) RTStrConsoleCPToUtf8Tag(char **ppszString, const char *pszString, const char *pszTag)
+{
+ return RTStrCurrentCPToUtf8Tag(ppszString, pszString, pszTag);
+}