/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* This source code was derived from Chromium code, and as such is also subject * to the [Chromium license](ipc/chromium/src/LICENSE). */ #include "SharedMemoryPlatform.h" #include #include #include #include #include #ifdef XP_LINUX # include "base/linux_memfd_defs.h" #endif #ifdef MOZ_WIDGET_GTK # include "mozilla/WidgetUtilsGtk.h" #endif #ifdef __FreeBSD__ # include #endif #ifdef MOZ_VALGRIND # include #endif #include "base/eintr_wrapper.h" #include "base/string_util.h" #include "mozilla/Atomics.h" #include "mozilla/Maybe.h" #include "mozilla/ProfilerThreadSleep.h" #include "mozilla/UniquePtrExtensions.h" #include "prenv.h" #include "nsXULAppAPI.h" // for XRE_IsParentProcess namespace mozilla::ipc::shared_memory { // memfd_create is a nonstandard interface for creating anonymous // shared memory accessible as a file descriptor but not tied to any // filesystem. It first appeared in Linux 3.17, and was adopted by // FreeBSD in version 13. #if !defined(HAVE_MEMFD_CREATE) && defined(XP_LINUX) && \ defined(SYS_memfd_create) // Older libc versions (e.g., glibc before 2.27) don't have the // wrapper, but we can supply our own; see `linux_memfd_defs.h`. static int memfd_create(const char* aName, unsigned int aFlags) { return syscall(SYS_memfd_create, aName, aFlags); } # define HAVE_MEMFD_CREATE 1 #endif // memfd supports having "seals" applied to the file, to prevent // various types of changes (which apply to all fds referencing the // file). Unfortunately, we can't rely on F_SEAL_WRITE to implement // Freeze(); see the comments in ReadOnlyCopy() below. // // Instead, to prevent a child process from regaining write access to // a read-only copy, the OS must also provide a way to remove write // permissions at the file descriptor level. This next section // attempts to accomplish that. #ifdef HAVE_MEMFD_CREATE # ifdef XP_LINUX # define USE_MEMFD_CREATE 1 // To create a read-only duplicate of an fd, we can use procfs; the // same operation could restore write access, but sandboxing prevents // child processes from accessing /proc. // // (Note: if this ever changes to not use /proc, also reconsider how // and if HaveMemfd should check whether this works.) static int DupReadOnly(int aFd) { MOZ_DIAGNOSTIC_ASSERT(XRE_IsParentProcess()); std::string path = StringPrintf("/proc/self/fd/%d", aFd); // procfs opens probably won't EINTR, but checking for it can't hurt return HANDLE_EINTR(open(path.c_str(), O_RDONLY | O_CLOEXEC)); } # elif defined(__FreeBSD__) # define USE_MEMFD_CREATE 1 // FreeBSD's Capsicum framework allows irrevocably restricting the // operations permitted on a file descriptor. static int DupReadOnly(int aFd) { int rofd = dup(aFd); if (rofd < 0) { return -1; } cap_rights_t rights; cap_rights_init(&rights, CAP_FSTAT, CAP_MMAP_R); if (cap_rights_limit(rofd, &rights) < 0) { int err = errno; close(rofd); errno = err; return -1; } return rofd; } # else // unhandled OS # warning "OS has memfd_create but no DupReadOnly implementation" # endif // OS selection #endif // HAVE_MEMFD_CREATE // Runtime detection for memfd support. Returns `Nothing()` if not // supported, or `Some(flags)` if supported, where `flags` contains // flags like `MFD_CLOEXEC` that should be passed to all calls. static Maybe HaveMemfd() { #ifdef USE_MEMFD_CREATE static const Maybe kHave = []() -> Maybe { unsigned flags = MFD_CLOEXEC | MFD_ALLOW_SEALING; # ifdef MFD_NOEXEC_SEAL flags |= MFD_NOEXEC_SEAL; # endif mozilla::UniqueFileHandle fd(memfd_create("mozilla-ipc-test", flags)); # ifdef MFD_NOEXEC_SEAL if (!fd && errno == EINVAL) { flags &= ~MFD_NOEXEC_SEAL; fd.reset(memfd_create("mozilla-ipc-test", flags)); } # endif if (!fd) { MOZ_ASSERT(errno == ENOSYS); return Nothing(); } // Verify that DupReadOnly works; on Linux it's known to fail if: // // * SELinux assigns the memfd a type for which this process's // domain doesn't have "open" permission; this is always the // case on Android but could occur on desktop as well // // * /proc (used by the DupReadOnly implementation) isn't mounted, // which is a configuration that the Tor Browser project is // interested in as a way to reduce fingerprinting risk // // Sandboxed processes on Linux also can't use it if sandboxing // has already been started, but that's expected. It should be // safe for sandboxed child processes to use memfd even if an // unsandboxed process couldn't freeze them, because freezing // isn't allowed (or meaningful) for memory created by another // process. if (XRE_IsParentProcess()) { mozilla::UniqueFileHandle rofd(DupReadOnly(fd.get())); if (!rofd) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "read-only dup failed ({}); not using memfd", strerror(errno)); return Nothing(); } } return Some(flags); }(); return kHave; #else return Nothing(); #endif // USE_MEMFD_CREATE } bool AppendPosixShmPrefix(std::string* aStr, pid_t aPid) { if (HaveMemfd()) { return false; } *aStr += '/'; #ifdef MOZ_WIDGET_GTK // The Snap package environment doesn't provide a private /dev/shm // (it's used for communication with services like PulseAudio); // instead AppArmor is used to restrict access to it. Anything with // this prefix is allowed: if (const char* snap = mozilla::widget::GetSnapInstanceName()) { StringAppendF(aStr, "snap.%s.", snap); } #endif // XP_LINUX // Hopefully the "implementation defined" name length limit is long // enough for this. StringAppendF(aStr, "org.mozilla.ipc.%d.", static_cast(aPid)); return true; } // `freezable` is a pointer because `Maybe` nor `std::optional` can store a // reference. If there is no freezable, pass as nullptr. static Maybe CreateImpl(size_t aSize, PlatformHandle* aFreezable) { MOZ_ASSERT(aSize > 0); MOZ_DIAGNOSTIC_ASSERT( !aFreezable || XRE_IsParentProcess(), "Child processes may not create freezable shared memory"); mozilla::UniqueFileHandle fd; mozilla::UniqueFileHandle frozen_fd; #ifdef USE_MEMFD_CREATE if (auto flags = HaveMemfd()) { fd.reset(memfd_create("mozilla-ipc", *flags)); if (!fd) { // In general it's too late to fall back here -- in a sandboxed // child process, shm_open is already blocked. And it shouldn't // be necessary. MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "failed to create memfd: {}", strerror(errno)); return Nothing(); } if (aFreezable) { frozen_fd.reset(DupReadOnly(fd.get())); if (!frozen_fd) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "failed to create read-only memfd: {}", strerror(errno)); return Nothing(); } } } #endif if (!fd) { // Generic Unix: shm_open + shm_unlink do { // The names don't need to be unique, but it saves time if they // usually are. static mozilla::Atomic sNameCounter; std::string name; CHECK(AppendPosixShmPrefix(&name, getpid())); StringAppendF(&name, "%zu", sNameCounter++); // O_EXCL means the names being predictable shouldn't be a problem. fd.reset(HANDLE_EINTR( shm_open(name.c_str(), O_RDWR | O_CREAT | O_EXCL, 0600))); if (fd) { if (aFreezable) { frozen_fd.reset(HANDLE_EINTR(shm_open(name.c_str(), O_RDONLY, 0400))); if (!frozen_fd) { int open_err = errno; shm_unlink(name.c_str()); DLOG(FATAL) << "failed to re-open freezable shm: " << strerror(open_err); return Nothing(); } } if (shm_unlink(name.c_str()) != 0) { // This shouldn't happen, but if it does: assume the file is // in fact leaked, and bail out now while it's still 0-length. DLOG(FATAL) << "failed to unlink shm: " << strerror(errno); return Nothing(); } } } while (!fd && errno == EEXIST); } if (!fd) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "failed to open shm: {}", strerror(errno)); return Nothing(); } mozilla::Maybe fallocateError; #if defined(HAVE_POSIX_FALLOCATE) // Using posix_fallocate will ensure that there's actually space for this // file. Otherwise we end up with a sparse file that can give SIGBUS if we // run out of space while writing to it. (This doesn't apply to memfd.) if (!HaveMemfd()) { int rv; // Avoid repeated interruptions of posix_fallocate by the profiler's // SIGPROF sampling signal. Indicating "thread sleep" here means we'll // get up to one interruption but not more. See bug 1658847 for more. // This has to be scoped outside the HANDLE_RV_EINTR retry loop. { AUTO_PROFILER_THREAD_SLEEP; rv = HANDLE_RV_EINTR( posix_fallocate(fd.get(), 0, static_cast(aSize))); } // Some filesystems have trouble with posix_fallocate. For now, we must // fallback ftruncate and accept the allocation failures like we do // without posix_fallocate. // See https://bugzilla.mozilla.org/show_bug.cgi?id=1618914 if (rv != 0 && rv != EOPNOTSUPP && rv != EINVAL && rv != ENODEV) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "fallocate failed to set shm size: {}", strerror(rv)); return Nothing(); } fallocateError = mozilla::Some(rv); } #endif // If posix_fallocate isn't supported / relevant for this type of // file (either failed with an expected error, or wasn't attempted), // then set the size with ftruncate: if (fallocateError != mozilla::Some(0)) { int rv = HANDLE_EINTR(ftruncate(fd.get(), static_cast(aSize))); if (rv != 0) { int ftruncate_errno = errno; if (fallocateError) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "fallocate failed to set shm size: {}", strerror(*fallocateError)); } MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "fallocate failed to set shm size: {}", strerror(ftruncate_errno)); return Nothing(); } } if (aFreezable) { *aFreezable = std::move(frozen_fd); } return Some(std::move(fd)); } bool UsingPosixShm() { return !HaveMemfd(); } bool Platform::Create(MutableHandle& aHandle, size_t aSize) { if (auto ph = CreateImpl(aSize, nullptr)) { aHandle.mHandle = std::move(*ph); aHandle.SetSize(aSize); return true; } return false; } bool Platform::CreateFreezable(FreezableHandle& aHandle, size_t aSize) { if (auto ph = CreateImpl(aSize, &aHandle.mFrozenFile)) { aHandle.mHandle = std::move(*ph); aHandle.SetSize(aSize); return true; } return false; } PlatformHandle Platform::CloneHandle(const PlatformHandle& aHandle) { const int new_fd = dup(aHandle.get()); if (new_fd < 0) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "failed to duplicate file descriptor: {}", strerror(errno)); return nullptr; } return mozilla::UniqueFileHandle(new_fd); } bool Platform::Freeze(FreezableHandle& aHandle) { #ifdef USE_MEMFD_CREATE # ifdef MOZ_VALGRIND // Valgrind allows memfd_create but doesn't understand F_ADD_SEALS. static const bool haveSeals = RUNNING_ON_VALGRIND == 0; # else static const bool haveSeals = true; # endif static const bool useSeals = !PR_GetEnv("MOZ_SHM_NO_SEALS"); if (HaveMemfd() && haveSeals && useSeals) { // Seals are added to the file as defense-in-depth. The primary // method of access control is creating a read-only fd (using // procfs in this case) and requiring that sandboxes processes not // have access to /proc/self/fd to regain write permission; this // is the same as with shm_open. // // Unfortunately, F_SEAL_WRITE is unreliable: if the process // forked while there was a writeable mapping, it will inherit a // copy of the mapping, which causes the seal to fail. // // (Also, in the future we may want to split this into separate // classes for mappings and shared memory handles, which would // complicate identifying the case where `F_SEAL_WRITE` would be // possible even in the absence of races with fork.) // // However, Linux 5.1 added F_SEAL_FUTURE_WRITE, which prevents // write operations afterwards, but existing writeable mappings // are unaffected (similar to ashmem protection semantics). const int seals = F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL; int sealError = EINVAL; # ifdef F_SEAL_FUTURE_WRITE sealError = fcntl(aHandle.mHandle.get(), F_ADD_SEALS, seals | F_SEAL_FUTURE_WRITE) == 0 ? 0 : errno; # endif // F_SEAL_FUTURE_WRITE if (sealError == EINVAL) { sealError = fcntl(aHandle.mHandle.get(), F_ADD_SEALS, seals) == 0 ? 0 : errno; } if (sealError != 0) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "failed to seal memfd: {}", strerror(errno)); return false; } } #else // !USE_MEMFD_CREATE MOZ_ASSERT(!HaveMemfd()); #endif MOZ_ASSERT(aHandle.mFrozenFile); MOZ_ASSERT(aHandle.mHandle); aHandle.mHandle = std::move(aHandle.mFrozenFile); MOZ_ASSERT(aHandle.mHandle); return true; } Maybe Platform::Map(const HandleBase& aHandle, uint64_t aOffset, size_t aSize, void* aFixedAddress, bool aReadOnly) { // Don't use MAP_FIXED when a fixed_address was specified, since that can // replace pages that are alread mapped at that address. void* mem = mmap(aFixedAddress, aSize, PROT_READ | (aReadOnly ? 0 : PROT_WRITE), MAP_SHARED, aHandle.mHandle.get(), aOffset); if (mem == MAP_FAILED) { MOZ_LOG_FMT(gSharedMemoryLog, LogLevel::Warning, "call to mmap failed: {}", strerror(errno)); return Nothing(); } if (aFixedAddress && mem != aFixedAddress) { DebugOnly munmap_succeeded = munmap(mem, aSize) == 0; MOZ_ASSERT(munmap_succeeded, "call to munmap failed"); return Nothing(); } return Some(mem); } void Platform::Unmap(void* aMemory, size_t aSize) { munmap(aMemory, aSize); } bool Platform::Protect(char* aAddr, size_t aSize, Access aAccess) { int flags = PROT_NONE; if (aAccess & AccessRead) flags |= PROT_READ; if (aAccess & AccessWrite) flags |= PROT_WRITE; return 0 == mprotect(aAddr, aSize, flags); } void* Platform::FindFreeAddressSpace(size_t aSize) { #ifndef __FreeBSD__ constexpr int flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE; #else constexpr int flags = MAP_ANONYMOUS | MAP_PRIVATE; #endif void* memory = mmap(nullptr, aSize, PROT_NONE, flags, -1, 0); if (memory == MAP_FAILED) { return nullptr; } munmap(memory, aSize); return memory; } size_t Platform::PageSize() { return sysconf(_SC_PAGESIZE); } size_t Platform::AllocationGranularity() { return PageSize(); } bool Platform::IsSafeToMap(const PlatformHandle&) { return true; } } // namespace mozilla::ipc::shared_memory