summaryrefslogtreecommitdiffstats
path: root/src/rocksdb/port/port_posix.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/rocksdb/port/port_posix.cc300
1 files changed, 300 insertions, 0 deletions
diff --git a/src/rocksdb/port/port_posix.cc b/src/rocksdb/port/port_posix.cc
new file mode 100644
index 000000000..3872293b8
--- /dev/null
+++ b/src/rocksdb/port/port_posix.cc
@@ -0,0 +1,300 @@
+// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
+// This source code is licensed under both the GPLv2 (found in the
+// COPYING file in the root directory) and Apache 2.0 License
+// (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#if !defined(OS_WIN)
+
+#include "port/port_posix.h"
+
+#include <assert.h>
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <cstdlib>
+#include <fstream>
+#include <string>
+
+#include "util/string_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+// We want to give users opportunity to default all the mutexes to adaptive if
+// not specified otherwise. This enables a quick way to conduct various
+// performance related experiements.
+//
+// NB! Support for adaptive mutexes is turned on by definining
+// ROCKSDB_PTHREAD_ADAPTIVE_MUTEX during the compilation. If you use RocksDB
+// build environment then this happens automatically; otherwise it's up to the
+// consumer to define the identifier.
+#ifdef ROCKSDB_DEFAULT_TO_ADAPTIVE_MUTEX
+extern const bool kDefaultToAdaptiveMutex = true;
+#else
+extern const bool kDefaultToAdaptiveMutex = false;
+#endif
+
+namespace port {
+
+static int PthreadCall(const char* label, int result) {
+ if (result != 0 && result != ETIMEDOUT && result != EBUSY) {
+ fprintf(stderr, "pthread %s: %s\n", label, errnoStr(result).c_str());
+ abort();
+ }
+ return result;
+}
+
+Mutex::Mutex(bool adaptive) {
+ (void)adaptive;
+#ifdef ROCKSDB_PTHREAD_ADAPTIVE_MUTEX
+ if (!adaptive) {
+ PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr));
+ } else {
+ pthread_mutexattr_t mutex_attr;
+ PthreadCall("init mutex attr", pthread_mutexattr_init(&mutex_attr));
+ PthreadCall("set mutex attr", pthread_mutexattr_settype(
+ &mutex_attr, PTHREAD_MUTEX_ADAPTIVE_NP));
+ PthreadCall("init mutex", pthread_mutex_init(&mu_, &mutex_attr));
+ PthreadCall("destroy mutex attr", pthread_mutexattr_destroy(&mutex_attr));
+ }
+#else
+ PthreadCall("init mutex", pthread_mutex_init(&mu_, nullptr));
+#endif // ROCKSDB_PTHREAD_ADAPTIVE_MUTEX
+}
+
+Mutex::~Mutex() { PthreadCall("destroy mutex", pthread_mutex_destroy(&mu_)); }
+
+void Mutex::Lock() {
+ PthreadCall("lock", pthread_mutex_lock(&mu_));
+#ifndef NDEBUG
+ locked_ = true;
+#endif
+}
+
+void Mutex::Unlock() {
+#ifndef NDEBUG
+ locked_ = false;
+#endif
+ PthreadCall("unlock", pthread_mutex_unlock(&mu_));
+}
+
+bool Mutex::TryLock() {
+ bool ret = PthreadCall("trylock", pthread_mutex_trylock(&mu_)) == 0;
+#ifndef NDEBUG
+ if (ret) {
+ locked_ = true;
+ }
+#endif
+ return ret;
+}
+
+void Mutex::AssertHeld() {
+#ifndef NDEBUG
+ assert(locked_);
+#endif
+}
+
+CondVar::CondVar(Mutex* mu) : mu_(mu) {
+ PthreadCall("init cv", pthread_cond_init(&cv_, nullptr));
+}
+
+CondVar::~CondVar() { PthreadCall("destroy cv", pthread_cond_destroy(&cv_)); }
+
+void CondVar::Wait() {
+#ifndef NDEBUG
+ mu_->locked_ = false;
+#endif
+ PthreadCall("wait", pthread_cond_wait(&cv_, &mu_->mu_));
+#ifndef NDEBUG
+ mu_->locked_ = true;
+#endif
+}
+
+bool CondVar::TimedWait(uint64_t abs_time_us) {
+ struct timespec ts;
+ ts.tv_sec = static_cast<time_t>(abs_time_us / 1000000);
+ ts.tv_nsec = static_cast<suseconds_t>((abs_time_us % 1000000) * 1000);
+
+#ifndef NDEBUG
+ mu_->locked_ = false;
+#endif
+ int err = pthread_cond_timedwait(&cv_, &mu_->mu_, &ts);
+#ifndef NDEBUG
+ mu_->locked_ = true;
+#endif
+ if (err == ETIMEDOUT) {
+ return true;
+ }
+ if (err != 0) {
+ PthreadCall("timedwait", err);
+ }
+ return false;
+}
+
+void CondVar::Signal() { PthreadCall("signal", pthread_cond_signal(&cv_)); }
+
+void CondVar::SignalAll() {
+ PthreadCall("broadcast", pthread_cond_broadcast(&cv_));
+}
+
+RWMutex::RWMutex() {
+ PthreadCall("init mutex", pthread_rwlock_init(&mu_, nullptr));
+}
+
+RWMutex::~RWMutex() {
+ PthreadCall("destroy mutex", pthread_rwlock_destroy(&mu_));
+}
+
+void RWMutex::ReadLock() {
+ PthreadCall("read lock", pthread_rwlock_rdlock(&mu_));
+}
+
+void RWMutex::WriteLock() {
+ PthreadCall("write lock", pthread_rwlock_wrlock(&mu_));
+}
+
+void RWMutex::ReadUnlock() {
+ PthreadCall("read unlock", pthread_rwlock_unlock(&mu_));
+}
+
+void RWMutex::WriteUnlock() {
+ PthreadCall("write unlock", pthread_rwlock_unlock(&mu_));
+}
+
+int PhysicalCoreID() {
+#if defined(ROCKSDB_SCHED_GETCPU_PRESENT) && defined(__x86_64__) && \
+ (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 22))
+ // sched_getcpu uses VDSO getcpu() syscall since 2.22. I believe Linux offers
+ // VDSO support only on x86_64. This is the fastest/preferred method if
+ // available.
+ int cpuno = sched_getcpu();
+ if (cpuno < 0) {
+ return -1;
+ }
+ return cpuno;
+#elif defined(__x86_64__) || defined(__i386__)
+ // clang/gcc both provide cpuid.h, which defines __get_cpuid(), for x86_64 and
+ // i386.
+ unsigned eax, ebx = 0, ecx, edx;
+ if (!__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+ return -1;
+ }
+ return ebx >> 24;
+#else
+ // give up, the caller can generate a random number or something.
+ return -1;
+#endif
+}
+
+void InitOnce(OnceType* once, void (*initializer)()) {
+ PthreadCall("once", pthread_once(once, initializer));
+}
+
+void Crash(const std::string& srcfile, int srcline) {
+ fprintf(stdout, "Crashing at %s:%d\n", srcfile.c_str(), srcline);
+ fflush(stdout);
+ kill(getpid(), SIGTERM);
+}
+
+int GetMaxOpenFiles() {
+#if defined(RLIMIT_NOFILE)
+ struct rlimit no_files_limit;
+ if (getrlimit(RLIMIT_NOFILE, &no_files_limit) != 0) {
+ return -1;
+ }
+ // protect against overflow
+ if (static_cast<uintmax_t>(no_files_limit.rlim_cur) >=
+ static_cast<uintmax_t>(std::numeric_limits<int>::max())) {
+ return std::numeric_limits<int>::max();
+ }
+ return static_cast<int>(no_files_limit.rlim_cur);
+#endif
+ return -1;
+}
+
+void* cacheline_aligned_alloc(size_t size) {
+#if __GNUC__ < 5 && defined(__SANITIZE_ADDRESS__)
+ return malloc(size);
+#elif (_POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 || defined(__APPLE__))
+ void* m;
+ errno = posix_memalign(&m, CACHE_LINE_SIZE, size);
+ return errno ? nullptr : m;
+#else
+ return malloc(size);
+#endif
+}
+
+void cacheline_aligned_free(void* memblock) { free(memblock); }
+
+static size_t GetPageSize() {
+#if defined(OS_LINUX) || defined(_SC_PAGESIZE)
+ long v = sysconf(_SC_PAGESIZE);
+ if (v >= 1024) {
+ return static_cast<size_t>(v);
+ }
+#endif
+ // Default assume 4KB
+ return 4U * 1024U;
+}
+
+const size_t kPageSize = GetPageSize();
+
+void SetCpuPriority(ThreadId id, CpuPriority priority) {
+#ifdef OS_LINUX
+ sched_param param;
+ param.sched_priority = 0;
+ switch (priority) {
+ case CpuPriority::kHigh:
+ sched_setscheduler(id, SCHED_OTHER, &param);
+ setpriority(PRIO_PROCESS, id, -20);
+ break;
+ case CpuPriority::kNormal:
+ sched_setscheduler(id, SCHED_OTHER, &param);
+ setpriority(PRIO_PROCESS, id, 0);
+ break;
+ case CpuPriority::kLow:
+ sched_setscheduler(id, SCHED_OTHER, &param);
+ setpriority(PRIO_PROCESS, id, 19);
+ break;
+ case CpuPriority::kIdle:
+ sched_setscheduler(id, SCHED_IDLE, &param);
+ break;
+ default:
+ assert(false);
+ }
+#else
+ (void)id;
+ (void)priority;
+#endif
+}
+
+int64_t GetProcessID() { return getpid(); }
+
+bool GenerateRfcUuid(std::string* output) {
+ output->clear();
+ std::ifstream f("/proc/sys/kernel/random/uuid");
+ std::getline(f, /*&*/ *output);
+ if (output->size() == 36) {
+ return true;
+ } else {
+ output->clear();
+ return false;
+ }
+}
+
+} // namespace port
+} // namespace ROCKSDB_NAMESPACE
+
+#endif