diff options
Diffstat (limited to '')
-rw-r--r-- | src/common/compat.cc | 567 |
1 files changed, 567 insertions, 0 deletions
diff --git a/src/common/compat.cc b/src/common/compat.cc new file mode 100644 index 000000000..82b57ad94 --- /dev/null +++ b/src/common/compat.cc @@ -0,0 +1,567 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2011 New Dream Network + * Copyright (C) 2018 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <cstdio> + +#include <errno.h> +#include <fcntl.h> +#include <stdint.h> +#include <stdio.h> +#include "acconfig.h" +#ifdef HAVE_MEMSET_S +# define __STDC_WANT_LIB_EXT1__ 1 +#endif +#include <string.h> +#include <thread> +#ifndef _WIN32 +#include <sys/mount.h> +#else +#include <stdlib.h> +#endif +#include <sys/param.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#if defined(__linux__) +#include <sys/vfs.h> +#endif + +#include "include/compat.h" +#include "include/sock_compat.h" +#include "common/safe_io.h" + +// The type-value for a ZFS FS in fstatfs. +#define FS_ZFS_TYPE 0xde + +// On FreeBSD, ZFS fallocate always fails since it is considered impossible to +// reserve space on a COW filesystem. posix_fallocate() returns EINVAL +// Linux in this case already emulates the reservation in glibc +// In which case it is allocated manually, and still that is not a real guarantee +// that a full buffer is allocated on disk, since it could be compressed. +// To prevent this the written buffer needs to be loaded with random data. +int manual_fallocate(int fd, off_t offset, off_t len) { + int r = lseek(fd, offset, SEEK_SET); + if (r == -1) + return errno; + char data[1024*128]; + // TODO: compressing filesystems would require random data + // FIPS zeroization audit 20191115: this memset is not security related. + memset(data, 0x42, sizeof(data)); + for (off_t off = 0; off < len; off += sizeof(data)) { + if (off + static_cast<off_t>(sizeof(data)) > len) + r = safe_write(fd, data, len - off); + else + r = safe_write(fd, data, sizeof(data)); + if (r == -1) { + return errno; + } + } + return 0; +} + +int on_zfs(int basedir_fd) { + #ifndef _WIN32 + struct statfs basefs; + (void)fstatfs(basedir_fd, &basefs); + return (basefs.f_type == FS_ZFS_TYPE); + #else + return 0; + #endif +} + +int ceph_posix_fallocate(int fd, off_t offset, off_t len) { + // Return 0 if oke, otherwise errno > 0 + +#ifdef HAVE_POSIX_FALLOCATE + if (on_zfs(fd)) { + return manual_fallocate(fd, offset, len); + } else { + return posix_fallocate(fd, offset, len); + } +#elif defined(__APPLE__) + fstore_t store; + store.fst_flags = F_ALLOCATECONTIG; + store.fst_posmode = F_PEOFPOSMODE; + store.fst_offset = offset; + store.fst_length = len; + + int ret = fcntl(fd, F_PREALLOCATE, &store); + if (ret == -1) { + ret = errno; + } + return ret; +#else + return manual_fallocate(fd, offset, len); +#endif +} + +int pipe_cloexec(int pipefd[2], int flags) +{ +#if defined(HAVE_PIPE2) + return pipe2(pipefd, O_CLOEXEC | flags); +#else + if (pipe(pipefd) == -1) + return -1; + + #ifndef _WIN32 + /* + * The old-fashioned, race-condition prone way that we have to fall + * back on if pipe2 does not exist. + */ + if (fcntl(pipefd[0], F_SETFD, FD_CLOEXEC) < 0) { + goto fail; + } + + if (fcntl(pipefd[1], F_SETFD, FD_CLOEXEC) < 0) { + goto fail; + } + #endif + + return 0; +fail: + int save_errno = errno; + VOID_TEMP_FAILURE_RETRY(close(pipefd[0])); + VOID_TEMP_FAILURE_RETRY(close(pipefd[1])); + return (errno = save_errno, -1); +#endif +} + + +int socket_cloexec(int domain, int type, int protocol) +{ +#ifdef SOCK_CLOEXEC + return socket(domain, type|SOCK_CLOEXEC, protocol); +#else + int fd = socket(domain, type, protocol); + if (fd == -1) + return -1; + + #ifndef _WIN32 + if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) + goto fail; + #endif + + return fd; +fail: + int save_errno = errno; + VOID_TEMP_FAILURE_RETRY(close(fd)); + return (errno = save_errno, -1); +#endif +} + +int socketpair_cloexec(int domain, int type, int protocol, int sv[2]) +{ +#ifdef SOCK_CLOEXEC + return socketpair(domain, type|SOCK_CLOEXEC, protocol, sv); +#elif _WIN32 + /* TODO */ + return -ENOTSUP; +#else + int rc = socketpair(domain, type, protocol, sv); + if (rc == -1) + return -1; + + #ifndef _WIN32 + if (fcntl(sv[0], F_SETFD, FD_CLOEXEC) < 0) + goto fail; + + if (fcntl(sv[1], F_SETFD, FD_CLOEXEC) < 0) + goto fail; + #endif + + return 0; +fail: + int save_errno = errno; + VOID_TEMP_FAILURE_RETRY(close(sv[0])); + VOID_TEMP_FAILURE_RETRY(close(sv[1])); + return (errno = save_errno, -1); +#endif +} + +int accept_cloexec(int sockfd, struct sockaddr* addr, socklen_t* addrlen) +{ +#ifdef HAVE_ACCEPT4 + return accept4(sockfd, addr, addrlen, SOCK_CLOEXEC); +#else + int fd = accept(sockfd, addr, addrlen); + if (fd == -1) + return -1; + + #ifndef _WIN32 + if (fcntl(fd, F_SETFD, FD_CLOEXEC) < 0) + goto fail; + #endif + + return fd; +fail: + int save_errno = errno; + VOID_TEMP_FAILURE_RETRY(close(fd)); + return (errno = save_errno, -1); +#endif +} + +#if defined(__FreeBSD__) +int sched_setaffinity(pid_t pid, size_t cpusetsize, + cpu_set_t *mask) +{ + return 0; +} +#endif + +char *ceph_strerror_r(int errnum, char *buf, size_t buflen) +{ +#ifdef _WIN32 + strerror_s(buf, buflen, errnum); + return buf; +#elif defined(STRERROR_R_CHAR_P) + return strerror_r(errnum, buf, buflen); +#else + if (strerror_r(errnum, buf, buflen)) { + snprintf(buf, buflen, "Unknown error %d", errnum); + } + return buf; +#endif +} + +int ceph_memzero_s(void *dest, size_t destsz, size_t count) { +#ifdef HAVE_MEMSET_S + return memset_s(dest, destsz, 0, count); +#elif defined(_WIN32) + SecureZeroMemory(dest, count); +#else + explicit_bzero(dest, count); +#endif + return 0; +} + +#ifdef _WIN32 + +#include <iomanip> +#include <ctime> + +// chown is not available on Windows. Plus, changing file owners is not +// a common practice on Windows. +int chown(const char *path, uid_t owner, gid_t group) { + return 0; +} + +int fchown(int fd, uid_t owner, gid_t group) { + return 0; +} + +int lchown(const char *path, uid_t owner, gid_t group) { + return 0; +} + +int posix_memalign(void **memptr, size_t alignment, size_t size) { + *memptr = _aligned_malloc(size, alignment); + return *memptr ? 0 : errno; +} + +char *strptime(const char *s, const char *format, struct tm *tm) { + std::istringstream input(s); + input.imbue(std::locale(setlocale(LC_ALL, nullptr))); + input >> std::get_time(tm, format); + if (input.fail()) { + return nullptr; + } + return (char*)(s + input.tellg()); +} + +int pipe(int pipefd[2]) { + // We'll use the same pipe size as Linux (64kb). + return _pipe(pipefd, 0x10000, O_NOINHERIT); +} + +// lrand48 is not available on Windows. We'll generate a pseudo-random +// value in the 0 - 2^31 range by calling rand twice. +long int lrand48(void) { + long int val; + val = (long int) rand(); + val <<= 16; + val += (long int) rand(); + return val; +} + +int random() { + return rand(); +} + +int fsync(int fd) { + HANDLE handle = (HANDLE*)_get_osfhandle(fd); + if (handle == INVALID_HANDLE_VALUE) + return -1; + if (!FlushFileBuffers(handle)) + return -1; + return 0; +} + +ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) { + DWORD bytes_written = 0; + + HANDLE handle = (HANDLE*)_get_osfhandle(fd); + if (handle == INVALID_HANDLE_VALUE) + return -1; + + OVERLAPPED overlapped = { 0 }; + ULARGE_INTEGER offsetUnion; + offsetUnion.QuadPart = offset; + + overlapped.Offset = offsetUnion.LowPart; + overlapped.OffsetHigh = offsetUnion.HighPart; + + if (!WriteFile(handle, buf, count, &bytes_written, &overlapped)) + // we may consider mapping error codes, although that may + // not be exhaustive. + return -1; + + return bytes_written; +} + +ssize_t pread(int fd, void *buf, size_t count, off_t offset) { + DWORD bytes_read = 0; + + HANDLE handle = (HANDLE*)_get_osfhandle(fd); + if (handle == INVALID_HANDLE_VALUE) + return -1; + + OVERLAPPED overlapped = { 0 }; + ULARGE_INTEGER offsetUnion; + offsetUnion.QuadPart = offset; + + overlapped.Offset = offsetUnion.LowPart; + overlapped.OffsetHigh = offsetUnion.HighPart; + + if (!ReadFile(handle, buf, count, &bytes_read, &overlapped)) { + if (GetLastError() != ERROR_HANDLE_EOF) + return -1; + } + + return bytes_read; +} + +ssize_t preadv(int fd, const struct iovec *iov, int iov_cnt) { + ssize_t read = 0; + + for (int i = 0; i < iov_cnt; i++) { + int r = ::read(fd, iov[i].iov_base, iov[i].iov_len); + if (r < 0) + return r; + read += r; + if (r < iov[i].iov_len) + break; + } + + return read; +} + +ssize_t writev(int fd, const struct iovec *iov, int iov_cnt) { + ssize_t written = 0; + + for (int i = 0; i < iov_cnt; i++) { + int r = ::write(fd, iov[i].iov_base, iov[i].iov_len); + if (r < 0) + return r; + written += r; + if (r < iov[i].iov_len) + break; + } + + return written; +} + +int &alloc_tls() { + static __thread int tlsvar; + tlsvar++; + return tlsvar; +} + +void apply_tls_workaround() { + // Workaround for the following Mingw bugs: + // https://sourceforge.net/p/mingw-w64/bugs/727/ + // https://sourceforge.net/p/mingw-w64/bugs/527/ + // https://sourceforge.net/p/mingw-w64/bugs/445/ + // https://gcc.gnu.org/bugzilla/attachment.cgi?id=41382 + pthread_key_t key; + pthread_key_create(&key, nullptr); + // Use a TLS slot for emutls + alloc_tls(); + // Free up a slot that can now be used for c++ destructors + pthread_key_delete(key); +} + +CEPH_CONSTRUCTOR(ceph_windows_init) { + // This will run at startup time before invoking main(). + WSADATA wsaData; + int error; + + #ifdef __MINGW32__ + apply_tls_workaround(); + #endif + + error = WSAStartup(MAKEWORD(2, 2), &wsaData); + if (error != 0) { + fprintf(stderr, "WSAStartup failed: %d", WSAGetLastError()); + exit(error); + } +} + +int _win_socketpair(int socks[2]) +{ + union { + struct sockaddr_in inaddr; + struct sockaddr addr; + } a; + SOCKET listener; + int e; + socklen_t addrlen = sizeof(a.inaddr); + int reuse = 1; + + if (socks == 0) { + WSASetLastError(WSAEINVAL); + return -1; + } + + listener = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (listener == INVALID_SOCKET) { + return -1; + } + + memset(&a, 0, sizeof(a)); + a.inaddr.sin_family = AF_INET; + a.inaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + a.inaddr.sin_port = 0; + + socks[0] = socks[1] = -1; + SOCKET s[2] = { INVALID_SOCKET, INVALID_SOCKET }; + + do { + if (setsockopt(listener, SOL_SOCKET, SO_REUSEADDR, + (char*) &reuse, (socklen_t) sizeof(reuse)) == -1) + break; + if (bind(listener, &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR) + break; + if (getsockname(listener, &a.addr, &addrlen) == SOCKET_ERROR) + break; + if (listen(listener, 1) == SOCKET_ERROR) + break; + s[0] = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (s[0] == INVALID_SOCKET) + break; + if (connect(s[0], &a.addr, sizeof(a.inaddr)) == SOCKET_ERROR) + break; + s[1] = accept(listener, NULL, NULL); + if (s[1] == INVALID_SOCKET) + break; + + closesocket(listener); + + // The Windows socket API is mostly compatible with the Berkeley + // API, with a few exceptions. The Windows socket functions use + // SOCKET instead of int. The issue is that on x64 systems, + // SOCKET uses 64b while int uses 32b. There's been much debate + // whether casting a Windows socket to an int is safe or not. + // Worth noting that Windows kernel objects use 32b. For now, + // we're just adding a check. + // + // Ideally, we should update ceph to use the right type but this + // can be quite difficult, especially considering that there are + // a significant number of functions that accept both sockets and + // file descriptors. + if (s[0] >> 32 || s[1] >> 32) { + WSASetLastError(WSAENAMETOOLONG); + break; + } + + socks[0] = s[0]; + socks[1] = s[1]; + + return 0; + + } while (0); + + e = WSAGetLastError(); + closesocket(listener); + closesocket(s[0]); + closesocket(s[1]); + WSASetLastError(e); + return -1; +} + +int win_socketpair(int socks[2]) { + int r = 0; + for (int i = 0; i < 15; i++) { + r = _win_socketpair(socks); + if (r && WSAGetLastError() == WSAEADDRINUSE) { + sleep(2); + continue; + } + else { + break; + } + } + return r; +} + +unsigned get_page_size() { + SYSTEM_INFO system_info; + GetSystemInfo(&system_info); + return system_info.dwPageSize; +} + +int setenv(const char *name, const char *value, int overwrite) { + if (!overwrite && getenv(name)) { + return 0; + } + return _putenv_s(name, value); +} + +ssize_t get_self_exe_path(char* path, int buff_length) { + return GetModuleFileName(NULL, path, buff_length - 1); +} + +int geteuid() +{ + return 0; +} + +int getegid() +{ + return 0; +} + +int getuid() +{ + return 0; +} + +int getgid() +{ + return 0; +} + +#else + +unsigned get_page_size() { + return sysconf(_SC_PAGESIZE); +} + +ssize_t get_self_exe_path(char* path, int buff_length) { + return readlink("/proc/self/exe", path, + sizeof(buff_length) - 1); +} + +#endif /* _WIN32 */ |