diff options
Diffstat (limited to 'src/include/port')
46 files changed, 4323 insertions, 0 deletions
diff --git a/src/include/port/aix.h b/src/include/port/aix.h new file mode 100644 index 0000000..5b1159c --- /dev/null +++ b/src/include/port/aix.h @@ -0,0 +1,14 @@ +/* + * src/include/port/aix.h + */ +#define CLASS_CONFLICT +#define DISABLE_XOPEN_NLS + +/* + * "IBM XL C/C++ for AIX, V12.1" miscompiles, for 32-bit, some inline + * expansions of ginCompareItemPointers() "long long" arithmetic. To take + * advantage of inlining, build a 64-bit PostgreSQL. + */ +#if defined(__ILP32__) && defined(__IBMC__) +#define PG_FORCE_DISABLE_INLINE +#endif diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h new file mode 100644 index 0000000..bbff945 --- /dev/null +++ b/src/include/port/atomics.h @@ -0,0 +1,519 @@ +/*------------------------------------------------------------------------- + * + * atomics.h + * Atomic operations. + * + * Hardware and compiler dependent functions for manipulating memory + * atomically and dealing with cache coherency. Used to implement locking + * facilities and lockless algorithms/data structures. + * + * To bring up postgres on a platform/compiler at the very least + * implementations for the following operations should be provided: + * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier() + * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32() + * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag() + * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate. + * + * There exist generic, hardware independent, implementations for several + * compilers which might be sufficient, although possibly not optimal, for a + * new platform. If no such generic implementation is available spinlocks (or + * even OS provided semaphores) will be used to implement the API. + * + * Implement _u64 atomics if and only if your platform can use them + * efficiently (and obviously correctly). + * + * Use higher level functionality (lwlocks, spinlocks, heavyweight locks) + * whenever possible. Writing correct code using these facilities is hard. + * + * For an introduction to using memory barriers within the PostgreSQL backend, + * see src/backend/storage/lmgr/README.barrier + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics.h + * + *------------------------------------------------------------------------- + */ +#ifndef ATOMICS_H +#define ATOMICS_H + +#ifdef FRONTEND +#error "atomics.h may not be included from frontend code" +#endif + +#define INSIDE_ATOMICS_H + +#include <limits.h> + +/* + * First a set of architecture specific files is included. + * + * These files can provide the full set of atomics or can do pretty much + * nothing if all the compilers commonly used on these platforms provide + * usable generics. + * + * Don't add an inline assembly of the actual atomic operations if all the + * common implementations of your platform provide intrinsics. Intrinsics are + * much easier to understand and potentially support more architectures. + * + * It will often make sense to define memory barrier semantics here, since + * e.g. generic compiler intrinsics for x86 memory barriers can't know that + * postgres doesn't need x86 read/write barriers do anything more than a + * compiler barrier. + * + */ +#if defined(__arm__) || defined(__arm) || defined(__aarch64__) +#include "port/atomics/arch-arm.h" +#elif defined(__i386__) || defined(__i386) || defined(__x86_64__) +#include "port/atomics/arch-x86.h" +#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__) +#include "port/atomics/arch-ppc.h" +#elif defined(__hppa) || defined(__hppa__) +#include "port/atomics/arch-hppa.h" +#endif + +/* + * Compiler specific, but architecture independent implementations. + * + * Provide architecture independent implementations of the atomic + * facilities. At the very least compiler barriers should be provided, but a + * full implementation of + * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier() + * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32() + * using compiler intrinsics are a good idea. + */ +/* + * gcc or compatible, including clang and icc. Exclude xlc. The ppc64le "IBM + * XL C/C++ for Linux, V13.1.2" emulates gcc, but __sync_lock_test_and_set() + * of one-byte types elicits SIGSEGV. That bug was gone by V13.1.5 (2016-12). + */ +#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && !(defined(__IBMC__) || defined(__IBMCPP__)) +#include "port/atomics/generic-gcc.h" +#elif defined(_MSC_VER) +#include "port/atomics/generic-msvc.h" +#elif defined(__SUNPRO_C) && !defined(__GNUC__) +#include "port/atomics/generic-sunpro.h" +#else +/* + * Unsupported compiler, we'll likely use slower fallbacks... At least + * compiler barriers should really be provided. + */ +#endif + +/* + * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and + * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics + * support. In the case of spinlock backed atomics the emulation is expected + * to be efficient, although less so than native atomics support. + */ +#include "port/atomics/fallback.h" + +/* + * Provide additional operations using supported infrastructure. These are + * expected to be efficient if the underlying atomic operations are efficient. + */ +#include "port/atomics/generic.h" + + +/* + * pg_compiler_barrier - prevent the compiler from moving code across + * + * A compiler barrier need not (and preferably should not) emit any actual + * machine code, but must act as an optimization fence: the compiler must not + * reorder loads or stores to main memory around the barrier. However, the + * CPU may still reorder loads or stores at runtime, if the architecture's + * memory model permits this. + */ +#define pg_compiler_barrier() pg_compiler_barrier_impl() + +/* + * pg_memory_barrier - prevent the CPU from reordering memory access + * + * A memory barrier must act as a compiler barrier, and in addition must + * guarantee that all loads and stores issued prior to the barrier are + * completed before any loads or stores issued after the barrier. Unless + * loads and stores are totally ordered (which is not the case on most + * architectures) this requires issuing some sort of memory fencing + * instruction. + */ +#define pg_memory_barrier() pg_memory_barrier_impl() + +/* + * pg_(read|write)_barrier - prevent the CPU from reordering memory access + * + * A read barrier must act as a compiler barrier, and in addition must + * guarantee that any loads issued prior to the barrier are completed before + * any loads issued after the barrier. Similarly, a write barrier acts + * as a compiler barrier, and also orders stores. Read and write barriers + * are thus weaker than a full memory barrier, but stronger than a compiler + * barrier. In practice, on machines with strong memory ordering, read and + * write barriers may require nothing more than a compiler barrier. + */ +#define pg_read_barrier() pg_read_barrier_impl() +#define pg_write_barrier() pg_write_barrier_impl() + +/* + * Spinloop delay - Allow CPU to relax in busy loops + */ +#define pg_spin_delay() pg_spin_delay_impl() + +/* + * pg_atomic_init_flag - initialize atomic flag. + * + * No barrier semantics. + */ +static inline void +pg_atomic_init_flag(volatile pg_atomic_flag *ptr) +{ + pg_atomic_init_flag_impl(ptr); +} + +/* + * pg_atomic_test_set_flag - TAS() + * + * Returns true if the flag has successfully been set, false otherwise. + * + * Acquire (including read barrier) semantics. + */ +static inline bool +pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_test_set_flag_impl(ptr); +} + +/* + * pg_atomic_unlocked_test_flag - Check if the lock is free + * + * Returns true if the flag currently is not set, false otherwise. + * + * No barrier semantics. + */ +static inline bool +pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_unlocked_test_flag_impl(ptr); +} + +/* + * pg_atomic_clear_flag - release lock set by TAS() + * + * Release (including write barrier) semantics. + */ +static inline void +pg_atomic_clear_flag(volatile pg_atomic_flag *ptr) +{ + pg_atomic_clear_flag_impl(ptr); +} + + +/* + * pg_atomic_init_u32 - initialize atomic variable + * + * Has to be done before any concurrent usage.. + * + * No barrier semantics. + */ +static inline void +pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + AssertPointerAlignment(ptr, 4); + + pg_atomic_init_u32_impl(ptr, val); +} + +/* + * pg_atomic_read_u32 - unlocked read from atomic variable. + * + * The read is guaranteed to return a value as it has been written by this or + * another process at some point in the past. There's however no cache + * coherency interaction guaranteeing the value hasn't since been written to + * again. + * + * No barrier semantics. + */ +static inline uint32 +pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_read_u32_impl(ptr); +} + +/* + * pg_atomic_write_u32 - write to atomic variable. + * + * The write is guaranteed to succeed as a whole, i.e. it's not possible to + * observe a partial write for any reader. Note that this correctly interacts + * with pg_atomic_compare_exchange_u32, in contrast to + * pg_atomic_unlocked_write_u32(). + * + * No barrier semantics. + */ +static inline void +pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + AssertPointerAlignment(ptr, 4); + + pg_atomic_write_u32_impl(ptr, val); +} + +/* + * pg_atomic_unlocked_write_u32 - unlocked write to atomic variable. + * + * The write is guaranteed to succeed as a whole, i.e. it's not possible to + * observe a partial write for any reader. But note that writing this way is + * not guaranteed to correctly interact with read-modify-write operations like + * pg_atomic_compare_exchange_u32. This should only be used in cases where + * minor performance regressions due to atomics emulation are unacceptable. + * + * No barrier semantics. + */ +static inline void +pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + AssertPointerAlignment(ptr, 4); + + pg_atomic_unlocked_write_u32_impl(ptr, val); +} + +/* + * pg_atomic_exchange_u32 - exchange newval with current value + * + * Returns the old value of 'ptr' before the swap. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval) +{ + AssertPointerAlignment(ptr, 4); + + return pg_atomic_exchange_u32_impl(ptr, newval); +} + +/* + * pg_atomic_compare_exchange_u32 - CAS operation + * + * Atomically compare the current value of ptr with *expected and store newval + * iff ptr and *expected have the same value. The current value of *ptr will + * always be stored in *expected. + * + * Return true if values have been exchanged, false otherwise. + * + * Full barrier semantics. + */ +static inline bool +pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + AssertPointerAlignment(ptr, 4); + AssertPointerAlignment(expected, 4); + + return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval); +} + +/* + * pg_atomic_fetch_add_u32 - atomically add to variable + * + * Returns the value of ptr before the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_fetch_add_u32_impl(ptr, add_); +} + +/* + * pg_atomic_fetch_sub_u32 - atomically subtract from variable + * + * Returns the value of ptr before the arithmetic operation. Note that sub_ + * may not be INT_MIN due to platform limitations. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + AssertPointerAlignment(ptr, 4); + Assert(sub_ != INT_MIN); + return pg_atomic_fetch_sub_u32_impl(ptr, sub_); +} + +/* + * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable + * + * Returns the value of ptr before the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_fetch_and_u32_impl(ptr, and_); +} + +/* + * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable + * + * Returns the value of ptr before the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_fetch_or_u32_impl(ptr, or_); +} + +/* + * pg_atomic_add_fetch_u32 - atomically add to variable + * + * Returns the value of ptr after the arithmetic operation. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + AssertPointerAlignment(ptr, 4); + return pg_atomic_add_fetch_u32_impl(ptr, add_); +} + +/* + * pg_atomic_sub_fetch_u32 - atomically subtract from variable + * + * Returns the value of ptr after the arithmetic operation. Note that sub_ may + * not be INT_MIN due to platform limitations. + * + * Full barrier semantics. + */ +static inline uint32 +pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + AssertPointerAlignment(ptr, 4); + Assert(sub_ != INT_MIN); + return pg_atomic_sub_fetch_u32_impl(ptr, sub_); +} + +/* ---- + * The 64 bit operations have the same semantics as their 32bit counterparts + * if they are available. Check the corresponding 32bit function for + * documentation. + * ---- + */ +static inline void +pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val) +{ + /* + * Can't necessarily enforce alignment - and don't need it - when using + * the spinlock based fallback implementation. Therefore only assert when + * not using it. + */ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + pg_atomic_init_u64_impl(ptr, val); +} + +static inline uint64 +pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_read_u64_impl(ptr); +} + +static inline void +pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + pg_atomic_write_u64_impl(ptr, val); +} + +static inline uint64 +pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_exchange_u64_impl(ptr, newval); +} + +static inline bool +pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); + AssertPointerAlignment(expected, 8); +#endif + return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval); +} + +static inline uint64 +pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_fetch_add_u64_impl(ptr, add_); +} + +static inline uint64 +pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + Assert(sub_ != PG_INT64_MIN); + return pg_atomic_fetch_sub_u64_impl(ptr, sub_); +} + +static inline uint64 +pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_fetch_and_u64_impl(ptr, and_); +} + +static inline uint64 +pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_fetch_or_u64_impl(ptr, or_); +} + +static inline uint64 +pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 add_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + return pg_atomic_add_fetch_u64_impl(ptr, add_); +} + +static inline uint64 +pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ +#ifndef PG_HAVE_ATOMIC_U64_SIMULATION + AssertPointerAlignment(ptr, 8); +#endif + Assert(sub_ != PG_INT64_MIN); + return pg_atomic_sub_fetch_u64_impl(ptr, sub_); +} + +#undef INSIDE_ATOMICS_H + +#endif /* ATOMICS_H */ diff --git a/src/include/port/atomics/arch-arm.h b/src/include/port/atomics/arch-arm.h new file mode 100644 index 0000000..c90bf58 --- /dev/null +++ b/src/include/port/atomics/arch-arm.h @@ -0,0 +1,32 @@ +/*------------------------------------------------------------------------- + * + * arch-arm.h + * Atomic operations considerations specific to ARM + * + * Portions Copyright (c) 2013-2023, PostgreSQL Global Development Group + * + * NOTES: + * + * src/include/port/atomics/arch-arm.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +/* + * 64 bit atomics on ARM32 are implemented using kernel fallbacks and thus + * might be slow, so disable entirely. On ARM64 that problem doesn't exist. + */ +#if !defined(__aarch64__) +#define PG_DISABLE_64_BIT_ATOMICS +#else +/* + * Architecture Reference Manual for ARMv8 states aligned read/write to/from + * general purpose register is atomic. + */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY +#endif /* __aarch64__ */ diff --git a/src/include/port/atomics/arch-hppa.h b/src/include/port/atomics/arch-hppa.h new file mode 100644 index 0000000..4c89fbf --- /dev/null +++ b/src/include/port/atomics/arch-hppa.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * + * arch-hppa.h + * Atomic operations considerations specific to HPPA + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-hppa.h + * + *------------------------------------------------------------------------- + */ + +/* HPPA doesn't do either read or write reordering */ +#define pg_memory_barrier_impl() pg_compiler_barrier_impl() diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h new file mode 100644 index 0000000..d992d4c --- /dev/null +++ b/src/include/port/atomics/arch-ppc.h @@ -0,0 +1,254 @@ +/*------------------------------------------------------------------------- + * + * arch-ppc.h + * Atomic operations considerations specific to PowerPC + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-ppc.h + * + *------------------------------------------------------------------------- + */ + +#if defined(__GNUC__) + +/* + * lwsync orders loads with respect to each other, and similarly with stores. + * But a load can be performed before a subsequent store, so sync must be used + * for a full memory barrier. + */ +#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory") +#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#endif + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* 64bit atomics are only supported in 64bit mode */ +#if SIZEOF_VOID_P >= 8 +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif + +/* + * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but + * code generation differs at the end. __atomic_compare_exchange_n(): + * 100: isync + * 104: mfcr r3 + * 108: rlwinm r3,r3,3,31,31 + * 10c: bne 120 <.eb+0x10> + * 110: clrldi r3,r3,63 + * 114: addi r1,r1,112 + * 118: blr + * 11c: nop + * 120: clrldi r3,r3,63 + * 124: stw r9,0(r4) + * 128: addi r1,r1,112 + * 12c: blr + * + * This: + * f0: isync + * f4: mfcr r9 + * f8: rldicl. r3,r9,35,63 + * fc: bne 104 <.eb> + * 100: stw r10,0(r4) + * 104: addi r1,r1,112 + * 108: blr + * + * This implementation may or may not have materially different performance. + * It's not exploiting the fact that cr0 still holds the relevant comparison + * bits, set during the __asm__. One could fix that by moving more code into + * the __asm__. (That would remove the freedom to eliminate dead stores when + * the caller ignores "expected", but few callers do.) + * + * Recognizing constant "newval" would be superfluous, because there's no + * immediate-operand version of stwcx. + */ +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + uint32 found; + uint32 condition_register; + bool ret; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + (int32) *expected <= PG_INT16_MAX && + (int32) *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5,1 \n" + " cmpwi %0,%3 \n" + " bne $+12 \n" /* branch to lwsync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " lwsync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5,1 \n" + " cmpw %0,%3 \n" + " bne $+12 \n" /* branch to lwsync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " lwsync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +/* + * This mirrors gcc __sync_fetch_and_add(). + * + * Like tas(), use constraint "=&b" to avoid allocating r0. + */ +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4,1 \n" + " addi %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " lwsync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4,1 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " lwsync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + uint64 found; + uint32 condition_register; + bool ret; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + (int64) *expected <= PG_INT16_MAX && + (int64) *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5,1 \n" + " cmpdi %0,%3 \n" + " bne $+12 \n" /* branch to lwsync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " lwsync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5,1 \n" + " cmpd %0,%3 \n" + " bne $+12 \n" /* branch to lwsync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " lwsync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4,1 \n" + " addi %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " lwsync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4,1 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " lwsync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +/* per architecture manual doubleword accesses have single copy atomicity */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h new file mode 100644 index 0000000..bb84b9b --- /dev/null +++ b/src/include/port/atomics/arch-x86.h @@ -0,0 +1,252 @@ +/*------------------------------------------------------------------------- + * + * arch-x86.h + * Atomic operations considerations specific to intel x86 + * + * Note that we actually require a 486 upwards because the 386 doesn't have + * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere + * anymore that's not much of a restriction luckily. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-x86.h + * + *------------------------------------------------------------------------- + */ + +/* + * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads, + * or stores to be reordered with other stores, but a load can be performed + * before a subsequent store. + * + * Technically, some x86-ish chips support uncached memory access and/or + * special instructions that are weakly ordered. In those cases we'd need + * the read and write barriers to be lfence and sfence. But since we don't + * do those things, a compiler barrier should be enough. + * + * "lock; addl" has worked for longer than "mfence". It's also rumored to be + * faster in many scenarios. + */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#if defined(__i386__) || defined(__i386) +#define pg_memory_barrier_impl() \ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc") +#elif defined(__x86_64__) +#define pg_memory_barrier_impl() \ + __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc") +#endif +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +#define pg_read_barrier_impl() pg_compiler_barrier_impl() +#define pg_write_barrier_impl() pg_compiler_barrier_impl() + +/* + * Provide implementation for atomics using inline assembly on x86 gcc. It's + * nice to support older gcc's and the compare/exchange implementation here is + * actually more efficient than the * __sync variant. + */ +#if defined(HAVE_ATOMICS) + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef struct pg_atomic_flag +{ + volatile char value; +} pg_atomic_flag; + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* + * It's too complicated to write inline asm for 64bit types on 32bit and the + * 486 can't do it anyway. + */ +#ifdef __x86_64__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* alignment guaranteed due to being on a 64bit platform */ + volatile uint64 value; +} pg_atomic_uint64; +#endif /* __x86_64__ */ + +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +#endif /* defined(HAVE_ATOMICS) */ + +#if !defined(PG_HAVE_SPIN_DELAY) +/* + * This sequence is equivalent to the PAUSE instruction ("rep" is + * ignored by old IA32 processors if the following instruction is + * not a string operation); the IA-32 Architecture Software + * Developer's Manual, Vol. 3, Section 7.7.2 describes why using + * PAUSE in the inner loop of a spin lock is necessary for good + * performance: + * + * The PAUSE instruction improves the performance of IA-32 + * processors supporting Hyper-Threading Technology when + * executing spin-wait loops and other routines where one + * thread is accessing a shared lock or semaphore in a tight + * polling loop. When executing a spin-wait loop, the + * processor can suffer a severe performance penalty when + * exiting the loop because it detects a possible memory order + * violation and flushes the core processor's pipeline. The + * PAUSE instruction provides a hint to the processor that the + * code sequence is a spin-wait loop. The processor uses this + * hint to avoid the memory order violation and prevent the + * pipeline flush. In addition, the PAUSE instruction + * de-pipelines the spin-wait loop to prevent it from + * consuming execution resources excessively. + */ +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define PG_HAVE_SPIN_DELAY +static __inline__ void +pg_spin_delay_impl(void) +{ + __asm__ __volatile__(" rep; nop \n"); +} +#elif defined(_MSC_VER) && defined(__x86_64__) +#define PG_HAVE_SPIN_DELAY +static __forceinline void +pg_spin_delay_impl(void) +{ + _mm_pause(); +} +#elif defined(_MSC_VER) +#define PG_HAVE_SPIN_DELAY +static __forceinline void +pg_spin_delay_impl(void) +{ + /* See comment for gcc code. Same code, MASM syntax */ + __asm rep nop; +} +#endif +#endif /* !defined(PG_HAVE_SPIN_DELAY) */ + + +#if defined(HAVE_ATOMICS) + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + char _res = 1; + + __asm__ __volatile__( + " lock \n" + " xchgb %0,%1 \n" +: "+q"(_res), "+m"(ptr->value) +: +: "memory"); + return _res == 0; +} + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* + * On a TSO architecture like x86 it's sufficient to use a compiler + * barrier to achieve release semantics. + */ + __asm__ __volatile__("" ::: "memory"); + ptr->value = 0; +} + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + char ret; + + /* + * Perform cmpxchg and use the zero flag which it implicitly sets when + * equal to measure the success. + */ + __asm__ __volatile__( + " lock \n" + " cmpxchgl %4,%5 \n" + " setz %2 \n" +: "=a" (*expected), "=m"(ptr->value), "=q" (ret) +: "a" (*expected), "r" (newval), "m"(ptr->value) +: "memory", "cc"); + return (bool) ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 res; + __asm__ __volatile__( + " lock \n" + " xaddl %0,%1 \n" +: "=q"(res), "=m"(ptr->value) +: "0" (add_), "m"(ptr->value) +: "memory", "cc"); + return res; +} + +#ifdef __x86_64__ + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + char ret; + + /* + * Perform cmpxchg and use the zero flag which it implicitly sets when + * equal to measure the success. + */ + __asm__ __volatile__( + " lock \n" + " cmpxchgq %4,%5 \n" + " setz %2 \n" +: "=a" (*expected), "=m"(ptr->value), "=q" (ret) +: "a" (*expected), "r" (newval), "m"(ptr->value) +: "memory", "cc"); + return (bool) ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 res; + __asm__ __volatile__( + " lock \n" + " xaddq %0,%1 \n" +: "=q"(res), "=m"(ptr->value) +: "0" (add_), "m"(ptr->value) +: "memory", "cc"); + return res; +} + +#endif /* __x86_64__ */ + +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +/* + * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms + * since at least the 586. As well as on all x86-64 cpus. + */ +#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \ + (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \ + defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY +#endif /* 8 byte single-copy atomicity */ + +#endif /* HAVE_ATOMICS */ diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h new file mode 100644 index 0000000..a9e8e77 --- /dev/null +++ b/src/include/port/atomics/fallback.h @@ -0,0 +1,170 @@ +/*------------------------------------------------------------------------- + * + * fallback.h + * Fallback for platforms without spinlock and/or atomics support. Slower + * than native atomics support, but not unusably slow. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics/fallback.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +# error "should be included via atomics.h" +#endif + +#ifndef pg_memory_barrier_impl +/* + * If we have no memory barrier implementation for this architecture, we + * fall back to acquiring and releasing a spinlock. This might, in turn, + * fall back to the semaphore-based spinlock implementation, which will be + * amazingly slow. + * + * It's not self-evident that every possible legal implementation of a + * spinlock acquire-and-release would be equivalent to a full memory barrier. + * For example, I'm not sure that Itanium's acq and rel add up to a full + * fence. But all of our actual implementations seem OK in this regard. + */ +#define PG_HAVE_MEMORY_BARRIER_EMULATION + +extern void pg_spinlock_barrier(void); +#define pg_memory_barrier_impl pg_spinlock_barrier +#endif + +#ifndef pg_compiler_barrier_impl +/* + * If the compiler/arch combination does not provide compiler barriers, + * provide a fallback. The fallback simply consists of a function call into + * an externally defined function. That should guarantee compiler barrier + * semantics except for compilers that do inter translation unit/global + * optimization - those better provide an actual compiler barrier. + * + * A native compiler barrier for sure is a lot faster than this... + */ +#define PG_HAVE_COMPILER_BARRIER_EMULATION +extern void pg_extern_compiler_barrier(void); +#define pg_compiler_barrier_impl pg_extern_compiler_barrier +#endif + + +/* + * If we have atomics implementation for this platform, fall back to providing + * the atomics API using a spinlock to protect the internal state. Possibly + * the spinlock implementation uses semaphores internally... + * + * We have to be a bit careful here, as it's not guaranteed that atomic + * variables are mapped to the same address in every process (e.g. dynamic + * shared memory segments). We can't just hash the address and use that to map + * to a spinlock. Instead assign a spinlock on initialization of the atomic + * variable. + */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT) + +#define PG_HAVE_ATOMIC_FLAG_SIMULATION +#define PG_HAVE_ATOMIC_FLAG_SUPPORT + +typedef struct pg_atomic_flag +{ + /* + * To avoid circular includes we can't use s_lock as a type here. Instead + * just reserve enough space for all spinlock types. Some platforms would + * be content with just one byte instead of 4, but that's not too much + * waste. + */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */ + int sema[4]; +#else + int sema; +#endif + volatile bool value; +} pg_atomic_flag; + +#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */ + +#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) + +#define PG_HAVE_ATOMIC_U32_SIMULATION + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + /* Check pg_atomic_flag's definition above for an explanation */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC */ + int sema[4]; +#else + int sema; +#endif + volatile uint32 value; +} pg_atomic_uint32; + +#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */ + +#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) + +#define PG_HAVE_ATOMIC_U64_SIMULATION + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* Check pg_atomic_flag's definition above for an explanation */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC */ + int sema[4]; +#else + int sema; +#endif + volatile uint64 value; +} pg_atomic_uint64; + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_FLAG +extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr); + +#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */ + +#ifdef PG_HAVE_ATOMIC_U32_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_U32 +extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_); + +#define PG_HAVE_ATOMIC_WRITE_U32 +extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val); + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval); + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_); + +#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */ + + +#ifdef PG_HAVE_ATOMIC_U64_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_U64 +extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_); + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval); + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_); + +#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */ diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h new file mode 100644 index 0000000..da04e9f --- /dev/null +++ b/src/include/port/atomics/generic-gcc.h @@ -0,0 +1,286 @@ +/*------------------------------------------------------------------------- + * + * generic-gcc.h + * Atomic operations, implemented using gcc (or compatible) intrinsics. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * Legacy __sync Built-in Functions for Atomic Memory Access + * https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html + * * Built-in functions for memory model aware atomic operations + * https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html + * + * src/include/port/atomics/generic-gcc.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +/* + * An empty asm block should be a sufficient compiler barrier. + */ +#define pg_compiler_barrier_impl() __asm__ __volatile__("" ::: "memory") + +/* + * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier + * out of this compiler built-in. But we prefer to rely on platform specific + * definitions where possible, and use this only as a fallback. + */ +#if !defined(pg_memory_barrier_impl) +# if defined(HAVE_GCC__ATOMIC_INT32_CAS) +# define pg_memory_barrier_impl() __atomic_thread_fence(__ATOMIC_SEQ_CST) +# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +# define pg_memory_barrier_impl() __sync_synchronize() +# endif +#endif /* !defined(pg_memory_barrier_impl) */ + +#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +/* acquire semantics include read barrier semantics */ +# define pg_read_barrier_impl() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#endif + +#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +/* release semantics include write barrier semantics */ +# define pg_write_barrier_impl() __atomic_thread_fence(__ATOMIC_RELEASE) +#endif + + +#ifdef HAVE_ATOMICS + +/* generic gcc based atomic flag implementation */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \ + && (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS)) + +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef struct pg_atomic_flag +{ + /* + * If we have a choice, use int-width TAS, because that is more efficient + * and/or more reliably implemented on most non-Intel platforms. (Note + * that this code isn't used on x86[_64]; see arch-x86.h for that.) + */ +#ifdef HAVE_GCC__SYNC_INT32_TAS + volatile int value; +#else + volatile char value; +#endif +} pg_atomic_flag; + +#endif /* !ATOMIC_FLAG_SUPPORT && SYNC_INT32_TAS */ + +/* generic gcc based atomic uint32 implementation */ +#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) \ + && (defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS)) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#endif /* defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS) */ + +/* generic gcc based atomic uint64 implementation */ +#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) \ + && !defined(PG_DISABLE_64_BIT_ATOMICS) \ + && (defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS)) + +#define PG_HAVE_ATOMIC_U64_SUPPORT + +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS) */ + +#ifdef PG_HAVE_ATOMIC_FLAG_SUPPORT + +#if defined(HAVE_GCC__SYNC_CHAR_TAS) || defined(HAVE_GCC__SYNC_INT32_TAS) + +#ifndef PG_HAVE_ATOMIC_TEST_SET_FLAG +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* NB: only an acquire barrier, not a full one */ + /* some platform only support a 1 here */ + return __sync_lock_test_and_set(&ptr->value, 1) == 0; +} +#endif + +#endif /* defined(HAVE_GCC__SYNC_*_TAS) */ + +#ifndef PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return ptr->value == 0; +} +#endif + +#ifndef PG_HAVE_ATOMIC_CLEAR_FLAG +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + __sync_lock_release(&ptr->value); +} +#endif + +#ifndef PG_HAVE_ATOMIC_INIT_FLAG +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_clear_flag_impl(ptr); +} +#endif + +#endif /* defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) */ + +/* prefer __atomic, it has a better API */ +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + /* FIXME: we can probably use a lower consistency model */ + return __atomic_compare_exchange_n(&ptr->value, expected, newval, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + current = __sync_val_compare_and_swap(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} +#endif + +/* if we have 32-bit __sync_val_compare_and_swap, assume we have these too: */ + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return __sync_fetch_and_add(&ptr->value, add_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_SUB_U32 +static inline uint32 +pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return __sync_fetch_and_sub(&ptr->value, sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_AND_U32 +static inline uint32 +pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_) +{ + return __sync_fetch_and_and(&ptr->value, and_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_OR_U32 +static inline uint32 +pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_) +{ + return __sync_fetch_and_or(&ptr->value, or_); +} +#endif + + +#if !defined(PG_DISABLE_64_BIT_ATOMICS) + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + return __atomic_compare_exchange_n(&ptr->value, expected, newval, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + current = __sync_val_compare_and_swap(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} +#endif + +/* if we have 64-bit __sync_val_compare_and_swap, assume we have these too: */ + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return __sync_fetch_and_add(&ptr->value, add_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_SUB_U64 +static inline uint64 +pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return __sync_fetch_and_sub(&ptr->value, sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_AND_U64 +static inline uint64 +pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_) +{ + return __sync_fetch_and_and(&ptr->value, and_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_OR_U64 +static inline uint64 +pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_) +{ + return __sync_fetch_and_or(&ptr->value, or_); +} +#endif + +#endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h new file mode 100644 index 0000000..8835f4c --- /dev/null +++ b/src/include/port/atomics/generic-msvc.h @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * generic-msvc.h + * Atomic operations support when using MSVC + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * Interlocked Variable Access + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + * + * src/include/port/atomics/generic-msvc.h + * + *------------------------------------------------------------------------- + */ +#include <intrin.h> + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +#pragma intrinsic(_ReadWriteBarrier) +#define pg_compiler_barrier_impl() _ReadWriteBarrier() + +#ifndef pg_memory_barrier_impl +#define pg_memory_barrier_impl() MemoryBarrier() +#endif + +#if defined(HAVE_ATOMICS) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_attribute_aligned(8) pg_atomic_uint64 +{ + volatile uint64 value; +} pg_atomic_uint64; + + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + current = InterlockedCompareExchange(&ptr->value, newval, *expected); + ret = current == *expected; + *expected = current; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return InterlockedExchangeAdd(&ptr->value, add_); +} + +/* + * The non-intrinsics versions are only available in vista upwards, so use the + * intrinsic version. Only supported on >486, but we require XP as a minimum + * baseline, which doesn't support the 486, so we don't need to add checks for + * that case. + */ +#pragma intrinsic(_InterlockedCompareExchange64) + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + current = _InterlockedCompareExchange64(&ptr->value, newval, *expected); + ret = current == *expected; + *expected = current; + return ret; +} + +/* Only implemented on 64bit builds */ +#ifdef _WIN64 +#pragma intrinsic(_InterlockedExchangeAdd64) + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return _InterlockedExchangeAdd64(&ptr->value, add_); +} +#endif /* _WIN64 */ + +#endif /* HAVE_ATOMICS */ diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h new file mode 100644 index 0000000..30f7d8b --- /dev/null +++ b/src/include/port/atomics/generic-sunpro.h @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------- + * + * generic-sunpro.h + * Atomic operations for solaris' CC + * + * Portions Copyright (c) 2013-2023, PostgreSQL Global Development Group + * + * NOTES: + * + * Documentation: + * * manpage for atomic_cas(3C) + * http://www.unix.com/man-page/opensolaris/3c/atomic_cas/ + * http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html + * + * src/include/port/atomics/generic-sunpro.h + * + * ------------------------------------------------------------------------- + */ + +#if defined(HAVE_ATOMICS) + +#ifdef HAVE_MBARRIER_H +#include <mbarrier.h> + +#define pg_compiler_barrier_impl() __compiler_barrier() + +#ifndef pg_memory_barrier_impl +/* + * Despite the name this is actually a full barrier. Expanding to mfence/ + * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc + * respectively. + */ +# define pg_memory_barrier_impl() __machine_rw_barrier() +#endif +#ifndef pg_read_barrier_impl +# define pg_read_barrier_impl() __machine_r_barrier() +#endif +#ifndef pg_write_barrier_impl +# define pg_write_barrier_impl() __machine_w_barrier() +#endif + +#endif /* HAVE_MBARRIER_H */ + +/* Older versions of the compiler don't have atomic.h... */ +#ifdef HAVE_ATOMIC_H + +#include <atomic.h> + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* + * Syntax to enforce variable alignment should be supported by versions + * supporting atomic.h, but it's hard to find accurate documentation. If + * it proves to be a problem, we'll have to add more version checks for 64 + * bit support. + */ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* HAVE_ATOMIC_H */ + +#endif /* defined(HAVE_ATOMICS) */ + + +#if defined(HAVE_ATOMICS) + +#ifdef HAVE_ATOMIC_H + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + + current = atomic_cas_32(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + + current = atomic_cas_64(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} + +#endif /* HAVE_ATOMIC_H */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h new file mode 100644 index 0000000..95d99dd --- /dev/null +++ b/src/include/port/atomics/generic.h @@ -0,0 +1,401 @@ +/*------------------------------------------------------------------------- + * + * generic.h + * Implement higher level operations based on some lower level atomic + * operations. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics/generic.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +# error "should be included via atomics.h" +#endif + +/* + * If read or write barriers are undefined, we upgrade them to full memory + * barriers. + */ +#if !defined(pg_read_barrier_impl) +# define pg_read_barrier_impl pg_memory_barrier_impl +#endif +#if !defined(pg_write_barrier_impl) +# define pg_write_barrier_impl pg_memory_barrier_impl +#endif + +#ifndef PG_HAVE_SPIN_DELAY +#define PG_HAVE_SPIN_DELAY +#define pg_spin_delay_impl() ((void)0) +#endif + + +/* provide fallback */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && defined(PG_HAVE_ATOMIC_U32_SUPPORT) +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef pg_atomic_uint32 pg_atomic_flag; +#endif + +#ifndef PG_HAVE_ATOMIC_READ_U32 +#define PG_HAVE_ATOMIC_READ_U32 +static inline uint32 +pg_atomic_read_u32_impl(volatile pg_atomic_uint32 *ptr) +{ + return ptr->value; +} +#endif + +#ifndef PG_HAVE_ATOMIC_WRITE_U32 +#define PG_HAVE_ATOMIC_WRITE_U32 +static inline void +pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + ptr->value = val; +} +#endif + +#ifndef PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32 +#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32 +static inline void +pg_atomic_unlocked_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + ptr->value = val; +} +#endif + +/* + * provide fallback for test_and_set using atomic_exchange if available + */ +#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32) + +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_write_u32_impl(ptr, 0); +} + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_exchange_u32_impl(ptr, 1) == 0; +} + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_read_u32_impl(ptr) == 0; +} + + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* XXX: release semantics suffice? */ + pg_memory_barrier_impl(); + pg_atomic_write_u32_impl(ptr, 0); +} + +/* + * provide fallback for test_and_set using atomic_compare_exchange if + * available. + */ +#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) + +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_write_u32_impl(ptr, 0); +} + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + uint32 value = 0; + return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1); +} + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_read_u32_impl(ptr) == 0; +} + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* + * Use a memory barrier + plain write if we have a native memory + * barrier. But don't do so if memory barriers use spinlocks - that'd lead + * to circularity if flags are used to implement spinlocks. + */ +#ifndef PG_HAVE_MEMORY_BARRIER_EMULATION + /* XXX: release semantics suffice? */ + pg_memory_barrier_impl(); + pg_atomic_write_u32_impl(ptr, 0); +#else + uint32 value = 1; + pg_atomic_compare_exchange_u32_impl(ptr, &value, 0); +#endif +} + +#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) +# error "No pg_atomic_test_and_set provided" +#endif /* !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) */ + + +#ifndef PG_HAVE_ATOMIC_INIT_U32 +#define PG_HAVE_ATOMIC_INIT_U32 +static inline void +pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_) +{ + ptr->value = val_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_EXCHANGE_U32 +static inline uint32 +pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_SUB_U32 +static inline uint32 +pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return pg_atomic_fetch_add_u32_impl(ptr, -sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_AND_U32 +static inline uint32 +pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_OR_U32 +static inline uint32 +pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) +#define PG_HAVE_ATOMIC_ADD_FETCH_U32 +static inline uint32 +pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) +#define PG_HAVE_ATOMIC_SUB_FETCH_U32 +static inline uint32 +pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_EXCHANGE_U64 +static inline uint64 +pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_)) + /* skip */; + return old; +} +#endif + +#ifndef PG_HAVE_ATOMIC_WRITE_U64 +#define PG_HAVE_ATOMIC_WRITE_U64 + +#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \ + !defined(PG_HAVE_ATOMIC_U64_SIMULATION) + +static inline void +pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val) +{ + /* + * On this platform aligned 64bit writes are guaranteed to be atomic, + * except if using the fallback implementation, where can't guarantee the + * required alignment. + */ + AssertPointerAlignment(ptr, 8); + ptr->value = val; +} + +#else + +static inline void +pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val) +{ + /* + * 64 bit writes aren't safe on all platforms. In the generic + * implementation implement them as an atomic exchange. + */ + pg_atomic_exchange_u64_impl(ptr, val); +} + +#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */ +#endif /* PG_HAVE_ATOMIC_WRITE_U64 */ + +#ifndef PG_HAVE_ATOMIC_READ_U64 +#define PG_HAVE_ATOMIC_READ_U64 + +#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \ + !defined(PG_HAVE_ATOMIC_U64_SIMULATION) + +static inline uint64 +pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr) +{ + /* + * On this platform aligned 64-bit reads are guaranteed to be atomic. + */ + AssertPointerAlignment(ptr, 8); + return ptr->value; +} + +#else + +static inline uint64 +pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr) +{ + uint64 old = 0; + + /* + * 64-bit reads aren't atomic on all platforms. In the generic + * implementation implement them as a compare/exchange with 0. That'll + * fail or succeed, but always return the old value. Possibly might store + * a 0, but only if the previous value also was a 0 - i.e. harmless. + */ + pg_atomic_compare_exchange_u64_impl(ptr, &old, 0); + + return old; +} +#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */ +#endif /* PG_HAVE_ATOMIC_READ_U64 */ + +#ifndef PG_HAVE_ATOMIC_INIT_U64 +#define PG_HAVE_ATOMIC_INIT_U64 +static inline void +pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_) +{ + ptr->value = val_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_SUB_U64 +static inline uint64 +pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return pg_atomic_fetch_add_u64_impl(ptr, -sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_AND_U64 +static inline uint64 +pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_OR_U64 +static inline uint64 +pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) +#define PG_HAVE_ATOMIC_ADD_FETCH_U64 +static inline uint64 +pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) +#define PG_HAVE_ATOMIC_SUB_FETCH_U64 +static inline uint64 +pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_; +} +#endif diff --git a/src/include/port/cygwin.h b/src/include/port/cygwin.h new file mode 100644 index 0000000..44bf853 --- /dev/null +++ b/src/include/port/cygwin.h @@ -0,0 +1,23 @@ +/* src/include/port/cygwin.h */ + +/* + * Variables declared in the core backend and referenced by loadable + * modules need to be marked "dllimport" in the core build, but + * "dllexport" when the declaration is read in a loadable module. + * No special markings should be used when compiling frontend code. + */ +#ifndef FRONTEND +#ifdef BUILDING_DLL +#define PGDLLIMPORT __declspec (dllexport) +#else +#define PGDLLIMPORT __declspec (dllimport) +#endif +#endif + +/* + * Cygwin has a strtof() which is literally just (float)strtod(), which means + * we get misrounding _and_ silent over/underflow. Using our wrapper doesn't + * fix the misrounding but does fix the error checks, which cuts down on the + * number of test variant files needed. + */ +#define HAVE_BUGGY_STRTOF 1 diff --git a/src/include/port/darwin.h b/src/include/port/darwin.h new file mode 100644 index 0000000..15fb69d --- /dev/null +++ b/src/include/port/darwin.h @@ -0,0 +1,8 @@ +/* src/include/port/darwin.h */ + +#define __darwin__ 1 + +#if HAVE_DECL_F_FULLFSYNC /* not present before macOS 10.3 */ +#define HAVE_FSYNC_WRITETHROUGH + +#endif diff --git a/src/include/port/freebsd.h b/src/include/port/freebsd.h new file mode 100644 index 0000000..0e3fde5 --- /dev/null +++ b/src/include/port/freebsd.h @@ -0,0 +1,8 @@ +/* src/include/port/freebsd.h */ + +/* + * Set the default wal_sync_method to fdatasync. xlogdefs.h's normal rules + * would prefer open_datasync on FreeBSD 13+, but that is not a good choice on + * many systems. + */ +#define PLATFORM_DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC diff --git a/src/include/port/linux.h b/src/include/port/linux.h new file mode 100644 index 0000000..7a6e46c --- /dev/null +++ b/src/include/port/linux.h @@ -0,0 +1,22 @@ +/* src/include/port/linux.h */ + +/* + * As of July 2007, all known versions of the Linux kernel will sometimes + * return EIDRM for a shmctl() operation when EINVAL is correct (it happens + * when the low-order 15 bits of the supplied shm ID match the slot number + * assigned to a newer shmem segment). We deal with this by assuming that + * EIDRM means EINVAL in PGSharedMemoryIsInUse(). This is reasonably safe + * since in fact Linux has no excuse for ever returning EIDRM; it doesn't + * track removed segments in a way that would allow distinguishing them from + * private ones. But someday that code might get upgraded, and we'd have + * to have a kernel version test here. + */ +#define HAVE_LINUX_EIDRM_BUG + +/* + * Set the default wal_sync_method to fdatasync. With recent Linux versions, + * xlogdefs.h's normal rules will prefer open_datasync, which (a) doesn't + * perform better and (b) causes outright failures on ext4 data=journal + * filesystems, because those don't support O_DIRECT. + */ +#define PLATFORM_DEFAULT_SYNC_METHOD SYNC_METHOD_FDATASYNC diff --git a/src/include/port/netbsd.h b/src/include/port/netbsd.h new file mode 100644 index 0000000..590233f --- /dev/null +++ b/src/include/port/netbsd.h @@ -0,0 +1 @@ +/* src/include/port/netbsd.h */ diff --git a/src/include/port/openbsd.h b/src/include/port/openbsd.h new file mode 100644 index 0000000..395319b --- /dev/null +++ b/src/include/port/openbsd.h @@ -0,0 +1 @@ +/* src/include/port/openbsd.h */ diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h new file mode 100644 index 0000000..21a4fa0 --- /dev/null +++ b/src/include/port/pg_bitutils.h @@ -0,0 +1,339 @@ +/*------------------------------------------------------------------------- + * + * pg_bitutils.h + * Miscellaneous functions for bit-wise operations. + * + * + * Copyright (c) 2019-2023, PostgreSQL Global Development Group + * + * src/include/port/pg_bitutils.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_BITUTILS_H +#define PG_BITUTILS_H + +#ifdef _MSC_VER +#include <intrin.h> +#define HAVE_BITSCAN_FORWARD +#define HAVE_BITSCAN_REVERSE + +#else +#if defined(HAVE__BUILTIN_CTZ) +#define HAVE_BITSCAN_FORWARD +#endif + +#if defined(HAVE__BUILTIN_CLZ) +#define HAVE_BITSCAN_REVERSE +#endif +#endif /* _MSC_VER */ + +extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256]; +extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256]; +extern PGDLLIMPORT const uint8 pg_number_of_ones[256]; + +/* + * pg_leftmost_one_pos32 + * Returns the position of the most significant set bit in "word", + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_leftmost_one_pos32(uint32 word) +{ +#ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); + + return 31 - __builtin_clz(word); +#elif defined(_MSC_VER) + unsigned long result; + bool non_zero; + + non_zero = _BitScanReverse(&result, word); + Assert(non_zero); + return (int) result; +#else + int shift = 32 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + pg_leftmost_one_pos[(word >> shift) & 255]; +#endif /* HAVE__BUILTIN_CLZ */ +} + +/* + * pg_leftmost_one_pos64 + * As above, but for a 64-bit word. + */ +static inline int +pg_leftmost_one_pos64(uint64 word) +{ +#ifdef HAVE__BUILTIN_CLZ + Assert(word != 0); + +#if defined(HAVE_LONG_INT_64) + return 63 - __builtin_clzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return 63 - __builtin_clzll(word); +#else +#error must have a working 64-bit integer datatype +#endif /* HAVE_LONG_INT_64 */ + +#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) + unsigned long result; + bool non_zero; + + non_zero = _BitScanReverse64(&result, word); + Assert(non_zero); + return (int) result; +#else + int shift = 64 - 8; + + Assert(word != 0); + + while ((word >> shift) == 0) + shift -= 8; + + return shift + pg_leftmost_one_pos[(word >> shift) & 255]; +#endif /* HAVE__BUILTIN_CLZ */ +} + +/* + * pg_rightmost_one_pos32 + * Returns the position of the least significant set bit in "word", + * measured from the least significant bit. word must not be 0. + */ +static inline int +pg_rightmost_one_pos32(uint32 word) +{ +#ifdef HAVE__BUILTIN_CTZ + Assert(word != 0); + + return __builtin_ctz(word); +#elif defined(_MSC_VER) + unsigned long result; + bool non_zero; + + non_zero = _BitScanForward(&result, word); + Assert(non_zero); + return (int) result; +#else + int result = 0; + + Assert(word != 0); + + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += pg_rightmost_one_pos[word & 255]; + return result; +#endif /* HAVE__BUILTIN_CTZ */ +} + +/* + * pg_rightmost_one_pos64 + * As above, but for a 64-bit word. + */ +static inline int +pg_rightmost_one_pos64(uint64 word) +{ +#ifdef HAVE__BUILTIN_CTZ + Assert(word != 0); + +#if defined(HAVE_LONG_INT_64) + return __builtin_ctzl(word); +#elif defined(HAVE_LONG_LONG_INT_64) + return __builtin_ctzll(word); +#else +#error must have a working 64-bit integer datatype +#endif /* HAVE_LONG_INT_64 */ + +#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) + unsigned long result; + bool non_zero; + + non_zero = _BitScanForward64(&result, word); + Assert(non_zero); + return (int) result; +#else + int result = 0; + + Assert(word != 0); + + while ((word & 255) == 0) + { + word >>= 8; + result += 8; + } + result += pg_rightmost_one_pos[word & 255]; + return result; +#endif /* HAVE__BUILTIN_CTZ */ +} + +/* + * pg_nextpower2_32 + * Returns the next higher power of 2 above 'num', or 'num' if it's + * already a power of 2. + * + * 'num' mustn't be 0 or be above PG_UINT32_MAX / 2 + 1. + */ +static inline uint32 +pg_nextpower2_32(uint32 num) +{ + Assert(num > 0 && num <= PG_UINT32_MAX / 2 + 1); + + /* + * A power 2 number has only 1 bit set. Subtracting 1 from such a number + * will turn on all previous bits resulting in no common bits being set + * between num and num-1. + */ + if ((num & (num - 1)) == 0) + return num; /* already power 2 */ + + return ((uint32) 1) << (pg_leftmost_one_pos32(num) + 1); +} + +/* + * pg_nextpower2_64 + * Returns the next higher power of 2 above 'num', or 'num' if it's + * already a power of 2. + * + * 'num' mustn't be 0 or be above PG_UINT64_MAX / 2 + 1. + */ +static inline uint64 +pg_nextpower2_64(uint64 num) +{ + Assert(num > 0 && num <= PG_UINT64_MAX / 2 + 1); + + /* + * A power 2 number has only 1 bit set. Subtracting 1 from such a number + * will turn on all previous bits resulting in no common bits being set + * between num and num-1. + */ + if ((num & (num - 1)) == 0) + return num; /* already power 2 */ + + return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1); +} + +/* + * pg_prevpower2_32 + * Returns the next lower power of 2 below 'num', or 'num' if it's + * already a power of 2. + * + * 'num' mustn't be 0. + */ +static inline uint32 +pg_prevpower2_32(uint32 num) +{ + return ((uint32) 1) << pg_leftmost_one_pos32(num); +} + +/* + * pg_prevpower2_64 + * Returns the next lower power of 2 below 'num', or 'num' if it's + * already a power of 2. + * + * 'num' mustn't be 0. + */ +static inline uint64 +pg_prevpower2_64(uint64 num) +{ + return ((uint64) 1) << pg_leftmost_one_pos64(num); +} + +/* + * pg_ceil_log2_32 + * Returns equivalent of ceil(log2(num)) + */ +static inline uint32 +pg_ceil_log2_32(uint32 num) +{ + if (num < 2) + return 0; + else + return pg_leftmost_one_pos32(num - 1) + 1; +} + +/* + * pg_ceil_log2_64 + * Returns equivalent of ceil(log2(num)) + */ +static inline uint64 +pg_ceil_log2_64(uint64 num) +{ + if (num < 2) + return 0; + else + return pg_leftmost_one_pos64(num - 1) + 1; +} + +/* + * With MSVC on x86_64 builds, try using native popcnt instructions via the + * __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's + * __builtin_popcount* intrinsic functions as they always emit popcnt + * instructions. + */ +#if defined(_MSC_VER) && defined(_M_AMD64) +#define HAVE_X86_64_POPCNTQ +#endif + +/* + * On x86_64, we can use the hardware popcount instruction, but only if + * we can verify that the CPU supports it via the cpuid instruction. + * + * Otherwise, we fall back to a hand-rolled implementation. + */ +#ifdef HAVE_X86_64_POPCNTQ +#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID) +#define TRY_POPCNT_FAST 1 +#endif +#endif + +#ifdef TRY_POPCNT_FAST +/* Attempt to use the POPCNT instruction, but perform a runtime check first */ +extern int (*pg_popcount32) (uint32 word); +extern int (*pg_popcount64) (uint64 word); + +#else +/* Use a portable implementation -- no need for a function pointer. */ +extern int pg_popcount32(uint32 word); +extern int pg_popcount64(uint64 word); + +#endif /* TRY_POPCNT_FAST */ + +/* Count the number of one-bits in a byte array */ +extern uint64 pg_popcount(const char *buf, int bytes); + +/* + * Rotate the bits of "word" to the right/left by n bits. + */ +static inline uint32 +pg_rotate_right32(uint32 word, int n) +{ + return (word >> n) | (word << (32 - n)); +} + +static inline uint32 +pg_rotate_left32(uint32 word, int n) +{ + return (word << n) | (word >> (32 - n)); +} + +/* size_t variants of the above, as required */ + +#if SIZEOF_SIZE_T == 4 +#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos32 +#define pg_nextpower2_size_t pg_nextpower2_32 +#define pg_prevpower2_size_t pg_prevpower2_32 +#else +#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos64 +#define pg_nextpower2_size_t pg_nextpower2_64 +#define pg_prevpower2_size_t pg_prevpower2_64 +#endif + +#endif /* PG_BITUTILS_H */ diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h new file mode 100644 index 0000000..80abd75 --- /dev/null +++ b/src/include/port/pg_bswap.h @@ -0,0 +1,161 @@ +/*------------------------------------------------------------------------- + * + * pg_bswap.h + * Byte swapping. + * + * Macros for reversing the byte order of 16, 32 and 64-bit unsigned integers. + * For example, 0xAABBCCDD becomes 0xDDCCBBAA. These are just wrappers for + * built-in functions provided by the compiler where support exists. + * + * Note that all of these functions accept unsigned integers as arguments and + * return the same. Use caution when using these wrapper macros with signed + * integers. + * + * Copyright (c) 2015-2023, PostgreSQL Global Development Group + * + * src/include/port/pg_bswap.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_BSWAP_H +#define PG_BSWAP_H + + +/* + * In all supported versions msvc provides _byteswap_* functions in stdlib.h, + * already included by c.h. + */ + + +/* implementation of uint16 pg_bswap16(uint16) */ +#if defined(HAVE__BUILTIN_BSWAP16) + +#define pg_bswap16(x) __builtin_bswap16(x) + +#elif defined(_MSC_VER) + +#define pg_bswap16(x) _byteswap_ushort(x) + +#else + +static inline uint16 +pg_bswap16(uint16 x) +{ + return + ((x << 8) & 0xff00) | + ((x >> 8) & 0x00ff); +} + +#endif /* HAVE__BUILTIN_BSWAP16 */ + + +/* implementation of uint32 pg_bswap32(uint32) */ +#if defined(HAVE__BUILTIN_BSWAP32) + +#define pg_bswap32(x) __builtin_bswap32(x) + +#elif defined(_MSC_VER) + +#define pg_bswap32(x) _byteswap_ulong(x) + +#else + +static inline uint32 +pg_bswap32(uint32 x) +{ + return + ((x << 24) & 0xff000000) | + ((x << 8) & 0x00ff0000) | + ((x >> 8) & 0x0000ff00) | + ((x >> 24) & 0x000000ff); +} + +#endif /* HAVE__BUILTIN_BSWAP32 */ + + +/* implementation of uint64 pg_bswap64(uint64) */ +#if defined(HAVE__BUILTIN_BSWAP64) + +#define pg_bswap64(x) __builtin_bswap64(x) + + +#elif defined(_MSC_VER) + +#define pg_bswap64(x) _byteswap_uint64(x) + +#else + +static inline uint64 +pg_bswap64(uint64 x) +{ + return + ((x << 56) & UINT64CONST(0xff00000000000000)) | + ((x << 40) & UINT64CONST(0x00ff000000000000)) | + ((x << 24) & UINT64CONST(0x0000ff0000000000)) | + ((x << 8) & UINT64CONST(0x000000ff00000000)) | + ((x >> 8) & UINT64CONST(0x00000000ff000000)) | + ((x >> 24) & UINT64CONST(0x0000000000ff0000)) | + ((x >> 40) & UINT64CONST(0x000000000000ff00)) | + ((x >> 56) & UINT64CONST(0x00000000000000ff)); +} +#endif /* HAVE__BUILTIN_BSWAP64 */ + + +/* + * Portable and fast equivalents for ntohs, ntohl, htons, htonl, + * additionally extended to 64 bits. + */ +#ifdef WORDS_BIGENDIAN + +#define pg_hton16(x) (x) +#define pg_hton32(x) (x) +#define pg_hton64(x) (x) + +#define pg_ntoh16(x) (x) +#define pg_ntoh32(x) (x) +#define pg_ntoh64(x) (x) + +#else + +#define pg_hton16(x) pg_bswap16(x) +#define pg_hton32(x) pg_bswap32(x) +#define pg_hton64(x) pg_bswap64(x) + +#define pg_ntoh16(x) pg_bswap16(x) +#define pg_ntoh32(x) pg_bswap32(x) +#define pg_ntoh64(x) pg_bswap64(x) + +#endif /* WORDS_BIGENDIAN */ + + +/* + * Rearrange the bytes of a Datum from big-endian order into the native byte + * order. On big-endian machines, this does nothing at all. Note that the C + * type Datum is an unsigned integer type on all platforms. + * + * One possible application of the DatumBigEndianToNative() macro is to make + * bitwise comparisons cheaper. A simple 3-way comparison of Datums + * transformed by the macro (based on native, unsigned comparisons) will return + * the same result as a memcmp() of the corresponding original Datums, but can + * be much cheaper. It's generally safe to do this on big-endian systems + * without any special transformation occurring first. + * + * If SIZEOF_DATUM is not defined, then postgres.h wasn't included and these + * macros probably shouldn't be used, so we define nothing. Note that + * SIZEOF_DATUM == 8 would evaluate as 0 == 8 in that case, potentially + * leading to the wrong implementation being selected and confusing errors, so + * defining nothing is safest. + */ +#ifdef SIZEOF_DATUM +#ifdef WORDS_BIGENDIAN +#define DatumBigEndianToNative(x) (x) +#else /* !WORDS_BIGENDIAN */ +#if SIZEOF_DATUM == 8 +#define DatumBigEndianToNative(x) pg_bswap64(x) +#else /* SIZEOF_DATUM != 8 */ +#define DatumBigEndianToNative(x) pg_bswap32(x) +#endif /* SIZEOF_DATUM == 8 */ +#endif /* WORDS_BIGENDIAN */ +#endif /* SIZEOF_DATUM */ + +#endif /* PG_BSWAP_H */ diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h new file mode 100644 index 0000000..7f87792 --- /dev/null +++ b/src/include/port/pg_crc32c.h @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * pg_crc32c.h + * Routines for computing CRC-32C checksums. + * + * The speed of CRC-32C calculation has a big impact on performance, so we + * jump through some hoops to get the best implementation for each + * platform. Some CPU architectures have special instructions for speeding + * up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the + * Slicing-by-8 algorithm which uses lookup tables. + * + * The public interface consists of four macros: + * + * INIT_CRC32C(crc) + * Initialize a CRC accumulator + * + * COMP_CRC32C(crc, data, len) + * Accumulate some (more) bytes into a CRC + * + * FIN_CRC32C(crc) + * Finish a CRC calculation + * + * EQ_CRC32C(c1, c2) + * Check for equality of two CRCs. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/pg_crc32c.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_CRC32C_H +#define PG_CRC32C_H + +#include "port/pg_bswap.h" + +typedef uint32 pg_crc32c; + +/* The INIT and EQ macros are the same for all implementations. */ +#define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF) +#define EQ_CRC32C(c1, c2) ((c1) == (c2)) + +#if defined(USE_SSE42_CRC32C) +/* Use Intel SSE4.2 instructions. */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_sse42((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_ARMV8_CRC32C) +/* Use ARMv8 CRC Extension instructions. */ + +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_armv8((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); + +#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK) + +/* + * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first + * to check that they are available. + */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c((crc), (data), (len))) +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) + +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); +extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len); + +#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len); +#endif +#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK +extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len); +#endif + +#else +/* + * Use slicing-by-8 algorithm. + * + * On big-endian systems, the intermediate value is kept in reverse byte + * order, to avoid byte-swapping during the calculation. FIN_CRC32C reverses + * the bytes to the final order. + */ +#define COMP_CRC32C(crc, data, len) \ + ((crc) = pg_comp_crc32c_sb8((crc), (data), (len))) +#ifdef WORDS_BIGENDIAN +#define FIN_CRC32C(crc) ((crc) = pg_bswap32(crc) ^ 0xFFFFFFFF) +#else +#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF) +#endif + +extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len); + +#endif + +#endif /* PG_CRC32C_H */ diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h new file mode 100644 index 0000000..689799c --- /dev/null +++ b/src/include/port/pg_iovec.h @@ -0,0 +1,55 @@ +/*------------------------------------------------------------------------- + * + * pg_iovec.h + * Header for vectored I/O functions, to use in place of <sys/uio.h>. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/pg_iovec.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_IOVEC_H +#define PG_IOVEC_H + +#ifndef WIN32 + +#include <limits.h> +#include <sys/uio.h> + +#else + +/* POSIX requires at least 16 as a maximum iovcnt. */ +#define IOV_MAX 16 + +/* Define our own POSIX-compatible iovec struct. */ +struct iovec +{ + void *iov_base; + size_t iov_len; +}; + +#endif + +/* Define a reasonable maximum that is safe to use on the stack. */ +#define PG_IOV_MAX Min(IOV_MAX, 32) + +/* + * Note that pg_preadv and pg_pwritev have a pg_ prefix as a warning that the + * Windows implementations have the side-effect of changing the file position. + */ + +#if HAVE_DECL_PREADV +#define pg_preadv preadv +#else +extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset); +#endif + +#if HAVE_DECL_PWRITEV +#define pg_pwritev pwritev +#else +extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset); +#endif + +#endif /* PG_IOVEC_H */ diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h new file mode 100644 index 0000000..59aa824 --- /dev/null +++ b/src/include/port/pg_lfind.h @@ -0,0 +1,180 @@ +/*------------------------------------------------------------------------- + * + * pg_lfind.h + * Optimized linear search routines using SIMD intrinsics where + * available. + * + * Copyright (c) 2022-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/port/pg_lfind.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_LFIND_H +#define PG_LFIND_H + +#include "port/simd.h" + +/* + * pg_lfind8 + * + * Return true if there is an element in 'base' that equals 'key', otherwise + * return false. + */ +static inline bool +pg_lfind8(uint8 key, uint8 *base, uint32 nelem) +{ + uint32 i; + + /* round down to multiple of vector length */ + uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1); + Vector8 chunk; + + for (i = 0; i < tail_idx; i += sizeof(Vector8)) + { + vector8_load(&chunk, &base[i]); + if (vector8_has(chunk, key)) + return true; + } + + /* Process the remaining elements one at a time. */ + for (; i < nelem; i++) + { + if (key == base[i]) + return true; + } + + return false; +} + +/* + * pg_lfind8_le + * + * Return true if there is an element in 'base' that is less than or equal to + * 'key', otherwise return false. + */ +static inline bool +pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem) +{ + uint32 i; + + /* round down to multiple of vector length */ + uint32 tail_idx = nelem & ~(sizeof(Vector8) - 1); + Vector8 chunk; + + for (i = 0; i < tail_idx; i += sizeof(Vector8)) + { + vector8_load(&chunk, &base[i]); + if (vector8_has_le(chunk, key)) + return true; + } + + /* Process the remaining elements one at a time. */ + for (; i < nelem; i++) + { + if (base[i] <= key) + return true; + } + + return false; +} + +/* + * pg_lfind32 + * + * Return true if there is an element in 'base' that equals 'key', otherwise + * return false. + */ +static inline bool +pg_lfind32(uint32 key, uint32 *base, uint32 nelem) +{ + uint32 i = 0; + +#ifndef USE_NO_SIMD + + /* + * For better instruction-level parallelism, each loop iteration operates + * on a block of four registers. Testing for SSE2 has showed this is ~40% + * faster than using a block of two registers. + */ + const Vector32 keys = vector32_broadcast(key); /* load copies of key */ + const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32); + const uint32 nelem_per_iteration = 4 * nelem_per_vector; + + /* round down to multiple of elements per iteration */ + const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1); + +#if defined(USE_ASSERT_CHECKING) + bool assert_result = false; + + /* pre-compute the result for assert checking */ + for (i = 0; i < nelem; i++) + { + if (key == base[i]) + { + assert_result = true; + break; + } + } +#endif + + for (i = 0; i < tail_idx; i += nelem_per_iteration) + { + Vector32 vals1, + vals2, + vals3, + vals4, + result1, + result2, + result3, + result4, + tmp1, + tmp2, + result; + + /* load the next block into 4 registers */ + vector32_load(&vals1, &base[i]); + vector32_load(&vals2, &base[i + nelem_per_vector]); + vector32_load(&vals3, &base[i + nelem_per_vector * 2]); + vector32_load(&vals4, &base[i + nelem_per_vector * 3]); + + /* compare each value to the key */ + result1 = vector32_eq(keys, vals1); + result2 = vector32_eq(keys, vals2); + result3 = vector32_eq(keys, vals3); + result4 = vector32_eq(keys, vals4); + + /* combine the results into a single variable */ + tmp1 = vector32_or(result1, result2); + tmp2 = vector32_or(result3, result4); + result = vector32_or(tmp1, tmp2); + + /* see if there was a match */ + if (vector32_is_highbit_set(result)) + { + Assert(assert_result == true); + return true; + } + } +#endif /* ! USE_NO_SIMD */ + + /* Process the remaining elements one at a time. */ + for (; i < nelem; i++) + { + if (key == base[i]) + { +#ifndef USE_NO_SIMD + Assert(assert_result == true); +#endif + return true; + } + } + +#ifndef USE_NO_SIMD + Assert(assert_result == false); +#endif + return false; +} + +#endif /* PG_LFIND_H */ diff --git a/src/include/port/pg_pthread.h b/src/include/port/pg_pthread.h new file mode 100644 index 0000000..d102ce9 --- /dev/null +++ b/src/include/port/pg_pthread.h @@ -0,0 +1,41 @@ +/*------------------------------------------------------------------------- + * + * Declarations for missing POSIX thread components. + * + * Currently this supplies an implementation of pthread_barrier_t for the + * benefit of macOS, which lacks it. These declarations are not in port.h, + * because that'd require <pthread.h> to be included by every translation + * unit. + * + *------------------------------------------------------------------------- + */ + +#ifndef PG_PTHREAD_H +#define PG_PTHREAD_H + +#include <pthread.h> + +#ifndef HAVE_PTHREAD_BARRIER_WAIT + +#ifndef PTHREAD_BARRIER_SERIAL_THREAD +#define PTHREAD_BARRIER_SERIAL_THREAD (-1) +#endif + +typedef struct pg_pthread_barrier +{ + bool sense; /* we only need a one bit phase */ + int count; /* number of threads expected */ + int arrived; /* number of threads that have arrived */ + pthread_mutex_t mutex; + pthread_cond_t cond; +} pthread_barrier_t; + +extern int pthread_barrier_init(pthread_barrier_t *barrier, + const void *attr, + int count); +extern int pthread_barrier_wait(pthread_barrier_t *barrier); +extern int pthread_barrier_destroy(pthread_barrier_t *barrier); + +#endif + +#endif diff --git a/src/include/port/simd.h b/src/include/port/simd.h new file mode 100644 index 0000000..1fa6c3b --- /dev/null +++ b/src/include/port/simd.h @@ -0,0 +1,375 @@ +/*------------------------------------------------------------------------- + * + * simd.h + * Support for platform-specific vector operations. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/simd.h + * + * NOTES + * - VectorN in this file refers to a register where the element operands + * are N bits wide. The vector width is platform-specific, so users that care + * about that will need to inspect "sizeof(VectorN)". + * + *------------------------------------------------------------------------- + */ +#ifndef SIMD_H +#define SIMD_H + +#if (defined(__x86_64__) || defined(_M_AMD64)) +/* + * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume + * that compilers targeting this architecture understand SSE2 intrinsics. + * + * We use emmintrin.h rather than the comprehensive header immintrin.h in + * order to exclude extensions beyond SSE2. This is because MSVC, at least, + * will allow the use of intrinsics that haven't been enabled at compile + * time. + */ +#include <emmintrin.h> +#define USE_SSE2 +typedef __m128i Vector8; +typedef __m128i Vector32; + +#elif defined(__aarch64__) && defined(__ARM_NEON) +/* + * We use the Neon instructions if the compiler provides access to them (as + * indicated by __ARM_NEON) and we are on aarch64. While Neon support is + * technically optional for aarch64, it appears that all available 64-bit + * hardware does have it. Neon exists in some 32-bit hardware too, but we + * could not realistically use it there without a run-time check, which seems + * not worth the trouble for now. + */ +#include <arm_neon.h> +#define USE_NEON +typedef uint8x16_t Vector8; +typedef uint32x4_t Vector32; + +#else +/* + * If no SIMD instructions are available, we can in some cases emulate vector + * operations using bitwise operations on unsigned integers. Note that many + * of the functions in this file presently do not have non-SIMD + * implementations. In particular, none of the functions involving Vector32 + * are implemented without SIMD since it's likely not worthwhile to represent + * two 32-bit integers using a uint64. + */ +#define USE_NO_SIMD +typedef uint64 Vector8; +#endif + +/* load/store operations */ +static inline void vector8_load(Vector8 *v, const uint8 *s); +#ifndef USE_NO_SIMD +static inline void vector32_load(Vector32 *v, const uint32 *s); +#endif + +/* assignment operations */ +static inline Vector8 vector8_broadcast(const uint8 c); +#ifndef USE_NO_SIMD +static inline Vector32 vector32_broadcast(const uint32 c); +#endif + +/* element-wise comparisons to a scalar */ +static inline bool vector8_has(const Vector8 v, const uint8 c); +static inline bool vector8_has_zero(const Vector8 v); +static inline bool vector8_has_le(const Vector8 v, const uint8 c); +static inline bool vector8_is_highbit_set(const Vector8 v); +#ifndef USE_NO_SIMD +static inline bool vector32_is_highbit_set(const Vector32 v); +#endif + +/* arithmetic operations */ +static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2); +#ifndef USE_NO_SIMD +static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2); +static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2); +#endif + +/* + * comparisons between vectors + * + * Note: These return a vector rather than boolean, which is why we don't + * have non-SIMD implementations. + */ +#ifndef USE_NO_SIMD +static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2); +static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2); +#endif + +/* + * Load a chunk of memory into the given vector. + */ +static inline void +vector8_load(Vector8 *v, const uint8 *s) +{ +#if defined(USE_SSE2) + *v = _mm_loadu_si128((const __m128i *) s); +#elif defined(USE_NEON) + *v = vld1q_u8(s); +#else + memcpy(v, s, sizeof(Vector8)); +#endif +} + +#ifndef USE_NO_SIMD +static inline void +vector32_load(Vector32 *v, const uint32 *s) +{ +#ifdef USE_SSE2 + *v = _mm_loadu_si128((const __m128i *) s); +#elif defined(USE_NEON) + *v = vld1q_u32(s); +#endif +} +#endif /* ! USE_NO_SIMD */ + +/* + * Create a vector with all elements set to the same value. + */ +static inline Vector8 +vector8_broadcast(const uint8 c) +{ +#if defined(USE_SSE2) + return _mm_set1_epi8(c); +#elif defined(USE_NEON) + return vdupq_n_u8(c); +#else + return ~UINT64CONST(0) / 0xFF * c; +#endif +} + +#ifndef USE_NO_SIMD +static inline Vector32 +vector32_broadcast(const uint32 c) +{ +#ifdef USE_SSE2 + return _mm_set1_epi32(c); +#elif defined(USE_NEON) + return vdupq_n_u32(c); +#endif +} +#endif /* ! USE_NO_SIMD */ + +/* + * Return true if any elements in the vector are equal to the given scalar. + */ +static inline bool +vector8_has(const Vector8 v, const uint8 c) +{ + bool result; + + /* pre-compute the result for assert checking */ +#ifdef USE_ASSERT_CHECKING + bool assert_result = false; + + for (Size i = 0; i < sizeof(Vector8); i++) + { + if (((const uint8 *) &v)[i] == c) + { + assert_result = true; + break; + } + } +#endif /* USE_ASSERT_CHECKING */ + +#if defined(USE_NO_SIMD) + /* any bytes in v equal to c will evaluate to zero via XOR */ + result = vector8_has_zero(v ^ vector8_broadcast(c)); +#else + result = vector8_is_highbit_set(vector8_eq(v, vector8_broadcast(c))); +#endif + + Assert(assert_result == result); + return result; +} + +/* + * Convenience function equivalent to vector8_has(v, 0) + */ +static inline bool +vector8_has_zero(const Vector8 v) +{ +#if defined(USE_NO_SIMD) + /* + * We cannot call vector8_has() here, because that would lead to a + * circular definition. + */ + return vector8_has_le(v, 0); +#else + return vector8_has(v, 0); +#endif +} + +/* + * Return true if any elements in the vector are less than or equal to the + * given scalar. + */ +static inline bool +vector8_has_le(const Vector8 v, const uint8 c) +{ + bool result = false; + + /* pre-compute the result for assert checking */ +#ifdef USE_ASSERT_CHECKING + bool assert_result = false; + + for (Size i = 0; i < sizeof(Vector8); i++) + { + if (((const uint8 *) &v)[i] <= c) + { + assert_result = true; + break; + } + } +#endif /* USE_ASSERT_CHECKING */ + +#if defined(USE_NO_SIMD) + + /* + * To find bytes <= c, we can use bitwise operations to find bytes < c+1, + * but it only works if c+1 <= 128 and if the highest bit in v is not set. + * Adapted from + * https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord + */ + if ((int64) v >= 0 && c < 0x80) + result = (v - vector8_broadcast(c + 1)) & ~v & vector8_broadcast(0x80); + else + { + /* one byte at a time */ + for (Size i = 0; i < sizeof(Vector8); i++) + { + if (((const uint8 *) &v)[i] <= c) + { + result = true; + break; + } + } + } +#else + + /* + * Use saturating subtraction to find bytes <= c, which will present as + * NUL bytes. This approach is a workaround for the lack of unsigned + * comparison instructions on some architectures. + */ + result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c))); +#endif + + Assert(assert_result == result); + return result; +} + +/* + * Return true if the high bit of any element is set + */ +static inline bool +vector8_is_highbit_set(const Vector8 v) +{ +#ifdef USE_SSE2 + return _mm_movemask_epi8(v) != 0; +#elif defined(USE_NEON) + return vmaxvq_u8(v) > 0x7F; +#else + return v & vector8_broadcast(0x80); +#endif +} + +/* + * Exactly like vector8_is_highbit_set except for the input type, so it + * looks at each byte separately. + * + * XXX x86 uses the same underlying type for 8-bit, 16-bit, and 32-bit + * integer elements, but Arm does not, hence the need for a separate + * function. We could instead adopt the behavior of Arm's vmaxvq_u32(), i.e. + * check each 32-bit element, but that would require an additional mask + * operation on x86. + */ +#ifndef USE_NO_SIMD +static inline bool +vector32_is_highbit_set(const Vector32 v) +{ +#if defined(USE_NEON) + return vector8_is_highbit_set((Vector8) v); +#else + return vector8_is_highbit_set(v); +#endif +} +#endif /* ! USE_NO_SIMD */ + +/* + * Return the bitwise OR of the inputs + */ +static inline Vector8 +vector8_or(const Vector8 v1, const Vector8 v2) +{ +#ifdef USE_SSE2 + return _mm_or_si128(v1, v2); +#elif defined(USE_NEON) + return vorrq_u8(v1, v2); +#else + return v1 | v2; +#endif +} + +#ifndef USE_NO_SIMD +static inline Vector32 +vector32_or(const Vector32 v1, const Vector32 v2) +{ +#ifdef USE_SSE2 + return _mm_or_si128(v1, v2); +#elif defined(USE_NEON) + return vorrq_u32(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + +/* + * Return the result of subtracting the respective elements of the input + * vectors using saturation (i.e., if the operation would yield a value less + * than zero, zero is returned instead). For more information on saturation + * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic + */ +#ifndef USE_NO_SIMD +static inline Vector8 +vector8_ssub(const Vector8 v1, const Vector8 v2) +{ +#ifdef USE_SSE2 + return _mm_subs_epu8(v1, v2); +#elif defined(USE_NEON) + return vqsubq_u8(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + +/* + * Return a vector with all bits set in each lane where the corresponding + * lanes in the inputs are equal. + */ +#ifndef USE_NO_SIMD +static inline Vector8 +vector8_eq(const Vector8 v1, const Vector8 v2) +{ +#ifdef USE_SSE2 + return _mm_cmpeq_epi8(v1, v2); +#elif defined(USE_NEON) + return vceqq_u8(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + +#ifndef USE_NO_SIMD +static inline Vector32 +vector32_eq(const Vector32 v1, const Vector32 v2) +{ +#ifdef USE_SSE2 + return _mm_cmpeq_epi32(v1, v2); +#elif defined(USE_NEON) + return vceqq_u32(v1, v2); +#endif +} +#endif /* ! USE_NO_SIMD */ + +#endif /* SIMD_H */ diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h new file mode 100644 index 0000000..e63a3bd --- /dev/null +++ b/src/include/port/solaris.h @@ -0,0 +1,26 @@ +/* src/include/port/solaris.h */ + +/* + * Sort this out for all operating systems some time. The __xxx + * symbols are defined on both GCC and Solaris CC, although GCC + * doesn't document them. The __xxx__ symbols are only on GCC. + */ +#if defined(__i386) && !defined(__i386__) +#define __i386__ +#endif + +#if defined(__amd64) && !defined(__amd64__) +#define __amd64__ +#endif + +#if defined(__x86_64) && !defined(__x86_64__) +#define __x86_64__ +#endif + +#if defined(__sparc) && !defined(__sparc__) +#define __sparc__ +#endif + +#if defined(__i386__) +#include <sys/isa_defs.h> +#endif diff --git a/src/include/port/win32.h b/src/include/port/win32.h new file mode 100644 index 0000000..d6c13d0 --- /dev/null +++ b/src/include/port/win32.h @@ -0,0 +1,59 @@ +/* src/include/port/win32.h */ + +/* + * We always rely on the WIN32 macro being set by our build system, + * but _WIN32 is the compiler pre-defined macro. So make sure we define + * WIN32 whenever _WIN32 is set, to facilitate standalone building. + */ +#if defined(_WIN32) && !defined(WIN32) +#define WIN32 +#endif + +/* + * Make sure _WIN32_WINNT has the minimum required value. + * Leave a higher value in place. The minimum requirement is Windows 10. + */ +#ifdef _WIN32_WINNT +#undef _WIN32_WINNT +#endif + +#define _WIN32_WINNT 0x0A00 + +/* + * We need to prevent <crtdefs.h> from defining a symbol conflicting with + * our errcode() function. Since it's likely to get included by standard + * system headers, pre-emptively include it now. + */ +#if defined(_MSC_VER) || defined(HAVE_CRTDEFS_H) +#define errcode __msvc_errcode +#include <crtdefs.h> +#undef errcode +#endif + +/* + * defines for dynamic linking on Win32 platform + */ + +/* + * Variables declared in the core backend and referenced by loadable + * modules need to be marked "dllimport" in the core build, but + * "dllexport" when the declaration is read in a loadable module. + * No special markings should be used when compiling frontend code. + */ +#ifndef FRONTEND +#ifdef BUILDING_DLL +#define PGDLLIMPORT __declspec (dllexport) +#else +#define PGDLLIMPORT __declspec (dllimport) +#endif +#endif + +/* + * Functions exported by a loadable module must be marked "dllexport". + * + * While mingw would otherwise fall back to + * __attribute__((visibility("default"))), that appears to only work as long + * as no symbols are declared with __declspec(dllexport). But we can end up + * with some, e.g. plpython's Py_Init. + */ +#define PGDLLEXPORT __declspec (dllexport) diff --git a/src/include/port/win32/arpa/inet.h b/src/include/port/win32/arpa/inet.h new file mode 100644 index 0000000..ad18031 --- /dev/null +++ b/src/include/port/win32/arpa/inet.h @@ -0,0 +1,3 @@ +/* src/include/port/win32/arpa/inet.h */ + +#include <sys/socket.h> diff --git a/src/include/port/win32/dlfcn.h b/src/include/port/win32/dlfcn.h new file mode 100644 index 0000000..b6e43c0 --- /dev/null +++ b/src/include/port/win32/dlfcn.h @@ -0,0 +1 @@ +/* src/include/port/win32/dlfcn.h */ diff --git a/src/include/port/win32/grp.h b/src/include/port/win32/grp.h new file mode 100644 index 0000000..8b4f213 --- /dev/null +++ b/src/include/port/win32/grp.h @@ -0,0 +1 @@ +/* src/include/port/win32/grp.h */ diff --git a/src/include/port/win32/netdb.h b/src/include/port/win32/netdb.h new file mode 100644 index 0000000..9ed13e4 --- /dev/null +++ b/src/include/port/win32/netdb.h @@ -0,0 +1,7 @@ +/* src/include/port/win32/netdb.h */ +#ifndef WIN32_NETDB_H +#define WIN32_NETDB_H + +#include <ws2tcpip.h> + +#endif diff --git a/src/include/port/win32/netinet/in.h b/src/include/port/win32/netinet/in.h new file mode 100644 index 0000000..a4e22f8 --- /dev/null +++ b/src/include/port/win32/netinet/in.h @@ -0,0 +1,3 @@ +/* src/include/port/win32/netinet/in.h */ + +#include <sys/socket.h> diff --git a/src/include/port/win32/netinet/tcp.h b/src/include/port/win32/netinet/tcp.h new file mode 100644 index 0000000..1d377b6 --- /dev/null +++ b/src/include/port/win32/netinet/tcp.h @@ -0,0 +1,7 @@ +/* src/include/port/win32/netinet/tcp.h */ +#ifndef WIN32_NETINET_TCP_H +#define WIN32_NETINET_TCP_H + +#include <sys/socket.h> + +#endif diff --git a/src/include/port/win32/pwd.h b/src/include/port/win32/pwd.h new file mode 100644 index 0000000..b8c7178 --- /dev/null +++ b/src/include/port/win32/pwd.h @@ -0,0 +1,3 @@ +/* + * src/include/port/win32/pwd.h + */ diff --git a/src/include/port/win32/sys/resource.h b/src/include/port/win32/sys/resource.h new file mode 100644 index 0000000..a14feeb --- /dev/null +++ b/src/include/port/win32/sys/resource.h @@ -0,0 +1,20 @@ +/* + * Replacement for <sys/resource.h> for Windows. + */ +#ifndef WIN32_SYS_RESOURCE_H +#define WIN32_SYS_RESOURCE_H + +#include <sys/time.h> /* for struct timeval */ + +#define RUSAGE_SELF 0 +#define RUSAGE_CHILDREN (-1) + +struct rusage +{ + struct timeval ru_utime; /* user time used */ + struct timeval ru_stime; /* system time used */ +}; + +extern int getrusage(int who, struct rusage *rusage); + +#endif /* WIN32_SYS_RESOURCE_H */ diff --git a/src/include/port/win32/sys/select.h b/src/include/port/win32/sys/select.h new file mode 100644 index 0000000..f8a877a --- /dev/null +++ b/src/include/port/win32/sys/select.h @@ -0,0 +1,3 @@ +/* + * src/include/port/win32/sys/select.h + */ diff --git a/src/include/port/win32/sys/socket.h b/src/include/port/win32/sys/socket.h new file mode 100644 index 0000000..0c32c0f --- /dev/null +++ b/src/include/port/win32/sys/socket.h @@ -0,0 +1,26 @@ +/* + * src/include/port/win32/sys/socket.h + */ +#ifndef WIN32_SYS_SOCKET_H +#define WIN32_SYS_SOCKET_H + +/* + * Unfortunately, <wingdi.h> of VC++ also defines ERROR. + * To avoid the conflict, we include <windows.h> here and undefine ERROR + * immediately. + * + * Note: Don't include <wingdi.h> directly. It causes compile errors. + */ +#include <winsock2.h> +#include <ws2tcpip.h> +#include <windows.h> + +#undef ERROR +#undef small + +/* Restore old ERROR value */ +#ifdef PGERROR +#define ERROR PGERROR +#endif + +#endif /* WIN32_SYS_SOCKET_H */ diff --git a/src/include/port/win32/sys/un.h b/src/include/port/win32/sys/un.h new file mode 100644 index 0000000..4fc13a2 --- /dev/null +++ b/src/include/port/win32/sys/un.h @@ -0,0 +1,17 @@ +/* + * src/include/port/win32/sys/un.h + */ +#ifndef WIN32_SYS_UN_H +#define WIN32_SYS_UN_H + +/* + * Windows defines this structure in <afunix.h>, but not all tool chains have + * the header yet, so we define it here for now. + */ +struct sockaddr_un +{ + unsigned short sun_family; + char sun_path[108]; +}; + +#endif diff --git a/src/include/port/win32/sys/wait.h b/src/include/port/win32/sys/wait.h new file mode 100644 index 0000000..eaeb566 --- /dev/null +++ b/src/include/port/win32/sys/wait.h @@ -0,0 +1,3 @@ +/* + * src/include/port/win32/sys/wait.h + */ diff --git a/src/include/port/win32_msvc/dirent.h b/src/include/port/win32_msvc/dirent.h new file mode 100644 index 0000000..62799db --- /dev/null +++ b/src/include/port/win32_msvc/dirent.h @@ -0,0 +1,34 @@ +/* + * Headers for port/dirent.c, win32 native implementation of dirent functions + * + * src/include/port/win32_msvc/dirent.h + */ + +#ifndef _WIN32VC_DIRENT_H +#define _WIN32VC_DIRENT_H +struct dirent +{ + long d_ino; + unsigned short d_reclen; + unsigned char d_type; + unsigned short d_namlen; + char d_name[MAX_PATH]; +}; + +typedef struct DIR DIR; + +DIR *opendir(const char *); +struct dirent *readdir(DIR *); +int closedir(DIR *); + +/* File types for 'd_type'. */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 +#endif diff --git a/src/include/port/win32_msvc/sys/file.h b/src/include/port/win32_msvc/sys/file.h new file mode 100644 index 0000000..76be3e7 --- /dev/null +++ b/src/include/port/win32_msvc/sys/file.h @@ -0,0 +1 @@ +/* src/include/port/win32_msvc/sys/file.h */ diff --git a/src/include/port/win32_msvc/sys/param.h b/src/include/port/win32_msvc/sys/param.h new file mode 100644 index 0000000..160df3b --- /dev/null +++ b/src/include/port/win32_msvc/sys/param.h @@ -0,0 +1 @@ +/* src/include/port/win32_msvc/sys/param.h */ diff --git a/src/include/port/win32_msvc/sys/time.h b/src/include/port/win32_msvc/sys/time.h new file mode 100644 index 0000000..9d943ec --- /dev/null +++ b/src/include/port/win32_msvc/sys/time.h @@ -0,0 +1 @@ +/* src/include/port/win32_msvc/sys/time.h */ diff --git a/src/include/port/win32_msvc/unistd.h b/src/include/port/win32_msvc/unistd.h new file mode 100644 index 0000000..b7795ba --- /dev/null +++ b/src/include/port/win32_msvc/unistd.h @@ -0,0 +1,9 @@ +/* src/include/port/win32_msvc/unistd.h */ + +/* + * MSVC does not define these, nor does _fileno(stdin) etc reliably work + * (returns -1 if stdin/out/err are closed). + */ +#define STDIN_FILENO 0 +#define STDOUT_FILENO 1 +#define STDERR_FILENO 2 diff --git a/src/include/port/win32_msvc/utime.h b/src/include/port/win32_msvc/utime.h new file mode 100644 index 0000000..c78e79c --- /dev/null +++ b/src/include/port/win32_msvc/utime.h @@ -0,0 +1,3 @@ +/* src/include/port/win32_msvc/utime.h */ + +#include <sys/utime.h> /* for non-unicode version */ diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h new file mode 100644 index 0000000..b957d5c --- /dev/null +++ b/src/include/port/win32_port.h @@ -0,0 +1,594 @@ +/*------------------------------------------------------------------------- + * + * win32_port.h + * Windows-specific compatibility stuff. + * + * Note this is read in MinGW as well as native Windows builds, + * but not in Cygwin builds. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/win32_port.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_WIN32_PORT_H +#define PG_WIN32_PORT_H + +/* + * Always build with SSPI support. Keep it as a #define in case + * we want a switch to disable it sometime in the future. + */ +#define ENABLE_SSPI 1 + +/* undefine and redefine after #include */ +#undef mkdir + +#undef ERROR + +/* + * VS2013 and later issue warnings about using the old Winsock API, + * which we don't really want to hear about. + */ +#ifdef _MSC_VER +#define _WINSOCK_DEPRECATED_NO_WARNINGS +#endif + +/* + * The MinGW64 headers choke if this is already defined - they + * define it themselves. + */ +#if !defined(__MINGW64_VERSION_MAJOR) || defined(_MSC_VER) +#define _WINSOCKAPI_ +#endif + +/* + * windows.h includes a lot of other headers, slowing down compilation + * significantly. WIN32_LEAN_AND_MEAN reduces that a bit. It'd be better to + * remove the include of windows.h (as well as indirect inclusions of it) from + * such a central place, but until then... + * + * To be able to include ntstatus.h tell windows.h to not declare NTSTATUS by + * temporarily defining UMDF_USING_NTSTATUS, otherwise we'll get warning about + * macro redefinitions, as windows.h also defines NTSTATUS (yuck). That in + * turn requires including ntstatus.h, winternl.h to get common symbols. + */ +#define WIN32_LEAN_AND_MEAN +#define UMDF_USING_NTSTATUS + +#include <winsock2.h> +#include <ws2tcpip.h> +#include <windows.h> +#include <ntstatus.h> +#include <winternl.h> + +#undef small +#include <process.h> +#include <signal.h> +#include <direct.h> +#undef near + +/* needed before sys/stat hacking below: */ +#define fstat microsoft_native_fstat +#define stat microsoft_native_stat +#include <sys/stat.h> +#undef fstat +#undef stat + +/* Must be here to avoid conflicting with prototype in windows.h */ +#define mkdir(a,b) mkdir(a) + +#define ftruncate(a,b) chsize(a,b) + +/* Windows doesn't have fsync() as such, use _commit() */ +#define fsync(fd) _commit(fd) + +/* + * For historical reasons, we allow setting wal_sync_method to + * fsync_writethrough on Windows, even though it's really identical to fsync + * (both code paths wind up at _commit()). + */ +#define HAVE_FSYNC_WRITETHROUGH +#define FSYNC_WRITETHROUGH_IS_FSYNC + +#define USES_WINSOCK + +/* + * IPC defines + */ +#undef HAVE_UNION_SEMUN +#define HAVE_UNION_SEMUN 1 + +#define IPC_RMID 256 +#define IPC_CREAT 512 +#define IPC_EXCL 1024 +#define IPC_PRIVATE 234564 +#define IPC_NOWAIT 2048 +#define IPC_STAT 4096 + +#define EACCESS 2048 +#ifndef EIDRM +#define EIDRM 4096 +#endif + +#define SETALL 8192 +#define GETNCNT 16384 +#define GETVAL 65536 +#define SETVAL 131072 +#define GETPID 262144 + + +/* + * Signal stuff + * + * For WIN32, there is no wait() call so there are no wait() macros + * to interpret the return value of system(). Instead, system() + * return values < 0x100 are used for exit() termination, and higher + * values are used to indicate non-exit() termination, which is + * similar to a unix-style signal exit (think SIGSEGV == + * STATUS_ACCESS_VIOLATION). Return values are broken up into groups: + * + * https://docs.microsoft.com/en-us/windows-hardware/drivers/kernel/using-ntstatus-values + * + * NT_SUCCESS 0 - 0x3FFFFFFF + * NT_INFORMATION 0x40000000 - 0x7FFFFFFF + * NT_WARNING 0x80000000 - 0xBFFFFFFF + * NT_ERROR 0xC0000000 - 0xFFFFFFFF + * + * Effectively, we don't care on the severity of the return value from + * system(), we just need to know if it was because of exit() or generated + * by the system, and it seems values >= 0x100 are system-generated. + * See this URL for a list of WIN32 STATUS_* values: + * + * Wine (URL used in our error messages) - + * http://source.winehq.org/source/include/ntstatus.h + * Descriptions - + * https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/596a1078-e883-4972-9bbc-49e60bebca55 + * + * The comprehensive exception list is included in ntstatus.h from the + * Windows Driver Kit (WDK). A subset of the list is also included in + * winnt.h from the Windows SDK. Defining WIN32_NO_STATUS before including + * windows.h helps to avoid any conflicts. + * + * Some day we might want to print descriptions for the most common + * exceptions, rather than printing an include file name. We could use + * RtlNtStatusToDosError() and pass to FormatMessage(), which can print + * the text of error values, but MinGW does not support + * RtlNtStatusToDosError(). + */ +#define WIFEXITED(w) (((w) & 0XFFFFFF00) == 0) +#define WIFSIGNALED(w) (!WIFEXITED(w)) +#define WEXITSTATUS(w) (w) +#define WTERMSIG(w) (w) + +#define sigmask(sig) ( 1 << ((sig)-1) ) + +/* Signal function return values */ +#undef SIG_DFL +#undef SIG_ERR +#undef SIG_IGN +#define SIG_DFL ((pqsigfunc)0) +#define SIG_ERR ((pqsigfunc)-1) +#define SIG_IGN ((pqsigfunc)1) + +/* Some extra signals */ +#define SIGHUP 1 +#define SIGQUIT 3 +#define SIGTRAP 5 +#define SIGABRT 22 /* Set to match W32 value -- not UNIX value */ +#define SIGKILL 9 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGSTOP 17 +#define SIGTSTP 18 +#define SIGCONT 19 +#define SIGCHLD 20 +#define SIGWINCH 28 +#define SIGUSR1 30 +#define SIGUSR2 31 + +/* MinGW has gettimeofday(), but MSVC doesn't */ +#ifdef _MSC_VER +/* Last parameter not used */ +extern int gettimeofday(struct timeval *tp, void *tzp); +#endif + +/* for setitimer in backend/port/win32/timer.c */ +#define ITIMER_REAL 0 +struct itimerval +{ + struct timeval it_interval; + struct timeval it_value; +}; + +int setitimer(int which, const struct itimerval *value, struct itimerval *ovalue); + +/* Convenience wrapper for GetFileType() */ +extern DWORD pgwin32_get_file_type(HANDLE hFile); + +/* + * WIN32 does not provide 64-bit off_t, but does provide the functions operating + * with 64-bit offsets. Also, fseek() might not give an error for unseekable + * streams, so harden that function with our version. + */ +#define pgoff_t __int64 + +#ifdef _MSC_VER +extern int _pgfseeko64(FILE *stream, pgoff_t offset, int origin); +extern pgoff_t _pgftello64(FILE *stream); +#define fseeko(stream, offset, origin) _pgfseeko64(stream, offset, origin) +#define ftello(stream) _pgftello64(stream) +#else +#ifndef fseeko +#define fseeko(stream, offset, origin) fseeko64(stream, offset, origin) +#endif +#ifndef ftello +#define ftello(stream) ftello64(stream) +#endif +#endif + +/* + * Win32 also doesn't have symlinks, but we can emulate them with + * junction points on newer Win32 versions. + * + * Cygwin has its own symlinks which work on Win95/98/ME where + * junction points don't, so use those instead. We have no way of + * knowing what type of system Cygwin binaries will be run on. + * Note: Some CYGWIN includes might #define WIN32. + */ +extern int pgsymlink(const char *oldpath, const char *newpath); +extern int pgreadlink(const char *path, char *buf, size_t size); + +#define symlink(oldpath, newpath) pgsymlink(oldpath, newpath) +#define readlink(path, buf, size) pgreadlink(path, buf, size) + +/* + * Supplement to <sys/types.h>. + * + * Perl already has typedefs for uid_t and gid_t. + */ +#ifndef PLPERL_HAVE_UID_GID +typedef int uid_t; +typedef int gid_t; +#endif +typedef long key_t; + +#ifdef _MSC_VER +typedef int pid_t; +#endif + +/* + * Supplement to <sys/stat.h>. + * + * We must pull in sys/stat.h before this part, else our overrides lose. + * + * stat() is not guaranteed to set the st_size field on win32, so we + * redefine it to our own implementation. See src/port/win32stat.c. + * + * The struct stat is 32 bit in MSVC, so we redefine it as a copy of + * struct __stat64. This also fixes the struct size for MINGW builds. + */ +struct stat /* This should match struct __stat64 */ +{ + _dev_t st_dev; + _ino_t st_ino; + unsigned short st_mode; + short st_nlink; + short st_uid; + short st_gid; + _dev_t st_rdev; + __int64 st_size; + __time64_t st_atime; + __time64_t st_mtime; + __time64_t st_ctime; +}; + +extern int _pgfstat64(int fileno, struct stat *buf); +extern int _pgstat64(const char *name, struct stat *buf); +extern int _pglstat64(const char *name, struct stat *buf); + +#define fstat(fileno, sb) _pgfstat64(fileno, sb) +#define stat(path, sb) _pgstat64(path, sb) +#define lstat(path, sb) _pglstat64(path, sb) + +/* These macros are not provided by older MinGW, nor by MSVC */ +#ifndef S_IRUSR +#define S_IRUSR _S_IREAD +#endif +#ifndef S_IWUSR +#define S_IWUSR _S_IWRITE +#endif +#ifndef S_IXUSR +#define S_IXUSR _S_IEXEC +#endif +#ifndef S_IRWXU +#define S_IRWXU (S_IRUSR | S_IWUSR | S_IXUSR) +#endif +#ifndef S_IRGRP +#define S_IRGRP 0 +#endif +#ifndef S_IWGRP +#define S_IWGRP 0 +#endif +#ifndef S_IXGRP +#define S_IXGRP 0 +#endif +#ifndef S_IRWXG +#define S_IRWXG 0 +#endif +#ifndef S_IROTH +#define S_IROTH 0 +#endif +#ifndef S_IWOTH +#define S_IWOTH 0 +#endif +#ifndef S_IXOTH +#define S_IXOTH 0 +#endif +#ifndef S_IRWXO +#define S_IRWXO 0 +#endif +#ifndef S_ISDIR +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif +#ifndef S_ISREG +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif + +/* + * In order for lstat() to be able to report junction points as symlinks, we + * need to hijack a bit in st_mode, since neither MSVC nor MinGW provides + * S_ISLNK and there aren't any spare bits. We'll steal the one for character + * devices, because we don't otherwise make use of those. + */ +#ifdef S_ISLNK +#error "S_ISLNK is already defined" +#endif +#ifdef S_IFLNK +#error "S_IFLNK is already defined" +#endif +#define S_IFLNK S_IFCHR +#define S_ISLNK(m) (((m) & S_IFLNK) == S_IFLNK) + +/* + * Supplement to <fcntl.h>. + * This is the same value as _O_NOINHERIT in the MS header file. This is + * to ensure that we don't collide with a future definition. It means + * we cannot use _O_NOINHERIT ourselves. + */ +#define O_DSYNC 0x0080 + +/* + * Our open() replacement does not create inheritable handles, so it is safe to + * ignore O_CLOEXEC. (If we were using Windows' own open(), it might be + * necessary to convert this to _O_NOINHERIT.) + */ +#define O_CLOEXEC 0 + +/* + * Supplement to <errno.h>. + * + * We redefine network-related Berkeley error symbols as the corresponding WSA + * constants. This allows strerror.c to recognize them as being in the Winsock + * error code range and pass them off to win32_socket_strerror(), since + * Windows' version of plain strerror() won't cope. Note that this will break + * if these names are used for anything else besides Windows Sockets errors. + * See TranslateSocketError() when changing this list. + */ +#undef EAGAIN +#define EAGAIN WSAEWOULDBLOCK +#undef EINTR +#define EINTR WSAEINTR +#undef EMSGSIZE +#define EMSGSIZE WSAEMSGSIZE +#undef EAFNOSUPPORT +#define EAFNOSUPPORT WSAEAFNOSUPPORT +#undef EWOULDBLOCK +#define EWOULDBLOCK WSAEWOULDBLOCK +#undef ECONNABORTED +#define ECONNABORTED WSAECONNABORTED +#undef ECONNRESET +#define ECONNRESET WSAECONNRESET +#undef EINPROGRESS +#define EINPROGRESS WSAEINPROGRESS +#undef EISCONN +#define EISCONN WSAEISCONN +#undef ENOBUFS +#define ENOBUFS WSAENOBUFS +#undef EPROTONOSUPPORT +#define EPROTONOSUPPORT WSAEPROTONOSUPPORT +#undef ECONNREFUSED +#define ECONNREFUSED WSAECONNREFUSED +#undef ENOTSOCK +#define ENOTSOCK WSAENOTSOCK +#undef EOPNOTSUPP +#define EOPNOTSUPP WSAEOPNOTSUPP +#undef EADDRINUSE +#define EADDRINUSE WSAEADDRINUSE +#undef EADDRNOTAVAIL +#define EADDRNOTAVAIL WSAEADDRNOTAVAIL +#undef EHOSTDOWN +#define EHOSTDOWN WSAEHOSTDOWN +#undef EHOSTUNREACH +#define EHOSTUNREACH WSAEHOSTUNREACH +#undef ENETDOWN +#define ENETDOWN WSAENETDOWN +#undef ENETRESET +#define ENETRESET WSAENETRESET +#undef ENETUNREACH +#define ENETUNREACH WSAENETUNREACH +#undef ENOTCONN +#define ENOTCONN WSAENOTCONN +#undef ETIMEDOUT +#define ETIMEDOUT WSAETIMEDOUT + +/* + * Locale stuff. + * + * Extended locale functions with gratuitous underscore prefixes. + * (These APIs are nevertheless fully documented by Microsoft.) + */ +#define locale_t _locale_t +#define tolower_l _tolower_l +#define toupper_l _toupper_l +#define towlower_l _towlower_l +#define towupper_l _towupper_l +#define isdigit_l _isdigit_l +#define iswdigit_l _iswdigit_l +#define isalpha_l _isalpha_l +#define iswalpha_l _iswalpha_l +#define isalnum_l _isalnum_l +#define iswalnum_l _iswalnum_l +#define isupper_l _isupper_l +#define iswupper_l _iswupper_l +#define islower_l _islower_l +#define iswlower_l _iswlower_l +#define isgraph_l _isgraph_l +#define iswgraph_l _iswgraph_l +#define isprint_l _isprint_l +#define iswprint_l _iswprint_l +#define ispunct_l _ispunct_l +#define iswpunct_l _iswpunct_l +#define isspace_l _isspace_l +#define iswspace_l _iswspace_l +#define strcoll_l _strcoll_l +#define strxfrm_l _strxfrm_l +#define wcscoll_l _wcscoll_l +#define wcstombs_l _wcstombs_l +#define mbstowcs_l _mbstowcs_l + +/* + * Versions of libintl >= 0.18? try to replace setlocale() with a macro + * to their own versions. Remove the macro, if it exists, because it + * ends up calling the wrong version when the backend and libintl use + * different versions of msvcrt. + */ +#if defined(setlocale) +#undef setlocale +#endif + +/* + * Define our own wrapper macro around setlocale() to work around bugs in + * Windows' native setlocale() function. + */ +extern char *pgwin32_setlocale(int category, const char *locale); + +#define setlocale(a,b) pgwin32_setlocale(a,b) + + +/* In backend/port/win32/signal.c */ +extern PGDLLIMPORT volatile int pg_signal_queue; +extern PGDLLIMPORT int pg_signal_mask; +extern PGDLLIMPORT HANDLE pgwin32_signal_event; +extern PGDLLIMPORT HANDLE pgwin32_initial_signal_pipe; + +#define UNBLOCKED_SIGNAL_QUEUE() (pg_signal_queue & ~pg_signal_mask) +#define PG_SIGNAL_COUNT 32 + +extern void pgwin32_signal_initialize(void); +extern HANDLE pgwin32_create_signal_listener(pid_t pid); +extern void pgwin32_dispatch_queued_signals(void); +extern void pg_queue_signal(int signum); + +/* In src/port/kill.c */ +#define kill(pid,sig) pgkill(pid,sig) +extern int pgkill(int pid, int sig); + +/* In backend/port/win32/socket.c */ +#ifndef FRONTEND +#define socket(af, type, protocol) pgwin32_socket(af, type, protocol) +#define bind(s, addr, addrlen) pgwin32_bind(s, addr, addrlen) +#define listen(s, backlog) pgwin32_listen(s, backlog) +#define accept(s, addr, addrlen) pgwin32_accept(s, addr, addrlen) +#define connect(s, name, namelen) pgwin32_connect(s, name, namelen) +#define select(n, r, w, e, timeout) pgwin32_select(n, r, w, e, timeout) +#define recv(s, buf, len, flags) pgwin32_recv(s, buf, len, flags) +#define send(s, buf, len, flags) pgwin32_send(s, buf, len, flags) + +extern SOCKET pgwin32_socket(int af, int type, int protocol); +extern int pgwin32_bind(SOCKET s, struct sockaddr *addr, int addrlen); +extern int pgwin32_listen(SOCKET s, int backlog); +extern SOCKET pgwin32_accept(SOCKET s, struct sockaddr *addr, int *addrlen); +extern int pgwin32_connect(SOCKET s, const struct sockaddr *name, int namelen); +extern int pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval *timeout); +extern int pgwin32_recv(SOCKET s, char *buf, int len, int flags); +extern int pgwin32_send(SOCKET s, const void *buf, int len, int flags); +extern int pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout); + +extern PGDLLIMPORT int pgwin32_noblock; + +#endif /* FRONTEND */ + +/* in backend/port/win32_shmem.c */ +extern int pgwin32_ReserveSharedMemoryRegion(HANDLE); + +/* in backend/port/win32/crashdump.c */ +extern void pgwin32_install_crashdump_handler(void); + +/* in port/win32dlopen.c */ +extern void *dlopen(const char *file, int mode); +extern void *dlsym(void *handle, const char *symbol); +extern int dlclose(void *handle); +extern char *dlerror(void); + +#define RTLD_NOW 1 +#define RTLD_GLOBAL 0 + +/* in port/win32error.c */ +extern void _dosmaperr(unsigned long); + +/* in port/win32env.c */ +extern int pgwin32_putenv(const char *); +extern int pgwin32_setenv(const char *name, const char *value, int overwrite); +extern int pgwin32_unsetenv(const char *name); + +#define putenv(x) pgwin32_putenv(x) +#define setenv(x,y,z) pgwin32_setenv(x,y,z) +#define unsetenv(x) pgwin32_unsetenv(x) + +/* in port/win32security.c */ +extern int pgwin32_is_service(void); +extern int pgwin32_is_admin(void); + +/* Windows security token manipulation (in src/common/exec.c) */ +extern BOOL AddUserToTokenDacl(HANDLE hToken); + +/* Things that exist in MinGW headers, but need to be added to MSVC */ +#ifdef _MSC_VER + +#ifndef _WIN64 +typedef long ssize_t; +#else +typedef __int64 ssize_t; +#endif + +typedef unsigned short mode_t; + +#define F_OK 0 +#define W_OK 2 +#define R_OK 4 + +#endif /* _MSC_VER */ + +#if defined(__MINGW32__) || defined(__MINGW64__) +/* + * Mingw claims to have a strtof, and my reading of its source code suggests + * that it ought to work (and not need this hack), but the regression test + * results disagree with me; whether this is a version issue or not is not + * clear. However, using our wrapper (and the misrounded-input variant file, + * already required for supporting ancient systems) can't make things any + * worse, except for a tiny performance loss when reading zeros. + * + * See also cygwin.h for another instance of this. + */ +#define HAVE_BUGGY_STRTOF 1 +#endif + +/* in port/win32pread.c */ +extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset); + +/* in port/win32pwrite.c */ +extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset); + +#endif /* PG_WIN32_PORT_H */ diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h new file mode 100644 index 0000000..1ce9360 --- /dev/null +++ b/src/include/port/win32ntdll.h @@ -0,0 +1,34 @@ +/*------------------------------------------------------------------------- + * + * win32ntdll.h + * Dynamically loaded Windows NT functions. + * + * Portions Copyright (c) 2021-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/win32ntdll.h + * + *------------------------------------------------------------------------- + */ + +#ifndef WIN32NTDLL_H +#define WIN32NTDLL_H + +#include <ntstatus.h> +#include <winternl.h> + +#ifndef FLUSH_FLAGS_FILE_DATA_SYNC_ONLY +#define FLUSH_FLAGS_FILE_DATA_SYNC_ONLY 0x4 +#endif + +typedef NTSTATUS (__stdcall * RtlGetLastNtStatus_t) (void); +typedef ULONG (__stdcall * RtlNtStatusToDosError_t) (NTSTATUS); +typedef NTSTATUS (__stdcall * NtFlushBuffersFileEx_t) (HANDLE, ULONG, PVOID, ULONG, PIO_STATUS_BLOCK); + +extern PGDLLIMPORT RtlGetLastNtStatus_t pg_RtlGetLastNtStatus; +extern PGDLLIMPORT RtlNtStatusToDosError_t pg_RtlNtStatusToDosError; +extern PGDLLIMPORT NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx; + +extern int initialize_ntdll(void); + +#endif /* WIN32NTDLL_H */ |