diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
commit | 311bcfc6b3acdd6fd152798c7f287ddf74fa2a98 (patch) | |
tree | 0ec307299b1dada3701e42f4ca6eda57d708261e /src/include/port/atomics | |
parent | Initial commit. (diff) | |
download | postgresql-15-upstream.tar.xz postgresql-15-upstream.zip |
Adding upstream version 15.4.upstream/15.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/port/atomics')
-rw-r--r-- | src/include/port/atomics/arch-arm.h | 32 | ||||
-rw-r--r-- | src/include/port/atomics/arch-hppa.h | 17 | ||||
-rw-r--r-- | src/include/port/atomics/arch-ia64.h | 29 | ||||
-rw-r--r-- | src/include/port/atomics/arch-ppc.h | 254 | ||||
-rw-r--r-- | src/include/port/atomics/arch-x86.h | 252 | ||||
-rw-r--r-- | src/include/port/atomics/fallback.h | 170 | ||||
-rw-r--r-- | src/include/port/atomics/generic-acc.h | 106 | ||||
-rw-r--r-- | src/include/port/atomics/generic-gcc.h | 286 | ||||
-rw-r--r-- | src/include/port/atomics/generic-msvc.h | 101 | ||||
-rw-r--r-- | src/include/port/atomics/generic-sunpro.h | 106 | ||||
-rw-r--r-- | src/include/port/atomics/generic.h | 401 |
11 files changed, 1754 insertions, 0 deletions
diff --git a/src/include/port/atomics/arch-arm.h b/src/include/port/atomics/arch-arm.h new file mode 100644 index 0000000..9fe8f1b --- /dev/null +++ b/src/include/port/atomics/arch-arm.h @@ -0,0 +1,32 @@ +/*------------------------------------------------------------------------- + * + * arch-arm.h + * Atomic operations considerations specific to ARM + * + * Portions Copyright (c) 2013-2022, PostgreSQL Global Development Group + * + * NOTES: + * + * src/include/port/atomics/arch-arm.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +/* + * 64 bit atomics on ARM32 are implemented using kernel fallbacks and thus + * might be slow, so disable entirely. On ARM64 that problem doesn't exist. + */ +#if !defined(__aarch64__) && !defined(__aarch64) +#define PG_DISABLE_64_BIT_ATOMICS +#else +/* + * Architecture Reference Manual for ARMv8 states aligned read/write to/from + * general purpose register is atomic. + */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY +#endif /* __aarch64__ || __aarch64 */ diff --git a/src/include/port/atomics/arch-hppa.h b/src/include/port/atomics/arch-hppa.h new file mode 100644 index 0000000..7a7bb6e --- /dev/null +++ b/src/include/port/atomics/arch-hppa.h @@ -0,0 +1,17 @@ +/*------------------------------------------------------------------------- + * + * arch-hppa.h + * Atomic operations considerations specific to HPPA + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-hppa.h + * + *------------------------------------------------------------------------- + */ + +/* HPPA doesn't do either read or write reordering */ +#define pg_memory_barrier_impl() pg_compiler_barrier_impl() diff --git a/src/include/port/atomics/arch-ia64.h b/src/include/port/atomics/arch-ia64.h new file mode 100644 index 0000000..771bac1 --- /dev/null +++ b/src/include/port/atomics/arch-ia64.h @@ -0,0 +1,29 @@ +/*------------------------------------------------------------------------- + * + * arch-ia64.h + * Atomic operations considerations specific to intel itanium + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-ia64.h + * + *------------------------------------------------------------------------- + */ + +/* + * Itanium is weakly ordered, so read and write barriers require a full + * fence. + */ +#if defined(__INTEL_COMPILER) +# define pg_memory_barrier_impl() __mf() +#elif defined(__GNUC__) +# define pg_memory_barrier_impl() __asm__ __volatile__ ("mf" : : : "memory") +#elif defined(__hpux) +# define pg_memory_barrier_impl() _Asm_mf() +#endif + +/* per architecture manual doubleword accesses have single copy atomicity */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h new file mode 100644 index 0000000..eb64513 --- /dev/null +++ b/src/include/port/atomics/arch-ppc.h @@ -0,0 +1,254 @@ +/*------------------------------------------------------------------------- + * + * arch-ppc.h + * Atomic operations considerations specific to PowerPC + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-ppc.h + * + *------------------------------------------------------------------------- + */ + +#if defined(__GNUC__) + +/* + * lwsync orders loads with respect to each other, and similarly with stores. + * But a load can be performed before a subsequent store, so sync must be used + * for a full memory barrier. + */ +#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory") +#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#endif + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* 64bit atomics are only supported in 64bit mode */ +#if SIZEOF_VOID_P >= 8 +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif + +/* + * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but + * code generation differs at the end. __atomic_compare_exchange_n(): + * 100: isync + * 104: mfcr r3 + * 108: rlwinm r3,r3,3,31,31 + * 10c: bne 120 <.eb+0x10> + * 110: clrldi r3,r3,63 + * 114: addi r1,r1,112 + * 118: blr + * 11c: nop + * 120: clrldi r3,r3,63 + * 124: stw r9,0(r4) + * 128: addi r1,r1,112 + * 12c: blr + * + * This: + * f0: isync + * f4: mfcr r9 + * f8: rldicl. r3,r9,35,63 + * fc: bne 104 <.eb> + * 100: stw r10,0(r4) + * 104: addi r1,r1,112 + * 108: blr + * + * This implementation may or may not have materially different performance. + * It's not exploiting the fact that cr0 still holds the relevant comparison + * bits, set during the __asm__. One could fix that by moving more code into + * the __asm__. (That would remove the freedom to eliminate dead stores when + * the caller ignores "expected", but few callers do.) + * + * Recognizing constant "newval" would be superfluous, because there's no + * immediate-operand version of stwcx. + */ +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + uint32 found; + uint32 condition_register; + bool ret; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + (int32) *expected <= PG_INT16_MAX && + (int32) *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5 \n" + " cmpwi %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5 \n" + " cmpw %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +/* + * This mirrors gcc __sync_fetch_and_add(). + * + * Like tas(), use constraint "=&b" to avoid allocating r0. + */ +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + uint64 found; + uint32 condition_register; + bool ret; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + (int64) *expected <= PG_INT16_MAX && + (int64) *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5 \n" + " cmpdi %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5 \n" + " cmpd %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +/* per architecture manual doubleword accesses have single copy atomicity */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h new file mode 100644 index 0000000..cef1ba7 --- /dev/null +++ b/src/include/port/atomics/arch-x86.h @@ -0,0 +1,252 @@ +/*------------------------------------------------------------------------- + * + * arch-x86.h + * Atomic operations considerations specific to intel x86 + * + * Note that we actually require a 486 upwards because the 386 doesn't have + * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere + * anymore that's not much of a restriction luckily. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-x86.h + * + *------------------------------------------------------------------------- + */ + +/* + * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads, + * or stores to be reordered with other stores, but a load can be performed + * before a subsequent store. + * + * Technically, some x86-ish chips support uncached memory access and/or + * special instructions that are weakly ordered. In those cases we'd need + * the read and write barriers to be lfence and sfence. But since we don't + * do those things, a compiler barrier should be enough. + * + * "lock; addl" has worked for longer than "mfence". It's also rumored to be + * faster in many scenarios. + */ + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#if defined(__i386__) || defined(__i386) +#define pg_memory_barrier_impl() \ + __asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc") +#elif defined(__x86_64__) +#define pg_memory_barrier_impl() \ + __asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc") +#endif +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +#define pg_read_barrier_impl() pg_compiler_barrier_impl() +#define pg_write_barrier_impl() pg_compiler_barrier_impl() + +/* + * Provide implementation for atomics using inline assembly on x86 gcc. It's + * nice to support older gcc's and the compare/exchange implementation here is + * actually more efficient than the * __sync variant. + */ +#if defined(HAVE_ATOMICS) + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef struct pg_atomic_flag +{ + volatile char value; +} pg_atomic_flag; + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* + * It's too complicated to write inline asm for 64bit types on 32bit and the + * 486 can't do it anyway. + */ +#ifdef __x86_64__ +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* alignment guaranteed due to being on a 64bit platform */ + volatile uint64 value; +} pg_atomic_uint64; +#endif /* __x86_64__ */ + +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +#endif /* defined(HAVE_ATOMICS) */ + +#if !defined(PG_HAVE_SPIN_DELAY) +/* + * This sequence is equivalent to the PAUSE instruction ("rep" is + * ignored by old IA32 processors if the following instruction is + * not a string operation); the IA-32 Architecture Software + * Developer's Manual, Vol. 3, Section 7.7.2 describes why using + * PAUSE in the inner loop of a spin lock is necessary for good + * performance: + * + * The PAUSE instruction improves the performance of IA-32 + * processors supporting Hyper-Threading Technology when + * executing spin-wait loops and other routines where one + * thread is accessing a shared lock or semaphore in a tight + * polling loop. When executing a spin-wait loop, the + * processor can suffer a severe performance penalty when + * exiting the loop because it detects a possible memory order + * violation and flushes the core processor's pipeline. The + * PAUSE instruction provides a hint to the processor that the + * code sequence is a spin-wait loop. The processor uses this + * hint to avoid the memory order violation and prevent the + * pipeline flush. In addition, the PAUSE instruction + * de-pipelines the spin-wait loop to prevent it from + * consuming execution resources excessively. + */ +#if defined(__GNUC__) || defined(__INTEL_COMPILER) +#define PG_HAVE_SPIN_DELAY +static __inline__ void +pg_spin_delay_impl(void) +{ + __asm__ __volatile__(" rep; nop \n"); +} +#elif defined(_MSC_VER) && defined(__x86_64__) +#define PG_HAVE_SPIN_DELAY +static __forceinline void +pg_spin_delay_impl(void) +{ + _mm_pause(); +} +#elif defined(_MSC_VER) +#define PG_HAVE_SPIN_DELAY +static __forceinline void +pg_spin_delay_impl(void) +{ + /* See comment for gcc code. Same code, MASM syntax */ + __asm rep nop; +} +#endif +#endif /* !defined(PG_HAVE_SPIN_DELAY) */ + + +#if defined(HAVE_ATOMICS) + +#if defined(__GNUC__) || defined(__INTEL_COMPILER) + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + register char _res = 1; + + __asm__ __volatile__( + " lock \n" + " xchgb %0,%1 \n" +: "+q"(_res), "+m"(ptr->value) +: +: "memory"); + return _res == 0; +} + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* + * On a TSO architecture like x86 it's sufficient to use a compiler + * barrier to achieve release semantics. + */ + __asm__ __volatile__("" ::: "memory"); + ptr->value = 0; +} + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + char ret; + + /* + * Perform cmpxchg and use the zero flag which it implicitly sets when + * equal to measure the success. + */ + __asm__ __volatile__( + " lock \n" + " cmpxchgl %4,%5 \n" + " setz %2 \n" +: "=a" (*expected), "=m"(ptr->value), "=q" (ret) +: "a" (*expected), "r" (newval), "m"(ptr->value) +: "memory", "cc"); + return (bool) ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 res; + __asm__ __volatile__( + " lock \n" + " xaddl %0,%1 \n" +: "=q"(res), "=m"(ptr->value) +: "0" (add_), "m"(ptr->value) +: "memory", "cc"); + return res; +} + +#ifdef __x86_64__ + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + char ret; + + /* + * Perform cmpxchg and use the zero flag which it implicitly sets when + * equal to measure the success. + */ + __asm__ __volatile__( + " lock \n" + " cmpxchgq %4,%5 \n" + " setz %2 \n" +: "=a" (*expected), "=m"(ptr->value), "=q" (ret) +: "a" (*expected), "r" (newval), "m"(ptr->value) +: "memory", "cc"); + return (bool) ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 res; + __asm__ __volatile__( + " lock \n" + " xaddq %0,%1 \n" +: "=q"(res), "=m"(ptr->value) +: "0" (add_), "m"(ptr->value) +: "memory", "cc"); + return res; +} + +#endif /* __x86_64__ */ + +#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */ + +/* + * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms + * since at least the 586. As well as on all x86-64 cpus. + */ +#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */ \ + (defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \ + defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY +#endif /* 8 byte single-copy atomicity */ + +#endif /* HAVE_ATOMICS */ diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h new file mode 100644 index 0000000..1cdef5f --- /dev/null +++ b/src/include/port/atomics/fallback.h @@ -0,0 +1,170 @@ +/*------------------------------------------------------------------------- + * + * fallback.h + * Fallback for platforms without spinlock and/or atomics support. Slower + * than native atomics support, but not unusably slow. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics/fallback.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +# error "should be included via atomics.h" +#endif + +#ifndef pg_memory_barrier_impl +/* + * If we have no memory barrier implementation for this architecture, we + * fall back to acquiring and releasing a spinlock. This might, in turn, + * fall back to the semaphore-based spinlock implementation, which will be + * amazingly slow. + * + * It's not self-evident that every possible legal implementation of a + * spinlock acquire-and-release would be equivalent to a full memory barrier. + * For example, I'm not sure that Itanium's acq and rel add up to a full + * fence. But all of our actual implementations seem OK in this regard. + */ +#define PG_HAVE_MEMORY_BARRIER_EMULATION + +extern void pg_spinlock_barrier(void); +#define pg_memory_barrier_impl pg_spinlock_barrier +#endif + +#ifndef pg_compiler_barrier_impl +/* + * If the compiler/arch combination does not provide compiler barriers, + * provide a fallback. The fallback simply consists of a function call into + * an externally defined function. That should guarantee compiler barrier + * semantics except for compilers that do inter translation unit/global + * optimization - those better provide an actual compiler barrier. + * + * A native compiler barrier for sure is a lot faster than this... + */ +#define PG_HAVE_COMPILER_BARRIER_EMULATION +extern void pg_extern_compiler_barrier(void); +#define pg_compiler_barrier_impl pg_extern_compiler_barrier +#endif + + +/* + * If we have atomics implementation for this platform, fall back to providing + * the atomics API using a spinlock to protect the internal state. Possibly + * the spinlock implementation uses semaphores internally... + * + * We have to be a bit careful here, as it's not guaranteed that atomic + * variables are mapped to the same address in every process (e.g. dynamic + * shared memory segments). We can't just hash the address and use that to map + * to a spinlock. Instead assign a spinlock on initialization of the atomic + * variable. + */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT) + +#define PG_HAVE_ATOMIC_FLAG_SIMULATION +#define PG_HAVE_ATOMIC_FLAG_SUPPORT + +typedef struct pg_atomic_flag +{ + /* + * To avoid circular includes we can't use s_lock as a type here. Instead + * just reserve enough space for all spinlock types. Some platforms would + * be content with just one byte instead of 4, but that's not too much + * waste. + */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */ + int sema[4]; +#else + int sema; +#endif + volatile bool value; +} pg_atomic_flag; + +#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */ + +#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) + +#define PG_HAVE_ATOMIC_U32_SIMULATION + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + /* Check pg_atomic_flag's definition above for an explanation */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */ + int sema[4]; +#else + int sema; +#endif + volatile uint32 value; +} pg_atomic_uint32; + +#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */ + +#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) + +#define PG_HAVE_ATOMIC_U64_SIMULATION + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* Check pg_atomic_flag's definition above for an explanation */ +#if defined(__hppa) || defined(__hppa__) /* HP PA-RISC, GCC and HP compilers */ + int sema[4]; +#else + int sema; +#endif + volatile uint64 value; +} pg_atomic_uint64; + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_FLAG +extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr); + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr); + +#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */ + +#ifdef PG_HAVE_ATOMIC_U32_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_U32 +extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_); + +#define PG_HAVE_ATOMIC_WRITE_U32 +extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val); + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval); + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_); + +#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */ + + +#ifdef PG_HAVE_ATOMIC_U64_SIMULATION + +#define PG_HAVE_ATOMIC_INIT_U64 +extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_); + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval); + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_); + +#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */ diff --git a/src/include/port/atomics/generic-acc.h b/src/include/port/atomics/generic-acc.h new file mode 100644 index 0000000..842b2de --- /dev/null +++ b/src/include/port/atomics/generic-acc.h @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------- + * + * generic-acc.h + * Atomic operations support when using HPs acc on HPUX + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * inline assembly for Itanium-based HP-UX: + * http://h21007.www2.hp.com/portal/download/files/unprot/Itanium/inline_assem_ERS.pdf + * * Implementing Spinlocks on the Intel (R) Itanium (R) Architecture and PA-RISC + * http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf + * + * Itanium only supports a small set of numbers (6, -8, -4, -1, 1, 4, 8, 16) + * for atomic add/sub, so we just implement everything but compare_exchange + * via the compare_exchange fallbacks in atomics/generic.h. + * + * src/include/port/atomics/generic-acc.h + * + * ------------------------------------------------------------------------- + */ + +#include <machine/sys/inline.h> + +#define pg_compiler_barrier_impl() _Asm_sched_fence() + +#if defined(HAVE_ATOMICS) + +/* IA64 always has 32/64 bit atomics */ + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* + * Alignment is guaranteed to be 64bit. Search for "Well-behaved + * application restrictions" => "Data alignment and data sharing" on HP's + * website. Unfortunately the URL doesn't seem to stable enough to + * include. + */ + volatile uint64 value; +} pg_atomic_uint64; + + +#define MINOR_FENCE (_Asm_fence) (_UP_CALL_FENCE | _UP_SYS_FENCE | \ + _DOWN_CALL_FENCE | _DOWN_SYS_FENCE ) + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + + _Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE); + /* + * We want a barrier, not just release/acquire semantics. + */ + _Asm_mf(); + /* + * Notes: + * _DOWN_MEM_FENCE | _UP_MEM_FENCE prevents reordering by the compiler + */ + current = _Asm_cmpxchg(_SZ_W, /* word */ + _SEM_REL, + &ptr->value, + newval, _LDHINT_NONE, + _DOWN_MEM_FENCE | _UP_MEM_FENCE); + ret = current == *expected; + *expected = current; + return ret; +} + + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + + _Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE); + _Asm_mf(); + current = _Asm_cmpxchg(_SZ_D, /* doubleword */ + _SEM_REL, + &ptr->value, + newval, _LDHINT_NONE, + _DOWN_MEM_FENCE | _UP_MEM_FENCE); + ret = current == *expected; + *expected = current; + return ret; +} + +#undef MINOR_FENCE + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h new file mode 100644 index 0000000..6c566af --- /dev/null +++ b/src/include/port/atomics/generic-gcc.h @@ -0,0 +1,286 @@ +/*------------------------------------------------------------------------- + * + * generic-gcc.h + * Atomic operations, implemented using gcc (or compatible) intrinsics. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * Legacy __sync Built-in Functions for Atomic Memory Access + * https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html + * * Built-in functions for memory model aware atomic operations + * https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html + * + * src/include/port/atomics/generic-gcc.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +/* + * An empty asm block should be a sufficient compiler barrier. + */ +#define pg_compiler_barrier_impl() __asm__ __volatile__("" ::: "memory") + +/* + * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier + * out of this compiler built-in. But we prefer to rely on platform specific + * definitions where possible, and use this only as a fallback. + */ +#if !defined(pg_memory_barrier_impl) +# if defined(HAVE_GCC__ATOMIC_INT32_CAS) +# define pg_memory_barrier_impl() __atomic_thread_fence(__ATOMIC_SEQ_CST) +# elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) +# define pg_memory_barrier_impl() __sync_synchronize() +# endif +#endif /* !defined(pg_memory_barrier_impl) */ + +#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +/* acquire semantics include read barrier semantics */ +# define pg_read_barrier_impl() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#endif + +#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +/* release semantics include write barrier semantics */ +# define pg_write_barrier_impl() __atomic_thread_fence(__ATOMIC_RELEASE) +#endif + + +#ifdef HAVE_ATOMICS + +/* generic gcc based atomic flag implementation */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \ + && (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS)) + +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef struct pg_atomic_flag +{ + /* + * If we have a choice, use int-width TAS, because that is more efficient + * and/or more reliably implemented on most non-Intel platforms. (Note + * that this code isn't used on x86[_64]; see arch-x86.h for that.) + */ +#ifdef HAVE_GCC__SYNC_INT32_TAS + volatile int value; +#else + volatile char value; +#endif +} pg_atomic_flag; + +#endif /* !ATOMIC_FLAG_SUPPORT && SYNC_INT32_TAS */ + +/* generic gcc based atomic uint32 implementation */ +#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) \ + && (defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS)) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#endif /* defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS) */ + +/* generic gcc based atomic uint64 implementation */ +#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) \ + && !defined(PG_DISABLE_64_BIT_ATOMICS) \ + && (defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS)) + +#define PG_HAVE_ATOMIC_U64_SUPPORT + +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS) */ + +#ifdef PG_HAVE_ATOMIC_FLAG_SUPPORT + +#if defined(HAVE_GCC__SYNC_CHAR_TAS) || defined(HAVE_GCC__SYNC_INT32_TAS) + +#ifndef PG_HAVE_ATOMIC_TEST_SET_FLAG +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* NB: only an acquire barrier, not a full one */ + /* some platform only support a 1 here */ + return __sync_lock_test_and_set(&ptr->value, 1) == 0; +} +#endif + +#endif /* defined(HAVE_GCC__SYNC_*_TAS) */ + +#ifndef PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return ptr->value == 0; +} +#endif + +#ifndef PG_HAVE_ATOMIC_CLEAR_FLAG +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + __sync_lock_release(&ptr->value); +} +#endif + +#ifndef PG_HAVE_ATOMIC_INIT_FLAG +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_clear_flag_impl(ptr); +} +#endif + +#endif /* defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) */ + +/* prefer __atomic, it has a better API */ +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + /* FIXME: we can probably use a lower consistency model */ + return __atomic_compare_exchange_n(&ptr->value, expected, newval, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + current = __sync_val_compare_and_swap(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} +#endif + +/* if we have 32-bit __sync_val_compare_and_swap, assume we have these too: */ + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return __sync_fetch_and_add(&ptr->value, add_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_SUB_U32 +static inline uint32 +pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return __sync_fetch_and_sub(&ptr->value, sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_AND_U32 +static inline uint32 +pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_) +{ + return __sync_fetch_and_and(&ptr->value, and_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS) +#define PG_HAVE_ATOMIC_FETCH_OR_U32 +static inline uint32 +pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_) +{ + return __sync_fetch_and_or(&ptr->value, or_); +} +#endif + + +#if !defined(PG_DISABLE_64_BIT_ATOMICS) + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + return __atomic_compare_exchange_n(&ptr->value, expected, newval, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + current = __sync_val_compare_and_swap(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} +#endif + +/* if we have 64-bit __sync_val_compare_and_swap, assume we have these too: */ + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return __sync_fetch_and_add(&ptr->value, add_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_SUB_U64 +static inline uint64 +pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return __sync_fetch_and_sub(&ptr->value, sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_AND_U64 +static inline uint64 +pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_) +{ + return __sync_fetch_and_and(&ptr->value, and_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS) +#define PG_HAVE_ATOMIC_FETCH_OR_U64 +static inline uint64 +pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_) +{ + return __sync_fetch_and_or(&ptr->value, or_); +} +#endif + +#endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h new file mode 100644 index 0000000..6294162 --- /dev/null +++ b/src/include/port/atomics/generic-msvc.h @@ -0,0 +1,101 @@ +/*------------------------------------------------------------------------- + * + * generic-msvc.h + * Atomic operations support when using MSVC + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * Documentation: + * * Interlocked Variable Access + * http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx + * + * src/include/port/atomics/generic-msvc.h + * + *------------------------------------------------------------------------- + */ +#include <intrin.h> + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +#error "should be included via atomics.h" +#endif + +#pragma intrinsic(_ReadWriteBarrier) +#define pg_compiler_barrier_impl() _ReadWriteBarrier() + +#ifndef pg_memory_barrier_impl +#define pg_memory_barrier_impl() MemoryBarrier() +#endif + +#if defined(HAVE_ATOMICS) + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct __declspec(align(8)) pg_atomic_uint64 +{ + volatile uint64 value; +} pg_atomic_uint64; + + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + current = InterlockedCompareExchange(&ptr->value, newval, *expected); + ret = current == *expected; + *expected = current; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return InterlockedExchangeAdd(&ptr->value, add_); +} + +/* + * The non-intrinsics versions are only available in vista upwards, so use the + * intrinsic version. Only supported on >486, but we require XP as a minimum + * baseline, which doesn't support the 486, so we don't need to add checks for + * that case. + */ +#pragma intrinsic(_InterlockedCompareExchange64) + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + current = _InterlockedCompareExchange64(&ptr->value, newval, *expected); + ret = current == *expected; + *expected = current; + return ret; +} + +/* Only implemented on itanium and 64bit builds */ +#ifdef _WIN64 +#pragma intrinsic(_InterlockedExchangeAdd64) + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return _InterlockedExchangeAdd64(&ptr->value, add_); +} +#endif /* _WIN64 */ + +#endif /* HAVE_ATOMICS */ diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h new file mode 100644 index 0000000..da1f8f1 --- /dev/null +++ b/src/include/port/atomics/generic-sunpro.h @@ -0,0 +1,106 @@ +/*------------------------------------------------------------------------- + * + * generic-sunpro.h + * Atomic operations for solaris' CC + * + * Portions Copyright (c) 2013-2022, PostgreSQL Global Development Group + * + * NOTES: + * + * Documentation: + * * manpage for atomic_cas(3C) + * http://www.unix.com/man-page/opensolaris/3c/atomic_cas/ + * http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html + * + * src/include/port/atomics/generic-sunpro.h + * + * ------------------------------------------------------------------------- + */ + +#if defined(HAVE_ATOMICS) + +#ifdef HAVE_MBARRIER_H +#include <mbarrier.h> + +#define pg_compiler_barrier_impl() __compiler_barrier() + +#ifndef pg_memory_barrier_impl +/* + * Despite the name this is actually a full barrier. Expanding to mfence/ + * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc + * respectively. + */ +# define pg_memory_barrier_impl() __machine_rw_barrier() +#endif +#ifndef pg_read_barrier_impl +# define pg_read_barrier_impl() __machine_r_barrier() +#endif +#ifndef pg_write_barrier_impl +# define pg_write_barrier_impl() __machine_w_barrier() +#endif + +#endif /* HAVE_MBARRIER_H */ + +/* Older versions of the compiler don't have atomic.h... */ +#ifdef HAVE_ATOMIC_H + +#include <atomic.h> + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + /* + * Syntax to enforce variable alignment should be supported by versions + * supporting atomic.h, but it's hard to find accurate documentation. If + * it proves to be a problem, we'll have to add more version checks for 64 + * bit support. + */ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif /* HAVE_ATOMIC_H */ + +#endif /* defined(HAVE_ATOMICS) */ + + +#if defined(HAVE_ATOMICS) + +#ifdef HAVE_ATOMIC_H + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + bool ret; + uint32 current; + + current = atomic_cas_32(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + bool ret; + uint64 current; + + current = atomic_cas_64(&ptr->value, *expected, newval); + ret = current == *expected; + *expected = current; + return ret; +} + +#endif /* HAVE_ATOMIC_H */ + +#endif /* defined(HAVE_ATOMICS) */ diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h new file mode 100644 index 0000000..a1f2456 --- /dev/null +++ b/src/include/port/atomics/generic.h @@ -0,0 +1,401 @@ +/*------------------------------------------------------------------------- + * + * generic.h + * Implement higher level operations based on some lower level atomic + * operations. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/atomics/generic.h + * + *------------------------------------------------------------------------- + */ + +/* intentionally no include guards, should only be included by atomics.h */ +#ifndef INSIDE_ATOMICS_H +# error "should be included via atomics.h" +#endif + +/* + * If read or write barriers are undefined, we upgrade them to full memory + * barriers. + */ +#if !defined(pg_read_barrier_impl) +# define pg_read_barrier_impl pg_memory_barrier_impl +#endif +#if !defined(pg_write_barrier_impl) +# define pg_write_barrier_impl pg_memory_barrier_impl +#endif + +#ifndef PG_HAVE_SPIN_DELAY +#define PG_HAVE_SPIN_DELAY +#define pg_spin_delay_impl() ((void)0) +#endif + + +/* provide fallback */ +#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && defined(PG_HAVE_ATOMIC_U32_SUPPORT) +#define PG_HAVE_ATOMIC_FLAG_SUPPORT +typedef pg_atomic_uint32 pg_atomic_flag; +#endif + +#ifndef PG_HAVE_ATOMIC_READ_U32 +#define PG_HAVE_ATOMIC_READ_U32 +static inline uint32 +pg_atomic_read_u32_impl(volatile pg_atomic_uint32 *ptr) +{ + return ptr->value; +} +#endif + +#ifndef PG_HAVE_ATOMIC_WRITE_U32 +#define PG_HAVE_ATOMIC_WRITE_U32 +static inline void +pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + ptr->value = val; +} +#endif + +#ifndef PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32 +#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32 +static inline void +pg_atomic_unlocked_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val) +{ + ptr->value = val; +} +#endif + +/* + * provide fallback for test_and_set using atomic_exchange if available + */ +#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32) + +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_write_u32_impl(ptr, 0); +} + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_exchange_u32_impl(ptr, &value, 1) == 0; +} + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_read_u32_impl(ptr) == 0; +} + + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* XXX: release semantics suffice? */ + pg_memory_barrier_impl(); + pg_atomic_write_u32_impl(ptr, 0); +} + +/* + * provide fallback for test_and_set using atomic_compare_exchange if + * available. + */ +#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) + +#define PG_HAVE_ATOMIC_INIT_FLAG +static inline void +pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr) +{ + pg_atomic_write_u32_impl(ptr, 0); +} + +#define PG_HAVE_ATOMIC_TEST_SET_FLAG +static inline bool +pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr) +{ + uint32 value = 0; + return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1); +} + +#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG +static inline bool +pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr) +{ + return pg_atomic_read_u32_impl(ptr) == 0; +} + +#define PG_HAVE_ATOMIC_CLEAR_FLAG +static inline void +pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr) +{ + /* + * Use a memory barrier + plain write if we have a native memory + * barrier. But don't do so if memory barriers use spinlocks - that'd lead + * to circularity if flags are used to implement spinlocks. + */ +#ifndef PG_HAVE_MEMORY_BARRIER_EMULATION + /* XXX: release semantics suffice? */ + pg_memory_barrier_impl(); + pg_atomic_write_u32_impl(ptr, 0); +#else + uint32 value = 1; + pg_atomic_compare_exchange_u32_impl(ptr, &value, 0); +#endif +} + +#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) +# error "No pg_atomic_test_and_set provided" +#endif /* !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) */ + + +#ifndef PG_HAVE_ATOMIC_INIT_U32 +#define PG_HAVE_ATOMIC_INIT_U32 +static inline void +pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_) +{ + ptr->value = val_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_EXCHANGE_U32 +static inline uint32 +pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_SUB_U32 +static inline uint32 +pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return pg_atomic_fetch_add_u32_impl(ptr, -sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_AND_U32 +static inline uint32 +pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) +#define PG_HAVE_ATOMIC_FETCH_OR_U32 +static inline uint32 +pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_) +{ + uint32 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) +#define PG_HAVE_ATOMIC_ADD_FETCH_U32 +static inline uint32 +pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) +#define PG_HAVE_ATOMIC_SUB_FETCH_U32 +static inline uint32 +pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_) +{ + return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_EXCHANGE_U64 +static inline uint64 +pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_)) + /* skip */; + return old; +} +#endif + +#ifndef PG_HAVE_ATOMIC_WRITE_U64 +#define PG_HAVE_ATOMIC_WRITE_U64 + +#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \ + !defined(PG_HAVE_ATOMIC_U64_SIMULATION) + +static inline void +pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val) +{ + /* + * On this platform aligned 64bit writes are guaranteed to be atomic, + * except if using the fallback implementation, where can't guarantee the + * required alignment. + */ + AssertPointerAlignment(ptr, 8); + ptr->value = val; +} + +#else + +static inline void +pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val) +{ + /* + * 64 bit writes aren't safe on all platforms. In the generic + * implementation implement them as an atomic exchange. + */ + pg_atomic_exchange_u64_impl(ptr, val); +} + +#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */ +#endif /* PG_HAVE_ATOMIC_WRITE_U64 */ + +#ifndef PG_HAVE_ATOMIC_READ_U64 +#define PG_HAVE_ATOMIC_READ_U64 + +#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \ + !defined(PG_HAVE_ATOMIC_U64_SIMULATION) + +static inline uint64 +pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr) +{ + /* + * On this platform aligned 64-bit reads are guaranteed to be atomic. + */ + AssertPointerAlignment(ptr, 8); + return ptr->value; +} + +#else + +static inline uint64 +pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr) +{ + uint64 old = 0; + + /* + * 64-bit reads aren't atomic on all platforms. In the generic + * implementation implement them as a compare/exchange with 0. That'll + * fail or succeed, but always return the old value. Possibly might store + * a 0, but only if the previous value also was a 0 - i.e. harmless. + */ + pg_atomic_compare_exchange_u64_impl(ptr, &old, 0); + + return old; +} +#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */ +#endif /* PG_HAVE_ATOMIC_READ_U64 */ + +#ifndef PG_HAVE_ATOMIC_INIT_U64 +#define PG_HAVE_ATOMIC_INIT_U64 +static inline void +pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_) +{ + ptr->value = val_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_SUB_U64 +static inline uint64 +pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return pg_atomic_fetch_add_u64_impl(ptr, -sub_); +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_AND_U64 +static inline uint64 +pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) +#define PG_HAVE_ATOMIC_FETCH_OR_U64 +static inline uint64 +pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_) +{ + uint64 old; + old = ptr->value; /* ok if read is not atomic */ + while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_)) + /* skip */; + return old; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) +#define PG_HAVE_ATOMIC_ADD_FETCH_U64 +static inline uint64 +pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_; +} +#endif + +#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) +#define PG_HAVE_ATOMIC_SUB_FETCH_U64 +static inline uint64 +pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_) +{ + return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_; +} +#endif |