diff options
Diffstat (limited to 'src/include/port/atomics/arch-ppc.h')
-rw-r--r-- | src/include/port/atomics/arch-ppc.h | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h new file mode 100644 index 0000000..0814296 --- /dev/null +++ b/src/include/port/atomics/arch-ppc.h @@ -0,0 +1,254 @@ +/*------------------------------------------------------------------------- + * + * arch-ppc.h + * Atomic operations considerations specific to PowerPC + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * NOTES: + * + * src/include/port/atomics/arch-ppc.h + * + *------------------------------------------------------------------------- + */ + +#if defined(__GNUC__) + +/* + * lwsync orders loads with respect to each other, and similarly with stores. + * But a load can be performed before a subsequent store, so sync must be used + * for a full memory barrier. + */ +#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory") +#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory") +#endif + +#define PG_HAVE_ATOMIC_U32_SUPPORT +typedef struct pg_atomic_uint32 +{ + volatile uint32 value; +} pg_atomic_uint32; + +/* 64bit atomics are only supported in 64bit mode */ +#if SIZEOF_VOID_P >= 8 +#define PG_HAVE_ATOMIC_U64_SUPPORT +typedef struct pg_atomic_uint64 +{ + volatile uint64 value pg_attribute_aligned(8); +} pg_atomic_uint64; + +#endif + +/* + * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but + * code generation differs at the end. __atomic_compare_exchange_n(): + * 100: isync + * 104: mfcr r3 + * 108: rlwinm r3,r3,3,31,31 + * 10c: bne 120 <.eb+0x10> + * 110: clrldi r3,r3,63 + * 114: addi r1,r1,112 + * 118: blr + * 11c: nop + * 120: clrldi r3,r3,63 + * 124: stw r9,0(r4) + * 128: addi r1,r1,112 + * 12c: blr + * + * This: + * f0: isync + * f4: mfcr r9 + * f8: rldicl. r3,r9,35,63 + * fc: bne 104 <.eb> + * 100: stw r10,0(r4) + * 104: addi r1,r1,112 + * 108: blr + * + * This implementation may or may not have materially different performance. + * It's not exploiting the fact that cr0 still holds the relevant comparison + * bits, set during the __asm__. One could fix that by moving more code into + * the __asm__. (That would remove the freedom to eliminate dead stores when + * the caller ignores "expected", but few callers do.) + * + * Recognizing constant "newval" would be superfluous, because there's no + * immediate-operand version of stwcx. + */ +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32 +static inline bool +pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, + uint32 *expected, uint32 newval) +{ + uint32 found; + uint32 condition_register; + bool ret; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + (int32) *expected <= PG_INT16_MAX && + (int32) *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5 \n" + " cmpwi %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %0,0,%5 \n" + " cmpw %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stwcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to lwarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +/* + * This mirrors gcc __sync_fetch_and_add(). + * + * Like tas(), use constraint "=&b" to avoid allocating r0. + */ +#define PG_HAVE_ATOMIC_FETCH_ADD_U32 +static inline uint32 +pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_) +{ + uint32 _t; + uint32 res; + +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " lwarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stwcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to lwarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#ifdef PG_HAVE_ATOMIC_U64_SUPPORT + +#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64 +static inline bool +pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, + uint64 *expected, uint64 newval) +{ + uint64 found; + uint32 condition_register; + bool ret; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(*expected) && + (int64) *expected <= PG_INT16_MAX && + (int64) *expected >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5 \n" + " cmpdi %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "i"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %0,0,%5 \n" + " cmpd %0,%3 \n" + " bne $+12 \n" /* branch to isync */ + " stdcx. %4,0,%5 \n" + " bne $-16 \n" /* branch to ldarx */ + " isync \n" + " mfcr %1 \n" +: "=&r"(found), "=r"(condition_register), "+m"(ptr->value) +: "r"(*expected), "r"(newval), "r"(&ptr->value) +: "memory", "cc"); + + ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */ + if (!ret) + *expected = found; + return ret; +} + +#define PG_HAVE_ATOMIC_FETCH_ADD_U64 +static inline uint64 +pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_) +{ + uint64 _t; + uint64 res; + + /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */ +#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P + if (__builtin_constant_p(add_) && + add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN) + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " addi %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&b"(res), "+m"(ptr->value) +: "i"(add_), "r"(&ptr->value) +: "memory", "cc"); + else +#endif + __asm__ __volatile__( + " sync \n" + " ldarx %1,0,%4 \n" + " add %0,%1,%3 \n" + " stdcx. %0,0,%4 \n" + " bne $-12 \n" /* branch to ldarx */ + " isync \n" +: "=&r"(_t), "=&r"(res), "+m"(ptr->value) +: "r"(add_), "r"(&ptr->value) +: "memory", "cc"); + + return res; +} + +#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */ + +/* per architecture manual doubleword accesses have single copy atomicity */ +#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY |