summaryrefslogtreecommitdiffstats
path: root/src/include/port/atomics/arch-ppc.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/port/atomics/arch-ppc.h')
-rw-r--r--src/include/port/atomics/arch-ppc.h254
1 files changed, 254 insertions, 0 deletions
diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h
new file mode 100644
index 0000000..0814296
--- /dev/null
+++ b/src/include/port/atomics/arch-ppc.h
@@ -0,0 +1,254 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-ppc.h
+ * Atomic operations considerations specific to PowerPC
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-ppc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+
+/*
+ * lwsync orders loads with respect to each other, and similarly with stores.
+ * But a load can be performed before a subsequent store, so sync must be used
+ * for a full memory barrier.
+ */
+#define pg_memory_barrier_impl() __asm__ __volatile__ ("sync" : : : "memory")
+#define pg_read_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
+#define pg_write_barrier_impl() __asm__ __volatile__ ("lwsync" : : : "memory")
+#endif
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+ volatile uint32 value;
+} pg_atomic_uint32;
+
+/* 64bit atomics are only supported in 64bit mode */
+#if SIZEOF_VOID_P >= 8
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+ volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif
+
+/*
+ * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
+ * code generation differs at the end. __atomic_compare_exchange_n():
+ * 100: isync
+ * 104: mfcr r3
+ * 108: rlwinm r3,r3,3,31,31
+ * 10c: bne 120 <.eb+0x10>
+ * 110: clrldi r3,r3,63
+ * 114: addi r1,r1,112
+ * 118: blr
+ * 11c: nop
+ * 120: clrldi r3,r3,63
+ * 124: stw r9,0(r4)
+ * 128: addi r1,r1,112
+ * 12c: blr
+ *
+ * This:
+ * f0: isync
+ * f4: mfcr r9
+ * f8: rldicl. r3,r9,35,63
+ * fc: bne 104 <.eb>
+ * 100: stw r10,0(r4)
+ * 104: addi r1,r1,112
+ * 108: blr
+ *
+ * This implementation may or may not have materially different performance.
+ * It's not exploiting the fact that cr0 still holds the relevant comparison
+ * bits, set during the __asm__. One could fix that by moving more code into
+ * the __asm__. (That would remove the freedom to eliminate dead stores when
+ * the caller ignores "expected", but few callers do.)
+ *
+ * Recognizing constant "newval" would be superfluous, because there's no
+ * immediate-operand version of stwcx.
+ */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+ uint32 *expected, uint32 newval)
+{
+ uint32 found;
+ uint32 condition_register;
+ bool ret;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+ if (__builtin_constant_p(*expected) &&
+ (int32) *expected <= PG_INT16_MAX &&
+ (int32) *expected >= PG_INT16_MIN)
+ __asm__ __volatile__(
+ " sync \n"
+ " lwarx %0,0,%5 \n"
+ " cmpwi %0,%3 \n"
+ " bne $+12 \n" /* branch to isync */
+ " stwcx. %4,0,%5 \n"
+ " bne $-16 \n" /* branch to lwarx */
+ " isync \n"
+ " mfcr %1 \n"
+: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+: "i"(*expected), "r"(newval), "r"(&ptr->value)
+: "memory", "cc");
+ else
+#endif
+ __asm__ __volatile__(
+ " sync \n"
+ " lwarx %0,0,%5 \n"
+ " cmpw %0,%3 \n"
+ " bne $+12 \n" /* branch to isync */
+ " stwcx. %4,0,%5 \n"
+ " bne $-16 \n" /* branch to lwarx */
+ " isync \n"
+ " mfcr %1 \n"
+: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+: "r"(*expected), "r"(newval), "r"(&ptr->value)
+: "memory", "cc");
+
+ ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
+ if (!ret)
+ *expected = found;
+ return ret;
+}
+
+/*
+ * This mirrors gcc __sync_fetch_and_add().
+ *
+ * Like tas(), use constraint "=&b" to avoid allocating r0.
+ */
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+ uint32 _t;
+ uint32 res;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+ if (__builtin_constant_p(add_) &&
+ add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+ __asm__ __volatile__(
+ " sync \n"
+ " lwarx %1,0,%4 \n"
+ " addi %0,%1,%3 \n"
+ " stwcx. %0,0,%4 \n"
+ " bne $-12 \n" /* branch to lwarx */
+ " isync \n"
+: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
+: "i"(add_), "r"(&ptr->value)
+: "memory", "cc");
+ else
+#endif
+ __asm__ __volatile__(
+ " sync \n"
+ " lwarx %1,0,%4 \n"
+ " add %0,%1,%3 \n"
+ " stwcx. %0,0,%4 \n"
+ " bne $-12 \n" /* branch to lwarx */
+ " isync \n"
+: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+: "r"(add_), "r"(&ptr->value)
+: "memory", "cc");
+
+ return res;
+}
+
+#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+ uint64 *expected, uint64 newval)
+{
+ uint64 found;
+ uint32 condition_register;
+ bool ret;
+
+ /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+ if (__builtin_constant_p(*expected) &&
+ (int64) *expected <= PG_INT16_MAX &&
+ (int64) *expected >= PG_INT16_MIN)
+ __asm__ __volatile__(
+ " sync \n"
+ " ldarx %0,0,%5 \n"
+ " cmpdi %0,%3 \n"
+ " bne $+12 \n" /* branch to isync */
+ " stdcx. %4,0,%5 \n"
+ " bne $-16 \n" /* branch to ldarx */
+ " isync \n"
+ " mfcr %1 \n"
+: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+: "i"(*expected), "r"(newval), "r"(&ptr->value)
+: "memory", "cc");
+ else
+#endif
+ __asm__ __volatile__(
+ " sync \n"
+ " ldarx %0,0,%5 \n"
+ " cmpd %0,%3 \n"
+ " bne $+12 \n" /* branch to isync */
+ " stdcx. %4,0,%5 \n"
+ " bne $-16 \n" /* branch to ldarx */
+ " isync \n"
+ " mfcr %1 \n"
+: "=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+: "r"(*expected), "r"(newval), "r"(&ptr->value)
+: "memory", "cc");
+
+ ret = (condition_register >> 29) & 1; /* test eq bit of cr0 */
+ if (!ret)
+ *expected = found;
+ return ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+ uint64 _t;
+ uint64 res;
+
+ /* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+ if (__builtin_constant_p(add_) &&
+ add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+ __asm__ __volatile__(
+ " sync \n"
+ " ldarx %1,0,%4 \n"
+ " addi %0,%1,%3 \n"
+ " stdcx. %0,0,%4 \n"
+ " bne $-12 \n" /* branch to ldarx */
+ " isync \n"
+: "=&r"(_t), "=&b"(res), "+m"(ptr->value)
+: "i"(add_), "r"(&ptr->value)
+: "memory", "cc");
+ else
+#endif
+ __asm__ __volatile__(
+ " sync \n"
+ " ldarx %1,0,%4 \n"
+ " add %0,%1,%3 \n"
+ " stdcx. %0,0,%4 \n"
+ " bne $-12 \n" /* branch to ldarx */
+ " isync \n"
+: "=&r"(_t), "=&r"(res), "+m"(ptr->value)
+: "r"(add_), "r"(&ptr->value)
+: "memory", "cc");
+
+ return res;
+}
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
+/* per architecture manual doubleword accesses have single copy atomicity */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY