12 files changed, 2272 insertions, 0 deletions
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
new file mode 100644
index 0000000..4956ec5
--- /dev/null
+++ b/src/include/port/atomics.h
@@ -0,0 +1,524 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.h
+ *	  Atomic operations.
+ *
+ * Hardware and compiler dependent functions for manipulating memory
+ * atomically and dealing with cache coherency. Used to implement locking
+ * facilities and lockless algorithms/data structures.
+ *
+ * To bring up postgres on a platform/compiler at the very least
+ * implementations for the following operations should be provided:
+ * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
+ * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
+ * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag()
+ * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate.
+ *
+ * There exist generic, hardware independent, implementations for several
+ * compilers which might be sufficient, although possibly not optimal, for a
+ * new platform. If no such generic implementation is available spinlocks (or
+ * even OS provided semaphores) will be used to implement the API.
+ *
+ * Implement _u64 atomics if and only if your platform can use them
+ * efficiently (and obviously correctly).
+ *
+ * Use higher level functionality (lwlocks, spinlocks, heavyweight locks)
+ * whenever possible. Writing correct code using these facilities is hard.
+ *
+ * For an introduction to using memory barriers within the PostgreSQL backend,
+ * see src/backend/storage/lmgr/README.barrier
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/atomics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATOMICS_H
+#define ATOMICS_H
+
+#ifdef FRONTEND
+#error "atomics.h may not be included from frontend code"
+#endif
+
+#define INSIDE_ATOMICS_H
+
+#include <limits.h>
+
+/*
+ * First a set of architecture specific files is included.
+ *
+ * These files can provide the full set of atomics or can do pretty much
+ * nothing if all the compilers commonly used on these platforms provide
+ * usable generics.
+ *
+ * Don't add an inline assembly of the actual atomic operations if all the
+ * common implementations of your platform provide intrinsics. Intrinsics are
+ * much easier to understand and potentially support more architectures.
+ *
+ * It will often make sense to define memory barrier semantics here, since
+ * e.g. generic compiler intrinsics for x86 memory barriers can't know that
+ * postgres doesn't need x86 read/write barriers do anything more than a
+ * compiler barrier.
+ *
+ */
+#if defined(__arm__) || defined(__arm) || \
+	defined(__aarch64__) || defined(__aarch64)
+#include "port/atomics/arch-arm.h"
+#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#include "port/atomics/arch-x86.h"
+#elif defined(__ia64__) || defined(__ia64)
+#include "port/atomics/arch-ia64.h"
+#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#include "port/atomics/arch-ppc.h"
+#elif defined(__hppa) || defined(__hppa__)
+#include "port/atomics/arch-hppa.h"
+#endif
+
+/*
+ * Compiler specific, but architecture independent implementations.
+ *
+ * Provide architecture independent implementations of the atomic
+ * facilities. At the very least compiler barriers should be provided, but a
+ * full implementation of
+ * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
+ * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
+ * using compiler intrinsics are a good idea.
+ */
+/*
+ * gcc or compatible, including clang and icc.  Exclude xlc.  The ppc64le "IBM
+ * XL C/C++ for Linux, V13.1.2" emulates gcc, but __sync_lock_test_and_set()
+ * of one-byte types elicits SIGSEGV.  That bug was gone by V13.1.5 (2016-12).
+ */
+#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && !(defined(__IBMC__) || defined(__IBMCPP__))
+#include "port/atomics/generic-gcc.h"
+#elif defined(_MSC_VER)
+#include "port/atomics/generic-msvc.h"
+#elif defined(__hpux) && defined(__ia64) && !defined(__GNUC__)
+#include "port/atomics/generic-acc.h"
+#elif defined(__SUNPRO_C) && !defined(__GNUC__)
+#include "port/atomics/generic-sunpro.h"
+#else
+/*
+ * Unsupported compiler, we'll likely use slower fallbacks... At least
+ * compiler barriers should really be provided.
+ */
+#endif
+
+/*
+ * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and
+ * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics
+ * support. In the case of spinlock backed atomics the emulation is expected
+ * to be efficient, although less so than native atomics support.
+ */
+#include "port/atomics/fallback.h"
+
+/*
+ * Provide additional operations using supported infrastructure. These are
+ * expected to be efficient if the underlying atomic operations are efficient.
+ */
+#include "port/atomics/generic.h"
+
+
+/*
+ * pg_compiler_barrier - prevent the compiler from moving code across
+ *
+ * A compiler barrier need not (and preferably should not) emit any actual
+ * machine code, but must act as an optimization fence: the compiler must not
+ * reorder loads or stores to main memory around the barrier.  However, the
+ * CPU may still reorder loads or stores at runtime, if the architecture's
+ * memory model permits this.
+ */
+#define pg_compiler_barrier()	pg_compiler_barrier_impl()
+
+/*
+ * pg_memory_barrier - prevent the CPU from reordering memory access
+ *
+ * A memory barrier must act as a compiler barrier, and in addition must
+ * guarantee that all loads and stores issued prior to the barrier are
+ * completed before any loads or stores issued after the barrier.  Unless
+ * loads and stores are totally ordered (which is not the case on most
+ * architectures) this requires issuing some sort of memory fencing
+ * instruction.
+ */
+#define pg_memory_barrier() pg_memory_barrier_impl()
+
+/*
+ * pg_(read|write)_barrier - prevent the CPU from reordering memory access
+ *
+ * A read barrier must act as a compiler barrier, and in addition must
+ * guarantee that any loads issued prior to the barrier are completed before
+ * any loads issued after the barrier.  Similarly, a write barrier acts
+ * as a compiler barrier, and also orders stores.  Read and write barriers
+ * are thus weaker than a full memory barrier, but stronger than a compiler
+ * barrier.  In practice, on machines with strong memory ordering, read and
+ * write barriers may require nothing more than a compiler barrier.
+ */
+#define pg_read_barrier()	pg_read_barrier_impl()
+#define pg_write_barrier()	pg_write_barrier_impl()
+
+/*
+ * Spinloop delay - Allow CPU to relax in busy loops
+ */
+#define pg_spin_delay() pg_spin_delay_impl()
+
+/*
+ * pg_atomic_init_flag - initialize atomic flag.
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_init_flag(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_init_flag_impl(ptr);
+}
+
+/*
+ * pg_atomic_test_set_flag - TAS()
+ *
+ * Returns true if the flag has successfully been set, false otherwise.
+ *
+ * Acquire (including read barrier) semantics.
+ */
+static inline bool
+pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_test_set_flag_impl(ptr);
+}
+
+/*
+ * pg_atomic_unlocked_test_flag - Check if the lock is free
+ *
+ * Returns true if the flag currently is not set, false otherwise.
+ *
+ * No barrier semantics.
+ */
+static inline bool
+pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_unlocked_test_flag_impl(ptr);
+}
+
+/*
+ * pg_atomic_clear_flag - release lock set by TAS()
+ *
+ * Release (including write barrier) semantics.
+ */
+static inline void
+pg_atomic_clear_flag(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+
+/*
+ * pg_atomic_init_u32 - initialize atomic variable
+ *
+ * Has to be done before any concurrent usage..
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	pg_atomic_init_u32_impl(ptr, val);
+}
+
+/*
+ * pg_atomic_read_u32 - unlocked read from atomic variable.
+ *
+ * The read is guaranteed to return a value as it has been written by this or
+ * another process at some point in the past. There's however no cache
+ * coherency interaction guaranteeing the value hasn't since been written to
+ * again.
+ *
+ * No barrier semantics.
+ */
+static inline uint32
+pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_read_u32_impl(ptr);
+}
+
+/*
+ * pg_atomic_write_u32 - write to atomic variable.
+ *
+ * The write is guaranteed to succeed as a whole, i.e. it's not possible to
+ * observe a partial write for any reader.  Note that this correctly interacts
+ * with pg_atomic_compare_exchange_u32, in contrast to
+ * pg_atomic_unlocked_write_u32().
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	pg_atomic_write_u32_impl(ptr, val);
+}
+
+/*
+ * pg_atomic_unlocked_write_u32 - unlocked write to atomic variable.
+ *
+ * The write is guaranteed to succeed as a whole, i.e. it's not possible to
+ * observe a partial write for any reader.  But note that writing this way is
+ * not guaranteed to correctly interact with read-modify-write operations like
+ * pg_atomic_compare_exchange_u32.  This should only be used in cases where
+ * minor performance regressions due to atomics emulation are unacceptable.
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	pg_atomic_unlocked_write_u32_impl(ptr, val);
+}
+
+/*
+ * pg_atomic_exchange_u32 - exchange newval with current value
+ *
+ * Returns the old value of 'ptr' before the swap.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	return pg_atomic_exchange_u32_impl(ptr, newval);
+}
+
+/*
+ * pg_atomic_compare_exchange_u32 - CAS operation
+ *
+ * Atomically compare the current value of ptr with *expected and store newval
+ * iff ptr and *expected have the same value. The current value of *ptr will
+ * always be stored in *expected.
+ *
+ * Return true if values have been exchanged, false otherwise.
+ *
+ * Full barrier semantics.
+ */
+static inline bool
+pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
+							   uint32 *expected, uint32 newval)
+{
+	AssertPointerAlignment(ptr, 4);
+	AssertPointerAlignment(expected, 4);
+
+	return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval);
+}
+
+/*
+ * pg_atomic_fetch_add_u32 - atomically add to variable
+ *
+ * Returns the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_fetch_add_u32_impl(ptr, add_);
+}
+
+/*
+ * pg_atomic_fetch_sub_u32 - atomically subtract from variable
+ *
+ * Returns the value of ptr before the arithmetic operation. Note that sub_
+ * may not be INT_MIN due to platform limitations.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	AssertPointerAlignment(ptr, 4);
+	Assert(sub_ != INT_MIN);
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_);
+}
+
+/*
+ * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable
+ *
+ * Returns the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_fetch_and_u32_impl(ptr, and_);
+}
+
+/*
+ * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable
+ *
+ * Returns the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_fetch_or_u32_impl(ptr, or_);
+}
+
+/*
+ * pg_atomic_add_fetch_u32 - atomically add to variable
+ *
+ * Returns the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_add_fetch_u32_impl(ptr, add_);
+}
+
+/*
+ * pg_atomic_sub_fetch_u32 - atomically subtract from variable
+ *
+ * Returns the value of ptr after the arithmetic operation. Note that sub_ may
+ * not be INT_MIN due to platform limitations.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	AssertPointerAlignment(ptr, 4);
+	Assert(sub_ != INT_MIN);
+	return pg_atomic_sub_fetch_u32_impl(ptr, sub_);
+}
+
+/* ----
+ * The 64 bit operations have the same semantics as their 32bit counterparts
+ * if they are available. Check the corresponding 32bit function for
+ * documentation.
+ * ----
+ */
+static inline void
+pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	/*
+	 * Can't necessarily enforce alignment - and don't need it - when using
+	 * the spinlock based fallback implementation. Therefore only assert when
+	 * not using it.
+	 */
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	pg_atomic_init_u64_impl(ptr, val);
+}
+
+static inline uint64
+pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_read_u64_impl(ptr);
+}
+
+static inline void
+pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	pg_atomic_write_u64_impl(ptr, val);
+}
+
+static inline uint64
+pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_exchange_u64_impl(ptr, newval);
+}
+
+static inline bool
+pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
+							   uint64 *expected, uint64 newval)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+	AssertPointerAlignment(expected, 8);
+#endif
+	return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval);
+}
+
+static inline uint64
+pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_fetch_add_u64_impl(ptr, add_);
+}
+
+static inline uint64
+pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	Assert(sub_ != PG_INT64_MIN);
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_);
+}
+
+static inline uint64
+pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_fetch_and_u64_impl(ptr, and_);
+}
+
+static inline uint64
+pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_fetch_or_u64_impl(ptr, or_);
+}
+
+static inline uint64
+pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_add_fetch_u64_impl(ptr, add_);
+}
+
+static inline uint64
+pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	Assert(sub_ != PG_INT64_MIN);
+	return pg_atomic_sub_fetch_u64_impl(ptr, sub_);
+}
+
+#undef INSIDE_ATOMICS_H
+
+#endif							/* ATOMICS_H */
diff --git a/src/include/port/atomics/arch-arm.h b/src/include/port/atomics/arch-arm.h
new file mode 100644
index 0000000..6b925a7
--- /dev/null
+++ b/src/include/port/atomics/arch-arm.h
@@ -0,0 +1,26 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-arm.h
+ *	  Atomic operations considerations specific to ARM
+ *
+ * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-arm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+/*
+ * 64 bit atomics on ARM32 are implemented using kernel fallbacks and thus
+ * might be slow, so disable entirely. On ARM64 that problem doesn't exist.
+ */
+#if !defined(__aarch64__) && !defined(__aarch64)
+#define PG_DISABLE_64_BIT_ATOMICS
+#endif  /* __aarch64__ || __aarch64 */
diff --git a/src/include/port/atomics/arch-hppa.h b/src/include/port/atomics/arch-hppa.h
new file mode 100644
index 0000000..a581b3f
--- /dev/null
+++ b/src/include/port/atomics/arch-hppa.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-hppa.h
+ *	  Atomic operations considerations specific to HPPA
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-hppa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* HPPA doesn't do either read or write reordering */
+#define pg_memory_barrier_impl()		pg_compiler_barrier_impl()
diff --git a/src/include/port/atomics/arch-ia64.h b/src/include/port/atomics/arch-ia64.h
new file mode 100644
index 0000000..9feb153
--- /dev/null
+++ b/src/include/port/atomics/arch-ia64.h
@@ -0,0 +1,29 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-ia64.h
+ *	  Atomic operations considerations specific to intel itanium
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-ia64.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Itanium is weakly ordered, so read and write barriers require a full
+ * fence.
+ */
+#if defined(__INTEL_COMPILER)
+#	define pg_memory_barrier_impl()		__mf()
+#elif defined(__GNUC__)
+#	define pg_memory_barrier_impl()		__asm__ __volatile__ ("mf" : : : "memory")
+#elif defined(__hpux)
+#	define pg_memory_barrier_impl()		_Asm_mf()
+#endif
+
+/* per architecture manual doubleword accesses have single copy atomicity */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h
new file mode 100644
index 0000000..a82ae38
--- /dev/null
+++ b/src/include/port/atomics/arch-ppc.h
@@ -0,0 +1,254 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-ppc.h
+ *	  Atomic operations considerations specific to PowerPC
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-ppc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+
+/*
+ * lwsync orders loads with respect to each other, and similarly with stores.
+ * But a load can be performed before a subsequent store, so sync must be used
+ * for a full memory barrier.
+ */
+#define pg_memory_barrier_impl()	__asm__ __volatile__ ("sync" : : : "memory")
+#define pg_read_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#define pg_write_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#endif
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+/* 64bit atomics are only supported in 64bit mode */
+#if SIZEOF_VOID_P >= 8
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif
+
+/*
+ * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
+ * code generation differs at the end.  __atomic_compare_exchange_n():
+ *  100:	isync
+ *  104:	mfcr    r3
+ *  108:	rlwinm  r3,r3,3,31,31
+ *  10c:	bne     120 <.eb+0x10>
+ *  110:	clrldi  r3,r3,63
+ *  114:	addi    r1,r1,112
+ *  118:	blr
+ *  11c:	nop
+ *  120:	clrldi  r3,r3,63
+ *  124:	stw     r9,0(r4)
+ *  128:	addi    r1,r1,112
+ *  12c:	blr
+ *
+ * This:
+ *   f0:	isync
+ *   f4:	mfcr    r9
+ *   f8:	rldicl. r3,r9,35,63
+ *   fc:	bne     104 <.eb>
+ *  100:	stw     r10,0(r4)
+ *  104:	addi    r1,r1,112
+ *  108:	blr
+ *
+ * This implementation may or may not have materially different performance.
+ * It's not exploiting the fact that cr0 still holds the relevant comparison
+ * bits, set during the __asm__.  One could fix that by moving more code into
+ * the __asm__.  (That would remove the freedom to eliminate dead stores when
+ * the caller ignores "expected", but few callers do.)
+ *
+ * Recognizing constant "newval" would be superfluous, because there's no
+ * immediate-operand version of stwcx.
+ */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	uint32 found;
+	uint32 condition_register;
+	bool ret;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(*expected) &&
+		(int32) *expected <= PG_INT16_MAX &&
+		(int32) *expected >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %0,0,%5		\n"
+			"	cmpwi   %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to isync */
+			"	stwcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to lwarx */
+			"	isync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"i"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %0,0,%5		\n"
+			"	cmpw    %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to isync */
+			"	stwcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to lwarx */
+			"	isync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"r"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+
+	ret = (condition_register >> 29) & 1;	/* test eq bit of cr0 */
+	if (!ret)
+		*expected = found;
+	return ret;
+}
+
+/*
+ * This mirrors gcc __sync_fetch_and_add().
+ *
+ * Like tas(), use constraint "=&b" to avoid allocating r0.
+ */
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	uint32 _t;
+	uint32 res;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(add_) &&
+		add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %1,0,%4		\n"
+			"	addi    %0,%1,%3	\n"
+			"	stwcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to lwarx */
+			"	isync				\n"
+:			"=&r"(_t), "=&b"(res), "+m"(ptr->value)
+:			"i"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %1,0,%4		\n"
+			"	add     %0,%1,%3	\n"
+			"	stwcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to lwarx */
+			"	isync				\n"
+:			"=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:			"r"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+
+	return res;
+}
+
+#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	uint64 found;
+	uint32 condition_register;
+	bool ret;
+
+	/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(*expected) &&
+		(int64) *expected <= PG_INT16_MAX &&
+		(int64) *expected >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %0,0,%5		\n"
+			"	cmpdi   %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to isync */
+			"	stdcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to ldarx */
+			"	isync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"i"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %0,0,%5		\n"
+			"	cmpd    %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to isync */
+			"	stdcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to ldarx */
+			"	isync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"r"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+
+	ret = (condition_register >> 29) & 1;	/* test eq bit of cr0 */
+	if (!ret)
+		*expected = found;
+	return ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64 _t;
+	uint64 res;
+
+	/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(add_) &&
+		add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %1,0,%4		\n"
+			"	addi    %0,%1,%3	\n"
+			"	stdcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to ldarx */
+			"	isync				\n"
+:			"=&r"(_t), "=&b"(res), "+m"(ptr->value)
+:			"i"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %1,0,%4		\n"
+			"	add     %0,%1,%3	\n"
+			"	stdcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to ldarx */
+			"	isync				\n"
+:			"=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:			"r"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+
+	return res;
+}
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
+/* per architecture manual doubleword accesses have single copy atomicity */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
new file mode 100644
index 0000000..13cc4f5
--- /dev/null
+++ b/src/include/port/atomics/arch-x86.h
@@ -0,0 +1,252 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-x86.h
+ *	  Atomic operations considerations specific to intel x86
+ *
+ * Note that we actually require a 486 upwards because the 386 doesn't have
+ * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
+ * anymore that's not much of a restriction luckily.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-x86.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
+ * or stores to be reordered with other stores, but a load can be performed
+ * before a subsequent store.
+ *
+ * Technically, some x86-ish chips support uncached memory access and/or
+ * special instructions that are weakly ordered.  In those cases we'd need
+ * the read and write barriers to be lfence and sfence.  But since we don't
+ * do those things, a compiler barrier should be enough.
+ *
+ * "lock; addl" has worked for longer than "mfence". It's also rumored to be
+ * faster in many scenarios.
+ */
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#if defined(__i386__) || defined(__i386)
+#define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
+#elif defined(__x86_64__)
+#define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
+#endif
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+#define pg_read_barrier_impl()		pg_compiler_barrier_impl()
+#define pg_write_barrier_impl()		pg_compiler_barrier_impl()
+
+/*
+ * Provide implementation for atomics using inline assembly on x86 gcc. It's
+ * nice to support older gcc's and the compare/exchange implementation here is
+ * actually more efficient than the * __sync variant.
+ */
+#if defined(HAVE_ATOMICS)
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+typedef struct pg_atomic_flag
+{
+	volatile char value;
+} pg_atomic_flag;
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+/*
+ * It's too complicated to write inline asm for 64bit types on 32bit and the
+ * 486 can't do it anyway.
+ */
+#ifdef __x86_64__
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/* alignment guaranteed due to being on a 64bit platform */
+	volatile uint64 value;
+} pg_atomic_uint64;
+#endif	/* __x86_64__ */
+
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+#endif /* defined(HAVE_ATOMICS) */
+
+#if !defined(PG_HAVE_SPIN_DELAY)
+/*
+ * This sequence is equivalent to the PAUSE instruction ("rep" is
+ * ignored by old IA32 processors if the following instruction is
+ * not a string operation); the IA-32 Architecture Software
+ * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
+ * PAUSE in the inner loop of a spin lock is necessary for good
+ * performance:
+ *
+ *     The PAUSE instruction improves the performance of IA-32
+ *     processors supporting Hyper-Threading Technology when
+ *     executing spin-wait loops and other routines where one
+ *     thread is accessing a shared lock or semaphore in a tight
+ *     polling loop. When executing a spin-wait loop, the
+ *     processor can suffer a severe performance penalty when
+ *     exiting the loop because it detects a possible memory order
+ *     violation and flushes the core processor's pipeline. The
+ *     PAUSE instruction provides a hint to the processor that the
+ *     code sequence is a spin-wait loop. The processor uses this
+ *     hint to avoid the memory order violation and prevent the
+ *     pipeline flush. In addition, the PAUSE instruction
+ *     de-pipelines the spin-wait loop to prevent it from
+ *     consuming execution resources excessively.
+ */
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define PG_HAVE_SPIN_DELAY
+static __inline__ void
+pg_spin_delay_impl(void)
+{
+	__asm__ __volatile__(" rep; nop			\n");
+}
+#elif defined(_MSC_VER) && defined(__x86_64__)
+#define PG_HAVE_SPIN_DELAY
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	_mm_pause();
+}
+#elif defined(_MSC_VER)
+#define PG_HAVE_SPIN_DELAY
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	/* See comment for gcc code. Same code, MASM syntax */
+	__asm rep nop;
+}
+#endif
+#endif /* !defined(PG_HAVE_SPIN_DELAY) */
+
+
+#if defined(HAVE_ATOMICS)
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	register char _res = 1;
+
+	__asm__ __volatile__(
+		"	lock			\n"
+		"	xchgb	%0,%1	\n"
+:		"+q"(_res), "+m"(ptr->value)
+:
+:		"memory");
+	return _res == 0;
+}
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/*
+	 * On a TSO architecture like x86 it's sufficient to use a compiler
+	 * barrier to achieve release semantics.
+	 */
+	__asm__ __volatile__("" ::: "memory");
+	ptr->value = 0;
+}
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	char	ret;
+
+	/*
+	 * Perform cmpxchg and use the zero flag which it implicitly sets when
+	 * equal to measure the success.
+	 */
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	cmpxchgl	%4,%5	\n"
+		"   setz		%2		\n"
+:		"=a" (*expected), "=m"(ptr->value), "=q" (ret)
+:		"a" (*expected), "r" (newval), "m"(ptr->value)
+:		"memory", "cc");
+	return (bool) ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	uint32 res;
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	xaddl	%0,%1		\n"
+:		"=q"(res), "=m"(ptr->value)
+:		"0" (add_), "m"(ptr->value)
+:		"memory", "cc");
+	return res;
+}
+
+#ifdef __x86_64__
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	char	ret;
+
+	/*
+	 * Perform cmpxchg and use the zero flag which it implicitly sets when
+	 * equal to measure the success.
+	 */
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	cmpxchgq	%4,%5	\n"
+		"   setz		%2		\n"
+:		"=a" (*expected), "=m"(ptr->value), "=q" (ret)
+:		"a" (*expected), "r" (newval), "m"(ptr->value)
+:		"memory", "cc");
+	return (bool) ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64 res;
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	xaddq	%0,%1		\n"
+:		"=q"(res), "=m"(ptr->value)
+:		"0" (add_), "m"(ptr->value)
+:		"memory", "cc");
+	return res;
+}
+
+#endif /* __x86_64__ */
+
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+/*
+ * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
+ * since at least the 586. As well as on all x86-64 cpus.
+ */
+#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
+	(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
+	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
+#endif /* 8 byte single-copy atomicity */
+
+#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
new file mode 100644
index 0000000..e3849c8
--- /dev/null
+++ b/src/include/port/atomics/fallback.h
@@ -0,0 +1,170 @@
+/*-------------------------------------------------------------------------
+ *
+ * fallback.h
+ *    Fallback for platforms without spinlock and/or atomics support. Slower
+ *    than native atomics support, but not unusably slow.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/atomics/fallback.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#	error "should be included via atomics.h"
+#endif
+
+#ifndef pg_memory_barrier_impl
+/*
+ * If we have no memory barrier implementation for this architecture, we
+ * fall back to acquiring and releasing a spinlock.  This might, in turn,
+ * fall back to the semaphore-based spinlock implementation, which will be
+ * amazingly slow.
+ *
+ * It's not self-evident that every possible legal implementation of a
+ * spinlock acquire-and-release would be equivalent to a full memory barrier.
+ * For example, I'm not sure that Itanium's acq and rel add up to a full
+ * fence.  But all of our actual implementations seem OK in this regard.
+ */
+#define PG_HAVE_MEMORY_BARRIER_EMULATION
+
+extern void pg_spinlock_barrier(void);
+#define pg_memory_barrier_impl pg_spinlock_barrier
+#endif
+
+#ifndef pg_compiler_barrier_impl
+/*
+ * If the compiler/arch combination does not provide compiler barriers,
+ * provide a fallback.  The fallback simply consists of a function call into
+ * an externally defined function.  That should guarantee compiler barrier
+ * semantics except for compilers that do inter translation unit/global
+ * optimization - those better provide an actual compiler barrier.
+ *
+ * A native compiler barrier for sure is a lot faster than this...
+ */
+#define PG_HAVE_COMPILER_BARRIER_EMULATION
+extern void pg_extern_compiler_barrier(void);
+#define pg_compiler_barrier_impl pg_extern_compiler_barrier
+#endif
+
+
+/*
+ * If we have atomics implementation for this platform, fall back to providing
+ * the atomics API using a spinlock to protect the internal state. Possibly
+ * the spinlock implementation uses semaphores internally...
+ *
+ * We have to be a bit careful here, as it's not guaranteed that atomic
+ * variables are mapped to the same address in every process (e.g. dynamic
+ * shared memory segments). We can't just hash the address and use that to map
+ * to a spinlock. Instead assign a spinlock on initialization of the atomic
+ * variable.
+ */
+#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+
+#define PG_HAVE_ATOMIC_FLAG_SIMULATION
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+
+typedef struct pg_atomic_flag
+{
+	/*
+	 * To avoid circular includes we can't use s_lock as a type here. Instead
+	 * just reserve enough space for all spinlock types. Some platforms would
+	 * be content with just one byte instead of 4, but that's not too much
+	 * waste.
+	 */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile bool value;
+} pg_atomic_flag;
+
+#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */
+
+#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+
+#define PG_HAVE_ATOMIC_U32_SIMULATION
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	/* Check pg_atomic_flag's definition above for an explanation */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */
+
+#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
+
+#define PG_HAVE_ATOMIC_U64_SIMULATION
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/* Check pg_atomic_flag's definition above for an explanation */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
+#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr);
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr);
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr);
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr);
+
+#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */
+
+#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_U32
+extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_);
+
+#define PG_HAVE_ATOMIC_WRITE_U32
+extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val);
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+												uint32 *expected, uint32 newval);
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_);
+
+#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
+
+
+#ifdef PG_HAVE_ATOMIC_U64_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_U64
+extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_);
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+												uint64 *expected, uint64 newval);
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_);
+
+#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */
diff --git a/src/include/port/atomics/generic-acc.h b/src/include/port/atomics/generic-acc.h
new file mode 100644
index 0000000..195b8f4
--- /dev/null
+++ b/src/include/port/atomics/generic-acc.h
@@ -0,0 +1,106 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-acc.h
+ *	  Atomic operations support when using HPs acc on HPUX
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * inline assembly for Itanium-based HP-UX:
+ *   http://h21007.www2.hp.com/portal/download/files/unprot/Itanium/inline_assem_ERS.pdf
+ * * Implementing Spinlocks on the Intel (R) Itanium (R) Architecture and PA-RISC
+ *   http://h21007.www2.hp.com/portal/download/files/unprot/itanium/spinlocks.pdf
+ *
+ * Itanium only supports a small set of numbers (6, -8, -4, -1, 1, 4, 8, 16)
+ * for atomic add/sub, so we just implement everything but compare_exchange
+ * via the compare_exchange fallbacks in atomics/generic.h.
+ *
+ * src/include/port/atomics/generic-acc.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include <machine/sys/inline.h>
+
+#define pg_compiler_barrier_impl()	_Asm_sched_fence()
+
+#if defined(HAVE_ATOMICS)
+
+/* IA64 always has 32/64 bit atomics */
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/*
+	 * Alignment is guaranteed to be 64bit. Search for "Well-behaved
+	 * application restrictions" => "Data alignment and data sharing" on HP's
+	 * website. Unfortunately the URL doesn't seem to stable enough to
+	 * include.
+	 */
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+
+#define MINOR_FENCE (_Asm_fence) (_UP_CALL_FENCE | _UP_SYS_FENCE | \
+								 _DOWN_CALL_FENCE | _DOWN_SYS_FENCE )
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+
+	_Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE);
+	/*
+	 * We want a barrier, not just release/acquire semantics.
+	 */
+	_Asm_mf();
+	/*
+	 * Notes:
+	 * _DOWN_MEM_FENCE | _UP_MEM_FENCE prevents reordering by the compiler
+	 */
+	current =  _Asm_cmpxchg(_SZ_W, /* word */
+							_SEM_REL,
+							&ptr->value,
+							newval, _LDHINT_NONE,
+							_DOWN_MEM_FENCE | _UP_MEM_FENCE);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	_Asm_mov_to_ar(_AREG_CCV, *expected, MINOR_FENCE);
+	_Asm_mf();
+	current =  _Asm_cmpxchg(_SZ_D, /* doubleword */
+							_SEM_REL,
+							&ptr->value,
+							newval, _LDHINT_NONE,
+							_DOWN_MEM_FENCE | _UP_MEM_FENCE);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#undef MINOR_FENCE
+
+#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
new file mode 100644
index 0000000..2d84305
--- /dev/null
+++ b/src/include/port/atomics/generic-gcc.h
@@ -0,0 +1,286 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-gcc.h
+ *	  Atomic operations, implemented using gcc (or compatible) intrinsics.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Legacy __sync Built-in Functions for Atomic Memory Access
+ *   http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html
+ * * Built-in functions for memory model aware atomic operations
+ *   http://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html
+ *
+ * src/include/port/atomics/generic-gcc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+/*
+ * An empty asm block should be a sufficient compiler barrier.
+ */
+#define pg_compiler_barrier_impl()	__asm__ __volatile__("" ::: "memory")
+
+/*
+ * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier
+ * out of this compiler built-in.  But we prefer to rely on platform specific
+ * definitions where possible, and use this only as a fallback.
+ */
+#if !defined(pg_memory_barrier_impl)
+#	if defined(HAVE_GCC__ATOMIC_INT32_CAS)
+#		define pg_memory_barrier_impl()		__atomic_thread_fence(__ATOMIC_SEQ_CST)
+#	elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#		define pg_memory_barrier_impl()		__sync_synchronize()
+#	endif
+#endif /* !defined(pg_memory_barrier_impl) */
+
+#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
+/* acquire semantics include read barrier semantics */
+#		define pg_read_barrier_impl()		__atomic_thread_fence(__ATOMIC_ACQUIRE)
+#endif
+
+#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
+/* release semantics include write barrier semantics */
+#		define pg_write_barrier_impl()		__atomic_thread_fence(__ATOMIC_RELEASE)
+#endif
+
+
+#ifdef HAVE_ATOMICS
+
+/* generic gcc based atomic flag implementation */
+#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \
+	&& (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS))
+
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+typedef struct pg_atomic_flag
+{
+	/*
+	 * If we have a choice, use int-width TAS, because that is more efficient
+	 * and/or more reliably implemented on most non-Intel platforms.  (Note
+	 * that this code isn't used on x86[_64]; see arch-x86.h for that.)
+	 */
+#ifdef HAVE_GCC__SYNC_INT32_TAS
+	volatile int value;
+#else
+	volatile char value;
+#endif
+} pg_atomic_flag;
+
+#endif /* !ATOMIC_FLAG_SUPPORT && SYNC_INT32_TAS */
+
+/* generic gcc based atomic uint32 implementation */
+#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) \
+	&& (defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS))
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#endif /* defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS) */
+
+/* generic gcc based atomic uint64 implementation */
+#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) \
+	&& !defined(PG_DISABLE_64_BIT_ATOMICS) \
+	&& (defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS))
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif /* defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS) */
+
+#ifdef PG_HAVE_ATOMIC_FLAG_SUPPORT
+
+#if defined(HAVE_GCC__SYNC_CHAR_TAS) || defined(HAVE_GCC__SYNC_INT32_TAS)
+
+#ifndef PG_HAVE_ATOMIC_TEST_SET_FLAG
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* NB: only an acquire barrier, not a full one */
+	/* some platform only support a 1 here */
+	return __sync_lock_test_and_set(&ptr->value, 1) == 0;
+}
+#endif
+
+#endif /* defined(HAVE_GCC__SYNC_*_TAS) */
+
+#ifndef PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return ptr->value == 0;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_CLEAR_FLAG
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	__sync_lock_release(&ptr->value);
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_INIT_FLAG
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_clear_flag_impl(ptr);
+}
+#endif
+
+#endif /* defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) */
+
+/* prefer __atomic, it has a better API */
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	/* FIXME: we can probably use a lower consistency model */
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif
+
+/* if we have 32-bit __sync_val_compare_and_swap, assume we have these too: */
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	return __sync_fetch_and_add(&ptr->value, add_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U32
+static inline uint32
+pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	return __sync_fetch_and_sub(&ptr->value, sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+static inline uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+static inline uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
+
+#if !defined(PG_DISABLE_64_BIT_ATOMICS)
+
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif
+
+/* if we have 64-bit __sync_val_compare_and_swap, assume we have these too: */
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	return __sync_fetch_and_add(&ptr->value, add_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U64
+static inline uint64
+pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+	return __sync_fetch_and_sub(&ptr->value, sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+static inline uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+static inline uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
+#endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
+
+#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h
new file mode 100644
index 0000000..c74c609
--- /dev/null
+++ b/src/include/port/atomics/generic-msvc.h
@@ -0,0 +1,101 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-msvc.h
+ *	  Atomic operations support when using MSVC
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Interlocked Variable Access
+ *   http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx
+ *
+ * src/include/port/atomics/generic-msvc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <intrin.h>
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+#pragma intrinsic(_ReadWriteBarrier)
+#define pg_compiler_barrier_impl()	_ReadWriteBarrier()
+
+#ifndef pg_memory_barrier_impl
+#define pg_memory_barrier_impl()	MemoryBarrier()
+#endif
+
+#if defined(HAVE_ATOMICS)
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct __declspec(align(8)) pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = InterlockedCompareExchange(&ptr->value, newval, *expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	return InterlockedExchangeAdd(&ptr->value, add_);
+}
+
+/*
+ * The non-intrinsics versions are only available in vista upwards, so use the
+ * intrinsic version. Only supported on >486, but we require XP as a minimum
+ * baseline, which doesn't support the 486, so we don't need to add checks for
+ * that case.
+ */
+#pragma intrinsic(_InterlockedCompareExchange64)
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+	current = _InterlockedCompareExchange64(&ptr->value, newval, *expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+/* Only implemented on itanium and 64bit builds */
+#ifdef _WIN64
+#pragma intrinsic(_InterlockedExchangeAdd64)
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	return _InterlockedExchangeAdd64(&ptr->value, add_);
+}
+#endif /* _WIN64 */
+
+#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h
new file mode 100644
index 0000000..9aaeb73
--- /dev/null
+++ b/src/include/port/atomics/generic-sunpro.h
@@ -0,0 +1,106 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-sunpro.h
+ *	  Atomic operations for solaris' CC
+ *
+ * Portions Copyright (c) 2013-2020, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * manpage for atomic_cas(3C)
+ *   http://www.unix.com/man-page/opensolaris/3c/atomic_cas/
+ *   http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html
+ *
+ * src/include/port/atomics/generic-sunpro.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#if defined(HAVE_ATOMICS)
+
+#ifdef HAVE_MBARRIER_H
+#include <mbarrier.h>
+
+#define pg_compiler_barrier_impl()	__compiler_barrier()
+
+#ifndef pg_memory_barrier_impl
+/*
+ * Despite the name this is actually a full barrier. Expanding to mfence/
+ * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc
+ * respectively.
+ */
+#	define pg_memory_barrier_impl()		__machine_rw_barrier()
+#endif
+#ifndef pg_read_barrier_impl
+#	define pg_read_barrier_impl()		__machine_r_barrier()
+#endif
+#ifndef pg_write_barrier_impl
+#	define pg_write_barrier_impl()		__machine_w_barrier()
+#endif
+
+#endif /* HAVE_MBARRIER_H */
+
+/* Older versions of the compiler don't have atomic.h... */
+#ifdef HAVE_ATOMIC_H
+
+#include <atomic.h>
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/*
+	 * Syntax to enforce variable alignment should be supported by versions
+	 * supporting atomic.h, but it's hard to find accurate documentation. If
+	 * it proves to be a problem, we'll have to add more version checks for 64
+	 * bit support.
+	 */
+	volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif /* HAVE_ATOMIC_H */
+
+#endif /* defined(HAVE_ATOMICS) */
+
+
+#if defined(HAVE_ATOMICS)
+
+#ifdef HAVE_ATOMIC_H
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+
+	current = atomic_cas_32(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	current = atomic_cas_64(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#endif /* HAVE_ATOMIC_H */
+
+#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h
new file mode 100644
index 0000000..d60a0d9
--- /dev/null
+++ b/src/include/port/atomics/generic.h
@@ -0,0 +1,401 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic.h
+ *	  Implement higher level operations based on some lower level atomic
+ *	  operations.
+ *
+ * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/atomics/generic.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#	error "should be included via atomics.h"
+#endif
+
+/*
+ * If read or write barriers are undefined, we upgrade them to full memory
+ * barriers.
+ */
+#if !defined(pg_read_barrier_impl)
+#	define pg_read_barrier_impl pg_memory_barrier_impl
+#endif
+#if !defined(pg_write_barrier_impl)
+#	define pg_write_barrier_impl pg_memory_barrier_impl
+#endif
+
+#ifndef PG_HAVE_SPIN_DELAY
+#define PG_HAVE_SPIN_DELAY
+#define pg_spin_delay_impl()	((void)0)
+#endif
+
+
+/* provide fallback */
+#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+typedef pg_atomic_uint32 pg_atomic_flag;
+#endif
+
+#ifndef PG_HAVE_ATOMIC_READ_U32
+#define PG_HAVE_ATOMIC_READ_U32
+static inline uint32
+pg_atomic_read_u32_impl(volatile pg_atomic_uint32 *ptr)
+{
+	return ptr->value;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_WRITE_U32
+#define PG_HAVE_ATOMIC_WRITE_U32
+static inline void
+pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	ptr->value = val;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
+#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
+static inline void
+pg_atomic_unlocked_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	ptr->value = val;
+}
+#endif
+
+/*
+ * provide fallback for test_and_set using atomic_exchange if available
+ */
+#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32)
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_exchange_u32_impl(ptr, &value, 1) == 0;
+}
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_read_u32_impl(ptr) == 0;
+}
+
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier_impl();
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+/*
+ * provide fallback for test_and_set using atomic_compare_exchange if
+ * available.
+ */
+#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	uint32 value = 0;
+	return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1);
+}
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_read_u32_impl(ptr) == 0;
+}
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/*
+	 * Use a memory barrier + plain write if we have a native memory
+	 * barrier. But don't do so if memory barriers use spinlocks - that'd lead
+	 * to circularity if flags are used to implement spinlocks.
+	 */
+#ifndef PG_HAVE_MEMORY_BARRIER_EMULATION
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier_impl();
+	pg_atomic_write_u32_impl(ptr, 0);
+#else
+	uint32 value = 1;
+	pg_atomic_compare_exchange_u32_impl(ptr, &value, 0);
+#endif
+}
+
+#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG)
+#	error "No pg_atomic_test_and_set provided"
+#endif /* !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) */
+
+
+#ifndef PG_HAVE_ATOMIC_INIT_U32
+#define PG_HAVE_ATOMIC_INIT_U32
+static inline void
+pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
+{
+	ptr->value = val_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+static inline uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U32
+static inline uint32
+pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	return pg_atomic_fetch_add_u32_impl(ptr, -sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+static inline uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+static inline uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U32)
+#define PG_HAVE_ATOMIC_ADD_FETCH_U32
+static inline uint32
+pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U32)
+#define PG_HAVE_ATOMIC_SUB_FETCH_U32
+static inline uint32
+pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+static inline uint64
+pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_WRITE_U64
+#define PG_HAVE_ATOMIC_WRITE_U64
+
+#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \
+	!defined(PG_HAVE_ATOMIC_U64_SIMULATION)
+
+static inline void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	/*
+	 * On this platform aligned 64bit writes are guaranteed to be atomic,
+	 * except if using the fallback implementation, where can't guarantee the
+	 * required alignment.
+	 */
+	AssertPointerAlignment(ptr, 8);
+	ptr->value = val;
+}
+
+#else
+
+static inline void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	/*
+	 * 64 bit writes aren't safe on all platforms. In the generic
+	 * implementation implement them as an atomic exchange.
+	 */
+	pg_atomic_exchange_u64_impl(ptr, val);
+}
+
+#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */
+#endif /* PG_HAVE_ATOMIC_WRITE_U64 */
+
+#ifndef PG_HAVE_ATOMIC_READ_U64
+#define PG_HAVE_ATOMIC_READ_U64
+
+#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \
+	!defined(PG_HAVE_ATOMIC_U64_SIMULATION)
+
+static inline uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	/*
+	 * On this platform aligned 64-bit reads are guaranteed to be atomic.
+	 */
+	AssertPointerAlignment(ptr, 8);
+	return ptr->value;
+}
+
+#else
+
+static inline uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	uint64 old = 0;
+
+	/*
+	 * 64-bit reads aren't atomic on all platforms. In the generic
+	 * implementation implement them as a compare/exchange with 0. That'll
+	 * fail or succeed, but always return the old value. Possibly might store
+	 * a 0, but only if the previous value also was a 0 - i.e. harmless.
+	 */
+	pg_atomic_compare_exchange_u64_impl(ptr, &old, 0);
+
+	return old;
+}
+#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */
+#endif /* PG_HAVE_ATOMIC_READ_U64 */
+
+#ifndef PG_HAVE_ATOMIC_INIT_U64
+#define PG_HAVE_ATOMIC_INIT_U64
+static inline void
+pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
+{
+	ptr->value = val_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U64
+static inline uint64
+pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+	return pg_atomic_fetch_add_u64_impl(ptr, -sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+static inline uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+static inline uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U64)
+#define PG_HAVE_ATOMIC_ADD_FETCH_U64
+static inline uint64
+pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U64)
+#define PG_HAVE_ATOMIC_SUB_FETCH_U64
+static inline uint64
+pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_;
+}
+#endif