Adding upstream version 16.2.upstream/16.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 13:44:03 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 13:44:03 +0000
commit: 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch)
tree: fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/include/port
parent: Initial commit. (diff)
download: postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz
postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip
46 files changed, 4323 insertions, 0 deletions
diff --git a/src/include/port/aix.h b/src/include/port/aix.h
new file mode 100644
index 0000000..5b1159c
--- /dev/null
+++ b/src/include/port/aix.h
@@ -0,0 +1,14 @@
+/*
+ * src/include/port/aix.h
+ */
+#define CLASS_CONFLICT
+#define DISABLE_XOPEN_NLS
+
+/*
+ * "IBM XL C/C++ for AIX, V12.1" miscompiles, for 32-bit, some inline
+ * expansions of ginCompareItemPointers() "long long" arithmetic.  To take
+ * advantage of inlining, build a 64-bit PostgreSQL.
+ */
+#if defined(__ILP32__) && defined(__IBMC__)
+#define PG_FORCE_DISABLE_INLINE
+#endif
diff --git a/src/include/port/atomics.h b/src/include/port/atomics.h
new file mode 100644
index 0000000..bbff945
--- /dev/null
+++ b/src/include/port/atomics.h
@@ -0,0 +1,519 @@
+/*-------------------------------------------------------------------------
+ *
+ * atomics.h
+ *	  Atomic operations.
+ *
+ * Hardware and compiler dependent functions for manipulating memory
+ * atomically and dealing with cache coherency. Used to implement locking
+ * facilities and lockless algorithms/data structures.
+ *
+ * To bring up postgres on a platform/compiler at the very least
+ * implementations for the following operations should be provided:
+ * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
+ * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
+ * * pg_atomic_test_set_flag(), pg_atomic_init_flag(), pg_atomic_clear_flag()
+ * * PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY should be defined if appropriate.
+ *
+ * There exist generic, hardware independent, implementations for several
+ * compilers which might be sufficient, although possibly not optimal, for a
+ * new platform. If no such generic implementation is available spinlocks (or
+ * even OS provided semaphores) will be used to implement the API.
+ *
+ * Implement _u64 atomics if and only if your platform can use them
+ * efficiently (and obviously correctly).
+ *
+ * Use higher level functionality (lwlocks, spinlocks, heavyweight locks)
+ * whenever possible. Writing correct code using these facilities is hard.
+ *
+ * For an introduction to using memory barriers within the PostgreSQL backend,
+ * see src/backend/storage/lmgr/README.barrier
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/atomics.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef ATOMICS_H
+#define ATOMICS_H
+
+#ifdef FRONTEND
+#error "atomics.h may not be included from frontend code"
+#endif
+
+#define INSIDE_ATOMICS_H
+
+#include <limits.h>
+
+/*
+ * First a set of architecture specific files is included.
+ *
+ * These files can provide the full set of atomics or can do pretty much
+ * nothing if all the compilers commonly used on these platforms provide
+ * usable generics.
+ *
+ * Don't add an inline assembly of the actual atomic operations if all the
+ * common implementations of your platform provide intrinsics. Intrinsics are
+ * much easier to understand and potentially support more architectures.
+ *
+ * It will often make sense to define memory barrier semantics here, since
+ * e.g. generic compiler intrinsics for x86 memory barriers can't know that
+ * postgres doesn't need x86 read/write barriers do anything more than a
+ * compiler barrier.
+ *
+ */
+#if defined(__arm__) || defined(__arm) || defined(__aarch64__)
+#include "port/atomics/arch-arm.h"
+#elif defined(__i386__) || defined(__i386) || defined(__x86_64__)
+#include "port/atomics/arch-x86.h"
+#elif defined(__ppc__) || defined(__powerpc__) || defined(__ppc64__) || defined(__powerpc64__)
+#include "port/atomics/arch-ppc.h"
+#elif defined(__hppa) || defined(__hppa__)
+#include "port/atomics/arch-hppa.h"
+#endif
+
+/*
+ * Compiler specific, but architecture independent implementations.
+ *
+ * Provide architecture independent implementations of the atomic
+ * facilities. At the very least compiler barriers should be provided, but a
+ * full implementation of
+ * * pg_compiler_barrier(), pg_write_barrier(), pg_read_barrier()
+ * * pg_atomic_compare_exchange_u32(), pg_atomic_fetch_add_u32()
+ * using compiler intrinsics are a good idea.
+ */
+/*
+ * gcc or compatible, including clang and icc.  Exclude xlc.  The ppc64le "IBM
+ * XL C/C++ for Linux, V13.1.2" emulates gcc, but __sync_lock_test_and_set()
+ * of one-byte types elicits SIGSEGV.  That bug was gone by V13.1.5 (2016-12).
+ */
+#if (defined(__GNUC__) || defined(__INTEL_COMPILER)) && !(defined(__IBMC__) || defined(__IBMCPP__))
+#include "port/atomics/generic-gcc.h"
+#elif defined(_MSC_VER)
+#include "port/atomics/generic-msvc.h"
+#elif defined(__SUNPRO_C) && !defined(__GNUC__)
+#include "port/atomics/generic-sunpro.h"
+#else
+/*
+ * Unsupported compiler, we'll likely use slower fallbacks... At least
+ * compiler barriers should really be provided.
+ */
+#endif
+
+/*
+ * Provide a full fallback of the pg_*_barrier(), pg_atomic**_flag and
+ * pg_atomic_* APIs for platforms without sufficient spinlock and/or atomics
+ * support. In the case of spinlock backed atomics the emulation is expected
+ * to be efficient, although less so than native atomics support.
+ */
+#include "port/atomics/fallback.h"
+
+/*
+ * Provide additional operations using supported infrastructure. These are
+ * expected to be efficient if the underlying atomic operations are efficient.
+ */
+#include "port/atomics/generic.h"
+
+
+/*
+ * pg_compiler_barrier - prevent the compiler from moving code across
+ *
+ * A compiler barrier need not (and preferably should not) emit any actual
+ * machine code, but must act as an optimization fence: the compiler must not
+ * reorder loads or stores to main memory around the barrier.  However, the
+ * CPU may still reorder loads or stores at runtime, if the architecture's
+ * memory model permits this.
+ */
+#define pg_compiler_barrier()	pg_compiler_barrier_impl()
+
+/*
+ * pg_memory_barrier - prevent the CPU from reordering memory access
+ *
+ * A memory barrier must act as a compiler barrier, and in addition must
+ * guarantee that all loads and stores issued prior to the barrier are
+ * completed before any loads or stores issued after the barrier.  Unless
+ * loads and stores are totally ordered (which is not the case on most
+ * architectures) this requires issuing some sort of memory fencing
+ * instruction.
+ */
+#define pg_memory_barrier() pg_memory_barrier_impl()
+
+/*
+ * pg_(read|write)_barrier - prevent the CPU from reordering memory access
+ *
+ * A read barrier must act as a compiler barrier, and in addition must
+ * guarantee that any loads issued prior to the barrier are completed before
+ * any loads issued after the barrier.  Similarly, a write barrier acts
+ * as a compiler barrier, and also orders stores.  Read and write barriers
+ * are thus weaker than a full memory barrier, but stronger than a compiler
+ * barrier.  In practice, on machines with strong memory ordering, read and
+ * write barriers may require nothing more than a compiler barrier.
+ */
+#define pg_read_barrier()	pg_read_barrier_impl()
+#define pg_write_barrier()	pg_write_barrier_impl()
+
+/*
+ * Spinloop delay - Allow CPU to relax in busy loops
+ */
+#define pg_spin_delay() pg_spin_delay_impl()
+
+/*
+ * pg_atomic_init_flag - initialize atomic flag.
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_init_flag(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_init_flag_impl(ptr);
+}
+
+/*
+ * pg_atomic_test_set_flag - TAS()
+ *
+ * Returns true if the flag has successfully been set, false otherwise.
+ *
+ * Acquire (including read barrier) semantics.
+ */
+static inline bool
+pg_atomic_test_set_flag(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_test_set_flag_impl(ptr);
+}
+
+/*
+ * pg_atomic_unlocked_test_flag - Check if the lock is free
+ *
+ * Returns true if the flag currently is not set, false otherwise.
+ *
+ * No barrier semantics.
+ */
+static inline bool
+pg_atomic_unlocked_test_flag(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_unlocked_test_flag_impl(ptr);
+}
+
+/*
+ * pg_atomic_clear_flag - release lock set by TAS()
+ *
+ * Release (including write barrier) semantics.
+ */
+static inline void
+pg_atomic_clear_flag(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_clear_flag_impl(ptr);
+}
+
+
+/*
+ * pg_atomic_init_u32 - initialize atomic variable
+ *
+ * Has to be done before any concurrent usage..
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_init_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	pg_atomic_init_u32_impl(ptr, val);
+}
+
+/*
+ * pg_atomic_read_u32 - unlocked read from atomic variable.
+ *
+ * The read is guaranteed to return a value as it has been written by this or
+ * another process at some point in the past. There's however no cache
+ * coherency interaction guaranteeing the value hasn't since been written to
+ * again.
+ *
+ * No barrier semantics.
+ */
+static inline uint32
+pg_atomic_read_u32(volatile pg_atomic_uint32 *ptr)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_read_u32_impl(ptr);
+}
+
+/*
+ * pg_atomic_write_u32 - write to atomic variable.
+ *
+ * The write is guaranteed to succeed as a whole, i.e. it's not possible to
+ * observe a partial write for any reader.  Note that this correctly interacts
+ * with pg_atomic_compare_exchange_u32, in contrast to
+ * pg_atomic_unlocked_write_u32().
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	pg_atomic_write_u32_impl(ptr, val);
+}
+
+/*
+ * pg_atomic_unlocked_write_u32 - unlocked write to atomic variable.
+ *
+ * The write is guaranteed to succeed as a whole, i.e. it's not possible to
+ * observe a partial write for any reader.  But note that writing this way is
+ * not guaranteed to correctly interact with read-modify-write operations like
+ * pg_atomic_compare_exchange_u32.  This should only be used in cases where
+ * minor performance regressions due to atomics emulation are unacceptable.
+ *
+ * No barrier semantics.
+ */
+static inline void
+pg_atomic_unlocked_write_u32(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	pg_atomic_unlocked_write_u32_impl(ptr, val);
+}
+
+/*
+ * pg_atomic_exchange_u32 - exchange newval with current value
+ *
+ * Returns the old value of 'ptr' before the swap.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_exchange_u32(volatile pg_atomic_uint32 *ptr, uint32 newval)
+{
+	AssertPointerAlignment(ptr, 4);
+
+	return pg_atomic_exchange_u32_impl(ptr, newval);
+}
+
+/*
+ * pg_atomic_compare_exchange_u32 - CAS operation
+ *
+ * Atomically compare the current value of ptr with *expected and store newval
+ * iff ptr and *expected have the same value. The current value of *ptr will
+ * always be stored in *expected.
+ *
+ * Return true if values have been exchanged, false otherwise.
+ *
+ * Full barrier semantics.
+ */
+static inline bool
+pg_atomic_compare_exchange_u32(volatile pg_atomic_uint32 *ptr,
+							   uint32 *expected, uint32 newval)
+{
+	AssertPointerAlignment(ptr, 4);
+	AssertPointerAlignment(expected, 4);
+
+	return pg_atomic_compare_exchange_u32_impl(ptr, expected, newval);
+}
+
+/*
+ * pg_atomic_fetch_add_u32 - atomically add to variable
+ *
+ * Returns the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_add_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_fetch_add_u32_impl(ptr, add_);
+}
+
+/*
+ * pg_atomic_fetch_sub_u32 - atomically subtract from variable
+ *
+ * Returns the value of ptr before the arithmetic operation. Note that sub_
+ * may not be INT_MIN due to platform limitations.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_sub_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	AssertPointerAlignment(ptr, 4);
+	Assert(sub_ != INT_MIN);
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_);
+}
+
+/*
+ * pg_atomic_fetch_and_u32 - atomically bit-and and_ with variable
+ *
+ * Returns the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_and_u32(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_fetch_and_u32_impl(ptr, and_);
+}
+
+/*
+ * pg_atomic_fetch_or_u32 - atomically bit-or or_ with variable
+ *
+ * Returns the value of ptr before the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_fetch_or_u32(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_fetch_or_u32_impl(ptr, or_);
+}
+
+/*
+ * pg_atomic_add_fetch_u32 - atomically add to variable
+ *
+ * Returns the value of ptr after the arithmetic operation.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_add_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	AssertPointerAlignment(ptr, 4);
+	return pg_atomic_add_fetch_u32_impl(ptr, add_);
+}
+
+/*
+ * pg_atomic_sub_fetch_u32 - atomically subtract from variable
+ *
+ * Returns the value of ptr after the arithmetic operation. Note that sub_ may
+ * not be INT_MIN due to platform limitations.
+ *
+ * Full barrier semantics.
+ */
+static inline uint32
+pg_atomic_sub_fetch_u32(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	AssertPointerAlignment(ptr, 4);
+	Assert(sub_ != INT_MIN);
+	return pg_atomic_sub_fetch_u32_impl(ptr, sub_);
+}
+
+/* ----
+ * The 64 bit operations have the same semantics as their 32bit counterparts
+ * if they are available. Check the corresponding 32bit function for
+ * documentation.
+ * ----
+ */
+static inline void
+pg_atomic_init_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	/*
+	 * Can't necessarily enforce alignment - and don't need it - when using
+	 * the spinlock based fallback implementation. Therefore only assert when
+	 * not using it.
+	 */
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	pg_atomic_init_u64_impl(ptr, val);
+}
+
+static inline uint64
+pg_atomic_read_u64(volatile pg_atomic_uint64 *ptr)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_read_u64_impl(ptr);
+}
+
+static inline void
+pg_atomic_write_u64(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	pg_atomic_write_u64_impl(ptr, val);
+}
+
+static inline uint64
+pg_atomic_exchange_u64(volatile pg_atomic_uint64 *ptr, uint64 newval)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_exchange_u64_impl(ptr, newval);
+}
+
+static inline bool
+pg_atomic_compare_exchange_u64(volatile pg_atomic_uint64 *ptr,
+							   uint64 *expected, uint64 newval)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+	AssertPointerAlignment(expected, 8);
+#endif
+	return pg_atomic_compare_exchange_u64_impl(ptr, expected, newval);
+}
+
+static inline uint64
+pg_atomic_fetch_add_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_fetch_add_u64_impl(ptr, add_);
+}
+
+static inline uint64
+pg_atomic_fetch_sub_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	Assert(sub_ != PG_INT64_MIN);
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_);
+}
+
+static inline uint64
+pg_atomic_fetch_and_u64(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_fetch_and_u64_impl(ptr, and_);
+}
+
+static inline uint64
+pg_atomic_fetch_or_u64(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_fetch_or_u64_impl(ptr, or_);
+}
+
+static inline uint64
+pg_atomic_add_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	return pg_atomic_add_fetch_u64_impl(ptr, add_);
+}
+
+static inline uint64
+pg_atomic_sub_fetch_u64(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+#ifndef PG_HAVE_ATOMIC_U64_SIMULATION
+	AssertPointerAlignment(ptr, 8);
+#endif
+	Assert(sub_ != PG_INT64_MIN);
+	return pg_atomic_sub_fetch_u64_impl(ptr, sub_);
+}
+
+#undef INSIDE_ATOMICS_H
+
+#endif							/* ATOMICS_H */
diff --git a/src/include/port/atomics/arch-arm.h b/src/include/port/atomics/arch-arm.h
new file mode 100644
index 0000000..c90bf58
--- /dev/null
+++ b/src/include/port/atomics/arch-arm.h
@@ -0,0 +1,32 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-arm.h
+ *	  Atomic operations considerations specific to ARM
+ *
+ * Portions Copyright (c) 2013-2023, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-arm.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+/*
+ * 64 bit atomics on ARM32 are implemented using kernel fallbacks and thus
+ * might be slow, so disable entirely. On ARM64 that problem doesn't exist.
+ */
+#if !defined(__aarch64__)
+#define PG_DISABLE_64_BIT_ATOMICS
+#else
+/*
+ * Architecture Reference Manual for ARMv8 states aligned read/write to/from
+ * general purpose register is atomic.
+ */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
+#endif  /* __aarch64__ */
diff --git a/src/include/port/atomics/arch-hppa.h b/src/include/port/atomics/arch-hppa.h
new file mode 100644
index 0000000..4c89fbf
--- /dev/null
+++ b/src/include/port/atomics/arch-hppa.h
@@ -0,0 +1,17 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-hppa.h
+ *	  Atomic operations considerations specific to HPPA
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-hppa.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* HPPA doesn't do either read or write reordering */
+#define pg_memory_barrier_impl()		pg_compiler_barrier_impl()
diff --git a/src/include/port/atomics/arch-ppc.h b/src/include/port/atomics/arch-ppc.h
new file mode 100644
index 0000000..d992d4c
--- /dev/null
+++ b/src/include/port/atomics/arch-ppc.h
@@ -0,0 +1,254 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-ppc.h
+ *	  Atomic operations considerations specific to PowerPC
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-ppc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#if defined(__GNUC__)
+
+/*
+ * lwsync orders loads with respect to each other, and similarly with stores.
+ * But a load can be performed before a subsequent store, so sync must be used
+ * for a full memory barrier.
+ */
+#define pg_memory_barrier_impl()	__asm__ __volatile__ ("sync" : : : "memory")
+#define pg_read_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#define pg_write_barrier_impl()		__asm__ __volatile__ ("lwsync" : : : "memory")
+#endif
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+/* 64bit atomics are only supported in 64bit mode */
+#if SIZEOF_VOID_P >= 8
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif
+
+/*
+ * This mimics gcc __atomic_compare_exchange_n(..., __ATOMIC_SEQ_CST), but
+ * code generation differs at the end.  __atomic_compare_exchange_n():
+ *  100:	isync
+ *  104:	mfcr    r3
+ *  108:	rlwinm  r3,r3,3,31,31
+ *  10c:	bne     120 <.eb+0x10>
+ *  110:	clrldi  r3,r3,63
+ *  114:	addi    r1,r1,112
+ *  118:	blr
+ *  11c:	nop
+ *  120:	clrldi  r3,r3,63
+ *  124:	stw     r9,0(r4)
+ *  128:	addi    r1,r1,112
+ *  12c:	blr
+ *
+ * This:
+ *   f0:	isync
+ *   f4:	mfcr    r9
+ *   f8:	rldicl. r3,r9,35,63
+ *   fc:	bne     104 <.eb>
+ *  100:	stw     r10,0(r4)
+ *  104:	addi    r1,r1,112
+ *  108:	blr
+ *
+ * This implementation may or may not have materially different performance.
+ * It's not exploiting the fact that cr0 still holds the relevant comparison
+ * bits, set during the __asm__.  One could fix that by moving more code into
+ * the __asm__.  (That would remove the freedom to eliminate dead stores when
+ * the caller ignores "expected", but few callers do.)
+ *
+ * Recognizing constant "newval" would be superfluous, because there's no
+ * immediate-operand version of stwcx.
+ */
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	uint32 found;
+	uint32 condition_register;
+	bool ret;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(*expected) &&
+		(int32) *expected <= PG_INT16_MAX &&
+		(int32) *expected >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %0,0,%5,1	\n"
+			"	cmpwi   %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to lwsync */
+			"	stwcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to lwarx */
+			"	lwsync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"i"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %0,0,%5,1	\n"
+			"	cmpw    %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to lwsync */
+			"	stwcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to lwarx */
+			"	lwsync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"r"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+
+	ret = (condition_register >> 29) & 1;	/* test eq bit of cr0 */
+	if (!ret)
+		*expected = found;
+	return ret;
+}
+
+/*
+ * This mirrors gcc __sync_fetch_and_add().
+ *
+ * Like tas(), use constraint "=&b" to avoid allocating r0.
+ */
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	uint32 _t;
+	uint32 res;
+
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(add_) &&
+		add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %1,0,%4,1	\n"
+			"	addi    %0,%1,%3	\n"
+			"	stwcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to lwarx */
+			"	lwsync				\n"
+:			"=&r"(_t), "=&b"(res), "+m"(ptr->value)
+:			"i"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	lwarx   %1,0,%4,1	\n"
+			"	add     %0,%1,%3	\n"
+			"	stwcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to lwarx */
+			"	lwsync				\n"
+:			"=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:			"r"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+
+	return res;
+}
+
+#ifdef PG_HAVE_ATOMIC_U64_SUPPORT
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	uint64 found;
+	uint32 condition_register;
+	bool ret;
+
+	/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/; s/cmpw/cmpd/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(*expected) &&
+		(int64) *expected <= PG_INT16_MAX &&
+		(int64) *expected >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %0,0,%5,1	\n"
+			"	cmpdi   %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to lwsync */
+			"	stdcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to ldarx */
+			"	lwsync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"i"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %0,0,%5,1	\n"
+			"	cmpd    %0,%3		\n"
+			"	bne     $+12		\n"		/* branch to lwsync */
+			"	stdcx.  %4,0,%5		\n"
+			"	bne     $-16		\n"		/* branch to ldarx */
+			"	lwsync				\n"
+			"	mfcr    %1          \n"
+:			"=&r"(found), "=r"(condition_register), "+m"(ptr->value)
+:			"r"(*expected), "r"(newval), "r"(&ptr->value)
+:			"memory", "cc");
+
+	ret = (condition_register >> 29) & 1;	/* test eq bit of cr0 */
+	if (!ret)
+		*expected = found;
+	return ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64 _t;
+	uint64 res;
+
+	/* Like u32, but s/lwarx/ldarx/; s/stwcx/stdcx/ */
+#ifdef HAVE_I_CONSTRAINT__BUILTIN_CONSTANT_P
+	if (__builtin_constant_p(add_) &&
+		add_ <= PG_INT16_MAX && add_ >= PG_INT16_MIN)
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %1,0,%4,1	\n"
+			"	addi    %0,%1,%3	\n"
+			"	stdcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to ldarx */
+			"	lwsync				\n"
+:			"=&r"(_t), "=&b"(res), "+m"(ptr->value)
+:			"i"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+	else
+#endif
+		__asm__ __volatile__(
+			"	sync				\n"
+			"	ldarx   %1,0,%4,1	\n"
+			"	add     %0,%1,%3	\n"
+			"	stdcx.  %0,0,%4		\n"
+			"	bne     $-12		\n"		/* branch to ldarx */
+			"	lwsync				\n"
+:			"=&r"(_t), "=&r"(res), "+m"(ptr->value)
+:			"r"(add_), "r"(&ptr->value)
+:			"memory", "cc");
+
+	return res;
+}
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
+/* per architecture manual doubleword accesses have single copy atomicity */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
diff --git a/src/include/port/atomics/arch-x86.h b/src/include/port/atomics/arch-x86.h
new file mode 100644
index 0000000..bb84b9b
--- /dev/null
+++ b/src/include/port/atomics/arch-x86.h
@@ -0,0 +1,252 @@
+/*-------------------------------------------------------------------------
+ *
+ * arch-x86.h
+ *	  Atomic operations considerations specific to intel x86
+ *
+ * Note that we actually require a 486 upwards because the 386 doesn't have
+ * support for xadd and cmpxchg. Given that the 386 isn't supported anywhere
+ * anymore that's not much of a restriction luckily.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * src/include/port/atomics/arch-x86.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Both 32 and 64 bit x86 do not allow loads to be reordered with other loads,
+ * or stores to be reordered with other stores, but a load can be performed
+ * before a subsequent store.
+ *
+ * Technically, some x86-ish chips support uncached memory access and/or
+ * special instructions that are weakly ordered.  In those cases we'd need
+ * the read and write barriers to be lfence and sfence.  But since we don't
+ * do those things, a compiler barrier should be enough.
+ *
+ * "lock; addl" has worked for longer than "mfence". It's also rumored to be
+ * faster in many scenarios.
+ */
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#if defined(__i386__) || defined(__i386)
+#define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%esp)" : : : "memory", "cc")
+#elif defined(__x86_64__)
+#define pg_memory_barrier_impl()		\
+	__asm__ __volatile__ ("lock; addl $0,0(%%rsp)" : : : "memory", "cc")
+#endif
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+#define pg_read_barrier_impl()		pg_compiler_barrier_impl()
+#define pg_write_barrier_impl()		pg_compiler_barrier_impl()
+
+/*
+ * Provide implementation for atomics using inline assembly on x86 gcc. It's
+ * nice to support older gcc's and the compare/exchange implementation here is
+ * actually more efficient than the * __sync variant.
+ */
+#if defined(HAVE_ATOMICS)
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+typedef struct pg_atomic_flag
+{
+	volatile char value;
+} pg_atomic_flag;
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+/*
+ * It's too complicated to write inline asm for 64bit types on 32bit and the
+ * 486 can't do it anyway.
+ */
+#ifdef __x86_64__
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/* alignment guaranteed due to being on a 64bit platform */
+	volatile uint64 value;
+} pg_atomic_uint64;
+#endif	/* __x86_64__ */
+
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+#endif /* defined(HAVE_ATOMICS) */
+
+#if !defined(PG_HAVE_SPIN_DELAY)
+/*
+ * This sequence is equivalent to the PAUSE instruction ("rep" is
+ * ignored by old IA32 processors if the following instruction is
+ * not a string operation); the IA-32 Architecture Software
+ * Developer's Manual, Vol. 3, Section 7.7.2 describes why using
+ * PAUSE in the inner loop of a spin lock is necessary for good
+ * performance:
+ *
+ *     The PAUSE instruction improves the performance of IA-32
+ *     processors supporting Hyper-Threading Technology when
+ *     executing spin-wait loops and other routines where one
+ *     thread is accessing a shared lock or semaphore in a tight
+ *     polling loop. When executing a spin-wait loop, the
+ *     processor can suffer a severe performance penalty when
+ *     exiting the loop because it detects a possible memory order
+ *     violation and flushes the core processor's pipeline. The
+ *     PAUSE instruction provides a hint to the processor that the
+ *     code sequence is a spin-wait loop. The processor uses this
+ *     hint to avoid the memory order violation and prevent the
+ *     pipeline flush. In addition, the PAUSE instruction
+ *     de-pipelines the spin-wait loop to prevent it from
+ *     consuming execution resources excessively.
+ */
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+#define PG_HAVE_SPIN_DELAY
+static __inline__ void
+pg_spin_delay_impl(void)
+{
+	__asm__ __volatile__(" rep; nop			\n");
+}
+#elif defined(_MSC_VER) && defined(__x86_64__)
+#define PG_HAVE_SPIN_DELAY
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	_mm_pause();
+}
+#elif defined(_MSC_VER)
+#define PG_HAVE_SPIN_DELAY
+static __forceinline void
+pg_spin_delay_impl(void)
+{
+	/* See comment for gcc code. Same code, MASM syntax */
+	__asm rep nop;
+}
+#endif
+#endif /* !defined(PG_HAVE_SPIN_DELAY) */
+
+
+#if defined(HAVE_ATOMICS)
+
+#if defined(__GNUC__) || defined(__INTEL_COMPILER)
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	char		_res = 1;
+
+	__asm__ __volatile__(
+		"	lock			\n"
+		"	xchgb	%0,%1	\n"
+:		"+q"(_res), "+m"(ptr->value)
+:
+:		"memory");
+	return _res == 0;
+}
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/*
+	 * On a TSO architecture like x86 it's sufficient to use a compiler
+	 * barrier to achieve release semantics.
+	 */
+	__asm__ __volatile__("" ::: "memory");
+	ptr->value = 0;
+}
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	char	ret;
+
+	/*
+	 * Perform cmpxchg and use the zero flag which it implicitly sets when
+	 * equal to measure the success.
+	 */
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	cmpxchgl	%4,%5	\n"
+		"   setz		%2		\n"
+:		"=a" (*expected), "=m"(ptr->value), "=q" (ret)
+:		"a" (*expected), "r" (newval), "m"(ptr->value)
+:		"memory", "cc");
+	return (bool) ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	uint32 res;
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	xaddl	%0,%1		\n"
+:		"=q"(res), "=m"(ptr->value)
+:		"0" (add_), "m"(ptr->value)
+:		"memory", "cc");
+	return res;
+}
+
+#ifdef __x86_64__
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	char	ret;
+
+	/*
+	 * Perform cmpxchg and use the zero flag which it implicitly sets when
+	 * equal to measure the success.
+	 */
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	cmpxchgq	%4,%5	\n"
+		"   setz		%2		\n"
+:		"=a" (*expected), "=m"(ptr->value), "=q" (ret)
+:		"a" (*expected), "r" (newval), "m"(ptr->value)
+:		"memory", "cc");
+	return (bool) ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64 res;
+	__asm__ __volatile__(
+		"	lock				\n"
+		"	xaddq	%0,%1		\n"
+:		"=q"(res), "=m"(ptr->value)
+:		"0" (add_), "m"(ptr->value)
+:		"memory", "cc");
+	return res;
+}
+
+#endif /* __x86_64__ */
+
+#endif /* defined(__GNUC__) || defined(__INTEL_COMPILER) */
+
+/*
+ * 8 byte reads / writes have single-copy atomicity on 32 bit x86 platforms
+ * since at least the 586. As well as on all x86-64 cpus.
+ */
+#if defined(__i568__) || defined(__i668__) || /* gcc i586+ */  \
+	(defined(_M_IX86) && _M_IX86 >= 500) || /* msvc i586+ */ \
+	defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) /* gcc, sunpro, msvc */
+#define PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY
+#endif /* 8 byte single-copy atomicity */
+
+#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/fallback.h b/src/include/port/atomics/fallback.h
new file mode 100644
index 0000000..a9e8e77
--- /dev/null
+++ b/src/include/port/atomics/fallback.h
@@ -0,0 +1,170 @@
+/*-------------------------------------------------------------------------
+ *
+ * fallback.h
+ *    Fallback for platforms without spinlock and/or atomics support. Slower
+ *    than native atomics support, but not unusably slow.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/atomics/fallback.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#	error "should be included via atomics.h"
+#endif
+
+#ifndef pg_memory_barrier_impl
+/*
+ * If we have no memory barrier implementation for this architecture, we
+ * fall back to acquiring and releasing a spinlock.  This might, in turn,
+ * fall back to the semaphore-based spinlock implementation, which will be
+ * amazingly slow.
+ *
+ * It's not self-evident that every possible legal implementation of a
+ * spinlock acquire-and-release would be equivalent to a full memory barrier.
+ * For example, I'm not sure that Itanium's acq and rel add up to a full
+ * fence.  But all of our actual implementations seem OK in this regard.
+ */
+#define PG_HAVE_MEMORY_BARRIER_EMULATION
+
+extern void pg_spinlock_barrier(void);
+#define pg_memory_barrier_impl pg_spinlock_barrier
+#endif
+
+#ifndef pg_compiler_barrier_impl
+/*
+ * If the compiler/arch combination does not provide compiler barriers,
+ * provide a fallback.  The fallback simply consists of a function call into
+ * an externally defined function.  That should guarantee compiler barrier
+ * semantics except for compilers that do inter translation unit/global
+ * optimization - those better provide an actual compiler barrier.
+ *
+ * A native compiler barrier for sure is a lot faster than this...
+ */
+#define PG_HAVE_COMPILER_BARRIER_EMULATION
+extern void pg_extern_compiler_barrier(void);
+#define pg_compiler_barrier_impl pg_extern_compiler_barrier
+#endif
+
+
+/*
+ * If we have atomics implementation for this platform, fall back to providing
+ * the atomics API using a spinlock to protect the internal state. Possibly
+ * the spinlock implementation uses semaphores internally...
+ *
+ * We have to be a bit careful here, as it's not guaranteed that atomic
+ * variables are mapped to the same address in every process (e.g. dynamic
+ * shared memory segments). We can't just hash the address and use that to map
+ * to a spinlock. Instead assign a spinlock on initialization of the atomic
+ * variable.
+ */
+#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+
+#define PG_HAVE_ATOMIC_FLAG_SIMULATION
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+
+typedef struct pg_atomic_flag
+{
+	/*
+	 * To avoid circular includes we can't use s_lock as a type here. Instead
+	 * just reserve enough space for all spinlock types. Some platforms would
+	 * be content with just one byte instead of 4, but that's not too much
+	 * waste.
+	 */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC, GCC and HP compilers */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile bool value;
+} pg_atomic_flag;
+
+#endif /* PG_HAVE_ATOMIC_FLAG_SUPPORT */
+
+#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+
+#define PG_HAVE_ATOMIC_U32_SIMULATION
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	/* Check pg_atomic_flag's definition above for an explanation */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#endif /* PG_HAVE_ATOMIC_U32_SUPPORT */
+
+#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT)
+
+#define PG_HAVE_ATOMIC_U64_SIMULATION
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/* Check pg_atomic_flag's definition above for an explanation */
+#if defined(__hppa) || defined(__hppa__)	/* HP PA-RISC */
+	int			sema[4];
+#else
+	int			sema;
+#endif
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+#endif /* PG_HAVE_ATOMIC_U64_SUPPORT */
+
+#ifdef PG_HAVE_ATOMIC_FLAG_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+extern void pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr);
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+extern bool pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr);
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+extern void pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr);
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+extern bool pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr);
+
+#endif /* PG_HAVE_ATOMIC_FLAG_SIMULATION */
+
+#ifdef PG_HAVE_ATOMIC_U32_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_U32
+extern void pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_);
+
+#define PG_HAVE_ATOMIC_WRITE_U32
+extern void pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val);
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+extern bool pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+												uint32 *expected, uint32 newval);
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+extern uint32 pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_);
+
+#endif /* PG_HAVE_ATOMIC_U32_SIMULATION */
+
+
+#ifdef PG_HAVE_ATOMIC_U64_SIMULATION
+
+#define PG_HAVE_ATOMIC_INIT_U64
+extern void pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_);
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+extern bool pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+												uint64 *expected, uint64 newval);
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+extern uint64 pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_);
+
+#endif /* PG_HAVE_ATOMIC_U64_SIMULATION */
diff --git a/src/include/port/atomics/generic-gcc.h b/src/include/port/atomics/generic-gcc.h
new file mode 100644
index 0000000..da04e9f
--- /dev/null
+++ b/src/include/port/atomics/generic-gcc.h
@@ -0,0 +1,286 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-gcc.h
+ *	  Atomic operations, implemented using gcc (or compatible) intrinsics.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Legacy __sync Built-in Functions for Atomic Memory Access
+ *   https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fsync-Builtins.html
+ * * Built-in functions for memory model aware atomic operations
+ *   https://gcc.gnu.org/onlinedocs/gcc-4.8.2/gcc/_005f_005fatomic-Builtins.html
+ *
+ * src/include/port/atomics/generic-gcc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+/*
+ * An empty asm block should be a sufficient compiler barrier.
+ */
+#define pg_compiler_barrier_impl()	__asm__ __volatile__("" ::: "memory")
+
+/*
+ * If we're on GCC 4.1.0 or higher, we should be able to get a memory barrier
+ * out of this compiler built-in.  But we prefer to rely on platform specific
+ * definitions where possible, and use this only as a fallback.
+ */
+#if !defined(pg_memory_barrier_impl)
+#	if defined(HAVE_GCC__ATOMIC_INT32_CAS)
+#		define pg_memory_barrier_impl()		__atomic_thread_fence(__ATOMIC_SEQ_CST)
+#	elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1))
+#		define pg_memory_barrier_impl()		__sync_synchronize()
+#	endif
+#endif /* !defined(pg_memory_barrier_impl) */
+
+#if !defined(pg_read_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
+/* acquire semantics include read barrier semantics */
+#		define pg_read_barrier_impl()		__atomic_thread_fence(__ATOMIC_ACQUIRE)
+#endif
+
+#if !defined(pg_write_barrier_impl) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
+/* release semantics include write barrier semantics */
+#		define pg_write_barrier_impl()		__atomic_thread_fence(__ATOMIC_RELEASE)
+#endif
+
+
+#ifdef HAVE_ATOMICS
+
+/* generic gcc based atomic flag implementation */
+#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) \
+	&& (defined(HAVE_GCC__SYNC_INT32_TAS) || defined(HAVE_GCC__SYNC_CHAR_TAS))
+
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+typedef struct pg_atomic_flag
+{
+	/*
+	 * If we have a choice, use int-width TAS, because that is more efficient
+	 * and/or more reliably implemented on most non-Intel platforms.  (Note
+	 * that this code isn't used on x86[_64]; see arch-x86.h for that.)
+	 */
+#ifdef HAVE_GCC__SYNC_INT32_TAS
+	volatile int value;
+#else
+	volatile char value;
+#endif
+} pg_atomic_flag;
+
+#endif /* !ATOMIC_FLAG_SUPPORT && SYNC_INT32_TAS */
+
+/* generic gcc based atomic uint32 implementation */
+#if !defined(PG_HAVE_ATOMIC_U32_SUPPORT) \
+	&& (defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS))
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#endif /* defined(HAVE_GCC__ATOMIC_INT32_CAS) || defined(HAVE_GCC__SYNC_INT32_CAS) */
+
+/* generic gcc based atomic uint64 implementation */
+#if !defined(PG_HAVE_ATOMIC_U64_SUPPORT) \
+	&& !defined(PG_DISABLE_64_BIT_ATOMICS) \
+	&& (defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS))
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+
+typedef struct pg_atomic_uint64
+{
+	volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif /* defined(HAVE_GCC__ATOMIC_INT64_CAS) || defined(HAVE_GCC__SYNC_INT64_CAS) */
+
+#ifdef PG_HAVE_ATOMIC_FLAG_SUPPORT
+
+#if defined(HAVE_GCC__SYNC_CHAR_TAS) || defined(HAVE_GCC__SYNC_INT32_TAS)
+
+#ifndef PG_HAVE_ATOMIC_TEST_SET_FLAG
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* NB: only an acquire barrier, not a full one */
+	/* some platform only support a 1 here */
+	return __sync_lock_test_and_set(&ptr->value, 1) == 0;
+}
+#endif
+
+#endif /* defined(HAVE_GCC__SYNC_*_TAS) */
+
+#ifndef PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return ptr->value == 0;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_CLEAR_FLAG
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	__sync_lock_release(&ptr->value);
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_INIT_FLAG
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_clear_flag_impl(ptr);
+}
+#endif
+
+#endif /* defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) */
+
+/* prefer __atomic, it has a better API */
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__ATOMIC_INT32_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	/* FIXME: we can probably use a lower consistency model */
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif
+
+/* if we have 32-bit __sync_val_compare_and_swap, assume we have these too: */
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	return __sync_fetch_and_add(&ptr->value, add_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U32
+static inline uint32
+pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	return __sync_fetch_and_sub(&ptr->value, sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+static inline uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(HAVE_GCC__SYNC_INT32_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+static inline uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
+
+#if !defined(PG_DISABLE_64_BIT_ATOMICS)
+
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__ATOMIC_INT64_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	return __atomic_compare_exchange_n(&ptr->value, expected, newval, false,
+									   __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+	current = __sync_val_compare_and_swap(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+#endif
+
+/* if we have 64-bit __sync_val_compare_and_swap, assume we have these too: */
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	return __sync_fetch_and_add(&ptr->value, add_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U64
+static inline uint64
+pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+	return __sync_fetch_and_sub(&ptr->value, sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+static inline uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	return __sync_fetch_and_and(&ptr->value, and_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(HAVE_GCC__SYNC_INT64_CAS)
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+static inline uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	return __sync_fetch_and_or(&ptr->value, or_);
+}
+#endif
+
+#endif /* !defined(PG_DISABLE_64_BIT_ATOMICS) */
+
+#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic-msvc.h b/src/include/port/atomics/generic-msvc.h
new file mode 100644
index 0000000..8835f4c
--- /dev/null
+++ b/src/include/port/atomics/generic-msvc.h
@@ -0,0 +1,101 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-msvc.h
+ *	  Atomic operations support when using MSVC
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * Interlocked Variable Access
+ *   http://msdn.microsoft.com/en-us/library/ms684122%28VS.85%29.aspx
+ *
+ * src/include/port/atomics/generic-msvc.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#include <intrin.h>
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#error "should be included via atomics.h"
+#endif
+
+#pragma intrinsic(_ReadWriteBarrier)
+#define pg_compiler_barrier_impl()	_ReadWriteBarrier()
+
+#ifndef pg_memory_barrier_impl
+#define pg_memory_barrier_impl()	MemoryBarrier()
+#endif
+
+#if defined(HAVE_ATOMICS)
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_attribute_aligned(8) pg_atomic_uint64
+{
+	volatile uint64 value;
+} pg_atomic_uint64;
+
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+	current = InterlockedCompareExchange(&ptr->value, newval, *expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	return InterlockedExchangeAdd(&ptr->value, add_);
+}
+
+/*
+ * The non-intrinsics versions are only available in vista upwards, so use the
+ * intrinsic version. Only supported on >486, but we require XP as a minimum
+ * baseline, which doesn't support the 486, so we don't need to add checks for
+ * that case.
+ */
+#pragma intrinsic(_InterlockedCompareExchange64)
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+	current = _InterlockedCompareExchange64(&ptr->value, newval, *expected);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+/* Only implemented on 64bit builds */
+#ifdef _WIN64
+#pragma intrinsic(_InterlockedExchangeAdd64)
+
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	return _InterlockedExchangeAdd64(&ptr->value, add_);
+}
+#endif /* _WIN64 */
+
+#endif /* HAVE_ATOMICS */
diff --git a/src/include/port/atomics/generic-sunpro.h b/src/include/port/atomics/generic-sunpro.h
new file mode 100644
index 0000000..30f7d8b
--- /dev/null
+++ b/src/include/port/atomics/generic-sunpro.h
@@ -0,0 +1,106 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic-sunpro.h
+ *	  Atomic operations for solaris' CC
+ *
+ * Portions Copyright (c) 2013-2023, PostgreSQL Global Development Group
+ *
+ * NOTES:
+ *
+ * Documentation:
+ * * manpage for atomic_cas(3C)
+ *   http://www.unix.com/man-page/opensolaris/3c/atomic_cas/
+ *   http://docs.oracle.com/cd/E23824_01/html/821-1465/atomic-cas-3c.html
+ *
+ * src/include/port/atomics/generic-sunpro.h
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#if defined(HAVE_ATOMICS)
+
+#ifdef HAVE_MBARRIER_H
+#include <mbarrier.h>
+
+#define pg_compiler_barrier_impl()	__compiler_barrier()
+
+#ifndef pg_memory_barrier_impl
+/*
+ * Despite the name this is actually a full barrier. Expanding to mfence/
+ * membar #StoreStore | #LoadStore | #StoreLoad | #LoadLoad on x86/sparc
+ * respectively.
+ */
+#	define pg_memory_barrier_impl()		__machine_rw_barrier()
+#endif
+#ifndef pg_read_barrier_impl
+#	define pg_read_barrier_impl()		__machine_r_barrier()
+#endif
+#ifndef pg_write_barrier_impl
+#	define pg_write_barrier_impl()		__machine_w_barrier()
+#endif
+
+#endif /* HAVE_MBARRIER_H */
+
+/* Older versions of the compiler don't have atomic.h... */
+#ifdef HAVE_ATOMIC_H
+
+#include <atomic.h>
+
+#define PG_HAVE_ATOMIC_U32_SUPPORT
+typedef struct pg_atomic_uint32
+{
+	volatile uint32 value;
+} pg_atomic_uint32;
+
+#define PG_HAVE_ATOMIC_U64_SUPPORT
+typedef struct pg_atomic_uint64
+{
+	/*
+	 * Syntax to enforce variable alignment should be supported by versions
+	 * supporting atomic.h, but it's hard to find accurate documentation. If
+	 * it proves to be a problem, we'll have to add more version checks for 64
+	 * bit support.
+	 */
+	volatile uint64 value pg_attribute_aligned(8);
+} pg_atomic_uint64;
+
+#endif /* HAVE_ATOMIC_H */
+
+#endif /* defined(HAVE_ATOMICS) */
+
+
+#if defined(HAVE_ATOMICS)
+
+#ifdef HAVE_ATOMIC_H
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32
+static inline bool
+pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
+									uint32 *expected, uint32 newval)
+{
+	bool	ret;
+	uint32	current;
+
+	current = atomic_cas_32(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#define PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64
+static inline bool
+pg_atomic_compare_exchange_u64_impl(volatile pg_atomic_uint64 *ptr,
+									uint64 *expected, uint64 newval)
+{
+	bool	ret;
+	uint64	current;
+
+	current = atomic_cas_64(&ptr->value, *expected, newval);
+	ret = current == *expected;
+	*expected = current;
+	return ret;
+}
+
+#endif /* HAVE_ATOMIC_H */
+
+#endif /* defined(HAVE_ATOMICS) */
diff --git a/src/include/port/atomics/generic.h b/src/include/port/atomics/generic.h
new file mode 100644
index 0000000..95d99dd
--- /dev/null
+++ b/src/include/port/atomics/generic.h
@@ -0,0 +1,401 @@
+/*-------------------------------------------------------------------------
+ *
+ * generic.h
+ *	  Implement higher level operations based on some lower level atomic
+ *	  operations.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/atomics/generic.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/* intentionally no include guards, should only be included by atomics.h */
+#ifndef INSIDE_ATOMICS_H
+#	error "should be included via atomics.h"
+#endif
+
+/*
+ * If read or write barriers are undefined, we upgrade them to full memory
+ * barriers.
+ */
+#if !defined(pg_read_barrier_impl)
+#	define pg_read_barrier_impl pg_memory_barrier_impl
+#endif
+#if !defined(pg_write_barrier_impl)
+#	define pg_write_barrier_impl pg_memory_barrier_impl
+#endif
+
+#ifndef PG_HAVE_SPIN_DELAY
+#define PG_HAVE_SPIN_DELAY
+#define pg_spin_delay_impl()	((void)0)
+#endif
+
+
+/* provide fallback */
+#if !defined(PG_HAVE_ATOMIC_FLAG_SUPPORT) && defined(PG_HAVE_ATOMIC_U32_SUPPORT)
+#define PG_HAVE_ATOMIC_FLAG_SUPPORT
+typedef pg_atomic_uint32 pg_atomic_flag;
+#endif
+
+#ifndef PG_HAVE_ATOMIC_READ_U32
+#define PG_HAVE_ATOMIC_READ_U32
+static inline uint32
+pg_atomic_read_u32_impl(volatile pg_atomic_uint32 *ptr)
+{
+	return ptr->value;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_WRITE_U32
+#define PG_HAVE_ATOMIC_WRITE_U32
+static inline void
+pg_atomic_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	ptr->value = val;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
+#define PG_HAVE_ATOMIC_UNLOCKED_WRITE_U32
+static inline void
+pg_atomic_unlocked_write_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val)
+{
+	ptr->value = val;
+}
+#endif
+
+/*
+ * provide fallback for test_and_set using atomic_exchange if available
+ */
+#if !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_EXCHANGE_U32)
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_exchange_u32_impl(ptr, 1) == 0;
+}
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_read_u32_impl(ptr) == 0;
+}
+
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier_impl();
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+/*
+ * provide fallback for test_and_set using atomic_compare_exchange if
+ * available.
+ */
+#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+
+#define PG_HAVE_ATOMIC_INIT_FLAG
+static inline void
+pg_atomic_init_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	pg_atomic_write_u32_impl(ptr, 0);
+}
+
+#define PG_HAVE_ATOMIC_TEST_SET_FLAG
+static inline bool
+pg_atomic_test_set_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	uint32 value = 0;
+	return pg_atomic_compare_exchange_u32_impl(ptr, &value, 1);
+}
+
+#define PG_HAVE_ATOMIC_UNLOCKED_TEST_FLAG
+static inline bool
+pg_atomic_unlocked_test_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	return pg_atomic_read_u32_impl(ptr) == 0;
+}
+
+#define PG_HAVE_ATOMIC_CLEAR_FLAG
+static inline void
+pg_atomic_clear_flag_impl(volatile pg_atomic_flag *ptr)
+{
+	/*
+	 * Use a memory barrier + plain write if we have a native memory
+	 * barrier. But don't do so if memory barriers use spinlocks - that'd lead
+	 * to circularity if flags are used to implement spinlocks.
+	 */
+#ifndef PG_HAVE_MEMORY_BARRIER_EMULATION
+	/* XXX: release semantics suffice? */
+	pg_memory_barrier_impl();
+	pg_atomic_write_u32_impl(ptr, 0);
+#else
+	uint32 value = 1;
+	pg_atomic_compare_exchange_u32_impl(ptr, &value, 0);
+#endif
+}
+
+#elif !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG)
+#	error "No pg_atomic_test_and_set provided"
+#endif /* !defined(PG_HAVE_ATOMIC_TEST_SET_FLAG) */
+
+
+#ifndef PG_HAVE_ATOMIC_INIT_U32
+#define PG_HAVE_ATOMIC_INIT_U32
+static inline void
+pg_atomic_init_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 val_)
+{
+	ptr->value = val_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_EXCHANGE_U32
+static inline uint32
+pg_atomic_exchange_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 xchg_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, xchg_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U32
+static inline uint32
+pg_atomic_fetch_add_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old + add_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U32
+static inline uint32
+pg_atomic_fetch_sub_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	return pg_atomic_fetch_add_u32_impl(ptr, -sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_AND_U32
+static inline uint32
+pg_atomic_fetch_and_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 and_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old & and_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U32) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U32)
+#define PG_HAVE_ATOMIC_FETCH_OR_U32
+static inline uint32
+pg_atomic_fetch_or_u32_impl(volatile pg_atomic_uint32 *ptr, uint32 or_)
+{
+	uint32 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u32_impl(ptr, &old, old | or_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U32)
+#define PG_HAVE_ATOMIC_ADD_FETCH_U32
+static inline uint32
+pg_atomic_add_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 add_)
+{
+	return pg_atomic_fetch_add_u32_impl(ptr, add_) + add_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U32) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U32)
+#define PG_HAVE_ATOMIC_SUB_FETCH_U32
+static inline uint32
+pg_atomic_sub_fetch_u32_impl(volatile pg_atomic_uint32 *ptr, int32 sub_)
+{
+	return pg_atomic_fetch_sub_u32_impl(ptr, sub_) - sub_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_EXCHANGE_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_EXCHANGE_U64
+static inline uint64
+pg_atomic_exchange_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 xchg_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, xchg_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#ifndef PG_HAVE_ATOMIC_WRITE_U64
+#define PG_HAVE_ATOMIC_WRITE_U64
+
+#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \
+	!defined(PG_HAVE_ATOMIC_U64_SIMULATION)
+
+static inline void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	/*
+	 * On this platform aligned 64bit writes are guaranteed to be atomic,
+	 * except if using the fallback implementation, where can't guarantee the
+	 * required alignment.
+	 */
+	AssertPointerAlignment(ptr, 8);
+	ptr->value = val;
+}
+
+#else
+
+static inline void
+pg_atomic_write_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val)
+{
+	/*
+	 * 64 bit writes aren't safe on all platforms. In the generic
+	 * implementation implement them as an atomic exchange.
+	 */
+	pg_atomic_exchange_u64_impl(ptr, val);
+}
+
+#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */
+#endif /* PG_HAVE_ATOMIC_WRITE_U64 */
+
+#ifndef PG_HAVE_ATOMIC_READ_U64
+#define PG_HAVE_ATOMIC_READ_U64
+
+#if defined(PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY) && \
+	!defined(PG_HAVE_ATOMIC_U64_SIMULATION)
+
+static inline uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	/*
+	 * On this platform aligned 64-bit reads are guaranteed to be atomic.
+	 */
+	AssertPointerAlignment(ptr, 8);
+	return ptr->value;
+}
+
+#else
+
+static inline uint64
+pg_atomic_read_u64_impl(volatile pg_atomic_uint64 *ptr)
+{
+	uint64 old = 0;
+
+	/*
+	 * 64-bit reads aren't atomic on all platforms. In the generic
+	 * implementation implement them as a compare/exchange with 0. That'll
+	 * fail or succeed, but always return the old value. Possibly might store
+	 * a 0, but only if the previous value also was a 0 - i.e. harmless.
+	 */
+	pg_atomic_compare_exchange_u64_impl(ptr, &old, 0);
+
+	return old;
+}
+#endif /* PG_HAVE_8BYTE_SINGLE_COPY_ATOMICITY && !PG_HAVE_ATOMIC_U64_SIMULATION */
+#endif /* PG_HAVE_ATOMIC_READ_U64 */
+
+#ifndef PG_HAVE_ATOMIC_INIT_U64
+#define PG_HAVE_ATOMIC_INIT_U64
+static inline void
+pg_atomic_init_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 val_)
+{
+	ptr->value = val_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_ADD_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_ADD_U64
+static inline uint64
+pg_atomic_fetch_add_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old + add_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_SUB_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_SUB_U64
+static inline uint64
+pg_atomic_fetch_sub_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+	return pg_atomic_fetch_add_u64_impl(ptr, -sub_);
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_AND_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_AND_U64
+static inline uint64
+pg_atomic_fetch_and_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 and_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old & and_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_FETCH_OR_U64) && defined(PG_HAVE_ATOMIC_COMPARE_EXCHANGE_U64)
+#define PG_HAVE_ATOMIC_FETCH_OR_U64
+static inline uint64
+pg_atomic_fetch_or_u64_impl(volatile pg_atomic_uint64 *ptr, uint64 or_)
+{
+	uint64 old;
+	old = ptr->value;			/* ok if read is not atomic */
+	while (!pg_atomic_compare_exchange_u64_impl(ptr, &old, old | or_))
+		/* skip */;
+	return old;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_ADD_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_ADD_U64)
+#define PG_HAVE_ATOMIC_ADD_FETCH_U64
+static inline uint64
+pg_atomic_add_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 add_)
+{
+	return pg_atomic_fetch_add_u64_impl(ptr, add_) + add_;
+}
+#endif
+
+#if !defined(PG_HAVE_ATOMIC_SUB_FETCH_U64) && defined(PG_HAVE_ATOMIC_FETCH_SUB_U64)
+#define PG_HAVE_ATOMIC_SUB_FETCH_U64
+static inline uint64
+pg_atomic_sub_fetch_u64_impl(volatile pg_atomic_uint64 *ptr, int64 sub_)
+{
+	return pg_atomic_fetch_sub_u64_impl(ptr, sub_) - sub_;
+}
+#endif
diff --git a/src/include/port/cygwin.h b/src/include/port/cygwin.h
new file mode 100644
index 0000000..44bf853
--- /dev/null
+++ b/src/include/port/cygwin.h
@@ -0,0 +1,23 @@
+/* src/include/port/cygwin.h */
+
+/*
+ * Variables declared in the core backend and referenced by loadable
+ * modules need to be marked "dllimport" in the core build, but
+ * "dllexport" when the declaration is read in a loadable module.
+ * No special markings should be used when compiling frontend code.
+ */
+#ifndef FRONTEND
+#ifdef BUILDING_DLL
+#define PGDLLIMPORT __declspec (dllexport)
+#else
+#define PGDLLIMPORT __declspec (dllimport)
+#endif
+#endif
+
+/*
+ * Cygwin has a strtof() which is literally just (float)strtod(), which means
+ * we get misrounding _and_ silent over/underflow. Using our wrapper doesn't
+ * fix the misrounding but does fix the error checks, which cuts down on the
+ * number of test variant files needed.
+ */
+#define HAVE_BUGGY_STRTOF 1
diff --git a/src/include/port/darwin.h b/src/include/port/darwin.h
new file mode 100644
index 0000000..15fb69d
--- /dev/null
+++ b/src/include/port/darwin.h
@@ -0,0 +1,8 @@
+/* src/include/port/darwin.h */
+
+#define __darwin__	1
+
+#if HAVE_DECL_F_FULLFSYNC		/* not present before macOS 10.3 */
+#define HAVE_FSYNC_WRITETHROUGH
+
+#endif
diff --git a/src/include/port/freebsd.h b/src/include/port/freebsd.h
new file mode 100644
index 0000000..0e3fde5
--- /dev/null
+++ b/src/include/port/freebsd.h
@@ -0,0 +1,8 @@
+/* src/include/port/freebsd.h */
+
+/*
+ * Set the default wal_sync_method to fdatasync.  xlogdefs.h's normal rules
+ * would prefer open_datasync on FreeBSD 13+, but that is not a good choice on
+ * many systems.
+ */
+#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
diff --git a/src/include/port/linux.h b/src/include/port/linux.h
new file mode 100644
index 0000000..7a6e46c
--- /dev/null
+++ b/src/include/port/linux.h
@@ -0,0 +1,22 @@
+/* src/include/port/linux.h */
+
+/*
+ * As of July 2007, all known versions of the Linux kernel will sometimes
+ * return EIDRM for a shmctl() operation when EINVAL is correct (it happens
+ * when the low-order 15 bits of the supplied shm ID match the slot number
+ * assigned to a newer shmem segment).  We deal with this by assuming that
+ * EIDRM means EINVAL in PGSharedMemoryIsInUse().  This is reasonably safe
+ * since in fact Linux has no excuse for ever returning EIDRM; it doesn't
+ * track removed segments in a way that would allow distinguishing them from
+ * private ones.  But someday that code might get upgraded, and we'd have
+ * to have a kernel version test here.
+ */
+#define HAVE_LINUX_EIDRM_BUG
+
+/*
+ * Set the default wal_sync_method to fdatasync.  With recent Linux versions,
+ * xlogdefs.h's normal rules will prefer open_datasync, which (a) doesn't
+ * perform better and (b) causes outright failures on ext4 data=journal
+ * filesystems, because those don't support O_DIRECT.
+ */
+#define PLATFORM_DEFAULT_SYNC_METHOD	SYNC_METHOD_FDATASYNC
diff --git a/src/include/port/netbsd.h b/src/include/port/netbsd.h
new file mode 100644
index 0000000..590233f
--- /dev/null
+++ b/src/include/port/netbsd.h
@@ -0,0 +1 @@
+/* src/include/port/netbsd.h */
diff --git a/src/include/port/openbsd.h b/src/include/port/openbsd.h
new file mode 100644
index 0000000..395319b
--- /dev/null
+++ b/src/include/port/openbsd.h
@@ -0,0 +1 @@
+/* src/include/port/openbsd.h */
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
new file mode 100644
index 0000000..21a4fa0
--- /dev/null
+++ b/src/include/port/pg_bitutils.h
@@ -0,0 +1,339 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_bitutils.h
+ *	  Miscellaneous functions for bit-wise operations.
+ *
+ *
+ * Copyright (c) 2019-2023, PostgreSQL Global Development Group
+ *
+ * src/include/port/pg_bitutils.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_BITUTILS_H
+#define PG_BITUTILS_H
+
+#ifdef _MSC_VER
+#include <intrin.h>
+#define HAVE_BITSCAN_FORWARD
+#define HAVE_BITSCAN_REVERSE
+
+#else
+#if defined(HAVE__BUILTIN_CTZ)
+#define HAVE_BITSCAN_FORWARD
+#endif
+
+#if defined(HAVE__BUILTIN_CLZ)
+#define HAVE_BITSCAN_REVERSE
+#endif
+#endif							/* _MSC_VER */
+
+extern PGDLLIMPORT const uint8 pg_leftmost_one_pos[256];
+extern PGDLLIMPORT const uint8 pg_rightmost_one_pos[256];
+extern PGDLLIMPORT const uint8 pg_number_of_ones[256];
+
+/*
+ * pg_leftmost_one_pos32
+ *		Returns the position of the most significant set bit in "word",
+ *		measured from the least significant bit.  word must not be 0.
+ */
+static inline int
+pg_leftmost_one_pos32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_CLZ
+	Assert(word != 0);
+
+	return 31 - __builtin_clz(word);
+#elif defined(_MSC_VER)
+	unsigned long result;
+	bool		non_zero;
+
+	non_zero = _BitScanReverse(&result, word);
+	Assert(non_zero);
+	return (int) result;
+#else
+	int			shift = 32 - 8;
+
+	Assert(word != 0);
+
+	while ((word >> shift) == 0)
+		shift -= 8;
+
+	return shift + pg_leftmost_one_pos[(word >> shift) & 255];
+#endif							/* HAVE__BUILTIN_CLZ */
+}
+
+/*
+ * pg_leftmost_one_pos64
+ *		As above, but for a 64-bit word.
+ */
+static inline int
+pg_leftmost_one_pos64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_CLZ
+	Assert(word != 0);
+
+#if defined(HAVE_LONG_INT_64)
+	return 63 - __builtin_clzl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+	return 63 - __builtin_clzll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif							/* HAVE_LONG_INT_64 */
+
+#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+	unsigned long result;
+	bool		non_zero;
+
+	non_zero = _BitScanReverse64(&result, word);
+	Assert(non_zero);
+	return (int) result;
+#else
+	int			shift = 64 - 8;
+
+	Assert(word != 0);
+
+	while ((word >> shift) == 0)
+		shift -= 8;
+
+	return shift + pg_leftmost_one_pos[(word >> shift) & 255];
+#endif							/* HAVE__BUILTIN_CLZ */
+}
+
+/*
+ * pg_rightmost_one_pos32
+ *		Returns the position of the least significant set bit in "word",
+ *		measured from the least significant bit.  word must not be 0.
+ */
+static inline int
+pg_rightmost_one_pos32(uint32 word)
+{
+#ifdef HAVE__BUILTIN_CTZ
+	Assert(word != 0);
+
+	return __builtin_ctz(word);
+#elif defined(_MSC_VER)
+	unsigned long result;
+	bool		non_zero;
+
+	non_zero = _BitScanForward(&result, word);
+	Assert(non_zero);
+	return (int) result;
+#else
+	int			result = 0;
+
+	Assert(word != 0);
+
+	while ((word & 255) == 0)
+	{
+		word >>= 8;
+		result += 8;
+	}
+	result += pg_rightmost_one_pos[word & 255];
+	return result;
+#endif							/* HAVE__BUILTIN_CTZ */
+}
+
+/*
+ * pg_rightmost_one_pos64
+ *		As above, but for a 64-bit word.
+ */
+static inline int
+pg_rightmost_one_pos64(uint64 word)
+{
+#ifdef HAVE__BUILTIN_CTZ
+	Assert(word != 0);
+
+#if defined(HAVE_LONG_INT_64)
+	return __builtin_ctzl(word);
+#elif defined(HAVE_LONG_LONG_INT_64)
+	return __builtin_ctzll(word);
+#else
+#error must have a working 64-bit integer datatype
+#endif							/* HAVE_LONG_INT_64 */
+
+#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
+	unsigned long result;
+	bool		non_zero;
+
+	non_zero = _BitScanForward64(&result, word);
+	Assert(non_zero);
+	return (int) result;
+#else
+	int			result = 0;
+
+	Assert(word != 0);
+
+	while ((word & 255) == 0)
+	{
+		word >>= 8;
+		result += 8;
+	}
+	result += pg_rightmost_one_pos[word & 255];
+	return result;
+#endif							/* HAVE__BUILTIN_CTZ */
+}
+
+/*
+ * pg_nextpower2_32
+ *		Returns the next higher power of 2 above 'num', or 'num' if it's
+ *		already a power of 2.
+ *
+ * 'num' mustn't be 0 or be above PG_UINT32_MAX / 2 + 1.
+ */
+static inline uint32
+pg_nextpower2_32(uint32 num)
+{
+	Assert(num > 0 && num <= PG_UINT32_MAX / 2 + 1);
+
+	/*
+	 * A power 2 number has only 1 bit set.  Subtracting 1 from such a number
+	 * will turn on all previous bits resulting in no common bits being set
+	 * between num and num-1.
+	 */
+	if ((num & (num - 1)) == 0)
+		return num;				/* already power 2 */
+
+	return ((uint32) 1) << (pg_leftmost_one_pos32(num) + 1);
+}
+
+/*
+ * pg_nextpower2_64
+ *		Returns the next higher power of 2 above 'num', or 'num' if it's
+ *		already a power of 2.
+ *
+ * 'num' mustn't be 0 or be above PG_UINT64_MAX / 2  + 1.
+ */
+static inline uint64
+pg_nextpower2_64(uint64 num)
+{
+	Assert(num > 0 && num <= PG_UINT64_MAX / 2 + 1);
+
+	/*
+	 * A power 2 number has only 1 bit set.  Subtracting 1 from such a number
+	 * will turn on all previous bits resulting in no common bits being set
+	 * between num and num-1.
+	 */
+	if ((num & (num - 1)) == 0)
+		return num;				/* already power 2 */
+
+	return ((uint64) 1) << (pg_leftmost_one_pos64(num) + 1);
+}
+
+/*
+ * pg_prevpower2_32
+ *		Returns the next lower power of 2 below 'num', or 'num' if it's
+ *		already a power of 2.
+ *
+ * 'num' mustn't be 0.
+ */
+static inline uint32
+pg_prevpower2_32(uint32 num)
+{
+	return ((uint32) 1) << pg_leftmost_one_pos32(num);
+}
+
+/*
+ * pg_prevpower2_64
+ *		Returns the next lower power of 2 below 'num', or 'num' if it's
+ *		already a power of 2.
+ *
+ * 'num' mustn't be 0.
+ */
+static inline uint64
+pg_prevpower2_64(uint64 num)
+{
+	return ((uint64) 1) << pg_leftmost_one_pos64(num);
+}
+
+/*
+ * pg_ceil_log2_32
+ *		Returns equivalent of ceil(log2(num))
+ */
+static inline uint32
+pg_ceil_log2_32(uint32 num)
+{
+	if (num < 2)
+		return 0;
+	else
+		return pg_leftmost_one_pos32(num - 1) + 1;
+}
+
+/*
+ * pg_ceil_log2_64
+ *		Returns equivalent of ceil(log2(num))
+ */
+static inline uint64
+pg_ceil_log2_64(uint64 num)
+{
+	if (num < 2)
+		return 0;
+	else
+		return pg_leftmost_one_pos64(num - 1) + 1;
+}
+
+/*
+ * With MSVC on x86_64 builds, try using native popcnt instructions via the
+ * __popcnt and __popcnt64 intrinsics.  These don't work the same as GCC's
+ * __builtin_popcount* intrinsic functions as they always emit popcnt
+ * instructions.
+ */
+#if defined(_MSC_VER) && defined(_M_AMD64)
+#define HAVE_X86_64_POPCNTQ
+#endif
+
+/*
+ * On x86_64, we can use the hardware popcount instruction, but only if
+ * we can verify that the CPU supports it via the cpuid instruction.
+ *
+ * Otherwise, we fall back to a hand-rolled implementation.
+ */
+#ifdef HAVE_X86_64_POPCNTQ
+#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
+#define TRY_POPCNT_FAST 1
+#endif
+#endif
+
+#ifdef TRY_POPCNT_FAST
+/* Attempt to use the POPCNT instruction, but perform a runtime check first */
+extern int	(*pg_popcount32) (uint32 word);
+extern int	(*pg_popcount64) (uint64 word);
+
+#else
+/* Use a portable implementation -- no need for a function pointer. */
+extern int	pg_popcount32(uint32 word);
+extern int	pg_popcount64(uint64 word);
+
+#endif							/* TRY_POPCNT_FAST */
+
+/* Count the number of one-bits in a byte array */
+extern uint64 pg_popcount(const char *buf, int bytes);
+
+/*
+ * Rotate the bits of "word" to the right/left by n bits.
+ */
+static inline uint32
+pg_rotate_right32(uint32 word, int n)
+{
+	return (word >> n) | (word << (32 - n));
+}
+
+static inline uint32
+pg_rotate_left32(uint32 word, int n)
+{
+	return (word << n) | (word >> (32 - n));
+}
+
+/* size_t variants of the above, as required */
+
+#if SIZEOF_SIZE_T == 4
+#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos32
+#define pg_nextpower2_size_t pg_nextpower2_32
+#define pg_prevpower2_size_t pg_prevpower2_32
+#else
+#define pg_leftmost_one_pos_size_t pg_leftmost_one_pos64
+#define pg_nextpower2_size_t pg_nextpower2_64
+#define pg_prevpower2_size_t pg_prevpower2_64
+#endif
+
+#endif							/* PG_BITUTILS_H */
diff --git a/src/include/port/pg_bswap.h b/src/include/port/pg_bswap.h
new file mode 100644
index 0000000..80abd75
--- /dev/null
+++ b/src/include/port/pg_bswap.h
@@ -0,0 +1,161 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_bswap.h
+ *	  Byte swapping.
+ *
+ * Macros for reversing the byte order of 16, 32 and 64-bit unsigned integers.
+ * For example, 0xAABBCCDD becomes 0xDDCCBBAA.  These are just wrappers for
+ * built-in functions provided by the compiler where support exists.
+ *
+ * Note that all of these functions accept unsigned integers as arguments and
+ * return the same.  Use caution when using these wrapper macros with signed
+ * integers.
+ *
+ * Copyright (c) 2015-2023, PostgreSQL Global Development Group
+ *
+ * src/include/port/pg_bswap.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_BSWAP_H
+#define PG_BSWAP_H
+
+
+/*
+ * In all supported versions msvc provides _byteswap_* functions in stdlib.h,
+ * already included by c.h.
+ */
+
+
+/* implementation of uint16 pg_bswap16(uint16) */
+#if defined(HAVE__BUILTIN_BSWAP16)
+
+#define pg_bswap16(x) __builtin_bswap16(x)
+
+#elif defined(_MSC_VER)
+
+#define pg_bswap16(x) _byteswap_ushort(x)
+
+#else
+
+static inline uint16
+pg_bswap16(uint16 x)
+{
+	return
+		((x << 8) & 0xff00) |
+		((x >> 8) & 0x00ff);
+}
+
+#endif							/* HAVE__BUILTIN_BSWAP16 */
+
+
+/* implementation of uint32 pg_bswap32(uint32) */
+#if defined(HAVE__BUILTIN_BSWAP32)
+
+#define pg_bswap32(x) __builtin_bswap32(x)
+
+#elif defined(_MSC_VER)
+
+#define pg_bswap32(x) _byteswap_ulong(x)
+
+#else
+
+static inline uint32
+pg_bswap32(uint32 x)
+{
+	return
+		((x << 24) & 0xff000000) |
+		((x << 8) & 0x00ff0000) |
+		((x >> 8) & 0x0000ff00) |
+		((x >> 24) & 0x000000ff);
+}
+
+#endif							/* HAVE__BUILTIN_BSWAP32 */
+
+
+/* implementation of uint64 pg_bswap64(uint64) */
+#if defined(HAVE__BUILTIN_BSWAP64)
+
+#define pg_bswap64(x) __builtin_bswap64(x)
+
+
+#elif defined(_MSC_VER)
+
+#define pg_bswap64(x) _byteswap_uint64(x)
+
+#else
+
+static inline uint64
+pg_bswap64(uint64 x)
+{
+	return
+		((x << 56) & UINT64CONST(0xff00000000000000)) |
+		((x << 40) & UINT64CONST(0x00ff000000000000)) |
+		((x << 24) & UINT64CONST(0x0000ff0000000000)) |
+		((x << 8) & UINT64CONST(0x000000ff00000000)) |
+		((x >> 8) & UINT64CONST(0x00000000ff000000)) |
+		((x >> 24) & UINT64CONST(0x0000000000ff0000)) |
+		((x >> 40) & UINT64CONST(0x000000000000ff00)) |
+		((x >> 56) & UINT64CONST(0x00000000000000ff));
+}
+#endif							/* HAVE__BUILTIN_BSWAP64 */
+
+
+/*
+ * Portable and fast equivalents for ntohs, ntohl, htons, htonl,
+ * additionally extended to 64 bits.
+ */
+#ifdef WORDS_BIGENDIAN
+
+#define pg_hton16(x)		(x)
+#define pg_hton32(x)		(x)
+#define pg_hton64(x)		(x)
+
+#define pg_ntoh16(x)		(x)
+#define pg_ntoh32(x)		(x)
+#define pg_ntoh64(x)		(x)
+
+#else
+
+#define pg_hton16(x)		pg_bswap16(x)
+#define pg_hton32(x)		pg_bswap32(x)
+#define pg_hton64(x)		pg_bswap64(x)
+
+#define pg_ntoh16(x)		pg_bswap16(x)
+#define pg_ntoh32(x)		pg_bswap32(x)
+#define pg_ntoh64(x)		pg_bswap64(x)
+
+#endif							/* WORDS_BIGENDIAN */
+
+
+/*
+ * Rearrange the bytes of a Datum from big-endian order into the native byte
+ * order.  On big-endian machines, this does nothing at all.  Note that the C
+ * type Datum is an unsigned integer type on all platforms.
+ *
+ * One possible application of the DatumBigEndianToNative() macro is to make
+ * bitwise comparisons cheaper.  A simple 3-way comparison of Datums
+ * transformed by the macro (based on native, unsigned comparisons) will return
+ * the same result as a memcmp() of the corresponding original Datums, but can
+ * be much cheaper.  It's generally safe to do this on big-endian systems
+ * without any special transformation occurring first.
+ *
+ * If SIZEOF_DATUM is not defined, then postgres.h wasn't included and these
+ * macros probably shouldn't be used, so we define nothing.  Note that
+ * SIZEOF_DATUM == 8 would evaluate as 0 == 8 in that case, potentially
+ * leading to the wrong implementation being selected and confusing errors, so
+ * defining nothing is safest.
+ */
+#ifdef SIZEOF_DATUM
+#ifdef WORDS_BIGENDIAN
+#define		DatumBigEndianToNative(x)	(x)
+#else							/* !WORDS_BIGENDIAN */
+#if SIZEOF_DATUM == 8
+#define		DatumBigEndianToNative(x)	pg_bswap64(x)
+#else							/* SIZEOF_DATUM != 8 */
+#define		DatumBigEndianToNative(x)	pg_bswap32(x)
+#endif							/* SIZEOF_DATUM == 8 */
+#endif							/* WORDS_BIGENDIAN */
+#endif							/* SIZEOF_DATUM */
+
+#endif							/* PG_BSWAP_H */
diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
new file mode 100644
index 0000000..7f87792
--- /dev/null
+++ b/src/include/port/pg_crc32c.h
@@ -0,0 +1,101 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_crc32c.h
+ *	  Routines for computing CRC-32C checksums.
+ *
+ * The speed of CRC-32C calculation has a big impact on performance, so we
+ * jump through some hoops to get the best implementation for each
+ * platform. Some CPU architectures have special instructions for speeding
+ * up CRC calculations (e.g. Intel SSE 4.2), on other platforms we use the
+ * Slicing-by-8 algorithm which uses lookup tables.
+ *
+ * The public interface consists of four macros:
+ *
+ * INIT_CRC32C(crc)
+ *		Initialize a CRC accumulator
+ *
+ * COMP_CRC32C(crc, data, len)
+ *		Accumulate some (more) bytes into a CRC
+ *
+ * FIN_CRC32C(crc)
+ *		Finish a CRC calculation
+ *
+ * EQ_CRC32C(c1, c2)
+ *		Check for equality of two CRCs.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/pg_crc32c.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_CRC32C_H
+#define PG_CRC32C_H
+
+#include "port/pg_bswap.h"
+
+typedef uint32 pg_crc32c;
+
+/* The INIT and EQ macros are the same for all implementations. */
+#define INIT_CRC32C(crc) ((crc) = 0xFFFFFFFF)
+#define EQ_CRC32C(c1, c2) ((c1) == (c2))
+
+#if defined(USE_SSE42_CRC32C)
+/* Use Intel SSE4.2 instructions. */
+#define COMP_CRC32C(crc, data, len) \
+	((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
+#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
+
+extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
+
+#elif defined(USE_ARMV8_CRC32C)
+/* Use ARMv8 CRC Extension instructions. */
+
+#define COMP_CRC32C(crc, data, len)							\
+	((crc) = pg_comp_crc32c_armv8((crc), (data), (len)))
+#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
+
+extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
+
+#elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
+
+/*
+ * Use Intel SSE 4.2 or ARMv8 instructions, but perform a runtime check first
+ * to check that they are available.
+ */
+#define COMP_CRC32C(crc, data, len) \
+	((crc) = pg_comp_crc32c((crc), (data), (len)))
+#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
+
+extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
+extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
+
+#ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
+extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
+#endif
+#ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK
+extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
+#endif
+
+#else
+/*
+ * Use slicing-by-8 algorithm.
+ *
+ * On big-endian systems, the intermediate value is kept in reverse byte
+ * order, to avoid byte-swapping during the calculation. FIN_CRC32C reverses
+ * the bytes to the final order.
+ */
+#define COMP_CRC32C(crc, data, len) \
+	((crc) = pg_comp_crc32c_sb8((crc), (data), (len)))
+#ifdef WORDS_BIGENDIAN
+#define FIN_CRC32C(crc) ((crc) = pg_bswap32(crc) ^ 0xFFFFFFFF)
+#else
+#define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
+#endif
+
+extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
+
+#endif
+
+#endif							/* PG_CRC32C_H */
diff --git a/src/include/port/pg_iovec.h b/src/include/port/pg_iovec.h
new file mode 100644
index 0000000..689799c
--- /dev/null
+++ b/src/include/port/pg_iovec.h
@@ -0,0 +1,55 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_iovec.h
+ *	  Header for vectored I/O functions, to use in place of <sys/uio.h>.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/pg_iovec.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_IOVEC_H
+#define PG_IOVEC_H
+
+#ifndef WIN32
+
+#include <limits.h>
+#include <sys/uio.h>
+
+#else
+
+/* POSIX requires at least 16 as a maximum iovcnt. */
+#define IOV_MAX 16
+
+/* Define our own POSIX-compatible iovec struct. */
+struct iovec
+{
+	void	   *iov_base;
+	size_t		iov_len;
+};
+
+#endif
+
+/* Define a reasonable maximum that is safe to use on the stack. */
+#define PG_IOV_MAX Min(IOV_MAX, 32)
+
+/*
+ * Note that pg_preadv and pg_pwritev have a pg_ prefix as a warning that the
+ * Windows implementations have the side-effect of changing the file position.
+ */
+
+#if HAVE_DECL_PREADV
+#define pg_preadv preadv
+#else
+extern ssize_t pg_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+#endif
+
+#if HAVE_DECL_PWRITEV
+#define pg_pwritev pwritev
+#else
+extern ssize_t pg_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+#endif
+
+#endif							/* PG_IOVEC_H */
diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h
new file mode 100644
index 0000000..59aa824
--- /dev/null
+++ b/src/include/port/pg_lfind.h
@@ -0,0 +1,180 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_lfind.h
+ *	  Optimized linear search routines using SIMD intrinsics where
+ *	  available.
+ *
+ * Copyright (c) 2022-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/include/port/pg_lfind.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_LFIND_H
+#define PG_LFIND_H
+
+#include "port/simd.h"
+
+/*
+ * pg_lfind8
+ *
+ * Return true if there is an element in 'base' that equals 'key', otherwise
+ * return false.
+ */
+static inline bool
+pg_lfind8(uint8 key, uint8 *base, uint32 nelem)
+{
+	uint32		i;
+
+	/* round down to multiple of vector length */
+	uint32		tail_idx = nelem & ~(sizeof(Vector8) - 1);
+	Vector8		chunk;
+
+	for (i = 0; i < tail_idx; i += sizeof(Vector8))
+	{
+		vector8_load(&chunk, &base[i]);
+		if (vector8_has(chunk, key))
+			return true;
+	}
+
+	/* Process the remaining elements one at a time. */
+	for (; i < nelem; i++)
+	{
+		if (key == base[i])
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * pg_lfind8_le
+ *
+ * Return true if there is an element in 'base' that is less than or equal to
+ * 'key', otherwise return false.
+ */
+static inline bool
+pg_lfind8_le(uint8 key, uint8 *base, uint32 nelem)
+{
+	uint32		i;
+
+	/* round down to multiple of vector length */
+	uint32		tail_idx = nelem & ~(sizeof(Vector8) - 1);
+	Vector8		chunk;
+
+	for (i = 0; i < tail_idx; i += sizeof(Vector8))
+	{
+		vector8_load(&chunk, &base[i]);
+		if (vector8_has_le(chunk, key))
+			return true;
+	}
+
+	/* Process the remaining elements one at a time. */
+	for (; i < nelem; i++)
+	{
+		if (base[i] <= key)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * pg_lfind32
+ *
+ * Return true if there is an element in 'base' that equals 'key', otherwise
+ * return false.
+ */
+static inline bool
+pg_lfind32(uint32 key, uint32 *base, uint32 nelem)
+{
+	uint32		i = 0;
+
+#ifndef USE_NO_SIMD
+
+	/*
+	 * For better instruction-level parallelism, each loop iteration operates
+	 * on a block of four registers.  Testing for SSE2 has showed this is ~40%
+	 * faster than using a block of two registers.
+	 */
+	const Vector32 keys = vector32_broadcast(key);	/* load copies of key */
+	const uint32 nelem_per_vector = sizeof(Vector32) / sizeof(uint32);
+	const uint32 nelem_per_iteration = 4 * nelem_per_vector;
+
+	/* round down to multiple of elements per iteration */
+	const uint32 tail_idx = nelem & ~(nelem_per_iteration - 1);
+
+#if defined(USE_ASSERT_CHECKING)
+	bool		assert_result = false;
+
+	/* pre-compute the result for assert checking */
+	for (i = 0; i < nelem; i++)
+	{
+		if (key == base[i])
+		{
+			assert_result = true;
+			break;
+		}
+	}
+#endif
+
+	for (i = 0; i < tail_idx; i += nelem_per_iteration)
+	{
+		Vector32	vals1,
+					vals2,
+					vals3,
+					vals4,
+					result1,
+					result2,
+					result3,
+					result4,
+					tmp1,
+					tmp2,
+					result;
+
+		/* load the next block into 4 registers */
+		vector32_load(&vals1, &base[i]);
+		vector32_load(&vals2, &base[i + nelem_per_vector]);
+		vector32_load(&vals3, &base[i + nelem_per_vector * 2]);
+		vector32_load(&vals4, &base[i + nelem_per_vector * 3]);
+
+		/* compare each value to the key */
+		result1 = vector32_eq(keys, vals1);
+		result2 = vector32_eq(keys, vals2);
+		result3 = vector32_eq(keys, vals3);
+		result4 = vector32_eq(keys, vals4);
+
+		/* combine the results into a single variable */
+		tmp1 = vector32_or(result1, result2);
+		tmp2 = vector32_or(result3, result4);
+		result = vector32_or(tmp1, tmp2);
+
+		/* see if there was a match */
+		if (vector32_is_highbit_set(result))
+		{
+			Assert(assert_result == true);
+			return true;
+		}
+	}
+#endif							/* ! USE_NO_SIMD */
+
+	/* Process the remaining elements one at a time. */
+	for (; i < nelem; i++)
+	{
+		if (key == base[i])
+		{
+#ifndef USE_NO_SIMD
+			Assert(assert_result == true);
+#endif
+			return true;
+		}
+	}
+
+#ifndef USE_NO_SIMD
+	Assert(assert_result == false);
+#endif
+	return false;
+}
+
+#endif							/* PG_LFIND_H */
diff --git a/src/include/port/pg_pthread.h b/src/include/port/pg_pthread.h
new file mode 100644
index 0000000..d102ce9
--- /dev/null
+++ b/src/include/port/pg_pthread.h
@@ -0,0 +1,41 @@
+/*-------------------------------------------------------------------------
+ *
+ * Declarations for missing POSIX thread components.
+ *
+ *	  Currently this supplies an implementation of pthread_barrier_t for the
+ *	  benefit of macOS, which lacks it.  These declarations are not in port.h,
+ *	  because that'd require <pthread.h> to be included by every translation
+ *	  unit.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef PG_PTHREAD_H
+#define PG_PTHREAD_H
+
+#include <pthread.h>
+
+#ifndef HAVE_PTHREAD_BARRIER_WAIT
+
+#ifndef PTHREAD_BARRIER_SERIAL_THREAD
+#define PTHREAD_BARRIER_SERIAL_THREAD (-1)
+#endif
+
+typedef struct pg_pthread_barrier
+{
+	bool		sense;			/* we only need a one bit phase */
+	int			count;			/* number of threads expected */
+	int			arrived;		/* number of threads that have arrived */
+	pthread_mutex_t mutex;
+	pthread_cond_t cond;
+} pthread_barrier_t;
+
+extern int	pthread_barrier_init(pthread_barrier_t *barrier,
+								 const void *attr,
+								 int count);
+extern int	pthread_barrier_wait(pthread_barrier_t *barrier);
+extern int	pthread_barrier_destroy(pthread_barrier_t *barrier);
+
+#endif
+
+#endif
diff --git a/src/include/port/simd.h b/src/include/port/simd.h
new file mode 100644
index 0000000..1fa6c3b
--- /dev/null
+++ b/src/include/port/simd.h
@@ -0,0 +1,375 @@
+/*-------------------------------------------------------------------------
+ *
+ * simd.h
+ *	  Support for platform-specific vector operations.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/simd.h
+ *
+ * NOTES
+ * - VectorN in this file refers to a register where the element operands
+ * are N bits wide. The vector width is platform-specific, so users that care
+ * about that will need to inspect "sizeof(VectorN)".
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef SIMD_H
+#define SIMD_H
+
+#if (defined(__x86_64__) || defined(_M_AMD64))
+/*
+ * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume
+ * that compilers targeting this architecture understand SSE2 intrinsics.
+ *
+ * We use emmintrin.h rather than the comprehensive header immintrin.h in
+ * order to exclude extensions beyond SSE2. This is because MSVC, at least,
+ * will allow the use of intrinsics that haven't been enabled at compile
+ * time.
+ */
+#include <emmintrin.h>
+#define USE_SSE2
+typedef __m128i Vector8;
+typedef __m128i Vector32;
+
+#elif defined(__aarch64__) && defined(__ARM_NEON)
+/*
+ * We use the Neon instructions if the compiler provides access to them (as
+ * indicated by __ARM_NEON) and we are on aarch64.  While Neon support is
+ * technically optional for aarch64, it appears that all available 64-bit
+ * hardware does have it.  Neon exists in some 32-bit hardware too, but we
+ * could not realistically use it there without a run-time check, which seems
+ * not worth the trouble for now.
+ */
+#include <arm_neon.h>
+#define USE_NEON
+typedef uint8x16_t Vector8;
+typedef uint32x4_t Vector32;
+
+#else
+/*
+ * If no SIMD instructions are available, we can in some cases emulate vector
+ * operations using bitwise operations on unsigned integers.  Note that many
+ * of the functions in this file presently do not have non-SIMD
+ * implementations.  In particular, none of the functions involving Vector32
+ * are implemented without SIMD since it's likely not worthwhile to represent
+ * two 32-bit integers using a uint64.
+ */
+#define USE_NO_SIMD
+typedef uint64 Vector8;
+#endif
+
+/* load/store operations */
+static inline void vector8_load(Vector8 *v, const uint8 *s);
+#ifndef USE_NO_SIMD
+static inline void vector32_load(Vector32 *v, const uint32 *s);
+#endif
+
+/* assignment operations */
+static inline Vector8 vector8_broadcast(const uint8 c);
+#ifndef USE_NO_SIMD
+static inline Vector32 vector32_broadcast(const uint32 c);
+#endif
+
+/* element-wise comparisons to a scalar */
+static inline bool vector8_has(const Vector8 v, const uint8 c);
+static inline bool vector8_has_zero(const Vector8 v);
+static inline bool vector8_has_le(const Vector8 v, const uint8 c);
+static inline bool vector8_is_highbit_set(const Vector8 v);
+#ifndef USE_NO_SIMD
+static inline bool vector32_is_highbit_set(const Vector32 v);
+#endif
+
+/* arithmetic operations */
+static inline Vector8 vector8_or(const Vector8 v1, const Vector8 v2);
+#ifndef USE_NO_SIMD
+static inline Vector32 vector32_or(const Vector32 v1, const Vector32 v2);
+static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
+#endif
+
+/*
+ * comparisons between vectors
+ *
+ * Note: These return a vector rather than boolean, which is why we don't
+ * have non-SIMD implementations.
+ */
+#ifndef USE_NO_SIMD
+static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2);
+static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2);
+#endif
+
+/*
+ * Load a chunk of memory into the given vector.
+ */
+static inline void
+vector8_load(Vector8 *v, const uint8 *s)
+{
+#if defined(USE_SSE2)
+	*v = _mm_loadu_si128((const __m128i *) s);
+#elif defined(USE_NEON)
+	*v = vld1q_u8(s);
+#else
+	memcpy(v, s, sizeof(Vector8));
+#endif
+}
+
+#ifndef USE_NO_SIMD
+static inline void
+vector32_load(Vector32 *v, const uint32 *s)
+{
+#ifdef USE_SSE2
+	*v = _mm_loadu_si128((const __m128i *) s);
+#elif defined(USE_NEON)
+	*v = vld1q_u32(s);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+/*
+ * Create a vector with all elements set to the same value.
+ */
+static inline Vector8
+vector8_broadcast(const uint8 c)
+{
+#if defined(USE_SSE2)
+	return _mm_set1_epi8(c);
+#elif defined(USE_NEON)
+	return vdupq_n_u8(c);
+#else
+	return ~UINT64CONST(0) / 0xFF * c;
+#endif
+}
+
+#ifndef USE_NO_SIMD
+static inline Vector32
+vector32_broadcast(const uint32 c)
+{
+#ifdef USE_SSE2
+	return _mm_set1_epi32(c);
+#elif defined(USE_NEON)
+	return vdupq_n_u32(c);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+/*
+ * Return true if any elements in the vector are equal to the given scalar.
+ */
+static inline bool
+vector8_has(const Vector8 v, const uint8 c)
+{
+	bool		result;
+
+	/* pre-compute the result for assert checking */
+#ifdef USE_ASSERT_CHECKING
+	bool		assert_result = false;
+
+	for (Size i = 0; i < sizeof(Vector8); i++)
+	{
+		if (((const uint8 *) &v)[i] == c)
+		{
+			assert_result = true;
+			break;
+		}
+	}
+#endif							/* USE_ASSERT_CHECKING */
+
+#if defined(USE_NO_SIMD)
+	/* any bytes in v equal to c will evaluate to zero via XOR */
+	result = vector8_has_zero(v ^ vector8_broadcast(c));
+#else
+	result = vector8_is_highbit_set(vector8_eq(v, vector8_broadcast(c)));
+#endif
+
+	Assert(assert_result == result);
+	return result;
+}
+
+/*
+ * Convenience function equivalent to vector8_has(v, 0)
+ */
+static inline bool
+vector8_has_zero(const Vector8 v)
+{
+#if defined(USE_NO_SIMD)
+	/*
+	 * We cannot call vector8_has() here, because that would lead to a
+	 * circular definition.
+	 */
+	return vector8_has_le(v, 0);
+#else
+	return vector8_has(v, 0);
+#endif
+}
+
+/*
+ * Return true if any elements in the vector are less than or equal to the
+ * given scalar.
+ */
+static inline bool
+vector8_has_le(const Vector8 v, const uint8 c)
+{
+	bool		result = false;
+
+	/* pre-compute the result for assert checking */
+#ifdef USE_ASSERT_CHECKING
+	bool		assert_result = false;
+
+	for (Size i = 0; i < sizeof(Vector8); i++)
+	{
+		if (((const uint8 *) &v)[i] <= c)
+		{
+			assert_result = true;
+			break;
+		}
+	}
+#endif							/* USE_ASSERT_CHECKING */
+
+#if defined(USE_NO_SIMD)
+
+	/*
+	 * To find bytes <= c, we can use bitwise operations to find bytes < c+1,
+	 * but it only works if c+1 <= 128 and if the highest bit in v is not set.
+	 * Adapted from
+	 * https://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
+	 */
+	if ((int64) v >= 0 && c < 0x80)
+		result = (v - vector8_broadcast(c + 1)) & ~v & vector8_broadcast(0x80);
+	else
+	{
+		/* one byte at a time */
+		for (Size i = 0; i < sizeof(Vector8); i++)
+		{
+			if (((const uint8 *) &v)[i] <= c)
+			{
+				result = true;
+				break;
+			}
+		}
+	}
+#else
+
+	/*
+	 * Use saturating subtraction to find bytes <= c, which will present as
+	 * NUL bytes.  This approach is a workaround for the lack of unsigned
+	 * comparison instructions on some architectures.
+	 */
+	result = vector8_has_zero(vector8_ssub(v, vector8_broadcast(c)));
+#endif
+
+	Assert(assert_result == result);
+	return result;
+}
+
+/*
+ * Return true if the high bit of any element is set
+ */
+static inline bool
+vector8_is_highbit_set(const Vector8 v)
+{
+#ifdef USE_SSE2
+	return _mm_movemask_epi8(v) != 0;
+#elif defined(USE_NEON)
+	return vmaxvq_u8(v) > 0x7F;
+#else
+	return v & vector8_broadcast(0x80);
+#endif
+}
+
+/*
+ * Exactly like vector8_is_highbit_set except for the input type, so it
+ * looks at each byte separately.
+ *
+ * XXX x86 uses the same underlying type for 8-bit, 16-bit, and 32-bit
+ * integer elements, but Arm does not, hence the need for a separate
+ * function. We could instead adopt the behavior of Arm's vmaxvq_u32(), i.e.
+ * check each 32-bit element, but that would require an additional mask
+ * operation on x86.
+ */
+#ifndef USE_NO_SIMD
+static inline bool
+vector32_is_highbit_set(const Vector32 v)
+{
+#if defined(USE_NEON)
+	return vector8_is_highbit_set((Vector8) v);
+#else
+	return vector8_is_highbit_set(v);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+/*
+ * Return the bitwise OR of the inputs
+ */
+static inline Vector8
+vector8_or(const Vector8 v1, const Vector8 v2)
+{
+#ifdef USE_SSE2
+	return _mm_or_si128(v1, v2);
+#elif defined(USE_NEON)
+	return vorrq_u8(v1, v2);
+#else
+	return v1 | v2;
+#endif
+}
+
+#ifndef USE_NO_SIMD
+static inline Vector32
+vector32_or(const Vector32 v1, const Vector32 v2)
+{
+#ifdef USE_SSE2
+	return _mm_or_si128(v1, v2);
+#elif defined(USE_NEON)
+	return vorrq_u32(v1, v2);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+/*
+ * Return the result of subtracting the respective elements of the input
+ * vectors using saturation (i.e., if the operation would yield a value less
+ * than zero, zero is returned instead).  For more information on saturation
+ * arithmetic, see https://en.wikipedia.org/wiki/Saturation_arithmetic
+ */
+#ifndef USE_NO_SIMD
+static inline Vector8
+vector8_ssub(const Vector8 v1, const Vector8 v2)
+{
+#ifdef USE_SSE2
+	return _mm_subs_epu8(v1, v2);
+#elif defined(USE_NEON)
+	return vqsubq_u8(v1, v2);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+/*
+ * Return a vector with all bits set in each lane where the corresponding
+ * lanes in the inputs are equal.
+ */
+#ifndef USE_NO_SIMD
+static inline Vector8
+vector8_eq(const Vector8 v1, const Vector8 v2)
+{
+#ifdef USE_SSE2
+	return _mm_cmpeq_epi8(v1, v2);
+#elif defined(USE_NEON)
+	return vceqq_u8(v1, v2);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+#ifndef USE_NO_SIMD
+static inline Vector32
+vector32_eq(const Vector32 v1, const Vector32 v2)
+{
+#ifdef USE_SSE2
+	return _mm_cmpeq_epi32(v1, v2);
+#elif defined(USE_NEON)
+	return vceqq_u32(v1, v2);
+#endif
+}
+#endif							/* ! USE_NO_SIMD */
+
+#endif							/* SIMD_H */
diff --git a/src/include/port/solaris.h b/src/include/port/solaris.h
new file mode 100644
index 0000000..e63a3bd
--- /dev/null
+++ b/src/include/port/solaris.h
@@ -0,0 +1,26 @@
+/* src/include/port/solaris.h */
+
+/*
+ * Sort this out for all operating systems some time.  The __xxx
+ * symbols are defined on both GCC and Solaris CC, although GCC
+ * doesn't document them.  The __xxx__ symbols are only on GCC.
+ */
+#if defined(__i386) && !defined(__i386__)
+#define __i386__
+#endif
+
+#if defined(__amd64) && !defined(__amd64__)
+#define __amd64__
+#endif
+
+#if defined(__x86_64) && !defined(__x86_64__)
+#define __x86_64__
+#endif
+
+#if defined(__sparc) && !defined(__sparc__)
+#define __sparc__
+#endif
+
+#if defined(__i386__)
+#include <sys/isa_defs.h>
+#endif
diff --git a/src/include/port/win32.h b/src/include/port/win32.h
new file mode 100644
index 0000000..d6c13d0
--- /dev/null
+++ b/src/include/port/win32.h
@@ -0,0 +1,59 @@
+/* src/include/port/win32.h */
+
+/*
+ * We always rely on the WIN32 macro being set by our build system,
+ * but _WIN32 is the compiler pre-defined macro. So make sure we define
+ * WIN32 whenever _WIN32 is set, to facilitate standalone building.
+ */
+#if defined(_WIN32) && !defined(WIN32)
+#define WIN32
+#endif
+
+/*
+ * Make sure _WIN32_WINNT has the minimum required value.
+ * Leave a higher value in place.  The minimum requirement is Windows 10.
+ */
+#ifdef _WIN32_WINNT
+#undef _WIN32_WINNT
+#endif
+
+#define _WIN32_WINNT 0x0A00
+
+/*
+ * We need to prevent <crtdefs.h> from defining a symbol conflicting with
+ * our errcode() function.  Since it's likely to get included by standard
+ * system headers, pre-emptively include it now.
+ */
+#if defined(_MSC_VER) || defined(HAVE_CRTDEFS_H)
+#define errcode __msvc_errcode
+#include <crtdefs.h>
+#undef errcode
+#endif
+
+/*
+ * defines for dynamic linking on Win32 platform
+ */
+
+/*
+ * Variables declared in the core backend and referenced by loadable
+ * modules need to be marked "dllimport" in the core build, but
+ * "dllexport" when the declaration is read in a loadable module.
+ * No special markings should be used when compiling frontend code.
+ */
+#ifndef FRONTEND
+#ifdef BUILDING_DLL
+#define PGDLLIMPORT __declspec (dllexport)
+#else
+#define PGDLLIMPORT __declspec (dllimport)
+#endif
+#endif
+
+/*
+ * Functions exported by a loadable module must be marked "dllexport".
+ *
+ * While mingw would otherwise fall back to
+ * __attribute__((visibility("default"))), that appears to only work as long
+ * as no symbols are declared with __declspec(dllexport). But we can end up
+ * with some, e.g. plpython's Py_Init.
+ */
+#define PGDLLEXPORT __declspec (dllexport)
diff --git a/src/include/port/win32/arpa/inet.h b/src/include/port/win32/arpa/inet.h
new file mode 100644
index 0000000..ad18031
--- /dev/null
+++ b/src/include/port/win32/arpa/inet.h
@@ -0,0 +1,3 @@
+/* src/include/port/win32/arpa/inet.h */
+
+#include <sys/socket.h>
diff --git a/src/include/port/win32/dlfcn.h b/src/include/port/win32/dlfcn.h
new file mode 100644
index 0000000..b6e43c0
--- /dev/null
+++ b/src/include/port/win32/dlfcn.h
@@ -0,0 +1 @@
+/* src/include/port/win32/dlfcn.h */
diff --git a/src/include/port/win32/grp.h b/src/include/port/win32/grp.h
new file mode 100644
index 0000000..8b4f213
--- /dev/null
+++ b/src/include/port/win32/grp.h
@@ -0,0 +1 @@
+/* src/include/port/win32/grp.h */
diff --git a/src/include/port/win32/netdb.h b/src/include/port/win32/netdb.h
new file mode 100644
index 0000000..9ed13e4
--- /dev/null
+++ b/src/include/port/win32/netdb.h
@@ -0,0 +1,7 @@
+/* src/include/port/win32/netdb.h */
+#ifndef WIN32_NETDB_H
+#define WIN32_NETDB_H
+
+#include <ws2tcpip.h>
+
+#endif
diff --git a/src/include/port/win32/netinet/in.h b/src/include/port/win32/netinet/in.h
new file mode 100644
index 0000000..a4e22f8
--- /dev/null
+++ b/src/include/port/win32/netinet/in.h
@@ -0,0 +1,3 @@
+/* src/include/port/win32/netinet/in.h */
+
+#include <sys/socket.h>
diff --git a/src/include/port/win32/netinet/tcp.h b/src/include/port/win32/netinet/tcp.h
new file mode 100644
index 0000000..1d377b6
--- /dev/null
+++ b/src/include/port/win32/netinet/tcp.h
@@ -0,0 +1,7 @@
+/* src/include/port/win32/netinet/tcp.h */
+#ifndef WIN32_NETINET_TCP_H
+#define WIN32_NETINET_TCP_H
+
+#include <sys/socket.h>
+
+#endif
diff --git a/src/include/port/win32/pwd.h b/src/include/port/win32/pwd.h
new file mode 100644
index 0000000..b8c7178
--- /dev/null
+++ b/src/include/port/win32/pwd.h
@@ -0,0 +1,3 @@
+/*
+ * src/include/port/win32/pwd.h
+ */
diff --git a/src/include/port/win32/sys/resource.h b/src/include/port/win32/sys/resource.h
new file mode 100644
index 0000000..a14feeb
--- /dev/null
+++ b/src/include/port/win32/sys/resource.h
@@ -0,0 +1,20 @@
+/*
+ * Replacement for <sys/resource.h> for Windows.
+ */
+#ifndef WIN32_SYS_RESOURCE_H
+#define WIN32_SYS_RESOURCE_H
+
+#include <sys/time.h>			/* for struct timeval */
+
+#define RUSAGE_SELF		0
+#define RUSAGE_CHILDREN (-1)
+
+struct rusage
+{
+	struct timeval ru_utime;	/* user time used */
+	struct timeval ru_stime;	/* system time used */
+};
+
+extern int	getrusage(int who, struct rusage *rusage);
+
+#endif							/* WIN32_SYS_RESOURCE_H */
diff --git a/src/include/port/win32/sys/select.h b/src/include/port/win32/sys/select.h
new file mode 100644
index 0000000..f8a877a
--- /dev/null
+++ b/src/include/port/win32/sys/select.h
@@ -0,0 +1,3 @@
+/*
+ * src/include/port/win32/sys/select.h
+ */
diff --git a/src/include/port/win32/sys/socket.h b/src/include/port/win32/sys/socket.h
new file mode 100644
index 0000000..0c32c0f
--- /dev/null
+++ b/src/include/port/win32/sys/socket.h
@@ -0,0 +1,26 @@
+/*
+ * src/include/port/win32/sys/socket.h
+ */
+#ifndef WIN32_SYS_SOCKET_H
+#define WIN32_SYS_SOCKET_H
+
+/*
+ * Unfortunately, <wingdi.h> of VC++ also defines ERROR.
+ * To avoid the conflict, we include <windows.h> here and undefine ERROR
+ * immediately.
+ *
+ * Note: Don't include <wingdi.h> directly.  It causes compile errors.
+ */
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <windows.h>
+
+#undef ERROR
+#undef small
+
+/* Restore old ERROR value */
+#ifdef PGERROR
+#define ERROR PGERROR
+#endif
+
+#endif							/* WIN32_SYS_SOCKET_H */
diff --git a/src/include/port/win32/sys/un.h b/src/include/port/win32/sys/un.h
new file mode 100644
index 0000000..4fc13a2
--- /dev/null
+++ b/src/include/port/win32/sys/un.h
@@ -0,0 +1,17 @@
+/*
+ * src/include/port/win32/sys/un.h
+ */
+#ifndef WIN32_SYS_UN_H
+#define WIN32_SYS_UN_H
+
+/*
+ * Windows defines this structure in <afunix.h>, but not all tool chains have
+ * the header yet, so we define it here for now.
+ */
+struct sockaddr_un
+{
+	unsigned short sun_family;
+	char		sun_path[108];
+};
+
+#endif
diff --git a/src/include/port/win32/sys/wait.h b/src/include/port/win32/sys/wait.h
new file mode 100644
index 0000000..eaeb566
--- /dev/null
+++ b/src/include/port/win32/sys/wait.h
@@ -0,0 +1,3 @@
+/*
+ * src/include/port/win32/sys/wait.h
+ */
diff --git a/src/include/port/win32_msvc/dirent.h b/src/include/port/win32_msvc/dirent.h
new file mode 100644
index 0000000..62799db
--- /dev/null
+++ b/src/include/port/win32_msvc/dirent.h
@@ -0,0 +1,34 @@
+/*
+ * Headers for port/dirent.c, win32 native implementation of dirent functions
+ *
+ * src/include/port/win32_msvc/dirent.h
+ */
+
+#ifndef _WIN32VC_DIRENT_H
+#define _WIN32VC_DIRENT_H
+struct dirent
+{
+	long		d_ino;
+	unsigned short d_reclen;
+	unsigned char d_type;
+	unsigned short d_namlen;
+	char		d_name[MAX_PATH];
+};
+
+typedef struct DIR DIR;
+
+DIR		   *opendir(const char *);
+struct dirent *readdir(DIR *);
+int			closedir(DIR *);
+
+/* File types for 'd_type'.  */
+#define DT_UNKNOWN		0
+#define DT_FIFO		1
+#define DT_CHR			2
+#define DT_DIR			4
+#define DT_BLK			6
+#define DT_REG			8
+#define DT_LNK			10
+#define DT_SOCK		12
+#define DT_WHT			14
+#endif
diff --git a/src/include/port/win32_msvc/sys/file.h b/src/include/port/win32_msvc/sys/file.h
new file mode 100644
index 0000000..76be3e7
--- /dev/null
+++ b/src/include/port/win32_msvc/sys/file.h
@@ -0,0 +1 @@
+/* src/include/port/win32_msvc/sys/file.h */
diff --git a/src/include/port/win32_msvc/sys/param.h b/src/include/port/win32_msvc/sys/param.h
new file mode 100644
index 0000000..160df3b
--- /dev/null
+++ b/src/include/port/win32_msvc/sys/param.h
@@ -0,0 +1 @@
+/* src/include/port/win32_msvc/sys/param.h */
diff --git a/src/include/port/win32_msvc/sys/time.h b/src/include/port/win32_msvc/sys/time.h
new file mode 100644
index 0000000..9d943ec
--- /dev/null
+++ b/src/include/port/win32_msvc/sys/time.h
@@ -0,0 +1 @@
+/* src/include/port/win32_msvc/sys/time.h */
diff --git a/src/include/port/win32_msvc/unistd.h b/src/include/port/win32_msvc/unistd.h
new file mode 100644
index 0000000..b7795ba
--- /dev/null
+++ b/src/include/port/win32_msvc/unistd.h
@@ -0,0 +1,9 @@
+/* src/include/port/win32_msvc/unistd.h */
+
+/*
+ * MSVC does not define these, nor does _fileno(stdin) etc reliably work
+ * (returns -1 if stdin/out/err are closed).
+ */
+#define STDIN_FILENO	0
+#define STDOUT_FILENO	1
+#define STDERR_FILENO	2
diff --git a/src/include/port/win32_msvc/utime.h b/src/include/port/win32_msvc/utime.h
new file mode 100644
index 0000000..c78e79c
--- /dev/null
+++ b/src/include/port/win32_msvc/utime.h
@@ -0,0 +1,3 @@
+/* src/include/port/win32_msvc/utime.h */
+
+#include <sys/utime.h>			/* for non-unicode version */
diff --git a/src/include/port/win32_port.h b/src/include/port/win32_port.h
new file mode 100644
index 0000000..b957d5c
--- /dev/null
+++ b/src/include/port/win32_port.h
@@ -0,0 +1,594 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32_port.h
+ *	  Windows-specific compatibility stuff.
+ *
+ * Note this is read in MinGW as well as native Windows builds,
+ * but not in Cygwin builds.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/win32_port.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef PG_WIN32_PORT_H
+#define PG_WIN32_PORT_H
+
+/*
+ * Always build with SSPI support. Keep it as a #define in case
+ * we want a switch to disable it sometime in the future.
+ */
+#define ENABLE_SSPI 1
+
+/* undefine and redefine after #include */
+#undef mkdir
+
+#undef ERROR
+
+/*
+ * VS2013 and later issue warnings about using the old Winsock API,
+ * which we don't really want to hear about.
+ */
+#ifdef _MSC_VER
+#define _WINSOCK_DEPRECATED_NO_WARNINGS
+#endif
+
+/*
+ * The MinGW64 headers choke if this is already defined - they
+ * define it themselves.
+ */
+#if !defined(__MINGW64_VERSION_MAJOR) || defined(_MSC_VER)
+#define _WINSOCKAPI_
+#endif
+
+/*
+ * windows.h includes a lot of other headers, slowing down compilation
+ * significantly.  WIN32_LEAN_AND_MEAN reduces that a bit. It'd be better to
+ * remove the include of windows.h (as well as indirect inclusions of it) from
+ * such a central place, but until then...
+ *
+ * To be able to include ntstatus.h tell windows.h to not declare NTSTATUS by
+ * temporarily defining UMDF_USING_NTSTATUS, otherwise we'll get warning about
+ * macro redefinitions, as windows.h also defines NTSTATUS (yuck). That in
+ * turn requires including ntstatus.h, winternl.h to get common symbols.
+ */
+#define WIN32_LEAN_AND_MEAN
+#define UMDF_USING_NTSTATUS
+
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#include <windows.h>
+#include <ntstatus.h>
+#include <winternl.h>
+
+#undef small
+#include <process.h>
+#include <signal.h>
+#include <direct.h>
+#undef near
+
+/* needed before sys/stat hacking below: */
+#define fstat microsoft_native_fstat
+#define stat microsoft_native_stat
+#include <sys/stat.h>
+#undef fstat
+#undef stat
+
+/* Must be here to avoid conflicting with prototype in windows.h */
+#define mkdir(a,b)	mkdir(a)
+
+#define ftruncate(a,b)	chsize(a,b)
+
+/* Windows doesn't have fsync() as such, use _commit() */
+#define fsync(fd) _commit(fd)
+
+/*
+ * For historical reasons, we allow setting wal_sync_method to
+ * fsync_writethrough on Windows, even though it's really identical to fsync
+ * (both code paths wind up at _commit()).
+ */
+#define HAVE_FSYNC_WRITETHROUGH
+#define FSYNC_WRITETHROUGH_IS_FSYNC
+
+#define USES_WINSOCK
+
+/*
+ *	IPC defines
+ */
+#undef HAVE_UNION_SEMUN
+#define HAVE_UNION_SEMUN 1
+
+#define IPC_RMID 256
+#define IPC_CREAT 512
+#define IPC_EXCL 1024
+#define IPC_PRIVATE 234564
+#define IPC_NOWAIT	2048
+#define IPC_STAT 4096
+
+#define EACCESS 2048
+#ifndef EIDRM
+#define EIDRM 4096
+#endif
+
+#define SETALL 8192
+#define GETNCNT 16384
+#define GETVAL 65536
+#define SETVAL 131072
+#define GETPID 262144
+
+
+/*
+ *	Signal stuff
+ *
+ *	For WIN32, there is no wait() call so there are no wait() macros
+ *	to interpret the return value of system().  Instead, system()
+ *	return values < 0x100 are used for exit() termination, and higher
+ *	values are used to indicate non-exit() termination, which is
+ *	similar to a unix-style signal exit (think SIGSEGV ==
+ *	STATUS_ACCESS_VIOLATION).  Return values are broken up into groups:
+ *
+ *	https://docs.microsoft.com/en-us/windows-hardware/drivers/kernel/using-ntstatus-values
+ *
+ *		NT_SUCCESS			0 - 0x3FFFFFFF
+ *		NT_INFORMATION		0x40000000 - 0x7FFFFFFF
+ *		NT_WARNING			0x80000000 - 0xBFFFFFFF
+ *		NT_ERROR			0xC0000000 - 0xFFFFFFFF
+ *
+ *	Effectively, we don't care on the severity of the return value from
+ *	system(), we just need to know if it was because of exit() or generated
+ *	by the system, and it seems values >= 0x100 are system-generated.
+ *	See this URL for a list of WIN32 STATUS_* values:
+ *
+ *		Wine (URL used in our error messages) -
+ *			http://source.winehq.org/source/include/ntstatus.h
+ *		Descriptions -
+ *			https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-erref/596a1078-e883-4972-9bbc-49e60bebca55
+ *
+ *	The comprehensive exception list is included in ntstatus.h from the
+ *	Windows Driver Kit (WDK).  A subset of the list is also included in
+ *	winnt.h from the Windows SDK.  Defining WIN32_NO_STATUS before including
+ *	windows.h helps to avoid any conflicts.
+ *
+ *	Some day we might want to print descriptions for the most common
+ *	exceptions, rather than printing an include file name.  We could use
+ *	RtlNtStatusToDosError() and pass to FormatMessage(), which can print
+ *	the text of error values, but MinGW does not support
+ *	RtlNtStatusToDosError().
+ */
+#define WIFEXITED(w)	(((w) & 0XFFFFFF00) == 0)
+#define WIFSIGNALED(w)	(!WIFEXITED(w))
+#define WEXITSTATUS(w)	(w)
+#define WTERMSIG(w)		(w)
+
+#define sigmask(sig) ( 1 << ((sig)-1) )
+
+/* Signal function return values */
+#undef SIG_DFL
+#undef SIG_ERR
+#undef SIG_IGN
+#define SIG_DFL ((pqsigfunc)0)
+#define SIG_ERR ((pqsigfunc)-1)
+#define SIG_IGN ((pqsigfunc)1)
+
+/* Some extra signals */
+#define SIGHUP				1
+#define SIGQUIT				3
+#define SIGTRAP				5
+#define SIGABRT				22	/* Set to match W32 value -- not UNIX value */
+#define SIGKILL				9
+#define SIGPIPE				13
+#define SIGALRM				14
+#define SIGSTOP				17
+#define SIGTSTP				18
+#define SIGCONT				19
+#define SIGCHLD				20
+#define SIGWINCH			28
+#define SIGUSR1				30
+#define SIGUSR2				31
+
+/* MinGW has gettimeofday(), but MSVC doesn't */
+#ifdef _MSC_VER
+/* Last parameter not used */
+extern int	gettimeofday(struct timeval *tp, void *tzp);
+#endif
+
+/* for setitimer in backend/port/win32/timer.c */
+#define ITIMER_REAL 0
+struct itimerval
+{
+	struct timeval it_interval;
+	struct timeval it_value;
+};
+
+int			setitimer(int which, const struct itimerval *value, struct itimerval *ovalue);
+
+/* Convenience wrapper for GetFileType() */
+extern DWORD pgwin32_get_file_type(HANDLE hFile);
+
+/*
+ * WIN32 does not provide 64-bit off_t, but does provide the functions operating
+ * with 64-bit offsets.  Also, fseek() might not give an error for unseekable
+ * streams, so harden that function with our version.
+ */
+#define pgoff_t __int64
+
+#ifdef _MSC_VER
+extern int	_pgfseeko64(FILE *stream, pgoff_t offset, int origin);
+extern pgoff_t _pgftello64(FILE *stream);
+#define fseeko(stream, offset, origin) _pgfseeko64(stream, offset, origin)
+#define ftello(stream) _pgftello64(stream)
+#else
+#ifndef fseeko
+#define fseeko(stream, offset, origin) fseeko64(stream, offset, origin)
+#endif
+#ifndef ftello
+#define ftello(stream) ftello64(stream)
+#endif
+#endif
+
+/*
+ *	Win32 also doesn't have symlinks, but we can emulate them with
+ *	junction points on newer Win32 versions.
+ *
+ *	Cygwin has its own symlinks which work on Win95/98/ME where
+ *	junction points don't, so use those instead.  We have no way of
+ *	knowing what type of system Cygwin binaries will be run on.
+ *		Note: Some CYGWIN includes might #define WIN32.
+ */
+extern int	pgsymlink(const char *oldpath, const char *newpath);
+extern int	pgreadlink(const char *path, char *buf, size_t size);
+
+#define symlink(oldpath, newpath)	pgsymlink(oldpath, newpath)
+#define readlink(path, buf, size)	pgreadlink(path, buf, size)
+
+/*
+ * Supplement to <sys/types.h>.
+ *
+ * Perl already has typedefs for uid_t and gid_t.
+ */
+#ifndef PLPERL_HAVE_UID_GID
+typedef int uid_t;
+typedef int gid_t;
+#endif
+typedef long key_t;
+
+#ifdef _MSC_VER
+typedef int pid_t;
+#endif
+
+/*
+ * Supplement to <sys/stat.h>.
+ *
+ * We must pull in sys/stat.h before this part, else our overrides lose.
+ *
+ * stat() is not guaranteed to set the st_size field on win32, so we
+ * redefine it to our own implementation.  See src/port/win32stat.c.
+ *
+ * The struct stat is 32 bit in MSVC, so we redefine it as a copy of
+ * struct __stat64.  This also fixes the struct size for MINGW builds.
+ */
+struct stat						/* This should match struct __stat64 */
+{
+	_dev_t		st_dev;
+	_ino_t		st_ino;
+	unsigned short st_mode;
+	short		st_nlink;
+	short		st_uid;
+	short		st_gid;
+	_dev_t		st_rdev;
+	__int64		st_size;
+	__time64_t	st_atime;
+	__time64_t	st_mtime;
+	__time64_t	st_ctime;
+};
+
+extern int	_pgfstat64(int fileno, struct stat *buf);
+extern int	_pgstat64(const char *name, struct stat *buf);
+extern int	_pglstat64(const char *name, struct stat *buf);
+
+#define fstat(fileno, sb)	_pgfstat64(fileno, sb)
+#define stat(path, sb)		_pgstat64(path, sb)
+#define lstat(path, sb)		_pglstat64(path, sb)
+
+/* These macros are not provided by older MinGW, nor by MSVC */
+#ifndef S_IRUSR
+#define S_IRUSR _S_IREAD
+#endif
+#ifndef S_IWUSR
+#define S_IWUSR _S_IWRITE
+#endif
+#ifndef S_IXUSR
+#define S_IXUSR _S_IEXEC
+#endif
+#ifndef S_IRWXU
+#define S_IRWXU (S_IRUSR | S_IWUSR | S_IXUSR)
+#endif
+#ifndef S_IRGRP
+#define S_IRGRP 0
+#endif
+#ifndef S_IWGRP
+#define S_IWGRP 0
+#endif
+#ifndef S_IXGRP
+#define S_IXGRP 0
+#endif
+#ifndef S_IRWXG
+#define S_IRWXG 0
+#endif
+#ifndef S_IROTH
+#define S_IROTH 0
+#endif
+#ifndef S_IWOTH
+#define S_IWOTH 0
+#endif
+#ifndef S_IXOTH
+#define S_IXOTH 0
+#endif
+#ifndef S_IRWXO
+#define S_IRWXO 0
+#endif
+#ifndef S_ISDIR
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#endif
+#ifndef S_ISREG
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+
+/*
+ * In order for lstat() to be able to report junction points as symlinks, we
+ * need to hijack a bit in st_mode, since neither MSVC nor MinGW provides
+ * S_ISLNK and there aren't any spare bits.  We'll steal the one for character
+ * devices, because we don't otherwise make use of those.
+ */
+#ifdef S_ISLNK
+#error "S_ISLNK is already defined"
+#endif
+#ifdef S_IFLNK
+#error "S_IFLNK is already defined"
+#endif
+#define S_IFLNK S_IFCHR
+#define S_ISLNK(m) (((m) & S_IFLNK) == S_IFLNK)
+
+/*
+ * Supplement to <fcntl.h>.
+ * This is the same value as _O_NOINHERIT in the MS header file. This is
+ * to ensure that we don't collide with a future definition. It means
+ * we cannot use _O_NOINHERIT ourselves.
+ */
+#define O_DSYNC 0x0080
+
+/*
+ * Our open() replacement does not create inheritable handles, so it is safe to
+ * ignore O_CLOEXEC.  (If we were using Windows' own open(), it might be
+ * necessary to convert this to _O_NOINHERIT.)
+ */
+#define O_CLOEXEC 0
+
+/*
+ * Supplement to <errno.h>.
+ *
+ * We redefine network-related Berkeley error symbols as the corresponding WSA
+ * constants. This allows strerror.c to recognize them as being in the Winsock
+ * error code range and pass them off to win32_socket_strerror(), since
+ * Windows' version of plain strerror() won't cope.  Note that this will break
+ * if these names are used for anything else besides Windows Sockets errors.
+ * See TranslateSocketError() when changing this list.
+ */
+#undef EAGAIN
+#define EAGAIN WSAEWOULDBLOCK
+#undef EINTR
+#define EINTR WSAEINTR
+#undef EMSGSIZE
+#define EMSGSIZE WSAEMSGSIZE
+#undef EAFNOSUPPORT
+#define EAFNOSUPPORT WSAEAFNOSUPPORT
+#undef EWOULDBLOCK
+#define EWOULDBLOCK WSAEWOULDBLOCK
+#undef ECONNABORTED
+#define ECONNABORTED WSAECONNABORTED
+#undef ECONNRESET
+#define ECONNRESET WSAECONNRESET
+#undef EINPROGRESS
+#define EINPROGRESS WSAEINPROGRESS
+#undef EISCONN
+#define EISCONN WSAEISCONN
+#undef ENOBUFS
+#define ENOBUFS WSAENOBUFS
+#undef EPROTONOSUPPORT
+#define EPROTONOSUPPORT WSAEPROTONOSUPPORT
+#undef ECONNREFUSED
+#define ECONNREFUSED WSAECONNREFUSED
+#undef ENOTSOCK
+#define ENOTSOCK WSAENOTSOCK
+#undef EOPNOTSUPP
+#define EOPNOTSUPP WSAEOPNOTSUPP
+#undef EADDRINUSE
+#define EADDRINUSE WSAEADDRINUSE
+#undef EADDRNOTAVAIL
+#define EADDRNOTAVAIL WSAEADDRNOTAVAIL
+#undef EHOSTDOWN
+#define EHOSTDOWN WSAEHOSTDOWN
+#undef EHOSTUNREACH
+#define EHOSTUNREACH WSAEHOSTUNREACH
+#undef ENETDOWN
+#define ENETDOWN WSAENETDOWN
+#undef ENETRESET
+#define ENETRESET WSAENETRESET
+#undef ENETUNREACH
+#define ENETUNREACH WSAENETUNREACH
+#undef ENOTCONN
+#define ENOTCONN WSAENOTCONN
+#undef ETIMEDOUT
+#define ETIMEDOUT WSAETIMEDOUT
+
+/*
+ * Locale stuff.
+ *
+ * Extended locale functions with gratuitous underscore prefixes.
+ * (These APIs are nevertheless fully documented by Microsoft.)
+ */
+#define locale_t _locale_t
+#define tolower_l _tolower_l
+#define toupper_l _toupper_l
+#define towlower_l _towlower_l
+#define towupper_l _towupper_l
+#define isdigit_l _isdigit_l
+#define iswdigit_l _iswdigit_l
+#define isalpha_l _isalpha_l
+#define iswalpha_l _iswalpha_l
+#define isalnum_l _isalnum_l
+#define iswalnum_l _iswalnum_l
+#define isupper_l _isupper_l
+#define iswupper_l _iswupper_l
+#define islower_l _islower_l
+#define iswlower_l _iswlower_l
+#define isgraph_l _isgraph_l
+#define iswgraph_l _iswgraph_l
+#define isprint_l _isprint_l
+#define iswprint_l _iswprint_l
+#define ispunct_l _ispunct_l
+#define iswpunct_l _iswpunct_l
+#define isspace_l _isspace_l
+#define iswspace_l _iswspace_l
+#define strcoll_l _strcoll_l
+#define strxfrm_l _strxfrm_l
+#define wcscoll_l _wcscoll_l
+#define wcstombs_l _wcstombs_l
+#define mbstowcs_l _mbstowcs_l
+
+/*
+ * Versions of libintl >= 0.18? try to replace setlocale() with a macro
+ * to their own versions.  Remove the macro, if it exists, because it
+ * ends up calling the wrong version when the backend and libintl use
+ * different versions of msvcrt.
+ */
+#if defined(setlocale)
+#undef setlocale
+#endif
+
+/*
+ * Define our own wrapper macro around setlocale() to work around bugs in
+ * Windows' native setlocale() function.
+ */
+extern char *pgwin32_setlocale(int category, const char *locale);
+
+#define setlocale(a,b) pgwin32_setlocale(a,b)
+
+
+/* In backend/port/win32/signal.c */
+extern PGDLLIMPORT volatile int pg_signal_queue;
+extern PGDLLIMPORT int pg_signal_mask;
+extern PGDLLIMPORT HANDLE pgwin32_signal_event;
+extern PGDLLIMPORT HANDLE pgwin32_initial_signal_pipe;
+
+#define UNBLOCKED_SIGNAL_QUEUE()	(pg_signal_queue & ~pg_signal_mask)
+#define PG_SIGNAL_COUNT 32
+
+extern void pgwin32_signal_initialize(void);
+extern HANDLE pgwin32_create_signal_listener(pid_t pid);
+extern void pgwin32_dispatch_queued_signals(void);
+extern void pg_queue_signal(int signum);
+
+/* In src/port/kill.c */
+#define kill(pid,sig)	pgkill(pid,sig)
+extern int	pgkill(int pid, int sig);
+
+/* In backend/port/win32/socket.c */
+#ifndef FRONTEND
+#define socket(af, type, protocol) pgwin32_socket(af, type, protocol)
+#define bind(s, addr, addrlen) pgwin32_bind(s, addr, addrlen)
+#define listen(s, backlog) pgwin32_listen(s, backlog)
+#define accept(s, addr, addrlen) pgwin32_accept(s, addr, addrlen)
+#define connect(s, name, namelen) pgwin32_connect(s, name, namelen)
+#define select(n, r, w, e, timeout) pgwin32_select(n, r, w, e, timeout)
+#define recv(s, buf, len, flags) pgwin32_recv(s, buf, len, flags)
+#define send(s, buf, len, flags) pgwin32_send(s, buf, len, flags)
+
+extern SOCKET pgwin32_socket(int af, int type, int protocol);
+extern int	pgwin32_bind(SOCKET s, struct sockaddr *addr, int addrlen);
+extern int	pgwin32_listen(SOCKET s, int backlog);
+extern SOCKET pgwin32_accept(SOCKET s, struct sockaddr *addr, int *addrlen);
+extern int	pgwin32_connect(SOCKET s, const struct sockaddr *name, int namelen);
+extern int	pgwin32_select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, const struct timeval *timeout);
+extern int	pgwin32_recv(SOCKET s, char *buf, int len, int flags);
+extern int	pgwin32_send(SOCKET s, const void *buf, int len, int flags);
+extern int	pgwin32_waitforsinglesocket(SOCKET s, int what, int timeout);
+
+extern PGDLLIMPORT int pgwin32_noblock;
+
+#endif							/* FRONTEND */
+
+/* in backend/port/win32_shmem.c */
+extern int	pgwin32_ReserveSharedMemoryRegion(HANDLE);
+
+/* in backend/port/win32/crashdump.c */
+extern void pgwin32_install_crashdump_handler(void);
+
+/* in port/win32dlopen.c */
+extern void *dlopen(const char *file, int mode);
+extern void *dlsym(void *handle, const char *symbol);
+extern int	dlclose(void *handle);
+extern char *dlerror(void);
+
+#define RTLD_NOW 1
+#define RTLD_GLOBAL 0
+
+/* in port/win32error.c */
+extern void _dosmaperr(unsigned long);
+
+/* in port/win32env.c */
+extern int	pgwin32_putenv(const char *);
+extern int	pgwin32_setenv(const char *name, const char *value, int overwrite);
+extern int	pgwin32_unsetenv(const char *name);
+
+#define putenv(x) pgwin32_putenv(x)
+#define setenv(x,y,z) pgwin32_setenv(x,y,z)
+#define unsetenv(x) pgwin32_unsetenv(x)
+
+/* in port/win32security.c */
+extern int	pgwin32_is_service(void);
+extern int	pgwin32_is_admin(void);
+
+/* Windows security token manipulation (in src/common/exec.c) */
+extern BOOL AddUserToTokenDacl(HANDLE hToken);
+
+/* Things that exist in MinGW headers, but need to be added to MSVC */
+#ifdef _MSC_VER
+
+#ifndef _WIN64
+typedef long ssize_t;
+#else
+typedef __int64 ssize_t;
+#endif
+
+typedef unsigned short mode_t;
+
+#define F_OK 0
+#define W_OK 2
+#define R_OK 4
+
+#endif							/* _MSC_VER */
+
+#if defined(__MINGW32__) || defined(__MINGW64__)
+/*
+ * Mingw claims to have a strtof, and my reading of its source code suggests
+ * that it ought to work (and not need this hack), but the regression test
+ * results disagree with me; whether this is a version issue or not is not
+ * clear. However, using our wrapper (and the misrounded-input variant file,
+ * already required for supporting ancient systems) can't make things any
+ * worse, except for a tiny performance loss when reading zeros.
+ *
+ * See also cygwin.h for another instance of this.
+ */
+#define HAVE_BUGGY_STRTOF 1
+#endif
+
+/* in port/win32pread.c */
+extern ssize_t pg_pread(int fd, void *buf, size_t nbyte, off_t offset);
+
+/* in port/win32pwrite.c */
+extern ssize_t pg_pwrite(int fd, const void *buf, size_t nbyte, off_t offset);
+
+#endif							/* PG_WIN32_PORT_H */
diff --git a/src/include/port/win32ntdll.h b/src/include/port/win32ntdll.h
new file mode 100644
index 0000000..1ce9360
--- /dev/null
+++ b/src/include/port/win32ntdll.h
@@ -0,0 +1,34 @@
+/*-------------------------------------------------------------------------
+ *
+ * win32ntdll.h
+ *	  Dynamically loaded Windows NT functions.
+ *
+ * Portions Copyright (c) 2021-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/port/win32ntdll.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef WIN32NTDLL_H
+#define WIN32NTDLL_H
+
+#include <ntstatus.h>
+#include <winternl.h>
+
+#ifndef FLUSH_FLAGS_FILE_DATA_SYNC_ONLY
+#define FLUSH_FLAGS_FILE_DATA_SYNC_ONLY 0x4
+#endif
+
+typedef NTSTATUS (__stdcall * RtlGetLastNtStatus_t) (void);
+typedef ULONG (__stdcall * RtlNtStatusToDosError_t) (NTSTATUS);
+typedef NTSTATUS (__stdcall * NtFlushBuffersFileEx_t) (HANDLE, ULONG, PVOID, ULONG, PIO_STATUS_BLOCK);
+
+extern PGDLLIMPORT RtlGetLastNtStatus_t pg_RtlGetLastNtStatus;
+extern PGDLLIMPORT RtlNtStatusToDosError_t pg_RtlNtStatusToDosError;
+extern PGDLLIMPORT NtFlushBuffersFileEx_t pg_NtFlushBuffersFileEx;
+
+extern int	initialize_ntdll(void);
+
+#endif							/* WIN32NTDLL_H */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 13:44:03 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 13:44:03 +0000
commit	293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch)
tree	fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/include/port
parent	Initial commit. (diff)
download	postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip