Adding upstream version 15.5.upstream/15.5

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:17:33 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:17:33 +0000
commit: 5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree: 739caf8c461053357daa9f162bef34516c7bf452 /src/backend/utils/activity
parent: Initial commit. (diff)
download: postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz
postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip
17 files changed, 7842 insertions, 0 deletions
diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile
new file mode 100644
index 0000000..a2e8507
--- /dev/null
+++ b/src/backend/utils/activity/Makefile
@@ -0,0 +1,34 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for backend/utils/activity
+#
+# Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/backend/utils/activity/Makefile
+#
+#-------------------------------------------------------------------------
+
+subdir = src/backend/utils/activity
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+OBJS = \
+	backend_progress.o \
+	backend_status.o \
+	pgstat.o \
+	pgstat_archiver.o \
+	pgstat_bgwriter.o \
+	pgstat_checkpointer.o \
+	pgstat_database.o \
+	pgstat_function.o \
+	pgstat_relation.o \
+	pgstat_replslot.o \
+	pgstat_shmem.o \
+	pgstat_slru.o \
+	pgstat_subscription.o \
+	pgstat_wal.o \
+	pgstat_xact.o \
+	wait_event.o
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
new file mode 100644
index 0000000..f291997
--- /dev/null
+++ b/src/backend/utils/activity/backend_progress.c
@@ -0,0 +1,112 @@
+/* ----------
+ * backend_progress.c
+ *
+ *	Command progress reporting infrastructure.
+ *
+ *	Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ *	src/backend/utils/activity/backend_progress.c
+ * ----------
+ */
+#include "postgres.h"
+
+#include "port/atomics.h"		/* for memory barriers */
+#include "utils/backend_progress.h"
+#include "utils/backend_status.h"
+
+
+/*-----------
+ * pgstat_progress_start_command() -
+ *
+ * Set st_progress_command (and st_progress_command_target) in own backend
+ * entry.  Also, zero-initialize st_progress_param array.
+ *-----------
+ */
+void
+pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_command = cmdtype;
+	beentry->st_progress_command_target = relid;
+	MemSet(&beentry->st_progress_param, 0, sizeof(beentry->st_progress_param));
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_update_param() -
+ *
+ * Update index'th member in st_progress_param[] of own backend entry.
+ *-----------
+ */
+void
+pgstat_progress_update_param(int index, int64 val)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	Assert(index >= 0 && index < PGSTAT_NUM_PROGRESS_PARAM);
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_param[index] = val;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_update_multi_param() -
+ *
+ * Update multiple members in st_progress_param[] of own backend entry.
+ * This is atomic; readers won't see intermediate states.
+ *-----------
+ */
+void
+pgstat_progress_update_multi_param(int nparam, const int *index,
+								   const int64 *val)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int			i;
+
+	if (!beentry || !pgstat_track_activities || nparam == 0)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	for (i = 0; i < nparam; ++i)
+	{
+		Assert(index[i] >= 0 && index[i] < PGSTAT_NUM_PROGRESS_PARAM);
+
+		beentry->st_progress_param[index[i]] = val[i];
+	}
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*-----------
+ * pgstat_progress_end_command() -
+ *
+ * Reset st_progress_command (and st_progress_command_target) in own backend
+ * entry.  This signals the end of the command.
+ *-----------
+ */
+void
+pgstat_progress_end_command(void)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
+		return;
+
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
+	beentry->st_progress_command_target = InvalidOid;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
new file mode 100644
index 0000000..3ecb15d
--- /dev/null
+++ b/src/backend/utils/activity/backend_status.c
@@ -0,0 +1,1151 @@
+/* ----------
+ * backend_status.c
+ *	  Backend status reporting infrastructure.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/backend_status.c
+ * ----------
+ */
+#include "postgres.h"
+
+#include "access/xact.h"
+#include "libpq/libpq.h"
+#include "miscadmin.h"
+#include "pg_trace.h"
+#include "pgstat.h"
+#include "port/atomics.h"		/* for memory barriers */
+#include "storage/ipc.h"
+#include "storage/proc.h"		/* for MyProc */
+#include "storage/sinvaladt.h"
+#include "utils/ascii.h"
+#include "utils/backend_status.h"
+#include "utils/guc.h"			/* for application_name */
+#include "utils/memutils.h"
+
+
+/* ----------
+ * Total number of backends including auxiliary
+ *
+ * We reserve a slot for each possible BackendId, plus one for each
+ * possible auxiliary process type.  (This scheme assumes there is not
+ * more than one of any auxiliary process type at a time.) MaxBackends
+ * includes autovacuum workers and background workers as well.
+ * ----------
+ */
+#define NumBackendStatSlots (MaxBackends + NUM_AUXPROCTYPES)
+
+
+/* ----------
+ * GUC parameters
+ * ----------
+ */
+bool		pgstat_track_activities = false;
+int			pgstat_track_activity_query_size = 1024;
+
+
+/* exposed so that backend_progress.c can access it */
+PgBackendStatus *MyBEEntry = NULL;
+
+
+static PgBackendStatus *BackendStatusArray = NULL;
+static char *BackendAppnameBuffer = NULL;
+static char *BackendClientHostnameBuffer = NULL;
+static char *BackendActivityBuffer = NULL;
+static Size BackendActivityBufferSize = 0;
+#ifdef USE_SSL
+static PgBackendSSLStatus *BackendSslStatusBuffer = NULL;
+#endif
+#ifdef ENABLE_GSS
+static PgBackendGSSStatus *BackendGssStatusBuffer = NULL;
+#endif
+
+
+/* Status for backends including auxiliary */
+static LocalPgBackendStatus *localBackendStatusTable = NULL;
+
+/* Total number of backends including auxiliary */
+static int	localNumBackends = 0;
+
+static MemoryContext backendStatusSnapContext;
+
+
+static void pgstat_beshutdown_hook(int code, Datum arg);
+static void pgstat_read_current_status(void);
+static void pgstat_setup_backend_status_context(void);
+
+
+/*
+ * Report shared-memory space needed by CreateSharedBackendStatus.
+ */
+Size
+BackendStatusShmemSize(void)
+{
+	Size		size;
+
+	/* BackendStatusArray: */
+	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
+	/* BackendAppnameBuffer: */
+	size = add_size(size,
+					mul_size(NAMEDATALEN, NumBackendStatSlots));
+	/* BackendClientHostnameBuffer: */
+	size = add_size(size,
+					mul_size(NAMEDATALEN, NumBackendStatSlots));
+	/* BackendActivityBuffer: */
+	size = add_size(size,
+					mul_size(pgstat_track_activity_query_size, NumBackendStatSlots));
+#ifdef USE_SSL
+	/* BackendSslStatusBuffer: */
+	size = add_size(size,
+					mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots));
+#endif
+#ifdef ENABLE_GSS
+	/* BackendGssStatusBuffer: */
+	size = add_size(size,
+					mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots));
+#endif
+	return size;
+}
+
+/*
+ * Initialize the shared status array and several string buffers
+ * during postmaster startup.
+ */
+void
+CreateSharedBackendStatus(void)
+{
+	Size		size;
+	bool		found;
+	int			i;
+	char	   *buffer;
+
+	/* Create or attach to the shared array */
+	size = mul_size(sizeof(PgBackendStatus), NumBackendStatSlots);
+	BackendStatusArray = (PgBackendStatus *)
+		ShmemInitStruct("Backend Status Array", size, &found);
+
+	if (!found)
+	{
+		/*
+		 * We're the first - initialize.
+		 */
+		MemSet(BackendStatusArray, 0, size);
+	}
+
+	/* Create or attach to the shared appname buffer */
+	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
+	BackendAppnameBuffer = (char *)
+		ShmemInitStruct("Backend Application Name Buffer", size, &found);
+
+	if (!found)
+	{
+		MemSet(BackendAppnameBuffer, 0, size);
+
+		/* Initialize st_appname pointers. */
+		buffer = BackendAppnameBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_appname = buffer;
+			buffer += NAMEDATALEN;
+		}
+	}
+
+	/* Create or attach to the shared client hostname buffer */
+	size = mul_size(NAMEDATALEN, NumBackendStatSlots);
+	BackendClientHostnameBuffer = (char *)
+		ShmemInitStruct("Backend Client Host Name Buffer", size, &found);
+
+	if (!found)
+	{
+		MemSet(BackendClientHostnameBuffer, 0, size);
+
+		/* Initialize st_clienthostname pointers. */
+		buffer = BackendClientHostnameBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_clienthostname = buffer;
+			buffer += NAMEDATALEN;
+		}
+	}
+
+	/* Create or attach to the shared activity buffer */
+	BackendActivityBufferSize = mul_size(pgstat_track_activity_query_size,
+										 NumBackendStatSlots);
+	BackendActivityBuffer = (char *)
+		ShmemInitStruct("Backend Activity Buffer",
+						BackendActivityBufferSize,
+						&found);
+
+	if (!found)
+	{
+		MemSet(BackendActivityBuffer, 0, BackendActivityBufferSize);
+
+		/* Initialize st_activity pointers. */
+		buffer = BackendActivityBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_activity_raw = buffer;
+			buffer += pgstat_track_activity_query_size;
+		}
+	}
+
+#ifdef USE_SSL
+	/* Create or attach to the shared SSL status buffer */
+	size = mul_size(sizeof(PgBackendSSLStatus), NumBackendStatSlots);
+	BackendSslStatusBuffer = (PgBackendSSLStatus *)
+		ShmemInitStruct("Backend SSL Status Buffer", size, &found);
+
+	if (!found)
+	{
+		PgBackendSSLStatus *ptr;
+
+		MemSet(BackendSslStatusBuffer, 0, size);
+
+		/* Initialize st_sslstatus pointers. */
+		ptr = BackendSslStatusBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_sslstatus = ptr;
+			ptr++;
+		}
+	}
+#endif
+
+#ifdef ENABLE_GSS
+	/* Create or attach to the shared GSSAPI status buffer */
+	size = mul_size(sizeof(PgBackendGSSStatus), NumBackendStatSlots);
+	BackendGssStatusBuffer = (PgBackendGSSStatus *)
+		ShmemInitStruct("Backend GSS Status Buffer", size, &found);
+
+	if (!found)
+	{
+		PgBackendGSSStatus *ptr;
+
+		MemSet(BackendGssStatusBuffer, 0, size);
+
+		/* Initialize st_gssstatus pointers. */
+		ptr = BackendGssStatusBuffer;
+		for (i = 0; i < NumBackendStatSlots; i++)
+		{
+			BackendStatusArray[i].st_gssstatus = ptr;
+			ptr++;
+		}
+	}
+#endif
+}
+
+/*
+ * Initialize pgstats backend activity state, and set up our on-proc-exit
+ * hook.  Called from InitPostgres and AuxiliaryProcessMain. For auxiliary
+ * process, MyBackendId is invalid. Otherwise, MyBackendId must be set, but we
+ * must not have started any transaction yet (since the exit hook must run
+ * after the last transaction exit).
+ *
+ * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
+ */
+void
+pgstat_beinit(void)
+{
+	/* Initialize MyBEEntry */
+	if (MyBackendId != InvalidBackendId)
+	{
+		Assert(MyBackendId >= 1 && MyBackendId <= MaxBackends);
+		MyBEEntry = &BackendStatusArray[MyBackendId - 1];
+	}
+	else
+	{
+		/* Must be an auxiliary process */
+		Assert(MyAuxProcType != NotAnAuxProcess);
+
+		/*
+		 * Assign the MyBEEntry for an auxiliary process.  Since it doesn't
+		 * have a BackendId, the slot is statically allocated based on the
+		 * auxiliary process type (MyAuxProcType).  Backends use slots indexed
+		 * in the range from 1 to MaxBackends (inclusive), so we use
+		 * MaxBackends + AuxBackendType + 1 as the index of the slot for an
+		 * auxiliary process.
+		 */
+		MyBEEntry = &BackendStatusArray[MaxBackends + MyAuxProcType];
+	}
+
+	/* Set up a process-exit hook to clean up */
+	on_shmem_exit(pgstat_beshutdown_hook, 0);
+}
+
+
+/* ----------
+ * pgstat_bestart() -
+ *
+ *	Initialize this backend's entry in the PgBackendStatus array.
+ *	Called from InitPostgres.
+ *
+ *	Apart from auxiliary processes, MyBackendId, MyDatabaseId,
+ *	session userid, and application_name must be set for a
+ *	backend (hence, this cannot be combined with pgbestat_beinit).
+ *	Note also that we must be inside a transaction if this isn't an aux
+ *	process, as we may need to do encoding conversion on some strings.
+ * ----------
+ */
+void
+pgstat_bestart(void)
+{
+	volatile PgBackendStatus *vbeentry = MyBEEntry;
+	PgBackendStatus lbeentry;
+#ifdef USE_SSL
+	PgBackendSSLStatus lsslstatus;
+#endif
+#ifdef ENABLE_GSS
+	PgBackendGSSStatus lgssstatus;
+#endif
+
+	/* pgstats state must be initialized from pgstat_beinit() */
+	Assert(vbeentry != NULL);
+
+	/*
+	 * To minimize the time spent modifying the PgBackendStatus entry, and
+	 * avoid risk of errors inside the critical section, we first copy the
+	 * shared-memory struct to a local variable, then modify the data in the
+	 * local variable, then copy the local variable back to shared memory.
+	 * Only the last step has to be inside the critical section.
+	 *
+	 * Most of the data we copy from shared memory is just going to be
+	 * overwritten, but the struct's not so large that it's worth the
+	 * maintenance hassle to copy only the needful fields.
+	 */
+	memcpy(&lbeentry,
+		   unvolatize(PgBackendStatus *, vbeentry),
+		   sizeof(PgBackendStatus));
+
+	/* These structs can just start from zeroes each time, though */
+#ifdef USE_SSL
+	memset(&lsslstatus, 0, sizeof(lsslstatus));
+#endif
+#ifdef ENABLE_GSS
+	memset(&lgssstatus, 0, sizeof(lgssstatus));
+#endif
+
+	/*
+	 * Now fill in all the fields of lbeentry, except for strings that are
+	 * out-of-line data.  Those have to be handled separately, below.
+	 */
+	lbeentry.st_procpid = MyProcPid;
+	lbeentry.st_backendType = MyBackendType;
+	lbeentry.st_proc_start_timestamp = MyStartTimestamp;
+	lbeentry.st_activity_start_timestamp = 0;
+	lbeentry.st_state_start_timestamp = 0;
+	lbeentry.st_xact_start_timestamp = 0;
+	lbeentry.st_databaseid = MyDatabaseId;
+
+	/* We have userid for client-backends, wal-sender and bgworker processes */
+	if (lbeentry.st_backendType == B_BACKEND
+		|| lbeentry.st_backendType == B_WAL_SENDER
+		|| lbeentry.st_backendType == B_BG_WORKER)
+		lbeentry.st_userid = GetSessionUserId();
+	else
+		lbeentry.st_userid = InvalidOid;
+
+	/*
+	 * We may not have a MyProcPort (eg, if this is the autovacuum process).
+	 * If so, use all-zeroes client address, which is dealt with specially in
+	 * pg_stat_get_backend_client_addr and pg_stat_get_backend_client_port.
+	 */
+	if (MyProcPort)
+		memcpy(&lbeentry.st_clientaddr, &MyProcPort->raddr,
+			   sizeof(lbeentry.st_clientaddr));
+	else
+		MemSet(&lbeentry.st_clientaddr, 0, sizeof(lbeentry.st_clientaddr));
+
+#ifdef USE_SSL
+	if (MyProcPort && MyProcPort->ssl_in_use)
+	{
+		lbeentry.st_ssl = true;
+		lsslstatus.ssl_bits = be_tls_get_cipher_bits(MyProcPort);
+		strlcpy(lsslstatus.ssl_version, be_tls_get_version(MyProcPort), NAMEDATALEN);
+		strlcpy(lsslstatus.ssl_cipher, be_tls_get_cipher(MyProcPort), NAMEDATALEN);
+		be_tls_get_peer_subject_name(MyProcPort, lsslstatus.ssl_client_dn, NAMEDATALEN);
+		be_tls_get_peer_serial(MyProcPort, lsslstatus.ssl_client_serial, NAMEDATALEN);
+		be_tls_get_peer_issuer_name(MyProcPort, lsslstatus.ssl_issuer_dn, NAMEDATALEN);
+	}
+	else
+	{
+		lbeentry.st_ssl = false;
+	}
+#else
+	lbeentry.st_ssl = false;
+#endif
+
+#ifdef ENABLE_GSS
+	if (MyProcPort && MyProcPort->gss != NULL)
+	{
+		const char *princ = be_gssapi_get_princ(MyProcPort);
+
+		lbeentry.st_gss = true;
+		lgssstatus.gss_auth = be_gssapi_get_auth(MyProcPort);
+		lgssstatus.gss_enc = be_gssapi_get_enc(MyProcPort);
+		if (princ)
+			strlcpy(lgssstatus.gss_princ, princ, NAMEDATALEN);
+	}
+	else
+	{
+		lbeentry.st_gss = false;
+	}
+#else
+	lbeentry.st_gss = false;
+#endif
+
+	lbeentry.st_state = STATE_UNDEFINED;
+	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
+	lbeentry.st_progress_command_target = InvalidOid;
+	lbeentry.st_query_id = UINT64CONST(0);
+
+	/*
+	 * we don't zero st_progress_param here to save cycles; nobody should
+	 * examine it until st_progress_command has been set to something other
+	 * than PROGRESS_COMMAND_INVALID
+	 */
+
+	/*
+	 * We're ready to enter the critical section that fills the shared-memory
+	 * status entry.  We follow the protocol of bumping st_changecount before
+	 * and after; and make sure it's even afterwards.  We use a volatile
+	 * pointer here to ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(vbeentry);
+
+	/* make sure we'll memcpy the same st_changecount back */
+	lbeentry.st_changecount = vbeentry->st_changecount;
+
+	memcpy(unvolatize(PgBackendStatus *, vbeentry),
+		   &lbeentry,
+		   sizeof(PgBackendStatus));
+
+	/*
+	 * We can write the out-of-line strings and structs using the pointers
+	 * that are in lbeentry; this saves some de-volatilizing messiness.
+	 */
+	lbeentry.st_appname[0] = '\0';
+	if (MyProcPort && MyProcPort->remote_hostname)
+		strlcpy(lbeentry.st_clienthostname, MyProcPort->remote_hostname,
+				NAMEDATALEN);
+	else
+		lbeentry.st_clienthostname[0] = '\0';
+	lbeentry.st_activity_raw[0] = '\0';
+	/* Also make sure the last byte in each string area is always 0 */
+	lbeentry.st_appname[NAMEDATALEN - 1] = '\0';
+	lbeentry.st_clienthostname[NAMEDATALEN - 1] = '\0';
+	lbeentry.st_activity_raw[pgstat_track_activity_query_size - 1] = '\0';
+
+#ifdef USE_SSL
+	memcpy(lbeentry.st_sslstatus, &lsslstatus, sizeof(PgBackendSSLStatus));
+#endif
+#ifdef ENABLE_GSS
+	memcpy(lbeentry.st_gssstatus, &lgssstatus, sizeof(PgBackendGSSStatus));
+#endif
+
+	PGSTAT_END_WRITE_ACTIVITY(vbeentry);
+
+	/* Update app name to current GUC setting */
+	if (application_name)
+		pgstat_report_appname(application_name);
+}
+
+/*
+ * Clear out our entry in the PgBackendStatus array.
+ */
+static void
+pgstat_beshutdown_hook(int code, Datum arg)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	/*
+	 * Clear my status entry, following the protocol of bumping st_changecount
+	 * before and after.  We use a volatile pointer here to ensure the
+	 * compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_procpid = 0;	/* mark invalid */
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+
+	/* so that functions can check if backend_status.c is up via MyBEEntry */
+	MyBEEntry = NULL;
+}
+
+/*
+ * Discard any data collected in the current transaction.  Any subsequent
+ * request will cause new snapshots to be read.
+ *
+ * This is also invoked during transaction commit or abort to discard the
+ * no-longer-wanted snapshot.
+ */
+void
+pgstat_clear_backend_activity_snapshot(void)
+{
+	/* Release memory, if any was allocated */
+	if (backendStatusSnapContext)
+	{
+		MemoryContextDelete(backendStatusSnapContext);
+		backendStatusSnapContext = NULL;
+	}
+
+	/* Reset variables */
+	localBackendStatusTable = NULL;
+	localNumBackends = 0;
+}
+
+static void
+pgstat_setup_backend_status_context(void)
+{
+	if (!backendStatusSnapContext)
+		backendStatusSnapContext = AllocSetContextCreate(TopMemoryContext,
+														 "Backend Status Snapshot",
+														 ALLOCSET_SMALL_SIZES);
+}
+
+
+/* ----------
+ * pgstat_report_activity() -
+ *
+ *	Called from tcop/postgres.c to report what the backend is actually doing
+ *	(but note cmd_str can be NULL for certain cases).
+ *
+ * All updates of the status entry follow the protocol of bumping
+ * st_changecount before and after.  We use a volatile pointer here to
+ * ensure the compiler doesn't try to get cute.
+ * ----------
+ */
+void
+pgstat_report_activity(BackendState state, const char *cmd_str)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	TimestampTz start_timestamp;
+	TimestampTz current_timestamp;
+	int			len = 0;
+
+	TRACE_POSTGRESQL_STATEMENT_STATUS(cmd_str);
+
+	if (!beentry)
+		return;
+
+	if (!pgstat_track_activities)
+	{
+		if (beentry->st_state != STATE_DISABLED)
+		{
+			volatile PGPROC *proc = MyProc;
+
+			/*
+			 * track_activities is disabled, but we last reported a
+			 * non-disabled state.  As our final update, change the state and
+			 * clear fields we will not be updating anymore.
+			 */
+			PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+			beentry->st_state = STATE_DISABLED;
+			beentry->st_state_start_timestamp = 0;
+			beentry->st_activity_raw[0] = '\0';
+			beentry->st_activity_start_timestamp = 0;
+			/* st_xact_start_timestamp and wait_event_info are also disabled */
+			beentry->st_xact_start_timestamp = 0;
+			beentry->st_query_id = UINT64CONST(0);
+			proc->wait_event_info = 0;
+			PGSTAT_END_WRITE_ACTIVITY(beentry);
+		}
+		return;
+	}
+
+	/*
+	 * To minimize the time spent modifying the entry, and avoid risk of
+	 * errors inside the critical section, fetch all the needed data first.
+	 */
+	start_timestamp = GetCurrentStatementStartTimestamp();
+	if (cmd_str != NULL)
+	{
+		/*
+		 * Compute length of to-be-stored string unaware of multi-byte
+		 * characters. For speed reasons that'll get corrected on read, rather
+		 * than computed every write.
+		 */
+		len = Min(strlen(cmd_str), pgstat_track_activity_query_size - 1);
+	}
+	current_timestamp = GetCurrentTimestamp();
+
+	/*
+	 * If the state has changed from "active" or "idle in transaction",
+	 * calculate the duration.
+	 */
+	if ((beentry->st_state == STATE_RUNNING ||
+		 beentry->st_state == STATE_FASTPATH ||
+		 beentry->st_state == STATE_IDLEINTRANSACTION ||
+		 beentry->st_state == STATE_IDLEINTRANSACTION_ABORTED) &&
+		state != beentry->st_state)
+	{
+		long		secs;
+		int			usecs;
+
+		TimestampDifference(beentry->st_state_start_timestamp,
+							current_timestamp,
+							&secs, &usecs);
+
+		if (beentry->st_state == STATE_RUNNING ||
+			beentry->st_state == STATE_FASTPATH)
+			pgstat_count_conn_active_time((PgStat_Counter) secs * 1000000 + usecs);
+		else
+			pgstat_count_conn_txn_idle_time((PgStat_Counter) secs * 1000000 + usecs);
+	}
+
+	/*
+	 * Now update the status entry
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_state = state;
+	beentry->st_state_start_timestamp = current_timestamp;
+
+	/*
+	 * If a new query is started, we reset the query identifier as it'll only
+	 * be known after parse analysis, to avoid reporting last query's
+	 * identifier.
+	 */
+	if (state == STATE_RUNNING)
+		beentry->st_query_id = UINT64CONST(0);
+
+	if (cmd_str != NULL)
+	{
+		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
+		beentry->st_activity_raw[len] = '\0';
+		beentry->st_activity_start_timestamp = start_timestamp;
+	}
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/* --------
+ * pgstat_report_query_id() -
+ *
+ * Called to update top-level query identifier.
+ * --------
+ */
+void
+pgstat_report_query_id(uint64 query_id, bool force)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	/*
+	 * if track_activities is disabled, st_query_id should already have been
+	 * reset
+	 */
+	if (!beentry || !pgstat_track_activities)
+		return;
+
+	/*
+	 * We only report the top-level query identifiers.  The stored query_id is
+	 * reset when a backend calls pgstat_report_activity(STATE_RUNNING), or
+	 * with an explicit call to this function using the force flag.  If the
+	 * saved query identifier is not zero it means that it's not a top-level
+	 * command, so ignore the one provided unless it's an explicit call to
+	 * reset the identifier.
+	 */
+	if (beentry->st_query_id != 0 && !force)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_query_id = query_id;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+
+/* ----------
+ * pgstat_report_appname() -
+ *
+ *	Called to update our application name.
+ * ----------
+ */
+void
+pgstat_report_appname(const char *appname)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	int			len;
+
+	if (!beentry)
+		return;
+
+	/* This should be unnecessary if GUC did its job, but be safe */
+	len = pg_mbcliplen(appname, strlen(appname), NAMEDATALEN - 1);
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	memcpy((char *) beentry->st_appname, appname, len);
+	beentry->st_appname[len] = '\0';
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/*
+ * Report current transaction start timestamp as the specified value.
+ * Zero means there is no active transaction.
+ */
+void
+pgstat_report_xact_timestamp(TimestampTz tstamp)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!pgstat_track_activities || !beentry)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+
+	beentry->st_xact_start_timestamp = tstamp;
+
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
+/* ----------
+ * pgstat_read_current_status() -
+ *
+ *	Copy the current contents of the PgBackendStatus array to local memory,
+ *	if not already done in this transaction.
+ * ----------
+ */
+static void
+pgstat_read_current_status(void)
+{
+	volatile PgBackendStatus *beentry;
+	LocalPgBackendStatus *localtable;
+	LocalPgBackendStatus *localentry;
+	char	   *localappname,
+			   *localclienthostname,
+			   *localactivity;
+#ifdef USE_SSL
+	PgBackendSSLStatus *localsslstatus;
+#endif
+#ifdef ENABLE_GSS
+	PgBackendGSSStatus *localgssstatus;
+#endif
+	int			i;
+
+	if (localBackendStatusTable)
+		return;					/* already done */
+
+	pgstat_setup_backend_status_context();
+
+	/*
+	 * Allocate storage for local copy of state data.  We can presume that
+	 * none of these requests overflow size_t, because we already calculated
+	 * the same values using mul_size during shmem setup.  However, with
+	 * probably-silly values of pgstat_track_activity_query_size and
+	 * max_connections, the localactivity buffer could exceed 1GB, so use
+	 * "huge" allocation for that one.
+	 */
+	localtable = (LocalPgBackendStatus *)
+		MemoryContextAlloc(backendStatusSnapContext,
+						   sizeof(LocalPgBackendStatus) * NumBackendStatSlots);
+	localappname = (char *)
+		MemoryContextAlloc(backendStatusSnapContext,
+						   NAMEDATALEN * NumBackendStatSlots);
+	localclienthostname = (char *)
+		MemoryContextAlloc(backendStatusSnapContext,
+						   NAMEDATALEN * NumBackendStatSlots);
+	localactivity = (char *)
+		MemoryContextAllocHuge(backendStatusSnapContext,
+							   (Size) pgstat_track_activity_query_size *
+							   (Size) NumBackendStatSlots);
+#ifdef USE_SSL
+	localsslstatus = (PgBackendSSLStatus *)
+		MemoryContextAlloc(backendStatusSnapContext,
+						   sizeof(PgBackendSSLStatus) * NumBackendStatSlots);
+#endif
+#ifdef ENABLE_GSS
+	localgssstatus = (PgBackendGSSStatus *)
+		MemoryContextAlloc(backendStatusSnapContext,
+						   sizeof(PgBackendGSSStatus) * NumBackendStatSlots);
+#endif
+
+	localNumBackends = 0;
+
+	beentry = BackendStatusArray;
+	localentry = localtable;
+	for (i = 1; i <= NumBackendStatSlots; i++)
+	{
+		/*
+		 * Follow the protocol of retrying if st_changecount changes while we
+		 * copy the entry, or if it's odd.  (The check for odd is needed to
+		 * cover the case where we are able to completely copy the entry while
+		 * the source backend is between increment steps.)	We use a volatile
+		 * pointer here to ensure the compiler doesn't try to get cute.
+		 */
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(beentry, before_changecount);
+
+			localentry->backendStatus.st_procpid = beentry->st_procpid;
+			/* Skip all the data-copying work if entry is not in use */
+			if (localentry->backendStatus.st_procpid > 0)
+			{
+				memcpy(&localentry->backendStatus, unvolatize(PgBackendStatus *, beentry), sizeof(PgBackendStatus));
+
+				/*
+				 * For each PgBackendStatus field that is a pointer, copy the
+				 * pointed-to data, then adjust the local copy of the pointer
+				 * field to point at the local copy of the data.
+				 *
+				 * strcpy is safe even if the string is modified concurrently,
+				 * because there's always a \0 at the end of the buffer.
+				 */
+				strcpy(localappname, (char *) beentry->st_appname);
+				localentry->backendStatus.st_appname = localappname;
+				strcpy(localclienthostname, (char *) beentry->st_clienthostname);
+				localentry->backendStatus.st_clienthostname = localclienthostname;
+				strcpy(localactivity, (char *) beentry->st_activity_raw);
+				localentry->backendStatus.st_activity_raw = localactivity;
+#ifdef USE_SSL
+				if (beentry->st_ssl)
+				{
+					memcpy(localsslstatus, beentry->st_sslstatus, sizeof(PgBackendSSLStatus));
+					localentry->backendStatus.st_sslstatus = localsslstatus;
+				}
+#endif
+#ifdef ENABLE_GSS
+				if (beentry->st_gss)
+				{
+					memcpy(localgssstatus, beentry->st_gssstatus, sizeof(PgBackendGSSStatus));
+					localentry->backendStatus.st_gssstatus = localgssstatus;
+				}
+#endif
+			}
+
+			pgstat_end_read_activity(beentry, after_changecount);
+
+			if (pgstat_read_activity_complete(before_changecount,
+											  after_changecount))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		beentry++;
+		/* Only valid entries get included into the local array */
+		if (localentry->backendStatus.st_procpid > 0)
+		{
+			BackendIdGetTransactionIds(i,
+									   &localentry->backend_xid,
+									   &localentry->backend_xmin);
+
+			localentry++;
+			localappname += NAMEDATALEN;
+			localclienthostname += NAMEDATALEN;
+			localactivity += pgstat_track_activity_query_size;
+#ifdef USE_SSL
+			localsslstatus++;
+#endif
+#ifdef ENABLE_GSS
+			localgssstatus++;
+#endif
+			localNumBackends++;
+		}
+	}
+
+	/* Set the pointer only after completion of a valid table */
+	localBackendStatusTable = localtable;
+}
+
+
+/* ----------
+ * pgstat_get_backend_current_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.  This looks directly at the BackendStatusArray,
+ *	and so will provide current information regardless of the age of our
+ *	transaction's snapshot of the status array.
+ *
+ *	It is the caller's responsibility to invoke this only for backends whose
+ *	state is expected to remain stable while the result is in use.  The
+ *	only current use is in deadlock reporting, where we can expect that
+ *	the target backend is blocked on a lock.  (There are corner cases
+ *	where the target's wait could get aborted while we are looking at it,
+ *	but the very worst consequence is to return a pointer to a string
+ *	that's been changed, so we won't worry too much.)
+ *
+ *	Note: return strings for special cases match pg_stat_get_backend_activity.
+ * ----------
+ */
+const char *
+pgstat_get_backend_current_activity(int pid, bool checkUser)
+{
+	PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		/*
+		 * Although we expect the target backend's entry to be stable, that
+		 * doesn't imply that anyone else's is.  To avoid identifying the
+		 * wrong backend, while we check for a match to the desired PID we
+		 * must follow the protocol of retrying if st_changecount changes
+		 * while we examine the entry, or if it's odd.  (This might be
+		 * unnecessary, since fetching or storing an int is almost certainly
+		 * atomic, but let's play it safe.)  We use a volatile pointer here to
+		 * ensure the compiler doesn't try to get cute.
+		 */
+		volatile PgBackendStatus *vbeentry = beentry;
+		bool		found;
+
+		for (;;)
+		{
+			int			before_changecount;
+			int			after_changecount;
+
+			pgstat_begin_read_activity(vbeentry, before_changecount);
+
+			found = (vbeentry->st_procpid == pid);
+
+			pgstat_end_read_activity(vbeentry, after_changecount);
+
+			if (pgstat_read_activity_complete(before_changecount,
+											  after_changecount))
+				break;
+
+			/* Make sure we can break out of loop if stuck... */
+			CHECK_FOR_INTERRUPTS();
+		}
+
+		if (found)
+		{
+			/* Now it is safe to use the non-volatile pointer */
+			if (checkUser && !superuser() && beentry->st_userid != GetUserId())
+				return "<insufficient privilege>";
+			else if (*(beentry->st_activity_raw) == '\0')
+				return "<command string not enabled>";
+			else
+			{
+				/* this'll leak a bit of memory, but that seems acceptable */
+				return pgstat_clip_activity(beentry->st_activity_raw);
+			}
+		}
+
+		beentry++;
+	}
+
+	/* If we get here, caller is in error ... */
+	return "<backend information not available>";
+}
+
+/* ----------
+ * pgstat_get_crashed_backend_activity() -
+ *
+ *	Return a string representing the current activity of the backend with
+ *	the specified PID.  Like the function above, but reads shared memory with
+ *	the expectation that it may be corrupt.  On success, copy the string
+ *	into the "buffer" argument and return that pointer.  On failure,
+ *	return NULL.
+ *
+ *	This function is only intended to be used by the postmaster to report the
+ *	query that crashed a backend.  In particular, no attempt is made to
+ *	follow the correct concurrency protocol when accessing the
+ *	BackendStatusArray.  But that's OK, in the worst case we'll return a
+ *	corrupted message.  We also must take care not to trip on ereport(ERROR).
+ * ----------
+ */
+const char *
+pgstat_get_crashed_backend_activity(int pid, char *buffer, int buflen)
+{
+	volatile PgBackendStatus *beentry;
+	int			i;
+
+	beentry = BackendStatusArray;
+
+	/*
+	 * We probably shouldn't get here before shared memory has been set up,
+	 * but be safe.
+	 */
+	if (beentry == NULL || BackendActivityBuffer == NULL)
+		return NULL;
+
+	for (i = 1; i <= MaxBackends; i++)
+	{
+		if (beentry->st_procpid == pid)
+		{
+			/* Read pointer just once, so it can't change after validation */
+			const char *activity = beentry->st_activity_raw;
+			const char *activity_last;
+
+			/*
+			 * We mustn't access activity string before we verify that it
+			 * falls within the BackendActivityBuffer. To make sure that the
+			 * entire string including its ending is contained within the
+			 * buffer, subtract one activity length from the buffer size.
+			 */
+			activity_last = BackendActivityBuffer + BackendActivityBufferSize
+				- pgstat_track_activity_query_size;
+
+			if (activity < BackendActivityBuffer ||
+				activity > activity_last)
+				return NULL;
+
+			/* If no string available, no point in a report */
+			if (activity[0] == '\0')
+				return NULL;
+
+			/*
+			 * Copy only ASCII-safe characters so we don't run into encoding
+			 * problems when reporting the message; and be sure not to run off
+			 * the end of memory.  As only ASCII characters are reported, it
+			 * doesn't seem necessary to perform multibyte aware clipping.
+			 */
+			ascii_safe_strlcpy(buffer, activity,
+							   Min(buflen, pgstat_track_activity_query_size));
+
+			return buffer;
+		}
+
+		beentry++;
+	}
+
+	/* PID not found */
+	return NULL;
+}
+
+/* ----------
+ * pgstat_get_my_query_id() -
+ *
+ * Return current backend's query identifier.
+ */
+uint64
+pgstat_get_my_query_id(void)
+{
+	if (!MyBEEntry)
+		return 0;
+
+	/*
+	 * There's no need for a lock around pgstat_begin_read_activity /
+	 * pgstat_end_read_activity here as it's only called from
+	 * pg_stat_get_activity which is already protected, or from the same
+	 * backend which means that there won't be concurrent writes.
+	 */
+	return MyBEEntry->st_query_id;
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_beentry() -
+ *
+ *	Support function for the SQL-callable pgstat* functions. Returns
+ *	our local copy of the current-activity entry for one backend.
+ *
+ *	NB: caller is responsible for a check if the user is permitted to see
+ *	this info (especially the querystring).
+ * ----------
+ */
+PgBackendStatus *
+pgstat_fetch_stat_beentry(int beid)
+{
+	pgstat_read_current_status();
+
+	if (beid < 1 || beid > localNumBackends)
+		return NULL;
+
+	return &localBackendStatusTable[beid - 1].backendStatus;
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_local_beentry() -
+ *
+ *	Like pgstat_fetch_stat_beentry() but with locally computed additions (like
+ *	xid and xmin values of the backend)
+ *
+ *	NB: caller is responsible for a check if the user is permitted to see
+ *	this info (especially the querystring).
+ * ----------
+ */
+LocalPgBackendStatus *
+pgstat_fetch_stat_local_beentry(int beid)
+{
+	pgstat_read_current_status();
+
+	if (beid < 1 || beid > localNumBackends)
+		return NULL;
+
+	return &localBackendStatusTable[beid - 1];
+}
+
+
+/* ----------
+ * pgstat_fetch_stat_numbackends() -
+ *
+ *	Support function for the SQL-callable pgstat* functions. Returns
+ *	the maximum current backend id.
+ * ----------
+ */
+int
+pgstat_fetch_stat_numbackends(void)
+{
+	pgstat_read_current_status();
+
+	return localNumBackends;
+}
+
+/*
+ * Convert a potentially unsafely truncated activity string (see
+ * PgBackendStatus.st_activity_raw's documentation) into a correctly truncated
+ * one.
+ *
+ * The returned string is allocated in the caller's memory context and may be
+ * freed.
+ */
+char *
+pgstat_clip_activity(const char *raw_activity)
+{
+	char	   *activity;
+	int			rawlen;
+	int			cliplen;
+
+	/*
+	 * Some callers, like pgstat_get_backend_current_activity(), do not
+	 * guarantee that the buffer isn't concurrently modified. We try to take
+	 * care that the buffer is always terminated by a NUL byte regardless, but
+	 * let's still be paranoid about the string's length. In those cases the
+	 * underlying buffer is guaranteed to be pgstat_track_activity_query_size
+	 * large.
+	 */
+	activity = pnstrdup(raw_activity, pgstat_track_activity_query_size - 1);
+
+	/* now double-guaranteed to be NUL terminated */
+	rawlen = strlen(activity);
+
+	/*
+	 * All supported server-encodings make it possible to determine the length
+	 * of a multi-byte character from its first byte (this is not the case for
+	 * client encodings, see GB18030). As st_activity is always stored using
+	 * server encoding, this allows us to perform multi-byte aware truncation,
+	 * even if the string earlier was truncated in the middle of a multi-byte
+	 * character.
+	 */
+	cliplen = pg_mbcliplen(activity, rawlen,
+						   pgstat_track_activity_query_size - 1);
+
+	activity[cliplen] = '\0';
+
+	return activity;
+}
diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
new file mode 100644
index 0000000..84d65a7
--- /dev/null
+++ b/src/backend/utils/activity/pgstat.c
@@ -0,0 +1,1678 @@
+/* ----------
+ * pgstat.c
+ *	  Infrastructure for the cumulative statistics system.
+ *
+ * The cumulative statistics system accumulates statistics for different kinds
+ * of objects. Some kinds of statistics are collected for a fixed number of
+ * objects (most commonly 1), e.g., checkpointer statistics. Other kinds of
+ * statistics are collected for a varying number of objects
+ * (e.g. relations). See PgStat_KindInfo for a list of currently handled
+ * statistics.
+ *
+ * Statistics are loaded from the filesystem during startup (by the startup
+ * process), unless preceded by a crash, in which case all stats are
+ * discarded. They are written out by the checkpointer process just before
+ * shutting down, except when shutting down in immediate mode.
+ *
+ * Fixed-numbered stats are stored in plain (non-dynamic) shared memory.
+ *
+ * Statistics for variable-numbered objects are stored in dynamic shared
+ * memory and can be found via a dshash hashtable. The statistics counters are
+ * not part of the dshash entry (PgStatShared_HashEntry) directly, but are
+ * separately allocated (PgStatShared_HashEntry->body). The separate
+ * allocation allows different kinds of statistics to be stored in the same
+ * hashtable without wasting space in PgStatShared_HashEntry.
+ *
+ * Variable-numbered stats are addressed by PgStat_HashKey while running.  It
+ * is not possible to have statistics for an object that cannot be addressed
+ * that way at runtime. A wider identifier can be used when serializing to
+ * disk (used for replication slot stats).
+ *
+ * To avoid contention on the shared hashtable, each backend has a
+ * backend-local hashtable (pgStatEntryRefHash) in front of the shared
+ * hashtable, containing references (PgStat_EntryRef) to shared hashtable
+ * entries. The shared hashtable only needs to be accessed when no prior
+ * reference is found in the local hashtable. Besides pointing to the
+ * shared hashtable entry (PgStatShared_HashEntry) PgStat_EntryRef also
+ * contains a pointer to the shared statistics data, as a process-local
+ * address, to reduce access costs.
+ *
+ * The names for structs stored in shared memory are prefixed with
+ * PgStatShared instead of PgStat. Each stats entry in shared memory is
+ * protected by a dedicated lwlock.
+ *
+ * Most stats updates are first accumulated locally in each process as pending
+ * entries, then later flushed to shared memory (just after commit, or by
+ * idle-timeout). This practically eliminates contention on individual stats
+ * entries. For most kinds of variable-numbered pending stats data is stored
+ * in PgStat_EntryRef->pending. All entries with pending data are in the
+ * pgStatPending list. Pending statistics updates are flushed out by
+ * pgstat_report_stat().
+ *
+ * The behavior of different kinds of statistics is determined by the kind's
+ * entry in pgstat_kind_infos, see PgStat_KindInfo for details.
+ *
+ * The consistency of read accesses to statistics can be configured using the
+ * stats_fetch_consistency GUC (see config.sgml and monitoring.sgml for the
+ * settings). When using PGSTAT_FETCH_CONSISTENCY_CACHE or
+ * PGSTAT_FETCH_CONSISTENCY_SNAPSHOT statistics are stored in
+ * pgStatLocal.snapshot.
+ *
+ * To keep things manageable, stats handling is split across several
+ * files. Infrastructure pieces are in:
+ * - pgstat.c - this file, to tie it all together
+ * - pgstat_shmem.c - nearly everything dealing with shared memory, including
+ *   the maintenance of hashtable entries
+ * - pgstat_xact.c - transactional integration, including the transactional
+ *   creation and dropping of stats entries
+ *
+ * Each statistics kind is handled in a dedicated file:
+ * - pgstat_archiver.c
+ * - pgstat_bgwriter.c
+ * - pgstat_checkpointer.c
+ * - pgstat_database.c
+ * - pgstat_function.c
+ * - pgstat_relation.c
+ * - pgstat_replslot.c
+ * - pgstat_slru.c
+ * - pgstat_subscription.c
+ * - pgstat_wal.c
+ *
+ * Whenever possible infrastructure files should not contain code related to
+ * specific kinds of stats.
+ *
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat.c
+ * ----------
+ */
+#include "postgres.h"
+
+#include <unistd.h>
+
+#include "access/transam.h"
+#include "access/xact.h"
+#include "lib/dshash.h"
+#include "pgstat.h"
+#include "port/atomics.h"
+#include "storage/fd.h"
+#include "storage/ipc.h"
+#include "storage/lwlock.h"
+#include "storage/pg_shmem.h"
+#include "storage/shmem.h"
+#include "utils/guc.h"
+#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
+#include "utils/timestamp.h"
+
+
+/* ----------
+ * Timer definitions.
+ *
+ * In milliseconds.
+ * ----------
+ */
+
+/* minimum interval non-forced stats flushes.*/
+#define PGSTAT_MIN_INTERVAL			1000
+/* how long until to block flushing pending stats updates */
+#define PGSTAT_MAX_INTERVAL			60000
+/* when to call pgstat_report_stat() again, even when idle */
+#define PGSTAT_IDLE_INTERVAL		10000
+
+/* ----------
+ * Initial size hints for the hash tables used in statistics.
+ * ----------
+ */
+
+#define PGSTAT_SNAPSHOT_HASH_SIZE	512
+
+
+/* hash table for statistics snapshots entry */
+typedef struct PgStat_SnapshotEntry
+{
+	PgStat_HashKey key;
+	char		status;			/* for simplehash use */
+	void	   *data;			/* the stats data itself */
+} PgStat_SnapshotEntry;
+
+
+/* ----------
+ * Backend-local Hash Table Definitions
+ * ----------
+ */
+
+/* for stats snapshot entries */
+#define SH_PREFIX pgstat_snapshot
+#define SH_ELEMENT_TYPE PgStat_SnapshotEntry
+#define SH_KEY_TYPE PgStat_HashKey
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) \
+	pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
+#define SH_EQUAL(tb, a, b) \
+	pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
+#define SH_SCOPE static inline
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
+
+/* ----------
+ * Local function forward declarations
+ * ----------
+ */
+
+static void pgstat_write_statsfile(void);
+static void pgstat_read_statsfile(void);
+
+static void pgstat_reset_after_failure(void);
+
+static bool pgstat_flush_pending_entries(bool nowait);
+
+static void pgstat_prep_snapshot(void);
+static void pgstat_build_snapshot(void);
+static void pgstat_build_snapshot_fixed(PgStat_Kind kind);
+
+static inline bool pgstat_is_kind_valid(int ikind);
+
+
+/* ----------
+ * GUC parameters
+ * ----------
+ */
+
+bool		pgstat_track_counts = false;
+int			pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_CACHE;
+
+
+/* ----------
+ * state shared with pgstat_*.c
+ * ----------
+ */
+
+PgStat_LocalState pgStatLocal;
+
+
+/* ----------
+ * Local data
+ *
+ * NB: There should be only variables related to stats infrastructure here,
+ * not for specific kinds of stats.
+ * ----------
+ */
+
+/*
+ * Memory contexts containing the pgStatEntryRefHash table, the
+ * pgStatSharedRef entries, and pending data respectively. Mostly to make it
+ * easier to track / attribute memory usage.
+ */
+
+static MemoryContext pgStatPendingContext = NULL;
+
+/*
+ * Backend local list of PgStat_EntryRef with unflushed pending stats.
+ *
+ * Newly pending entries should only ever be added to the end of the list,
+ * otherwise pgstat_flush_pending_entries() might not see them immediately.
+ */
+static dlist_head pgStatPending = DLIST_STATIC_INIT(pgStatPending);
+
+
+/*
+ * Force the next stats flush to happen regardless of
+ * PGSTAT_MIN_INTERVAL. Useful in test scripts.
+ */
+static bool pgStatForceNextFlush = false;
+
+/*
+ * Force-clear existing snapshot before next use when stats_fetch_consistency
+ * is changed.
+ */
+static bool force_stats_snapshot_clear = false;
+
+
+/*
+ * For assertions that check pgstat is not used before initialization / after
+ * shutdown.
+ */
+#ifdef USE_ASSERT_CHECKING
+static bool pgstat_is_initialized = false;
+static bool pgstat_is_shutdown = false;
+#endif
+
+
+/*
+ * The different kinds of statistics.
+ *
+ * If reasonably possible, handling specific to one kind of stats should go
+ * through this abstraction, rather than making more of pgstat.c aware.
+ *
+ * See comments for struct PgStat_KindInfo for details about the individual
+ * fields.
+ *
+ * XXX: It'd be nicer to define this outside of this file. But there doesn't
+ * seem to be a great way of doing that, given the split across multiple
+ * files.
+ */
+static const PgStat_KindInfo pgstat_kind_infos[PGSTAT_NUM_KINDS] = {
+
+	/* stats kinds for variable-numbered objects */
+
+	[PGSTAT_KIND_DATABASE] = {
+		.name = "database",
+
+		.fixed_amount = false,
+		/* so pg_stat_database entries can be seen in all databases */
+		.accessed_across_databases = true,
+
+		.shared_size = sizeof(PgStatShared_Database),
+		.shared_data_off = offsetof(PgStatShared_Database, stats),
+		.shared_data_len = sizeof(((PgStatShared_Database *) 0)->stats),
+		.pending_size = sizeof(PgStat_StatDBEntry),
+
+		.flush_pending_cb = pgstat_database_flush_cb,
+		.reset_timestamp_cb = pgstat_database_reset_timestamp_cb,
+	},
+
+	[PGSTAT_KIND_RELATION] = {
+		.name = "relation",
+
+		.fixed_amount = false,
+
+		.shared_size = sizeof(PgStatShared_Relation),
+		.shared_data_off = offsetof(PgStatShared_Relation, stats),
+		.shared_data_len = sizeof(((PgStatShared_Relation *) 0)->stats),
+		.pending_size = sizeof(PgStat_TableStatus),
+
+		.flush_pending_cb = pgstat_relation_flush_cb,
+		.delete_pending_cb = pgstat_relation_delete_pending_cb,
+	},
+
+	[PGSTAT_KIND_FUNCTION] = {
+		.name = "function",
+
+		.fixed_amount = false,
+
+		.shared_size = sizeof(PgStatShared_Function),
+		.shared_data_off = offsetof(PgStatShared_Function, stats),
+		.shared_data_len = sizeof(((PgStatShared_Function *) 0)->stats),
+		.pending_size = sizeof(PgStat_BackendFunctionEntry),
+
+		.flush_pending_cb = pgstat_function_flush_cb,
+	},
+
+	[PGSTAT_KIND_REPLSLOT] = {
+		.name = "replslot",
+
+		.fixed_amount = false,
+
+		.accessed_across_databases = true,
+		.named_on_disk = true,
+
+		.shared_size = sizeof(PgStatShared_ReplSlot),
+		.shared_data_off = offsetof(PgStatShared_ReplSlot, stats),
+		.shared_data_len = sizeof(((PgStatShared_ReplSlot *) 0)->stats),
+
+		.reset_timestamp_cb = pgstat_replslot_reset_timestamp_cb,
+		.to_serialized_name = pgstat_replslot_to_serialized_name_cb,
+		.from_serialized_name = pgstat_replslot_from_serialized_name_cb,
+	},
+
+	[PGSTAT_KIND_SUBSCRIPTION] = {
+		.name = "subscription",
+
+		.fixed_amount = false,
+		/* so pg_stat_subscription_stats entries can be seen in all databases */
+		.accessed_across_databases = true,
+
+		.shared_size = sizeof(PgStatShared_Subscription),
+		.shared_data_off = offsetof(PgStatShared_Subscription, stats),
+		.shared_data_len = sizeof(((PgStatShared_Subscription *) 0)->stats),
+		.pending_size = sizeof(PgStat_BackendSubEntry),
+
+		.flush_pending_cb = pgstat_subscription_flush_cb,
+		.reset_timestamp_cb = pgstat_subscription_reset_timestamp_cb,
+	},
+
+
+	/* stats for fixed-numbered (mostly 1) objects */
+
+	[PGSTAT_KIND_ARCHIVER] = {
+		.name = "archiver",
+
+		.fixed_amount = true,
+
+		.reset_all_cb = pgstat_archiver_reset_all_cb,
+		.snapshot_cb = pgstat_archiver_snapshot_cb,
+	},
+
+	[PGSTAT_KIND_BGWRITER] = {
+		.name = "bgwriter",
+
+		.fixed_amount = true,
+
+		.reset_all_cb = pgstat_bgwriter_reset_all_cb,
+		.snapshot_cb = pgstat_bgwriter_snapshot_cb,
+	},
+
+	[PGSTAT_KIND_CHECKPOINTER] = {
+		.name = "checkpointer",
+
+		.fixed_amount = true,
+
+		.reset_all_cb = pgstat_checkpointer_reset_all_cb,
+		.snapshot_cb = pgstat_checkpointer_snapshot_cb,
+	},
+
+	[PGSTAT_KIND_SLRU] = {
+		.name = "slru",
+
+		.fixed_amount = true,
+
+		.reset_all_cb = pgstat_slru_reset_all_cb,
+		.snapshot_cb = pgstat_slru_snapshot_cb,
+	},
+
+	[PGSTAT_KIND_WAL] = {
+		.name = "wal",
+
+		.fixed_amount = true,
+
+		.reset_all_cb = pgstat_wal_reset_all_cb,
+		.snapshot_cb = pgstat_wal_snapshot_cb,
+	},
+};
+
+
+/* ------------------------------------------------------------
+ * Functions managing the state of the stats system for all backends.
+ * ------------------------------------------------------------
+ */
+
+/*
+ * Read on-disk stats into memory at server start.
+ *
+ * Should only be called by the startup process or in single user mode.
+ */
+void
+pgstat_restore_stats(void)
+{
+	pgstat_read_statsfile();
+}
+
+/*
+ * Remove the stats file.  This is currently used only if WAL recovery is
+ * needed after a crash.
+ *
+ * Should only be called by the startup process or in single user mode.
+ */
+void
+pgstat_discard_stats(void)
+{
+	int			ret;
+
+	/* NB: this needs to be done even in single user mode */
+
+	ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+	if (ret != 0)
+	{
+		if (errno == ENOENT)
+			elog(DEBUG2,
+				 "didn't need to unlink permanent stats file \"%s\" - didn't exist",
+				 PGSTAT_STAT_PERMANENT_FILENAME);
+		else
+			ereport(LOG,
+					(errcode_for_file_access(),
+					 errmsg("could not unlink permanent statistics file \"%s\": %m",
+							PGSTAT_STAT_PERMANENT_FILENAME)));
+	}
+	else
+	{
+		ereport(DEBUG2,
+				(errcode_for_file_access(),
+				 errmsg_internal("unlinked permanent statistics file \"%s\"",
+						PGSTAT_STAT_PERMANENT_FILENAME)));
+	}
+
+	/*
+	 * Reset stats contents. This will set reset timestamps of fixed-numbered
+	 * stats to the current time (no variable stats exist).
+	 */
+	pgstat_reset_after_failure();
+}
+
+/*
+ * pgstat_before_server_shutdown() needs to be called by exactly one process
+ * during regular server shutdowns. Otherwise all stats will be lost.
+ *
+ * We currently only write out stats for proc_exit(0). We might want to change
+ * that at some point... But right now pgstat_discard_stats() would be called
+ * during the start after a disorderly shutdown, anyway.
+ */
+void
+pgstat_before_server_shutdown(int code, Datum arg)
+{
+	Assert(pgStatLocal.shmem != NULL);
+	Assert(!pgStatLocal.shmem->is_shutdown);
+
+	/*
+	 * Stats should only be reported after pgstat_initialize() and before
+	 * pgstat_shutdown(). This is a convenient point to catch most violations
+	 * of this rule.
+	 */
+	Assert(pgstat_is_initialized && !pgstat_is_shutdown);
+
+	/* flush out our own pending changes before writing out */
+	pgstat_report_stat(true);
+
+	/*
+	 * Only write out file during normal shutdown. Don't even signal that
+	 * we've shutdown during irregular shutdowns, because the shutdown
+	 * sequence isn't coordinated to ensure this backend shuts down last.
+	 */
+	if (code == 0)
+	{
+		pgStatLocal.shmem->is_shutdown = true;
+		pgstat_write_statsfile();
+	}
+}
+
+
+/* ------------------------------------------------------------
+ * Backend initialization / shutdown functions
+ * ------------------------------------------------------------
+ */
+
+/*
+ * Shut down a single backend's statistics reporting at process exit.
+ *
+ * Flush out any remaining statistics counts.  Without this, operations
+ * triggered during backend exit (such as temp table deletions) won't be
+ * counted.
+ */
+static void
+pgstat_shutdown_hook(int code, Datum arg)
+{
+	Assert(!pgstat_is_shutdown);
+	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
+
+	/*
+	 * If we got as far as discovering our own database ID, we can flush out
+	 * what we did so far.  Otherwise, we'd be reporting an invalid database
+	 * ID, so forget it.  (This means that accesses to pg_database during
+	 * failed backend starts might never get counted.)
+	 */
+	if (OidIsValid(MyDatabaseId))
+		pgstat_report_disconnect(MyDatabaseId);
+
+	pgstat_report_stat(true);
+
+	/* there shouldn't be any pending changes left */
+	Assert(dlist_is_empty(&pgStatPending));
+	dlist_init(&pgStatPending);
+
+	pgstat_detach_shmem();
+
+#ifdef USE_ASSERT_CHECKING
+	pgstat_is_shutdown = true;
+#endif
+}
+
+/*
+ * Initialize pgstats state, and set up our on-proc-exit hook. Called from
+ * BaseInit().
+ *
+ * NOTE: MyDatabaseId isn't set yet; so the shutdown hook has to be careful.
+ */
+void
+pgstat_initialize(void)
+{
+	Assert(!pgstat_is_initialized);
+
+	pgstat_attach_shmem();
+
+	pgstat_init_wal();
+
+	/* Set up a process-exit hook to clean up */
+	before_shmem_exit(pgstat_shutdown_hook, 0);
+
+#ifdef USE_ASSERT_CHECKING
+	pgstat_is_initialized = true;
+#endif
+}
+
+
+/* ------------------------------------------------------------
+ * Public functions used by backends follow
+ * ------------------------------------------------------------
+ */
+
+/*
+ * Must be called by processes that performs DML: tcop/postgres.c, logical
+ * receiver processes, SPI worker, etc. to flush pending statistics updates to
+ * shared memory.
+ *
+ * Unless called with 'force', pending stats updates are flushed happen once
+ * per PGSTAT_MIN_INTERVAL (1000ms). When not forced, stats flushes do not
+ * block on lock acquisition, except if stats updates have been pending for
+ * longer than PGSTAT_MAX_INTERVAL (60000ms).
+ *
+ * Whenever pending stats updates remain at the end of pgstat_report_stat() a
+ * suggested idle timeout is returned. Currently this is always
+ * PGSTAT_IDLE_INTERVAL (10000ms). Callers can use the returned time to set up
+ * a timeout after which to call pgstat_report_stat(true), but are not
+ * required to to do so.
+ *
+ * Note that this is called only when not within a transaction, so it is fair
+ * to use transaction stop time as an approximation of current time.
+ */
+long
+pgstat_report_stat(bool force)
+{
+	static TimestampTz pending_since = 0;
+	static TimestampTz last_flush = 0;
+	bool		partial_flush;
+	TimestampTz now;
+	bool		nowait;
+
+	pgstat_assert_is_up();
+	Assert(!IsTransactionOrTransactionBlock());
+
+	/* "absorb" the forced flush even if there's nothing to flush */
+	if (pgStatForceNextFlush)
+	{
+		force = true;
+		pgStatForceNextFlush = false;
+	}
+
+	/* Don't expend a clock check if nothing to do */
+	if (dlist_is_empty(&pgStatPending) &&
+		!have_slrustats &&
+		!pgstat_have_pending_wal())
+	{
+		Assert(pending_since == 0);
+		return 0;
+	}
+
+	/*
+	 * There should never be stats to report once stats are shut down. Can't
+	 * assert that before the checks above, as there is an unconditional
+	 * pgstat_report_stat() call in pgstat_shutdown_hook() - which at least
+	 * the process that ran pgstat_before_server_shutdown() will still call.
+	 */
+	Assert(!pgStatLocal.shmem->is_shutdown);
+
+	now = GetCurrentTransactionStopTimestamp();
+
+	if (!force)
+	{
+		if (pending_since > 0 &&
+			TimestampDifferenceExceeds(pending_since, now, PGSTAT_MAX_INTERVAL))
+		{
+			/* don't keep pending updates longer than PGSTAT_MAX_INTERVAL */
+			force = true;
+		}
+		else if (last_flush > 0 &&
+				 !TimestampDifferenceExceeds(last_flush, now, PGSTAT_MIN_INTERVAL))
+		{
+			/* don't flush too frequently */
+			if (pending_since == 0)
+				pending_since = now;
+
+			return PGSTAT_IDLE_INTERVAL;
+		}
+	}
+
+	pgstat_update_dbstats(now);
+
+	/* don't wait for lock acquisition when !force */
+	nowait = !force;
+
+	partial_flush = false;
+
+	/* flush database / relation / function / ... stats */
+	partial_flush |= pgstat_flush_pending_entries(nowait);
+
+	/* flush wal stats */
+	partial_flush |= pgstat_flush_wal(nowait);
+
+	/* flush SLRU stats */
+	partial_flush |= pgstat_slru_flush(nowait);
+
+	last_flush = now;
+
+	/*
+	 * If some of the pending stats could not be flushed due to lock
+	 * contention, let the caller know when to retry.
+	 */
+	if (partial_flush)
+	{
+		/* force should have prevented us from getting here */
+		Assert(!force);
+
+		/* remember since when stats have been pending */
+		if (pending_since == 0)
+			pending_since = now;
+
+		return PGSTAT_IDLE_INTERVAL;
+	}
+
+	pending_since = 0;
+
+	return 0;
+}
+
+/*
+ * Force locally pending stats to be flushed during the next
+ * pgstat_report_stat() call. This is useful for writing tests.
+ */
+void
+pgstat_force_next_flush(void)
+{
+	pgStatForceNextFlush = true;
+}
+
+/*
+ * Only for use by pgstat_reset_counters()
+ */
+static bool
+match_db_entries(PgStatShared_HashEntry *entry, Datum match_data)
+{
+	return entry->key.dboid == DatumGetObjectId(MyDatabaseId);
+}
+
+/*
+ * Reset counters for our database.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+void
+pgstat_reset_counters(void)
+{
+	TimestampTz ts = GetCurrentTimestamp();
+
+	pgstat_reset_matching_entries(match_db_entries,
+								  ObjectIdGetDatum(MyDatabaseId),
+								  ts);
+}
+
+/*
+ * Reset a single variable-numbered entry.
+ *
+ * If the stats kind is within a database, also reset the database's
+ * stat_reset_timestamp.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+void
+pgstat_reset(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+	TimestampTz ts = GetCurrentTimestamp();
+
+	/* not needed atm, and doesn't make sense with the current signature */
+	Assert(!pgstat_get_kind_info(kind)->fixed_amount);
+
+	/* reset the "single counter" */
+	pgstat_reset_entry(kind, dboid, objoid, ts);
+
+	if (!kind_info->accessed_across_databases)
+		pgstat_reset_database_timestamp(dboid, ts);
+}
+
+/*
+ * Reset stats for all entries of a kind.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+void
+pgstat_reset_of_kind(PgStat_Kind kind)
+{
+	const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+	TimestampTz ts = GetCurrentTimestamp();
+
+	if (kind_info->fixed_amount)
+		kind_info->reset_all_cb(ts);
+	else
+		pgstat_reset_entries_of_kind(kind, ts);
+}
+
+
+/* ------------------------------------------------------------
+ * Fetching of stats
+ * ------------------------------------------------------------
+ */
+
+/*
+ * Discard any data collected in the current transaction.  Any subsequent
+ * request will cause new snapshots to be read.
+ *
+ * This is also invoked during transaction commit or abort to discard
+ * the no-longer-wanted snapshot.  Updates of stats_fetch_consistency can
+ * cause this routine to be called.
+ */
+void
+pgstat_clear_snapshot(void)
+{
+	pgstat_assert_is_up();
+
+	memset(&pgStatLocal.snapshot.fixed_valid, 0,
+		   sizeof(pgStatLocal.snapshot.fixed_valid));
+	pgStatLocal.snapshot.stats = NULL;
+	pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_NONE;
+
+	/* Release memory, if any was allocated */
+	if (pgStatLocal.snapshot.context)
+	{
+		MemoryContextDelete(pgStatLocal.snapshot.context);
+
+		/* Reset variables */
+		pgStatLocal.snapshot.context = NULL;
+	}
+
+	/*
+	 * Historically the backend_status.c facilities lived in this file, and
+	 * were reset with the same function. For now keep it that way, and
+	 * forward the reset request.
+	 */
+	pgstat_clear_backend_activity_snapshot();
+
+	/* Reset this flag, as it may be possible that a cleanup was forced. */
+	force_stats_snapshot_clear = false;
+}
+
+void *
+pgstat_fetch_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	PgStat_HashKey key;
+	PgStat_EntryRef *entry_ref;
+	void	   *stats_data;
+	const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+	/* should be called from backends */
+	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
+	AssertArg(!kind_info->fixed_amount);
+
+	pgstat_prep_snapshot();
+
+	key.kind = kind;
+	key.dboid = dboid;
+	key.objoid = objoid;
+
+	/* if we need to build a full snapshot, do so */
+	if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
+		pgstat_build_snapshot();
+
+	/* if caching is desired, look up in cache */
+	if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
+	{
+		PgStat_SnapshotEntry *entry = NULL;
+
+		entry = pgstat_snapshot_lookup(pgStatLocal.snapshot.stats, key);
+
+		if (entry)
+			return entry->data;
+
+		/*
+		 * If we built a full snapshot and the key is not in
+		 * pgStatLocal.snapshot.stats, there are no matching stats.
+		 */
+		if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
+			return NULL;
+	}
+
+	pgStatLocal.snapshot.mode = pgstat_fetch_consistency;
+
+	entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
+
+	if (entry_ref == NULL || entry_ref->shared_entry->dropped)
+	{
+		/* create empty entry when using PGSTAT_FETCH_CONSISTENCY_CACHE */
+		if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE)
+		{
+			PgStat_SnapshotEntry *entry = NULL;
+			bool		found;
+
+			entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
+			Assert(!found);
+			entry->data = NULL;
+		}
+		return NULL;
+	}
+
+	/*
+	 * Allocate in caller's context for PGSTAT_FETCH_CONSISTENCY_NONE,
+	 * otherwise we could quickly end up with a fair bit of memory used due to
+	 * repeated accesses.
+	 */
+	if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
+		stats_data = palloc(kind_info->shared_data_len);
+	else
+		stats_data = MemoryContextAlloc(pgStatLocal.snapshot.context,
+										kind_info->shared_data_len);
+
+	pgstat_lock_entry_shared(entry_ref, false);
+	memcpy(stats_data,
+		   pgstat_get_entry_data(kind, entry_ref->shared_stats),
+		   kind_info->shared_data_len);
+	pgstat_unlock_entry(entry_ref);
+
+	if (pgstat_fetch_consistency > PGSTAT_FETCH_CONSISTENCY_NONE)
+	{
+		PgStat_SnapshotEntry *entry = NULL;
+		bool		found;
+
+		entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, key, &found);
+		entry->data = stats_data;
+	}
+
+	return stats_data;
+}
+
+/*
+ * If a stats snapshot has been taken, return the timestamp at which that was
+ * done, and set *have_snapshot to true. Otherwise *have_snapshot is set to
+ * false.
+ */
+TimestampTz
+pgstat_get_stat_snapshot_timestamp(bool *have_snapshot)
+{
+	if (force_stats_snapshot_clear)
+		pgstat_clear_snapshot();
+
+	if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
+	{
+		*have_snapshot = true;
+		return pgStatLocal.snapshot.snapshot_timestamp;
+	}
+
+	*have_snapshot = false;
+
+	return 0;
+}
+
+bool
+pgstat_have_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	/* fixed-numbered stats always exist */
+	if (pgstat_get_kind_info(kind)->fixed_amount)
+		return true;
+
+	return pgstat_get_entry_ref(kind, dboid, objoid, false, NULL) != NULL;
+}
+
+/*
+ * Ensure snapshot for fixed-numbered 'kind' exists.
+ *
+ * Typically used by the pgstat_fetch_* functions for a kind of stats, before
+ * massaging the data into the desired format.
+ */
+void
+pgstat_snapshot_fixed(PgStat_Kind kind)
+{
+	AssertArg(pgstat_is_kind_valid(kind));
+	AssertArg(pgstat_get_kind_info(kind)->fixed_amount);
+
+	if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
+		pgstat_build_snapshot();
+	else
+		pgstat_build_snapshot_fixed(kind);
+
+	Assert(pgStatLocal.snapshot.fixed_valid[kind]);
+}
+
+static void
+pgstat_prep_snapshot(void)
+{
+	if (force_stats_snapshot_clear)
+		pgstat_clear_snapshot();
+
+	if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE ||
+		pgStatLocal.snapshot.stats != NULL)
+		return;
+
+	if (!pgStatLocal.snapshot.context)
+		pgStatLocal.snapshot.context = AllocSetContextCreate(TopMemoryContext,
+															 "PgStat Snapshot",
+															 ALLOCSET_SMALL_SIZES);
+
+	pgStatLocal.snapshot.stats =
+		pgstat_snapshot_create(pgStatLocal.snapshot.context,
+							   PGSTAT_SNAPSHOT_HASH_SIZE,
+							   NULL);
+}
+
+static void
+pgstat_build_snapshot(void)
+{
+	dshash_seq_status hstat;
+	PgStatShared_HashEntry *p;
+
+	/* should only be called when we need a snapshot */
+	Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT);
+
+	/* snapshot already built */
+	if (pgStatLocal.snapshot.mode == PGSTAT_FETCH_CONSISTENCY_SNAPSHOT)
+		return;
+
+	pgstat_prep_snapshot();
+
+	Assert(pgStatLocal.snapshot.stats->members == 0);
+
+	pgStatLocal.snapshot.snapshot_timestamp = GetCurrentTimestamp();
+
+	/*
+	 * Snapshot all variable stats.
+	 */
+	dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
+	while ((p = dshash_seq_next(&hstat)) != NULL)
+	{
+		PgStat_Kind kind = p->key.kind;
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+		bool		found;
+		PgStat_SnapshotEntry *entry;
+		PgStatShared_Common *stats_data;
+
+		/*
+		 * Check if the stats object should be included in the snapshot.
+		 * Unless the stats kind can be accessed from all databases (e.g.,
+		 * database stats themselves), we only include stats for the current
+		 * database or objects not associated with a database (e.g. shared
+		 * relations).
+		 */
+		if (p->key.dboid != MyDatabaseId &&
+			p->key.dboid != InvalidOid &&
+			!kind_info->accessed_across_databases)
+			continue;
+
+		if (p->dropped)
+			continue;
+
+		Assert(pg_atomic_read_u32(&p->refcount) > 0);
+
+		stats_data = dsa_get_address(pgStatLocal.dsa, p->body);
+		Assert(stats_data);
+
+		entry = pgstat_snapshot_insert(pgStatLocal.snapshot.stats, p->key, &found);
+		Assert(!found);
+
+		entry->data = MemoryContextAlloc(pgStatLocal.snapshot.context,
+										 kind_info->shared_size);
+		/*
+		 * Acquire the LWLock directly instead of using
+		 * pg_stat_lock_entry_shared() which requires a reference.
+		 */
+		LWLockAcquire(&stats_data->lock, LW_SHARED);
+		memcpy(entry->data,
+			   pgstat_get_entry_data(kind, stats_data),
+			   kind_info->shared_size);
+		LWLockRelease(&stats_data->lock);
+	}
+	dshash_seq_term(&hstat);
+
+	/*
+	 * Build snapshot of all fixed-numbered stats.
+	 */
+	for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (!kind_info->fixed_amount)
+		{
+			Assert(kind_info->snapshot_cb == NULL);
+			continue;
+		}
+
+		pgstat_build_snapshot_fixed(kind);
+	}
+
+	pgStatLocal.snapshot.mode = PGSTAT_FETCH_CONSISTENCY_SNAPSHOT;
+}
+
+static void
+pgstat_build_snapshot_fixed(PgStat_Kind kind)
+{
+	const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+	Assert(kind_info->fixed_amount);
+	Assert(kind_info->snapshot_cb != NULL);
+
+	if (pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_NONE)
+	{
+		/* rebuild every time */
+		pgStatLocal.snapshot.fixed_valid[kind] = false;
+	}
+	else if (pgStatLocal.snapshot.fixed_valid[kind])
+	{
+		/* in snapshot mode we shouldn't get called again */
+		Assert(pgstat_fetch_consistency == PGSTAT_FETCH_CONSISTENCY_CACHE);
+		return;
+	}
+
+	Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
+
+	kind_info->snapshot_cb();
+
+	Assert(!pgStatLocal.snapshot.fixed_valid[kind]);
+	pgStatLocal.snapshot.fixed_valid[kind] = true;
+}
+
+
+/* ------------------------------------------------------------
+ * Backend-local pending stats infrastructure
+ * ------------------------------------------------------------
+ */
+
+/*
+ * Returns the appropriate PgStat_EntryRef, preparing it to receive pending
+ * stats if not already done.
+ *
+ * If created_entry is non-NULL, it'll be set to true if the entry is newly
+ * created, false otherwise.
+ */
+PgStat_EntryRef *
+pgstat_prep_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid, bool *created_entry)
+{
+	PgStat_EntryRef *entry_ref;
+
+	/* need to be able to flush out */
+	Assert(pgstat_get_kind_info(kind)->flush_pending_cb != NULL);
+
+	if (unlikely(!pgStatPendingContext))
+	{
+		pgStatPendingContext =
+			AllocSetContextCreate(TopMemoryContext,
+								  "PgStat Pending",
+								  ALLOCSET_SMALL_SIZES);
+	}
+
+	entry_ref = pgstat_get_entry_ref(kind, dboid, objoid,
+									 true, created_entry);
+
+	if (entry_ref->pending == NULL)
+	{
+		size_t		entrysize = pgstat_get_kind_info(kind)->pending_size;
+
+		Assert(entrysize != (size_t) -1);
+
+		entry_ref->pending = MemoryContextAllocZero(pgStatPendingContext, entrysize);
+		dlist_push_tail(&pgStatPending, &entry_ref->pending_node);
+	}
+
+	return entry_ref;
+}
+
+/*
+ * Return an existing stats entry, or NULL.
+ *
+ * This should only be used for helper function for pgstatfuncs.c - outside of
+ * that it shouldn't be needed.
+ */
+PgStat_EntryRef *
+pgstat_fetch_pending_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	PgStat_EntryRef *entry_ref;
+
+	entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
+
+	if (entry_ref == NULL || entry_ref->pending == NULL)
+		return NULL;
+
+	return entry_ref;
+}
+
+void
+pgstat_delete_pending_entry(PgStat_EntryRef *entry_ref)
+{
+	PgStat_Kind kind = entry_ref->shared_entry->key.kind;
+	const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+	void	   *pending_data = entry_ref->pending;
+
+	Assert(pending_data != NULL);
+	/* !fixed_amount stats should be handled explicitly */
+	Assert(!pgstat_get_kind_info(kind)->fixed_amount);
+
+	if (kind_info->delete_pending_cb)
+		kind_info->delete_pending_cb(entry_ref);
+
+	pfree(pending_data);
+	entry_ref->pending = NULL;
+
+	dlist_delete(&entry_ref->pending_node);
+}
+
+/*
+ * Flush out pending stats for database objects (databases, relations,
+ * functions).
+ */
+static bool
+pgstat_flush_pending_entries(bool nowait)
+{
+	bool		have_pending = false;
+	dlist_node *cur = NULL;
+
+	/*
+	 * Need to be a bit careful iterating over the list of pending entries.
+	 * Processing a pending entry may queue further pending entries to the end
+	 * of the list that we want to process, so a simple iteration won't do.
+	 * Further complicating matters is that we want to delete the current
+	 * entry in each iteration from the list if we flushed successfully.
+	 *
+	 * So we just keep track of the next pointer in each loop iteration.
+	 */
+	if (!dlist_is_empty(&pgStatPending))
+		cur = dlist_head_node(&pgStatPending);
+
+	while (cur)
+	{
+		PgStat_EntryRef *entry_ref =
+		dlist_container(PgStat_EntryRef, pending_node, cur);
+		PgStat_HashKey key = entry_ref->shared_entry->key;
+		PgStat_Kind kind = key.kind;
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+		bool		did_flush;
+		dlist_node *next;
+
+		Assert(!kind_info->fixed_amount);
+		Assert(kind_info->flush_pending_cb != NULL);
+
+		/* flush the stats, if possible */
+		did_flush = kind_info->flush_pending_cb(entry_ref, nowait);
+
+		Assert(did_flush || nowait);
+
+		/* determine next entry, before deleting the pending entry */
+		if (dlist_has_next(&pgStatPending, cur))
+			next = dlist_next_node(&pgStatPending, cur);
+		else
+			next = NULL;
+
+		/* if successfully flushed, remove entry */
+		if (did_flush)
+			pgstat_delete_pending_entry(entry_ref);
+		else
+			have_pending = true;
+
+		cur = next;
+	}
+
+	Assert(dlist_is_empty(&pgStatPending) == !have_pending);
+
+	return have_pending;
+}
+
+
+/* ------------------------------------------------------------
+ * Helper / infrastructure functions
+ * ------------------------------------------------------------
+ */
+
+PgStat_Kind
+pgstat_get_kind_from_str(char *kind_str)
+{
+	for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
+	{
+		if (pg_strcasecmp(kind_str, pgstat_kind_infos[kind].name) == 0)
+			return kind;
+	}
+
+	ereport(ERROR,
+			(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+			 errmsg("invalid statistics kind: \"%s\"", kind_str)));
+	return PGSTAT_KIND_DATABASE;	/* avoid compiler warnings */
+}
+
+static inline bool
+pgstat_is_kind_valid(int ikind)
+{
+	return ikind >= PGSTAT_KIND_FIRST_VALID && ikind <= PGSTAT_KIND_LAST;
+}
+
+const PgStat_KindInfo *
+pgstat_get_kind_info(PgStat_Kind kind)
+{
+	AssertArg(pgstat_is_kind_valid(kind));
+
+	return &pgstat_kind_infos[kind];
+}
+
+/*
+ * Stats should only be reported after pgstat_initialize() and before
+ * pgstat_shutdown(). This check is put in a few central places to catch
+ * violations of this rule more easily.
+ */
+#ifdef USE_ASSERT_CHECKING
+void
+pgstat_assert_is_up(void)
+{
+	Assert(pgstat_is_initialized && !pgstat_is_shutdown);
+}
+#endif
+
+
+/* ------------------------------------------------------------
+ * reading and writing of on-disk stats file
+ * ------------------------------------------------------------
+ */
+
+/* helpers for pgstat_write_statsfile() */
+static void
+write_chunk(FILE *fpout, void *ptr, size_t len)
+{
+	int			rc;
+
+	rc = fwrite(ptr, len, 1, fpout);
+
+	/* we'll check for errors with ferror once at the end */
+	(void) rc;
+}
+
+#define write_chunk_s(fpout, ptr) write_chunk(fpout, ptr, sizeof(*ptr))
+
+/*
+ * This function is called in the last process that is accessing the shared
+ * stats so locking is not required.
+ */
+static void
+pgstat_write_statsfile(void)
+{
+	FILE	   *fpout;
+	int32		format_id;
+	const char *tmpfile = PGSTAT_STAT_PERMANENT_TMPFILE;
+	const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
+	dshash_seq_status hstat;
+	PgStatShared_HashEntry *ps;
+
+	pgstat_assert_is_up();
+
+	/* we're shutting down, so it's ok to just override this */
+	pgstat_fetch_consistency = PGSTAT_FETCH_CONSISTENCY_NONE;
+
+	elog(DEBUG2, "writing stats file \"%s\"", statfile);
+
+	/*
+	 * Open the statistics temp file to write out the current values.
+	 */
+	fpout = AllocateFile(tmpfile, PG_BINARY_W);
+	if (fpout == NULL)
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not open temporary statistics file \"%s\": %m",
+						tmpfile)));
+		return;
+	}
+
+	/*
+	 * Write the file header --- currently just a format ID.
+	 */
+	format_id = PGSTAT_FILE_FORMAT_ID;
+	write_chunk_s(fpout, &format_id);
+
+	/*
+	 * XXX: The following could now be generalized to just iterate over
+	 * pgstat_kind_infos instead of knowing about the different kinds of
+	 * stats.
+	 */
+
+	/*
+	 * Write archiver stats struct
+	 */
+	pgstat_build_snapshot_fixed(PGSTAT_KIND_ARCHIVER);
+	write_chunk_s(fpout, &pgStatLocal.snapshot.archiver);
+
+	/*
+	 * Write bgwriter stats struct
+	 */
+	pgstat_build_snapshot_fixed(PGSTAT_KIND_BGWRITER);
+	write_chunk_s(fpout, &pgStatLocal.snapshot.bgwriter);
+
+	/*
+	 * Write checkpointer stats struct
+	 */
+	pgstat_build_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER);
+	write_chunk_s(fpout, &pgStatLocal.snapshot.checkpointer);
+
+	/*
+	 * Write SLRU stats struct
+	 */
+	pgstat_build_snapshot_fixed(PGSTAT_KIND_SLRU);
+	write_chunk_s(fpout, &pgStatLocal.snapshot.slru);
+
+	/*
+	 * Write WAL stats struct
+	 */
+	pgstat_build_snapshot_fixed(PGSTAT_KIND_WAL);
+	write_chunk_s(fpout, &pgStatLocal.snapshot.wal);
+
+	/*
+	 * Walk through the stats entries
+	 */
+	dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
+	while ((ps = dshash_seq_next(&hstat)) != NULL)
+	{
+		PgStatShared_Common *shstats;
+		const PgStat_KindInfo *kind_info = NULL;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/* we may have some "dropped" entries not yet removed, skip them */
+		Assert(!ps->dropped);
+		if (ps->dropped)
+			continue;
+
+		shstats = (PgStatShared_Common *) dsa_get_address(pgStatLocal.dsa, ps->body);
+
+		kind_info = pgstat_get_kind_info(ps->key.kind);
+
+		/* if not dropped the valid-entry refcount should exist */
+		Assert(pg_atomic_read_u32(&ps->refcount) > 0);
+
+		if (!kind_info->to_serialized_name)
+		{
+			/* normal stats entry, identified by PgStat_HashKey */
+			fputc('S', fpout);
+			write_chunk_s(fpout, &ps->key);
+		}
+		else
+		{
+			/* stats entry identified by name on disk (e.g. slots) */
+			NameData	name;
+
+			kind_info->to_serialized_name(&ps->key, shstats, &name);
+
+			fputc('N', fpout);
+			write_chunk_s(fpout, &ps->key.kind);
+			write_chunk_s(fpout, &name);
+		}
+
+		/* Write except the header part of the entry */
+		write_chunk(fpout,
+					pgstat_get_entry_data(ps->key.kind, shstats),
+					pgstat_get_entry_len(ps->key.kind));
+	}
+	dshash_seq_term(&hstat);
+
+	/*
+	 * No more output to be done. Close the temp file and replace the old
+	 * pgstat.stat with it.  The ferror() check replaces testing for error
+	 * after each individual fputc or fwrite (in write_chunk()) above.
+	 */
+	fputc('E', fpout);
+
+	if (ferror(fpout))
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not write temporary statistics file \"%s\": %m",
+						tmpfile)));
+		FreeFile(fpout);
+		unlink(tmpfile);
+	}
+	else if (FreeFile(fpout) < 0)
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not close temporary statistics file \"%s\": %m",
+						tmpfile)));
+		unlink(tmpfile);
+	}
+	else if (rename(tmpfile, statfile) < 0)
+	{
+		ereport(LOG,
+				(errcode_for_file_access(),
+				 errmsg("could not rename temporary statistics file \"%s\" to \"%s\": %m",
+						tmpfile, statfile)));
+		unlink(tmpfile);
+	}
+}
+
+/* helpers for pgstat_read_statsfile() */
+static bool
+read_chunk(FILE *fpin, void *ptr, size_t len)
+{
+	return fread(ptr, 1, len, fpin) == len;
+}
+
+#define read_chunk_s(fpin, ptr) read_chunk(fpin, ptr, sizeof(*ptr))
+
+/*
+ * Reads in existing statistics file into the shared stats hash.
+ *
+ * This function is called in the only process that is accessing the shared
+ * stats so locking is not required.
+ */
+static void
+pgstat_read_statsfile(void)
+{
+	FILE	   *fpin;
+	int32		format_id;
+	bool		found;
+	const char *statfile = PGSTAT_STAT_PERMANENT_FILENAME;
+	PgStat_ShmemControl *shmem = pgStatLocal.shmem;
+
+	/* shouldn't be called from postmaster */
+	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
+
+	elog(DEBUG2, "reading stats file \"%s\"", statfile);
+
+	/*
+	 * Try to open the stats file. If it doesn't exist, the backends simply
+	 * returns zero for anything and statistics simply starts from scratch
+	 * with empty counters.
+	 *
+	 * ENOENT is a possibility if stats collection was previously disabled or
+	 * has not yet written the stats file for the first time.  Any other
+	 * failure condition is suspicious.
+	 */
+	if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
+	{
+		if (errno != ENOENT)
+			ereport(LOG,
+					(errcode_for_file_access(),
+					 errmsg("could not open statistics file \"%s\": %m",
+							statfile)));
+		pgstat_reset_after_failure();
+		return;
+	}
+
+	/*
+	 * Verify it's of the expected format.
+	 */
+	if (!read_chunk_s(fpin, &format_id) ||
+		format_id != PGSTAT_FILE_FORMAT_ID)
+		goto error;
+
+	/*
+	 * XXX: The following could now be generalized to just iterate over
+	 * pgstat_kind_infos instead of knowing about the different kinds of
+	 * stats.
+	 */
+
+	/*
+	 * Read archiver stats struct
+	 */
+	if (!read_chunk_s(fpin, &shmem->archiver.stats))
+		goto error;
+
+	/*
+	 * Read bgwriter stats struct
+	 */
+	if (!read_chunk_s(fpin, &shmem->bgwriter.stats))
+		goto error;
+
+	/*
+	 * Read checkpointer stats struct
+	 */
+	if (!read_chunk_s(fpin, &shmem->checkpointer.stats))
+		goto error;
+
+	/*
+	 * Read SLRU stats struct
+	 */
+	if (!read_chunk_s(fpin, &shmem->slru.stats))
+		goto error;
+
+	/*
+	 * Read WAL stats struct
+	 */
+	if (!read_chunk_s(fpin, &shmem->wal.stats))
+		goto error;
+
+	/*
+	 * We found an existing statistics file. Read it and put all the hash
+	 * table entries into place.
+	 */
+	for (;;)
+	{
+		int			t = fgetc(fpin);
+
+		switch (t)
+		{
+			case 'S':
+			case 'N':
+				{
+					PgStat_HashKey key;
+					PgStatShared_HashEntry *p;
+					PgStatShared_Common *header;
+
+					CHECK_FOR_INTERRUPTS();
+
+					if (t == 'S')
+					{
+						/* normal stats entry, identified by PgStat_HashKey */
+						if (!read_chunk_s(fpin, &key))
+							goto error;
+
+						if (!pgstat_is_kind_valid(key.kind))
+							goto error;
+					}
+					else
+					{
+						/* stats entry identified by name on disk (e.g. slots) */
+						const PgStat_KindInfo *kind_info = NULL;
+						PgStat_Kind kind;
+						NameData	name;
+
+						if (!read_chunk_s(fpin, &kind))
+							goto error;
+						if (!read_chunk_s(fpin, &name))
+							goto error;
+						if (!pgstat_is_kind_valid(kind))
+							goto error;
+
+						kind_info = pgstat_get_kind_info(kind);
+
+						if (!kind_info->from_serialized_name)
+							goto error;
+
+						if (!kind_info->from_serialized_name(&name, &key))
+						{
+							/* skip over data for entry we don't care about */
+							if (fseek(fpin, pgstat_get_entry_len(kind), SEEK_CUR) != 0)
+								goto error;
+
+							continue;
+						}
+
+						Assert(key.kind == kind);
+					}
+
+					/*
+					 * This intentionally doesn't use pgstat_get_entry_ref() -
+					 * putting all stats into checkpointer's
+					 * pgStatEntryRefHash would be wasted effort and memory.
+					 */
+					p = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &found);
+
+					/* don't allow duplicate entries */
+					if (found)
+					{
+						dshash_release_lock(pgStatLocal.shared_hash, p);
+						elog(WARNING, "found duplicate stats entry %d/%u/%u",
+							 key.kind, key.dboid, key.objoid);
+						goto error;
+					}
+
+					header = pgstat_init_entry(key.kind, p);
+					dshash_release_lock(pgStatLocal.shared_hash, p);
+
+					if (!read_chunk(fpin,
+									pgstat_get_entry_data(key.kind, header),
+									pgstat_get_entry_len(key.kind)))
+						goto error;
+
+					break;
+				}
+			case 'E':
+				/* check that 'E' actually signals end of file */
+				if (fgetc(fpin) != EOF)
+					goto error;
+
+				goto done;
+
+			default:
+				goto error;
+		}
+	}
+
+done:
+	FreeFile(fpin);
+
+	elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
+	unlink(statfile);
+
+	return;
+
+error:
+	ereport(LOG,
+			(errmsg("corrupted statistics file \"%s\"", statfile)));
+
+	pgstat_reset_after_failure();
+
+	goto done;
+}
+
+/*
+ * Helper to reset / drop stats after a crash or after restoring stats from
+ * disk failed, potentially after already loading parts.
+ */
+static void
+pgstat_reset_after_failure(void)
+{
+	TimestampTz ts = GetCurrentTimestamp();
+
+	/* reset fixed-numbered stats */
+	for (int kind = PGSTAT_KIND_FIRST_VALID; kind <= PGSTAT_KIND_LAST; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (!kind_info->fixed_amount)
+			continue;
+
+		kind_info->reset_all_cb(ts);
+	}
+
+	/* and drop variable-numbered ones */
+	pgstat_drop_all_entries();
+}
+
+/*
+ * GUC assign_hook for stats_fetch_consistency.
+ */
+void
+assign_stats_fetch_consistency(int newval, void *extra)
+{
+	/*
+	 * Changing this value in a transaction may cause snapshot state
+	 * inconsistencies, so force a clear of the current snapshot on the next
+	 * snapshot build attempt.
+	 */
+	if (pgstat_fetch_consistency != newval)
+		force_stats_snapshot_clear = true;
+}
diff --git a/src/backend/utils/activity/pgstat_archiver.c b/src/backend/utils/activity/pgstat_archiver.c
new file mode 100644
index 0000000..851726f
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_archiver.c
@@ -0,0 +1,111 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_archiver.c
+ *	  Implementation of archiver statistics.
+ *
+ * This file contains the implementation of archiver statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_archiver.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * Report archiver statistics
+ */
+void
+pgstat_report_archiver(const char *xlog, bool failed)
+{
+	PgStatShared_Archiver *stats_shmem = &pgStatLocal.shmem->archiver;
+	TimestampTz now = GetCurrentTimestamp();
+
+	pgstat_begin_changecount_write(&stats_shmem->changecount);
+
+	if (failed)
+	{
+		++stats_shmem->stats.failed_count;
+		memcpy(&stats_shmem->stats.last_failed_wal, xlog,
+			   sizeof(stats_shmem->stats.last_failed_wal));
+		stats_shmem->stats.last_failed_timestamp = now;
+	}
+	else
+	{
+		++stats_shmem->stats.archived_count;
+		memcpy(&stats_shmem->stats.last_archived_wal, xlog,
+			   sizeof(stats_shmem->stats.last_archived_wal));
+		stats_shmem->stats.last_archived_timestamp = now;
+	}
+
+	pgstat_end_changecount_write(&stats_shmem->changecount);
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * a pointer to the archiver statistics struct.
+ */
+PgStat_ArchiverStats *
+pgstat_fetch_stat_archiver(void)
+{
+	pgstat_snapshot_fixed(PGSTAT_KIND_ARCHIVER);
+
+	return &pgStatLocal.snapshot.archiver;
+}
+
+void
+pgstat_archiver_reset_all_cb(TimestampTz ts)
+{
+	PgStatShared_Archiver *stats_shmem = &pgStatLocal.shmem->archiver;
+
+	/* see explanation above PgStatShared_Archiver for the reset protocol */
+	LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+	pgstat_copy_changecounted_stats(&stats_shmem->reset_offset,
+									&stats_shmem->stats,
+									sizeof(stats_shmem->stats),
+									&stats_shmem->changecount);
+	stats_shmem->stats.stat_reset_timestamp = ts;
+	LWLockRelease(&stats_shmem->lock);
+}
+
+void
+pgstat_archiver_snapshot_cb(void)
+{
+	PgStatShared_Archiver *stats_shmem = &pgStatLocal.shmem->archiver;
+	PgStat_ArchiverStats *stat_snap = &pgStatLocal.snapshot.archiver;
+	PgStat_ArchiverStats *reset_offset = &stats_shmem->reset_offset;
+	PgStat_ArchiverStats reset;
+
+	pgstat_copy_changecounted_stats(stat_snap,
+									&stats_shmem->stats,
+									sizeof(stats_shmem->stats),
+									&stats_shmem->changecount);
+
+	LWLockAcquire(&stats_shmem->lock, LW_SHARED);
+	memcpy(&reset, reset_offset, sizeof(stats_shmem->stats));
+	LWLockRelease(&stats_shmem->lock);
+
+	/* compensate by reset offsets */
+	if (stat_snap->archived_count == reset.archived_count)
+	{
+		stat_snap->last_archived_wal[0] = 0;
+		stat_snap->last_archived_timestamp = 0;
+	}
+	stat_snap->archived_count -= reset.archived_count;
+
+	if (stat_snap->failed_count == reset.failed_count)
+	{
+		stat_snap->last_failed_wal[0] = 0;
+		stat_snap->last_failed_timestamp = 0;
+	}
+	stat_snap->failed_count -= reset.failed_count;
+}
diff --git a/src/backend/utils/activity/pgstat_bgwriter.c b/src/backend/utils/activity/pgstat_bgwriter.c
new file mode 100644
index 0000000..fbb1edc
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_bgwriter.c
@@ -0,0 +1,110 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_bgwriter.c
+ *	  Implementation of bgwriter statistics.
+ *
+ * This file contains the implementation of bgwriter statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_bgwriter.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+
+
+PgStat_BgWriterStats PendingBgWriterStats = {0};
+
+
+/*
+ * Report bgwriter statistics
+ */
+void
+pgstat_report_bgwriter(void)
+{
+	PgStatShared_BgWriter *stats_shmem = &pgStatLocal.shmem->bgwriter;
+	static const PgStat_BgWriterStats all_zeroes;
+
+	Assert(!pgStatLocal.shmem->is_shutdown);
+	pgstat_assert_is_up();
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid unnecessarily modifying the stats entry.
+	 */
+	if (memcmp(&PendingBgWriterStats, &all_zeroes, sizeof(all_zeroes)) == 0)
+		return;
+
+	pgstat_begin_changecount_write(&stats_shmem->changecount);
+
+#define BGWRITER_ACC(fld) stats_shmem->stats.fld += PendingBgWriterStats.fld
+	BGWRITER_ACC(buf_written_clean);
+	BGWRITER_ACC(maxwritten_clean);
+	BGWRITER_ACC(buf_alloc);
+#undef BGWRITER_ACC
+
+	pgstat_end_changecount_write(&stats_shmem->changecount);
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats));
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * a pointer to the bgwriter statistics struct.
+ */
+PgStat_BgWriterStats *
+pgstat_fetch_stat_bgwriter(void)
+{
+	pgstat_snapshot_fixed(PGSTAT_KIND_BGWRITER);
+
+	return &pgStatLocal.snapshot.bgwriter;
+}
+
+void
+pgstat_bgwriter_reset_all_cb(TimestampTz ts)
+{
+	PgStatShared_BgWriter *stats_shmem = &pgStatLocal.shmem->bgwriter;
+
+	/* see explanation above PgStatShared_BgWriter for the reset protocol */
+	LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+	pgstat_copy_changecounted_stats(&stats_shmem->reset_offset,
+									&stats_shmem->stats,
+									sizeof(stats_shmem->stats),
+									&stats_shmem->changecount);
+	stats_shmem->stats.stat_reset_timestamp = ts;
+	LWLockRelease(&stats_shmem->lock);
+}
+
+void
+pgstat_bgwriter_snapshot_cb(void)
+{
+	PgStatShared_BgWriter *stats_shmem = &pgStatLocal.shmem->bgwriter;
+	PgStat_BgWriterStats *reset_offset = &stats_shmem->reset_offset;
+	PgStat_BgWriterStats reset;
+
+	pgstat_copy_changecounted_stats(&pgStatLocal.snapshot.bgwriter,
+									&stats_shmem->stats,
+									sizeof(stats_shmem->stats),
+									&stats_shmem->changecount);
+
+	LWLockAcquire(&stats_shmem->lock, LW_SHARED);
+	memcpy(&reset, reset_offset, sizeof(stats_shmem->stats));
+	LWLockRelease(&stats_shmem->lock);
+
+	/* compensate by reset offsets */
+#define BGWRITER_COMP(fld) pgStatLocal.snapshot.bgwriter.fld -= reset.fld;
+	BGWRITER_COMP(buf_written_clean);
+	BGWRITER_COMP(maxwritten_clean);
+	BGWRITER_COMP(buf_alloc);
+#undef BGWRITER_COMP
+}
diff --git a/src/backend/utils/activity/pgstat_checkpointer.c b/src/backend/utils/activity/pgstat_checkpointer.c
new file mode 100644
index 0000000..af8d513
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_checkpointer.c
@@ -0,0 +1,121 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_checkpointer.c
+ *	  Implementation of checkpoint statistics.
+ *
+ * This file contains the implementation of checkpoint statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_checkpointer.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+
+
+PgStat_CheckpointerStats PendingCheckpointerStats = {0};
+
+
+/*
+ * Report checkpointer statistics
+ */
+void
+pgstat_report_checkpointer(void)
+{
+	/* We assume this initializes to zeroes */
+	static const PgStat_CheckpointerStats all_zeroes;
+	PgStatShared_Checkpointer *stats_shmem = &pgStatLocal.shmem->checkpointer;
+
+	Assert(!pgStatLocal.shmem->is_shutdown);
+	pgstat_assert_is_up();
+
+	/*
+	 * This function can be called even if nothing at all has happened. In
+	 * this case, avoid unnecessarily modifying the stats entry.
+	 */
+	if (memcmp(&PendingCheckpointerStats, &all_zeroes,
+			   sizeof(all_zeroes)) == 0)
+		return;
+
+	pgstat_begin_changecount_write(&stats_shmem->changecount);
+
+#define CHECKPOINTER_ACC(fld) stats_shmem->stats.fld += PendingCheckpointerStats.fld
+	CHECKPOINTER_ACC(timed_checkpoints);
+	CHECKPOINTER_ACC(requested_checkpoints);
+	CHECKPOINTER_ACC(checkpoint_write_time);
+	CHECKPOINTER_ACC(checkpoint_sync_time);
+	CHECKPOINTER_ACC(buf_written_checkpoints);
+	CHECKPOINTER_ACC(buf_written_backend);
+	CHECKPOINTER_ACC(buf_fsync_backend);
+#undef CHECKPOINTER_ACC
+
+	pgstat_end_changecount_write(&stats_shmem->changecount);
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&PendingCheckpointerStats, 0, sizeof(PendingCheckpointerStats));
+}
+
+/*
+ * pgstat_fetch_stat_checkpointer() -
+ *
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * a pointer to the checkpointer statistics struct.
+ */
+PgStat_CheckpointerStats *
+pgstat_fetch_stat_checkpointer(void)
+{
+	pgstat_snapshot_fixed(PGSTAT_KIND_CHECKPOINTER);
+
+	return &pgStatLocal.snapshot.checkpointer;
+}
+
+void
+pgstat_checkpointer_reset_all_cb(TimestampTz ts)
+{
+	PgStatShared_Checkpointer *stats_shmem = &pgStatLocal.shmem->checkpointer;
+
+	/* see explanation above PgStatShared_Checkpointer for the reset protocol */
+	LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+	pgstat_copy_changecounted_stats(&stats_shmem->reset_offset,
+									&stats_shmem->stats,
+									sizeof(stats_shmem->stats),
+									&stats_shmem->changecount);
+	LWLockRelease(&stats_shmem->lock);
+}
+
+void
+pgstat_checkpointer_snapshot_cb(void)
+{
+	PgStatShared_Checkpointer *stats_shmem = &pgStatLocal.shmem->checkpointer;
+	PgStat_CheckpointerStats *reset_offset = &stats_shmem->reset_offset;
+	PgStat_CheckpointerStats reset;
+
+	pgstat_copy_changecounted_stats(&pgStatLocal.snapshot.checkpointer,
+									&stats_shmem->stats,
+									sizeof(stats_shmem->stats),
+									&stats_shmem->changecount);
+
+	LWLockAcquire(&stats_shmem->lock, LW_SHARED);
+	memcpy(&reset, reset_offset, sizeof(stats_shmem->stats));
+	LWLockRelease(&stats_shmem->lock);
+
+	/* compensate by reset offsets */
+#define CHECKPOINTER_COMP(fld) pgStatLocal.snapshot.checkpointer.fld -= reset.fld;
+	CHECKPOINTER_COMP(timed_checkpoints);
+	CHECKPOINTER_COMP(requested_checkpoints);
+	CHECKPOINTER_COMP(checkpoint_write_time);
+	CHECKPOINTER_COMP(checkpoint_sync_time);
+	CHECKPOINTER_COMP(buf_written_checkpoints);
+	CHECKPOINTER_COMP(buf_written_backend);
+	CHECKPOINTER_COMP(buf_fsync_backend);
+#undef CHECKPOINTER_COMP
+}
diff --git a/src/backend/utils/activity/pgstat_database.c b/src/backend/utils/activity/pgstat_database.c
new file mode 100644
index 0000000..4235fa0
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_database.c
@@ -0,0 +1,437 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_database.c
+ *	  Implementation of database statistics.
+ *
+ * This file contains the implementation of database statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_database.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+#include "utils/timestamp.h"
+#include "storage/procsignal.h"
+
+
+static bool pgstat_should_report_connstat(void);
+
+
+PgStat_Counter pgStatBlockReadTime = 0;
+PgStat_Counter pgStatBlockWriteTime = 0;
+PgStat_Counter pgStatActiveTime = 0;
+PgStat_Counter pgStatTransactionIdleTime = 0;
+SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
+
+
+static int	pgStatXactCommit = 0;
+static int	pgStatXactRollback = 0;
+static PgStat_Counter pgLastSessionReportTime = 0;
+
+
+/*
+ * Remove entry for the database being dropped.
+ */
+void
+pgstat_drop_database(Oid databaseid)
+{
+	pgstat_drop_transactional(PGSTAT_KIND_DATABASE, databaseid, InvalidOid);
+}
+
+/*
+ * Called from autovacuum.c to report startup of an autovacuum process.
+ * We are called before InitPostgres is done, so can't rely on MyDatabaseId;
+ * the db OID must be passed in, instead.
+ */
+void
+pgstat_report_autovac(Oid dboid)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_Database *dbentry;
+
+	/* can't get here in single user mode */
+	Assert(IsUnderPostmaster);
+
+	/*
+	 * End-of-vacuum is reported instantly. Report the start the same way for
+	 * consistency. Vacuum doesn't run frequently and is a long-lasting
+	 * operation so it doesn't matter if we get blocked here a little.
+	 */
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE,
+											dboid, InvalidOid, false);
+
+	dbentry = (PgStatShared_Database *) entry_ref->shared_stats;
+	dbentry->stats.last_autovac_time = GetCurrentTimestamp();
+
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * Report a Hot Standby recovery conflict.
+ */
+void
+pgstat_report_recovery_conflict(int reason)
+{
+	PgStat_StatDBEntry *dbentry;
+
+	Assert(IsUnderPostmaster);
+	if (!pgstat_track_counts)
+		return;
+
+	dbentry = pgstat_prep_database_pending(MyDatabaseId);
+
+	switch (reason)
+	{
+		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+
+			/*
+			 * Since we drop the information about the database as soon as it
+			 * replicates, there is no point in counting these conflicts.
+			 */
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+			dbentry->n_conflict_tablespace++;
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_LOCK:
+			dbentry->n_conflict_lock++;
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+			dbentry->n_conflict_snapshot++;
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+			dbentry->n_conflict_bufferpin++;
+			break;
+		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+			dbentry->n_conflict_startup_deadlock++;
+			break;
+	}
+}
+
+/*
+ * Report a detected deadlock.
+ */
+void
+pgstat_report_deadlock(void)
+{
+	PgStat_StatDBEntry *dbent;
+
+	if (!pgstat_track_counts)
+		return;
+
+	dbent = pgstat_prep_database_pending(MyDatabaseId);
+	dbent->n_deadlocks++;
+}
+
+/*
+ * Report one or more checksum failures.
+ */
+void
+pgstat_report_checksum_failures_in_db(Oid dboid, int failurecount)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_Database *sharedent;
+
+	if (!pgstat_track_counts)
+		return;
+
+	/*
+	 * Update the shared stats directly - checksum failures should never be
+	 * common enough for that to be a problem.
+	 */
+	entry_ref =
+		pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, dboid, InvalidOid, false);
+
+	sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
+	sharedent->stats.n_checksum_failures += failurecount;
+	sharedent->stats.last_checksum_failure = GetCurrentTimestamp();
+
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * Report one checksum failure in the current database.
+ */
+void
+pgstat_report_checksum_failure(void)
+{
+	pgstat_report_checksum_failures_in_db(MyDatabaseId, 1);
+}
+
+/*
+ * Report creation of temporary file.
+ */
+void
+pgstat_report_tempfile(size_t filesize)
+{
+	PgStat_StatDBEntry *dbent;
+
+	if (!pgstat_track_counts)
+		return;
+
+	dbent = pgstat_prep_database_pending(MyDatabaseId);
+	dbent->n_temp_bytes += filesize;
+	dbent->n_temp_files++;
+}
+
+/*
+ * Notify stats system of a new connection.
+ */
+void
+pgstat_report_connect(Oid dboid)
+{
+	PgStat_StatDBEntry *dbentry;
+
+	if (!pgstat_should_report_connstat())
+		return;
+
+	pgLastSessionReportTime = MyStartTimestamp;
+
+	dbentry = pgstat_prep_database_pending(MyDatabaseId);
+	dbentry->n_sessions++;
+}
+
+/*
+ * Notify the stats system of a disconnect.
+ */
+void
+pgstat_report_disconnect(Oid dboid)
+{
+	PgStat_StatDBEntry *dbentry;
+
+	if (!pgstat_should_report_connstat())
+		return;
+
+	dbentry = pgstat_prep_database_pending(MyDatabaseId);
+
+	switch (pgStatSessionEndCause)
+	{
+		case DISCONNECT_NOT_YET:
+		case DISCONNECT_NORMAL:
+			/* we don't collect these */
+			break;
+		case DISCONNECT_CLIENT_EOF:
+			dbentry->n_sessions_abandoned++;
+			break;
+		case DISCONNECT_FATAL:
+			dbentry->n_sessions_fatal++;
+			break;
+		case DISCONNECT_KILLED:
+			dbentry->n_sessions_killed++;
+			break;
+	}
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * the collected statistics for one database or NULL. NULL doesn't mean
+ * that the database doesn't exist, just that there are no statistics, so the
+ * caller is better off to report ZERO instead.
+ */
+PgStat_StatDBEntry *
+pgstat_fetch_stat_dbentry(Oid dboid)
+{
+	return (PgStat_StatDBEntry *)
+		pgstat_fetch_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid);
+}
+
+void
+AtEOXact_PgStat_Database(bool isCommit, bool parallel)
+{
+	/* Don't count parallel worker transaction stats */
+	if (!parallel)
+	{
+		/*
+		 * Count transaction commit or abort.  (We use counters, not just
+		 * bools, in case the reporting message isn't sent right away.)
+		 */
+		if (isCommit)
+			pgStatXactCommit++;
+		else
+			pgStatXactRollback++;
+	}
+}
+
+/*
+ * Subroutine for pgstat_report_stat(): Handle xact commit/rollback and I/O
+ * timings.
+ */
+void
+pgstat_update_dbstats(TimestampTz ts)
+{
+	PgStat_StatDBEntry *dbentry;
+
+	/*
+	 * If not connected to a database yet, don't attribute time to "shared
+	 * state" (InvalidOid is used to track stats for shared relations, etc.).
+	 */
+	if (!OidIsValid(MyDatabaseId))
+		return;
+
+	dbentry = pgstat_prep_database_pending(MyDatabaseId);
+
+	/*
+	 * Accumulate xact commit/rollback and I/O timings to stats entry of the
+	 * current database.
+	 */
+	dbentry->n_xact_commit += pgStatXactCommit;
+	dbentry->n_xact_rollback += pgStatXactRollback;
+	dbentry->n_block_read_time += pgStatBlockReadTime;
+	dbentry->n_block_write_time += pgStatBlockWriteTime;
+
+	if (pgstat_should_report_connstat())
+	{
+		long		secs;
+		int			usecs;
+
+		/*
+		 * pgLastSessionReportTime is initialized to MyStartTimestamp by
+		 * pgstat_report_connect().
+		 */
+		TimestampDifference(pgLastSessionReportTime, ts, &secs, &usecs);
+		pgLastSessionReportTime = ts;
+		dbentry->total_session_time += (PgStat_Counter) secs * 1000000 + usecs;
+		dbentry->total_active_time += pgStatActiveTime;
+		dbentry->total_idle_in_xact_time += pgStatTransactionIdleTime;
+	}
+
+	pgStatXactCommit = 0;
+	pgStatXactRollback = 0;
+	pgStatBlockReadTime = 0;
+	pgStatBlockWriteTime = 0;
+	pgStatActiveTime = 0;
+	pgStatTransactionIdleTime = 0;
+}
+
+/*
+ * We report session statistics only for normal backend processes.  Parallel
+ * workers run in parallel, so they don't contribute to session times, even
+ * though they use CPU time. Walsender processes could be considered here,
+ * but they have different session characteristics from normal backends (for
+ * example, they are always "active"), so they would skew session statistics.
+ */
+static bool
+pgstat_should_report_connstat(void)
+{
+	return MyBackendType == B_BACKEND;
+}
+
+/*
+ * Find or create a local PgStat_StatDBEntry entry for dboid.
+ */
+PgStat_StatDBEntry *
+pgstat_prep_database_pending(Oid dboid)
+{
+	PgStat_EntryRef *entry_ref;
+
+	/*
+	 * This should not report stats on database objects before having
+	 * connected to a database.
+	 */
+	Assert(!OidIsValid(dboid) || OidIsValid(MyDatabaseId));
+
+	entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_DATABASE, dboid, InvalidOid,
+										  NULL);
+
+	return entry_ref->pending;
+}
+
+/*
+ * Reset the database's reset timestamp, without resetting the contents of the
+ * database stats.
+ */
+void
+pgstat_reset_database_timestamp(Oid dboid, TimestampTz ts)
+{
+	PgStat_EntryRef *dbref;
+	PgStatShared_Database *dbentry;
+
+	dbref = pgstat_get_entry_ref_locked(PGSTAT_KIND_DATABASE, MyDatabaseId, InvalidOid,
+										false);
+
+	dbentry = (PgStatShared_Database *) dbref->shared_stats;
+	dbentry->stats.stat_reset_timestamp = ts;
+
+	pgstat_unlock_entry(dbref);
+}
+
+/*
+ * Flush out pending stats for the entry
+ *
+ * If nowait is true, this function returns false if lock could not
+ * immediately acquired, otherwise true is returned.
+ */
+bool
+pgstat_database_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	PgStatShared_Database *sharedent;
+	PgStat_StatDBEntry *pendingent;
+
+	pendingent = (PgStat_StatDBEntry *) entry_ref->pending;
+	sharedent = (PgStatShared_Database *) entry_ref->shared_stats;
+
+	if (!pgstat_lock_entry(entry_ref, nowait))
+		return false;
+
+#define PGSTAT_ACCUM_DBCOUNT(item)		\
+	(sharedent)->stats.item += (pendingent)->item
+
+	PGSTAT_ACCUM_DBCOUNT(n_xact_commit);
+	PGSTAT_ACCUM_DBCOUNT(n_xact_rollback);
+	PGSTAT_ACCUM_DBCOUNT(n_blocks_fetched);
+	PGSTAT_ACCUM_DBCOUNT(n_blocks_hit);
+
+	PGSTAT_ACCUM_DBCOUNT(n_tuples_returned);
+	PGSTAT_ACCUM_DBCOUNT(n_tuples_fetched);
+	PGSTAT_ACCUM_DBCOUNT(n_tuples_inserted);
+	PGSTAT_ACCUM_DBCOUNT(n_tuples_updated);
+	PGSTAT_ACCUM_DBCOUNT(n_tuples_deleted);
+
+	/* last_autovac_time is reported immediately */
+	Assert(pendingent->last_autovac_time == 0);
+
+	PGSTAT_ACCUM_DBCOUNT(n_conflict_tablespace);
+	PGSTAT_ACCUM_DBCOUNT(n_conflict_lock);
+	PGSTAT_ACCUM_DBCOUNT(n_conflict_snapshot);
+	PGSTAT_ACCUM_DBCOUNT(n_conflict_bufferpin);
+	PGSTAT_ACCUM_DBCOUNT(n_conflict_startup_deadlock);
+
+	PGSTAT_ACCUM_DBCOUNT(n_temp_bytes);
+	PGSTAT_ACCUM_DBCOUNT(n_temp_files);
+	PGSTAT_ACCUM_DBCOUNT(n_deadlocks);
+
+	/* checksum failures are reported immediately */
+	Assert(pendingent->n_checksum_failures == 0);
+	Assert(pendingent->last_checksum_failure == 0);
+
+	PGSTAT_ACCUM_DBCOUNT(n_block_read_time);
+	PGSTAT_ACCUM_DBCOUNT(n_block_write_time);
+
+	PGSTAT_ACCUM_DBCOUNT(n_sessions);
+	PGSTAT_ACCUM_DBCOUNT(total_session_time);
+	PGSTAT_ACCUM_DBCOUNT(total_active_time);
+	PGSTAT_ACCUM_DBCOUNT(total_idle_in_xact_time);
+	PGSTAT_ACCUM_DBCOUNT(n_sessions_abandoned);
+	PGSTAT_ACCUM_DBCOUNT(n_sessions_fatal);
+	PGSTAT_ACCUM_DBCOUNT(n_sessions_killed);
+#undef PGSTAT_ACCUM_DBCOUNT
+
+	pgstat_unlock_entry(entry_ref);
+
+	memset(pendingent, 0, sizeof(*pendingent));
+
+	return true;
+}
+
+void
+pgstat_database_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
+{
+	((PgStatShared_Database *) header)->stats.stat_reset_timestamp = ts;
+}
diff --git a/src/backend/utils/activity/pgstat_function.c b/src/backend/utils/activity/pgstat_function.c
new file mode 100644
index 0000000..427d8c4
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_function.c
@@ -0,0 +1,243 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_function.c
+ *	  Implementation of function statistics.
+ *
+ * This file contains the implementation of function statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_function.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "utils/inval.h"
+#include "utils/pgstat_internal.h"
+#include "utils/syscache.h"
+
+
+/* ----------
+ * GUC parameters
+ * ----------
+ */
+int			pgstat_track_functions = TRACK_FUNC_OFF;
+
+
+/*
+ * Total time charged to functions so far in the current backend.
+ * We use this to help separate "self" and "other" time charges.
+ * (We assume this initializes to zero.)
+ */
+static instr_time total_func_time;
+
+
+/*
+ * Ensure that stats are dropped if transaction aborts.
+ */
+void
+pgstat_create_function(Oid proid)
+{
+	pgstat_create_transactional(PGSTAT_KIND_FUNCTION,
+								MyDatabaseId,
+								proid);
+}
+
+/*
+ * Ensure that stats are dropped if transaction commits.
+ *
+ * NB: This is only reliable because pgstat_init_function_usage() does some
+ * extra work. If other places start emitting function stats they likely need
+ * similar logic.
+ */
+void
+pgstat_drop_function(Oid proid)
+{
+	pgstat_drop_transactional(PGSTAT_KIND_FUNCTION,
+							  MyDatabaseId,
+							  proid);
+}
+
+/*
+ * Initialize function call usage data.
+ * Called by the executor before invoking a function.
+ */
+void
+pgstat_init_function_usage(FunctionCallInfo fcinfo,
+						   PgStat_FunctionCallUsage *fcu)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStat_BackendFunctionEntry *pending;
+	bool		created_entry;
+
+	if (pgstat_track_functions <= fcinfo->flinfo->fn_stats)
+	{
+		/* stats not wanted */
+		fcu->fs = NULL;
+		return;
+	}
+
+	entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_FUNCTION,
+										  MyDatabaseId,
+										  fcinfo->flinfo->fn_oid,
+										  &created_entry);
+
+	/*
+	 * If no shared entry already exists, check if the function has been
+	 * deleted concurrently. This can go unnoticed until here because
+	 * executing a statement that just calls a function, does not trigger
+	 * cache invalidation processing. The reason we care about this case is
+	 * that otherwise we could create a new stats entry for an already dropped
+	 * function (for relations etc this is not possible because emitting stats
+	 * requires a lock for the relation to already have been acquired).
+	 *
+	 * It's somewhat ugly to have a behavioral difference based on
+	 * track_functions being enabled/disabled. But it seems acceptable, given
+	 * that there's already behavioral differences depending on whether the
+	 * function is the caches etc.
+	 *
+	 * For correctness it'd be sufficient to set ->dropped to true. However,
+	 * the accepted invalidation will commonly cause "low level" failures in
+	 * PL code, with an OID in the error message. Making this harder to
+	 * test...
+	 */
+	if (created_entry)
+	{
+		AcceptInvalidationMessages();
+		if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(fcinfo->flinfo->fn_oid)))
+		{
+			pgstat_drop_entry(PGSTAT_KIND_FUNCTION, MyDatabaseId,
+							  fcinfo->flinfo->fn_oid);
+			ereport(ERROR, errcode(ERRCODE_UNDEFINED_FUNCTION),
+					errmsg("function call to dropped function"));
+		}
+	}
+
+	pending = entry_ref->pending;
+
+	fcu->fs = &pending->f_counts;
+
+	/* save stats for this function, later used to compensate for recursion */
+	fcu->save_f_total_time = pending->f_counts.f_total_time;
+
+	/* save current backend-wide total time */
+	fcu->save_total = total_func_time;
+
+	/* get clock time as of function start */
+	INSTR_TIME_SET_CURRENT(fcu->f_start);
+}
+
+/*
+ * Calculate function call usage and update stat counters.
+ * Called by the executor after invoking a function.
+ *
+ * In the case of a set-returning function that runs in value-per-call mode,
+ * we will see multiple pgstat_init_function_usage/pgstat_end_function_usage
+ * calls for what the user considers a single call of the function.  The
+ * finalize flag should be TRUE on the last call.
+ */
+void
+pgstat_end_function_usage(PgStat_FunctionCallUsage *fcu, bool finalize)
+{
+	PgStat_FunctionCounts *fs = fcu->fs;
+	instr_time	f_total;
+	instr_time	f_others;
+	instr_time	f_self;
+
+	/* stats not wanted? */
+	if (fs == NULL)
+		return;
+
+	/* total elapsed time in this function call */
+	INSTR_TIME_SET_CURRENT(f_total);
+	INSTR_TIME_SUBTRACT(f_total, fcu->f_start);
+
+	/* self usage: elapsed minus anything already charged to other calls */
+	f_others = total_func_time;
+	INSTR_TIME_SUBTRACT(f_others, fcu->save_total);
+	f_self = f_total;
+	INSTR_TIME_SUBTRACT(f_self, f_others);
+
+	/* update backend-wide total time */
+	INSTR_TIME_ADD(total_func_time, f_self);
+
+	/*
+	 * Compute the new f_total_time as the total elapsed time added to the
+	 * pre-call value of f_total_time.  This is necessary to avoid
+	 * double-counting any time taken by recursive calls of myself.  (We do
+	 * not need any similar kluge for self time, since that already excludes
+	 * any recursive calls.)
+	 */
+	INSTR_TIME_ADD(f_total, fcu->save_f_total_time);
+
+	/* update counters in function stats table */
+	if (finalize)
+		fs->f_numcalls++;
+	fs->f_total_time = f_total;
+	INSTR_TIME_ADD(fs->f_self_time, f_self);
+}
+
+/*
+ * Flush out pending stats for the entry
+ *
+ * If nowait is true, this function returns false if lock could not
+ * immediately acquired, otherwise true is returned.
+ */
+bool
+pgstat_function_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	PgStat_BackendFunctionEntry *localent;
+	PgStatShared_Function *shfuncent;
+
+	localent = (PgStat_BackendFunctionEntry *) entry_ref->pending;
+	shfuncent = (PgStatShared_Function *) entry_ref->shared_stats;
+
+	/* localent always has non-zero content */
+
+	if (!pgstat_lock_entry(entry_ref, nowait))
+		return false;
+
+	shfuncent->stats.f_numcalls += localent->f_counts.f_numcalls;
+	shfuncent->stats.f_total_time +=
+		INSTR_TIME_GET_MICROSEC(localent->f_counts.f_total_time);
+	shfuncent->stats.f_self_time +=
+		INSTR_TIME_GET_MICROSEC(localent->f_counts.f_self_time);
+
+	pgstat_unlock_entry(entry_ref);
+
+	return true;
+}
+
+/*
+ * find any existing PgStat_BackendFunctionEntry entry for specified function
+ *
+ * If no entry, return NULL, don't create a new one
+ */
+PgStat_BackendFunctionEntry *
+find_funcstat_entry(Oid func_id)
+{
+	PgStat_EntryRef *entry_ref;
+
+	entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_FUNCTION, MyDatabaseId, func_id);
+
+	if (entry_ref)
+		return entry_ref->pending;
+	return NULL;
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * the collected statistics for one function or NULL.
+ */
+PgStat_StatFuncEntry *
+pgstat_fetch_stat_funcentry(Oid func_id)
+{
+	return (PgStat_StatFuncEntry *)
+		pgstat_fetch_entry(PGSTAT_KIND_FUNCTION, MyDatabaseId, func_id);
+}
diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c
new file mode 100644
index 0000000..a846d9f
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -0,0 +1,938 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_relation.c
+ *	  Implementation of relation statistics.
+ *
+ * This file contains the implementation of function relation. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_relation.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/twophase_rmgr.h"
+#include "access/xact.h"
+#include "catalog/partition.h"
+#include "postmaster/autovacuum.h"
+#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
+#include "utils/rel.h"
+#include "utils/timestamp.h"
+
+
+/* Record that's written to 2PC state file when pgstat state is persisted */
+typedef struct TwoPhasePgStatRecord
+{
+	PgStat_Counter tuples_inserted; /* tuples inserted in xact */
+	PgStat_Counter tuples_updated;	/* tuples updated in xact */
+	PgStat_Counter tuples_deleted;	/* tuples deleted in xact */
+	/* tuples i/u/d prior to truncate/drop */
+	PgStat_Counter inserted_pre_truncdrop;
+	PgStat_Counter updated_pre_truncdrop;
+	PgStat_Counter deleted_pre_truncdrop;
+	Oid			t_id;			/* table's OID */
+	bool		t_shared;		/* is it a shared catalog? */
+	bool		t_truncdropped; /* was the relation truncated/dropped? */
+} TwoPhasePgStatRecord;
+
+
+static PgStat_TableStatus *pgstat_prep_relation_pending(Oid rel_id, bool isshared);
+static void add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level);
+static void ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info);
+static void save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop);
+static void restore_truncdrop_counters(PgStat_TableXactStatus *trans);
+
+
+/*
+ * Copy stats between relations. This is used for things like REINDEX
+ * CONCURRENTLY.
+ */
+void
+pgstat_copy_relation_stats(Relation dst, Relation src)
+{
+	PgStat_StatTabEntry *srcstats;
+	PgStatShared_Relation *dstshstats;
+	PgStat_EntryRef *dst_ref;
+
+	srcstats = pgstat_fetch_stat_tabentry_ext(src->rd_rel->relisshared,
+											  RelationGetRelid(src));
+	if (!srcstats)
+		return;
+
+	dst_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION,
+										  dst->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
+										  RelationGetRelid(dst),
+										  false);
+
+	dstshstats = (PgStatShared_Relation *) dst_ref->shared_stats;
+	dstshstats->stats = *srcstats;
+
+	pgstat_unlock_entry(dst_ref);
+}
+
+/*
+ * Initialize a relcache entry to count access statistics.  Called whenever a
+ * relation is opened.
+ *
+ * We assume that a relcache entry's pgstat_info field is zeroed by relcache.c
+ * when the relcache entry is made; thereafter it is long-lived data.
+ *
+ * This does not create a reference to a stats entry in shared memory, nor
+ * allocate memory for the pending stats. That happens in
+ * pgstat_assoc_relation().
+ */
+void
+pgstat_init_relation(Relation rel)
+{
+	char		relkind = rel->rd_rel->relkind;
+
+	/*
+	 * We only count stats for relations with storage and partitioned tables
+	 */
+	if (!RELKIND_HAS_STORAGE(relkind) && relkind != RELKIND_PARTITIONED_TABLE)
+	{
+		rel->pgstat_enabled = false;
+		rel->pgstat_info = NULL;
+		return;
+	}
+
+	if (!pgstat_track_counts)
+	{
+		if (rel->pgstat_info)
+			pgstat_unlink_relation(rel);
+
+		/* We're not counting at all */
+		rel->pgstat_enabled = false;
+		rel->pgstat_info = NULL;
+		return;
+	}
+
+	rel->pgstat_enabled = true;
+}
+
+/*
+ * Prepare for statistics for this relation to be collected.
+ *
+ * This ensures we have a reference to the stats entry before stats can be
+ * generated. That is important because a relation drop in another connection
+ * could otherwise lead to the stats entry being dropped, which then later
+ * would get recreated when flushing stats.
+ *
+ * This is separate from pgstat_init_relation() as it is not uncommon for
+ * relcache entries to be opened without ever getting stats reported.
+ */
+void
+pgstat_assoc_relation(Relation rel)
+{
+	Assert(rel->pgstat_enabled);
+	Assert(rel->pgstat_info == NULL);
+
+	/* Else find or make the PgStat_TableStatus entry, and update link */
+	rel->pgstat_info = pgstat_prep_relation_pending(RelationGetRelid(rel),
+													rel->rd_rel->relisshared);
+
+	/* don't allow link a stats to multiple relcache entries */
+	Assert(rel->pgstat_info->relation == NULL);
+
+	/* mark this relation as the owner */
+	rel->pgstat_info->relation = rel;
+}
+
+/*
+ * Break the mutual link between a relcache entry and pending stats entry.
+ * This must be called whenever one end of the link is removed.
+ */
+void
+pgstat_unlink_relation(Relation rel)
+{
+	/* remove the link to stats info if any */
+	if (rel->pgstat_info == NULL)
+		return;
+
+	/* link sanity check */
+	Assert(rel->pgstat_info->relation == rel);
+	rel->pgstat_info->relation = NULL;
+	rel->pgstat_info = NULL;
+}
+
+/*
+ * Ensure that stats are dropped if transaction aborts.
+ */
+void
+pgstat_create_relation(Relation rel)
+{
+	pgstat_create_transactional(PGSTAT_KIND_RELATION,
+								rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
+								RelationGetRelid(rel));
+}
+
+/*
+ * Ensure that stats are dropped if transaction commits.
+ */
+void
+pgstat_drop_relation(Relation rel)
+{
+	int			nest_level = GetCurrentTransactionNestLevel();
+	PgStat_TableStatus *pgstat_info;
+
+	pgstat_drop_transactional(PGSTAT_KIND_RELATION,
+							  rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId,
+							  RelationGetRelid(rel));
+
+	if (!pgstat_should_count_relation(rel))
+		return;
+
+	/*
+	 * Transactionally set counters to 0. That ensures that accesses to
+	 * pg_stat_xact_all_tables inside the transaction show 0.
+	 */
+	pgstat_info = rel->pgstat_info;
+	if (pgstat_info->trans &&
+		pgstat_info->trans->nest_level == nest_level)
+	{
+		save_truncdrop_counters(pgstat_info->trans, true);
+		pgstat_info->trans->tuples_inserted = 0;
+		pgstat_info->trans->tuples_updated = 0;
+		pgstat_info->trans->tuples_deleted = 0;
+	}
+}
+
+/*
+ * Report that the table was just vacuumed.
+ */
+void
+pgstat_report_vacuum(Oid tableoid, bool shared,
+					 PgStat_Counter livetuples, PgStat_Counter deadtuples)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_Relation *shtabentry;
+	PgStat_StatTabEntry *tabentry;
+	Oid			dboid = (shared ? InvalidOid : MyDatabaseId);
+	TimestampTz ts;
+
+	if (!pgstat_track_counts)
+		return;
+
+	/* Store the data in the table's hash table entry. */
+	ts = GetCurrentTimestamp();
+
+	/* block acquiring lock for the same reason as pgstat_report_autovac() */
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION,
+											dboid, tableoid, false);
+
+	shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats;
+	tabentry = &shtabentry->stats;
+
+	tabentry->n_live_tuples = livetuples;
+	tabentry->n_dead_tuples = deadtuples;
+
+	/*
+	 * It is quite possible that a non-aggressive VACUUM ended up skipping
+	 * various pages, however, we'll zero the insert counter here regardless.
+	 * It's currently used only to track when we need to perform an "insert"
+	 * autovacuum, which are mainly intended to freeze newly inserted tuples.
+	 * Zeroing this may just mean we'll not try to vacuum the table again
+	 * until enough tuples have been inserted to trigger another insert
+	 * autovacuum.  An anti-wraparound autovacuum will catch any persistent
+	 * stragglers.
+	 */
+	tabentry->inserts_since_vacuum = 0;
+
+	if (IsAutoVacuumWorkerProcess())
+	{
+		tabentry->autovac_vacuum_timestamp = ts;
+		tabentry->autovac_vacuum_count++;
+	}
+	else
+	{
+		tabentry->vacuum_timestamp = ts;
+		tabentry->vacuum_count++;
+	}
+
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * Report that the table was just analyzed.
+ *
+ * Caller must provide new live- and dead-tuples estimates, as well as a
+ * flag indicating whether to reset the changes_since_analyze counter.
+ */
+void
+pgstat_report_analyze(Relation rel,
+					  PgStat_Counter livetuples, PgStat_Counter deadtuples,
+					  bool resetcounter)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_Relation *shtabentry;
+	PgStat_StatTabEntry *tabentry;
+	Oid			dboid = (rel->rd_rel->relisshared ? InvalidOid : MyDatabaseId);
+
+	if (!pgstat_track_counts)
+		return;
+
+	/*
+	 * Unlike VACUUM, ANALYZE might be running inside a transaction that has
+	 * already inserted and/or deleted rows in the target table. ANALYZE will
+	 * have counted such rows as live or dead respectively. Because we will
+	 * report our counts of such rows at transaction end, we should subtract
+	 * off these counts from the update we're making now, else they'll be
+	 * double-counted after commit.  (This approach also ensures that the
+	 * shared stats entry ends up with the right numbers if we abort instead
+	 * of committing.)
+	 *
+	 * Waste no time on partitioned tables, though.
+	 */
+	if (pgstat_should_count_relation(rel) &&
+		rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+	{
+		PgStat_TableXactStatus *trans;
+
+		for (trans = rel->pgstat_info->trans; trans; trans = trans->upper)
+		{
+			livetuples -= trans->tuples_inserted - trans->tuples_deleted;
+			deadtuples -= trans->tuples_updated + trans->tuples_deleted;
+		}
+		/* count stuff inserted by already-aborted subxacts, too */
+		deadtuples -= rel->pgstat_info->t_counts.t_delta_dead_tuples;
+		/* Since ANALYZE's counts are estimates, we could have underflowed */
+		livetuples = Max(livetuples, 0);
+		deadtuples = Max(deadtuples, 0);
+	}
+
+	/* block acquiring lock for the same reason as pgstat_report_autovac() */
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_RELATION, dboid,
+											RelationGetRelid(rel),
+											false);
+	/* can't get dropped while accessed */
+	Assert(entry_ref != NULL && entry_ref->shared_stats != NULL);
+
+	shtabentry = (PgStatShared_Relation *) entry_ref->shared_stats;
+	tabentry = &shtabentry->stats;
+
+	tabentry->n_live_tuples = livetuples;
+	tabentry->n_dead_tuples = deadtuples;
+
+	/*
+	 * If commanded, reset changes_since_analyze to zero.  This forgets any
+	 * changes that were committed while the ANALYZE was in progress, but we
+	 * have no good way to estimate how many of those there were.
+	 */
+	if (resetcounter)
+		tabentry->changes_since_analyze = 0;
+
+	if (IsAutoVacuumWorkerProcess())
+	{
+		tabentry->autovac_analyze_timestamp = GetCurrentTimestamp();
+		tabentry->autovac_analyze_count++;
+	}
+	else
+	{
+		tabentry->analyze_timestamp = GetCurrentTimestamp();
+		tabentry->analyze_count++;
+	}
+
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * count a tuple insertion of n tuples
+ */
+void
+pgstat_count_heap_insert(Relation rel, PgStat_Counter n)
+{
+	if (pgstat_should_count_relation(rel))
+	{
+		PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+		ensure_tabstat_xact_level(pgstat_info);
+		pgstat_info->trans->tuples_inserted += n;
+	}
+}
+
+/*
+ * count a tuple update
+ */
+void
+pgstat_count_heap_update(Relation rel, bool hot)
+{
+	if (pgstat_should_count_relation(rel))
+	{
+		PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+		ensure_tabstat_xact_level(pgstat_info);
+		pgstat_info->trans->tuples_updated++;
+
+		/* t_tuples_hot_updated is nontransactional, so just advance it */
+		if (hot)
+			pgstat_info->t_counts.t_tuples_hot_updated++;
+	}
+}
+
+/*
+ * count a tuple deletion
+ */
+void
+pgstat_count_heap_delete(Relation rel)
+{
+	if (pgstat_should_count_relation(rel))
+	{
+		PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+		ensure_tabstat_xact_level(pgstat_info);
+		pgstat_info->trans->tuples_deleted++;
+	}
+}
+
+/*
+ * update tuple counters due to truncate
+ */
+void
+pgstat_count_truncate(Relation rel)
+{
+	if (pgstat_should_count_relation(rel))
+	{
+		PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+		ensure_tabstat_xact_level(pgstat_info);
+		save_truncdrop_counters(pgstat_info->trans, false);
+		pgstat_info->trans->tuples_inserted = 0;
+		pgstat_info->trans->tuples_updated = 0;
+		pgstat_info->trans->tuples_deleted = 0;
+	}
+}
+
+/*
+ * update dead-tuples count
+ *
+ * The semantics of this are that we are reporting the nontransactional
+ * recovery of "delta" dead tuples; so t_delta_dead_tuples decreases
+ * rather than increasing, and the change goes straight into the per-table
+ * counter, not into transactional state.
+ */
+void
+pgstat_update_heap_dead_tuples(Relation rel, int delta)
+{
+	if (pgstat_should_count_relation(rel))
+	{
+		PgStat_TableStatus *pgstat_info = rel->pgstat_info;
+
+		pgstat_info->t_counts.t_delta_dead_tuples -= delta;
+	}
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * the collected statistics for one table or NULL. NULL doesn't mean
+ * that the table doesn't exist, just that there are no statistics, so the
+ * caller is better off to report ZERO instead.
+ */
+PgStat_StatTabEntry *
+pgstat_fetch_stat_tabentry(Oid relid)
+{
+	PgStat_StatTabEntry *tabentry;
+
+	tabentry = pgstat_fetch_stat_tabentry_ext(false, relid);
+	if (tabentry != NULL)
+		return tabentry;
+
+	/*
+	 * If we didn't find it, maybe it's a shared table.
+	 */
+	tabentry = pgstat_fetch_stat_tabentry_ext(true, relid);
+	return tabentry;
+}
+
+/*
+ * More efficient version of pgstat_fetch_stat_tabentry(), allowing to specify
+ * whether the to-be-accessed table is a shared relation or not.
+ */
+PgStat_StatTabEntry *
+pgstat_fetch_stat_tabentry_ext(bool shared, Oid reloid)
+{
+	Oid			dboid = (shared ? InvalidOid : MyDatabaseId);
+
+	return (PgStat_StatTabEntry *)
+		pgstat_fetch_entry(PGSTAT_KIND_RELATION, dboid, reloid);
+}
+
+/*
+ * find any existing PgStat_TableStatus entry for rel
+ *
+ * Find any existing PgStat_TableStatus entry for rel_id in the current
+ * database. If not found, try finding from shared tables.
+ *
+ * If no entry found, return NULL, don't create a new one
+ */
+PgStat_TableStatus *
+find_tabstat_entry(Oid rel_id)
+{
+	PgStat_EntryRef *entry_ref;
+
+	entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, MyDatabaseId, rel_id);
+	if (!entry_ref)
+		entry_ref = pgstat_fetch_pending_entry(PGSTAT_KIND_RELATION, InvalidOid, rel_id);
+
+	if (entry_ref)
+		return entry_ref->pending;
+	return NULL;
+}
+
+/*
+ * Perform relation stats specific end-of-transaction work. Helper for
+ * AtEOXact_PgStat.
+ *
+ * Transfer transactional insert/update counts into the base tabstat entries.
+ * We don't bother to free any of the transactional state, since it's all in
+ * TopTransactionContext and will go away anyway.
+ */
+void
+AtEOXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit)
+{
+	PgStat_TableXactStatus *trans;
+
+	for (trans = xact_state->first; trans != NULL; trans = trans->next)
+	{
+		PgStat_TableStatus *tabstat;
+
+		Assert(trans->nest_level == 1);
+		Assert(trans->upper == NULL);
+		tabstat = trans->parent;
+		Assert(tabstat->trans == trans);
+		/* restore pre-truncate/drop stats (if any) in case of aborted xact */
+		if (!isCommit)
+			restore_truncdrop_counters(trans);
+		/* count attempted actions regardless of commit/abort */
+		tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
+		tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
+		tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
+		if (isCommit)
+		{
+			tabstat->t_counts.t_truncdropped = trans->truncdropped;
+			if (trans->truncdropped)
+			{
+				/* forget live/dead stats seen by backend thus far */
+				tabstat->t_counts.t_delta_live_tuples = 0;
+				tabstat->t_counts.t_delta_dead_tuples = 0;
+			}
+			/* insert adds a live tuple, delete removes one */
+			tabstat->t_counts.t_delta_live_tuples +=
+				trans->tuples_inserted - trans->tuples_deleted;
+			/* update and delete each create a dead tuple */
+			tabstat->t_counts.t_delta_dead_tuples +=
+				trans->tuples_updated + trans->tuples_deleted;
+			/* insert, update, delete each count as one change event */
+			tabstat->t_counts.t_changed_tuples +=
+				trans->tuples_inserted + trans->tuples_updated +
+				trans->tuples_deleted;
+		}
+		else
+		{
+			/* inserted tuples are dead, deleted tuples are unaffected */
+			tabstat->t_counts.t_delta_dead_tuples +=
+				trans->tuples_inserted + trans->tuples_updated;
+			/* an aborted xact generates no changed_tuple events */
+		}
+		tabstat->trans = NULL;
+	}
+}
+
+/*
+ * Perform relation stats specific end-of-sub-transaction work. Helper for
+ * AtEOSubXact_PgStat.
+ *
+ * Transfer transactional insert/update counts into the next higher
+ * subtransaction state.
+ */
+void
+AtEOSubXact_PgStat_Relations(PgStat_SubXactStatus *xact_state, bool isCommit, int nestDepth)
+{
+	PgStat_TableXactStatus *trans;
+	PgStat_TableXactStatus *next_trans;
+
+	for (trans = xact_state->first; trans != NULL; trans = next_trans)
+	{
+		PgStat_TableStatus *tabstat;
+
+		next_trans = trans->next;
+		Assert(trans->nest_level == nestDepth);
+		tabstat = trans->parent;
+		Assert(tabstat->trans == trans);
+
+		if (isCommit)
+		{
+			if (trans->upper && trans->upper->nest_level == nestDepth - 1)
+			{
+				if (trans->truncdropped)
+				{
+					/* propagate the truncate/drop status one level up */
+					save_truncdrop_counters(trans->upper, false);
+					/* replace upper xact stats with ours */
+					trans->upper->tuples_inserted = trans->tuples_inserted;
+					trans->upper->tuples_updated = trans->tuples_updated;
+					trans->upper->tuples_deleted = trans->tuples_deleted;
+				}
+				else
+				{
+					trans->upper->tuples_inserted += trans->tuples_inserted;
+					trans->upper->tuples_updated += trans->tuples_updated;
+					trans->upper->tuples_deleted += trans->tuples_deleted;
+				}
+				tabstat->trans = trans->upper;
+				pfree(trans);
+			}
+			else
+			{
+				/*
+				 * When there isn't an immediate parent state, we can just
+				 * reuse the record instead of going through a palloc/pfree
+				 * pushup (this works since it's all in TopTransactionContext
+				 * anyway).  We have to re-link it into the parent level,
+				 * though, and that might mean pushing a new entry into the
+				 * pgStatXactStack.
+				 */
+				PgStat_SubXactStatus *upper_xact_state;
+
+				upper_xact_state = pgstat_get_xact_stack_level(nestDepth - 1);
+				trans->next = upper_xact_state->first;
+				upper_xact_state->first = trans;
+				trans->nest_level = nestDepth - 1;
+			}
+		}
+		else
+		{
+			/*
+			 * On abort, update top-level tabstat counts, then forget the
+			 * subtransaction
+			 */
+
+			/* first restore values obliterated by truncate/drop */
+			restore_truncdrop_counters(trans);
+			/* count attempted actions regardless of commit/abort */
+			tabstat->t_counts.t_tuples_inserted += trans->tuples_inserted;
+			tabstat->t_counts.t_tuples_updated += trans->tuples_updated;
+			tabstat->t_counts.t_tuples_deleted += trans->tuples_deleted;
+			/* inserted tuples are dead, deleted tuples are unaffected */
+			tabstat->t_counts.t_delta_dead_tuples +=
+				trans->tuples_inserted + trans->tuples_updated;
+			tabstat->trans = trans->upper;
+			pfree(trans);
+		}
+	}
+}
+
+/*
+ * Generate 2PC records for all the pending transaction-dependent relation
+ * stats.
+ */
+void
+AtPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
+{
+	PgStat_TableXactStatus *trans;
+
+	for (trans = xact_state->first; trans != NULL; trans = trans->next)
+	{
+		PgStat_TableStatus *tabstat PG_USED_FOR_ASSERTS_ONLY;
+		TwoPhasePgStatRecord record;
+
+		Assert(trans->nest_level == 1);
+		Assert(trans->upper == NULL);
+		tabstat = trans->parent;
+		Assert(tabstat->trans == trans);
+
+		record.tuples_inserted = trans->tuples_inserted;
+		record.tuples_updated = trans->tuples_updated;
+		record.tuples_deleted = trans->tuples_deleted;
+		record.inserted_pre_truncdrop = trans->inserted_pre_truncdrop;
+		record.updated_pre_truncdrop = trans->updated_pre_truncdrop;
+		record.deleted_pre_truncdrop = trans->deleted_pre_truncdrop;
+		record.t_id = tabstat->t_id;
+		record.t_shared = tabstat->t_shared;
+		record.t_truncdropped = trans->truncdropped;
+
+		RegisterTwoPhaseRecord(TWOPHASE_RM_PGSTAT_ID, 0,
+							   &record, sizeof(TwoPhasePgStatRecord));
+	}
+}
+
+/*
+ * All we need do here is unlink the transaction stats state from the
+ * nontransactional state.  The nontransactional action counts will be
+ * reported to the stats system immediately, while the effects on live and
+ * dead tuple counts are preserved in the 2PC state file.
+ *
+ * Note: AtEOXact_PgStat_Relations is not called during PREPARE.
+ */
+void
+PostPrepare_PgStat_Relations(PgStat_SubXactStatus *xact_state)
+{
+	PgStat_TableXactStatus *trans;
+
+	for (trans = xact_state->first; trans != NULL; trans = trans->next)
+	{
+		PgStat_TableStatus *tabstat;
+
+		tabstat = trans->parent;
+		tabstat->trans = NULL;
+	}
+}
+
+/*
+ * 2PC processing routine for COMMIT PREPARED case.
+ *
+ * Load the saved counts into our local pgstats state.
+ */
+void
+pgstat_twophase_postcommit(TransactionId xid, uint16 info,
+						   void *recdata, uint32 len)
+{
+	TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
+	PgStat_TableStatus *pgstat_info;
+
+	/* Find or create a tabstat entry for the rel */
+	pgstat_info = pgstat_prep_relation_pending(rec->t_id, rec->t_shared);
+
+	/* Same math as in AtEOXact_PgStat, commit case */
+	pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
+	pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
+	pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
+	pgstat_info->t_counts.t_truncdropped = rec->t_truncdropped;
+	if (rec->t_truncdropped)
+	{
+		/* forget live/dead stats seen by backend thus far */
+		pgstat_info->t_counts.t_delta_live_tuples = 0;
+		pgstat_info->t_counts.t_delta_dead_tuples = 0;
+	}
+	pgstat_info->t_counts.t_delta_live_tuples +=
+		rec->tuples_inserted - rec->tuples_deleted;
+	pgstat_info->t_counts.t_delta_dead_tuples +=
+		rec->tuples_updated + rec->tuples_deleted;
+	pgstat_info->t_counts.t_changed_tuples +=
+		rec->tuples_inserted + rec->tuples_updated +
+		rec->tuples_deleted;
+}
+
+/*
+ * 2PC processing routine for ROLLBACK PREPARED case.
+ *
+ * Load the saved counts into our local pgstats state, but treat them
+ * as aborted.
+ */
+void
+pgstat_twophase_postabort(TransactionId xid, uint16 info,
+						  void *recdata, uint32 len)
+{
+	TwoPhasePgStatRecord *rec = (TwoPhasePgStatRecord *) recdata;
+	PgStat_TableStatus *pgstat_info;
+
+	/* Find or create a tabstat entry for the rel */
+	pgstat_info = pgstat_prep_relation_pending(rec->t_id, rec->t_shared);
+
+	/* Same math as in AtEOXact_PgStat, abort case */
+	if (rec->t_truncdropped)
+	{
+		rec->tuples_inserted = rec->inserted_pre_truncdrop;
+		rec->tuples_updated = rec->updated_pre_truncdrop;
+		rec->tuples_deleted = rec->deleted_pre_truncdrop;
+	}
+	pgstat_info->t_counts.t_tuples_inserted += rec->tuples_inserted;
+	pgstat_info->t_counts.t_tuples_updated += rec->tuples_updated;
+	pgstat_info->t_counts.t_tuples_deleted += rec->tuples_deleted;
+	pgstat_info->t_counts.t_delta_dead_tuples +=
+		rec->tuples_inserted + rec->tuples_updated;
+}
+
+/*
+ * Flush out pending stats for the entry
+ *
+ * If nowait is true, this function returns false if lock could not
+ * immediately acquired, otherwise true is returned.
+ *
+ * Some of the stats are copied to the corresponding pending database stats
+ * entry when successfully flushing.
+ */
+bool
+pgstat_relation_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	static const PgStat_TableCounts all_zeroes;
+	Oid			dboid;
+	PgStat_TableStatus *lstats; /* pending stats entry  */
+	PgStatShared_Relation *shtabstats;
+	PgStat_StatTabEntry *tabentry;	/* table entry of shared stats */
+	PgStat_StatDBEntry *dbentry;	/* pending database entry */
+
+	dboid = entry_ref->shared_entry->key.dboid;
+	lstats = (PgStat_TableStatus *) entry_ref->pending;
+	shtabstats = (PgStatShared_Relation *) entry_ref->shared_stats;
+
+	/*
+	 * Ignore entries that didn't accumulate any actual counts, such as
+	 * indexes that were opened by the planner but not used.
+	 */
+	if (memcmp(&lstats->t_counts, &all_zeroes,
+			   sizeof(PgStat_TableCounts)) == 0)
+	{
+		return true;
+	}
+
+	if (!pgstat_lock_entry(entry_ref, nowait))
+		return false;
+
+	/* add the values to the shared entry. */
+	tabentry = &shtabstats->stats;
+
+	tabentry->numscans += lstats->t_counts.t_numscans;
+	tabentry->tuples_returned += lstats->t_counts.t_tuples_returned;
+	tabentry->tuples_fetched += lstats->t_counts.t_tuples_fetched;
+	tabentry->tuples_inserted += lstats->t_counts.t_tuples_inserted;
+	tabentry->tuples_updated += lstats->t_counts.t_tuples_updated;
+	tabentry->tuples_deleted += lstats->t_counts.t_tuples_deleted;
+	tabentry->tuples_hot_updated += lstats->t_counts.t_tuples_hot_updated;
+
+	/*
+	 * If table was truncated/dropped, first reset the live/dead counters.
+	 */
+	if (lstats->t_counts.t_truncdropped)
+	{
+		tabentry->n_live_tuples = 0;
+		tabentry->n_dead_tuples = 0;
+		tabentry->inserts_since_vacuum = 0;
+	}
+
+	tabentry->n_live_tuples += lstats->t_counts.t_delta_live_tuples;
+	tabentry->n_dead_tuples += lstats->t_counts.t_delta_dead_tuples;
+	tabentry->changes_since_analyze += lstats->t_counts.t_changed_tuples;
+	tabentry->inserts_since_vacuum += lstats->t_counts.t_tuples_inserted;
+	tabentry->blocks_fetched += lstats->t_counts.t_blocks_fetched;
+	tabentry->blocks_hit += lstats->t_counts.t_blocks_hit;
+
+	/* Clamp n_live_tuples in case of negative delta_live_tuples */
+	tabentry->n_live_tuples = Max(tabentry->n_live_tuples, 0);
+	/* Likewise for n_dead_tuples */
+	tabentry->n_dead_tuples = Max(tabentry->n_dead_tuples, 0);
+
+	pgstat_unlock_entry(entry_ref);
+
+	/* The entry was successfully flushed, add the same to database stats */
+	dbentry = pgstat_prep_database_pending(dboid);
+	dbentry->n_tuples_returned += lstats->t_counts.t_tuples_returned;
+	dbentry->n_tuples_fetched += lstats->t_counts.t_tuples_fetched;
+	dbentry->n_tuples_inserted += lstats->t_counts.t_tuples_inserted;
+	dbentry->n_tuples_updated += lstats->t_counts.t_tuples_updated;
+	dbentry->n_tuples_deleted += lstats->t_counts.t_tuples_deleted;
+	dbentry->n_blocks_fetched += lstats->t_counts.t_blocks_fetched;
+	dbentry->n_blocks_hit += lstats->t_counts.t_blocks_hit;
+
+	return true;
+}
+
+void
+pgstat_relation_delete_pending_cb(PgStat_EntryRef *entry_ref)
+{
+	PgStat_TableStatus *pending = (PgStat_TableStatus *) entry_ref->pending;
+
+	if (pending->relation)
+		pgstat_unlink_relation(pending->relation);
+}
+
+/*
+ * Find or create a PgStat_TableStatus entry for rel. New entry is created and
+ * initialized if not exists.
+ */
+static PgStat_TableStatus *
+pgstat_prep_relation_pending(Oid rel_id, bool isshared)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStat_TableStatus *pending;
+
+	entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_RELATION,
+										  isshared ? InvalidOid : MyDatabaseId,
+										  rel_id, NULL);
+	pending = entry_ref->pending;
+	pending->t_id = rel_id;
+	pending->t_shared = isshared;
+
+	return pending;
+}
+
+/*
+ * add a new (sub)transaction state record
+ */
+static void
+add_tabstat_xact_level(PgStat_TableStatus *pgstat_info, int nest_level)
+{
+	PgStat_SubXactStatus *xact_state;
+	PgStat_TableXactStatus *trans;
+
+	/*
+	 * If this is the first rel to be modified at the current nest level, we
+	 * first have to push a transaction stack entry.
+	 */
+	xact_state = pgstat_get_xact_stack_level(nest_level);
+
+	/* Now make a per-table stack entry */
+	trans = (PgStat_TableXactStatus *)
+		MemoryContextAllocZero(TopTransactionContext,
+							   sizeof(PgStat_TableXactStatus));
+	trans->nest_level = nest_level;
+	trans->upper = pgstat_info->trans;
+	trans->parent = pgstat_info;
+	trans->next = xact_state->first;
+	xact_state->first = trans;
+	pgstat_info->trans = trans;
+}
+
+/*
+ * Add a new (sub)transaction record if needed.
+ */
+static void
+ensure_tabstat_xact_level(PgStat_TableStatus *pgstat_info)
+{
+	int			nest_level = GetCurrentTransactionNestLevel();
+
+	if (pgstat_info->trans == NULL ||
+		pgstat_info->trans->nest_level != nest_level)
+		add_tabstat_xact_level(pgstat_info, nest_level);
+}
+
+/*
+ * Whenever a table is truncated/dropped, we save its i/u/d counters so that
+ * they can be cleared, and if the (sub)xact that executed the truncate/drop
+ * later aborts, the counters can be restored to the saved (pre-truncate/drop)
+ * values.
+ *
+ * Note that for truncate we do this on the first truncate in any particular
+ * subxact level only.
+ */
+static void
+save_truncdrop_counters(PgStat_TableXactStatus *trans, bool is_drop)
+{
+	if (!trans->truncdropped || is_drop)
+	{
+		trans->inserted_pre_truncdrop = trans->tuples_inserted;
+		trans->updated_pre_truncdrop = trans->tuples_updated;
+		trans->deleted_pre_truncdrop = trans->tuples_deleted;
+		trans->truncdropped = true;
+	}
+}
+
+/*
+ * restore counters when a truncate aborts
+ */
+static void
+restore_truncdrop_counters(PgStat_TableXactStatus *trans)
+{
+	if (trans->truncdropped)
+	{
+		trans->tuples_inserted = trans->inserted_pre_truncdrop;
+		trans->tuples_updated = trans->updated_pre_truncdrop;
+		trans->tuples_deleted = trans->deleted_pre_truncdrop;
+	}
+}
diff --git a/src/backend/utils/activity/pgstat_replslot.c b/src/backend/utils/activity/pgstat_replslot.c
new file mode 100644
index 0000000..1a8473b
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_replslot.c
@@ -0,0 +1,224 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_replslot.c
+ *	  Implementation of replication slot statistics.
+ *
+ * This file contains the implementation of replication slot statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Replication slot stats work a bit different than other other
+ * variable-numbered stats. Slots do not have oids (so they can be created on
+ * physical replicas). Use the slot index as object id while running. However,
+ * the slot index can change when restarting. That is addressed by using the
+ * name when (de-)serializing. After a restart it is possible for slots to
+ * have been dropped while shut down, which is addressed by not restoring
+ * stats for slots that cannot be found by name when starting up.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_replslot.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "replication/slot.h"
+#include "utils/builtins.h"		/* for namestrcpy() */
+#include "utils/pgstat_internal.h"
+
+
+static int	get_replslot_index(const char *name);
+
+
+/*
+ * Reset counters for a single replication slot.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+void
+pgstat_reset_replslot(const char *name)
+{
+	ReplicationSlot *slot;
+
+	AssertArg(name != NULL);
+
+	/* Check if the slot exits with the given name. */
+	slot = SearchNamedReplicationSlot(name, true);
+
+	if (!slot)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("replication slot \"%s\" does not exist",
+						name)));
+
+	/*
+	 * Nothing to do for physical slots as we collect stats only for logical
+	 * slots.
+	 */
+	if (SlotIsPhysical(slot))
+		return;
+
+	/* reset this one entry */
+	pgstat_reset(PGSTAT_KIND_REPLSLOT, InvalidOid,
+				 ReplicationSlotIndex(slot));
+}
+
+/*
+ * Report replication slot statistics.
+ *
+ * We can rely on the stats for the slot to exist and to belong to this
+ * slot. We can only get here if pgstat_create_replslot() or
+ * pgstat_acquire_replslot() have already been called.
+ */
+void
+pgstat_report_replslot(ReplicationSlot *slot, const PgStat_StatReplSlotEntry *repSlotStat)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_ReplSlot *shstatent;
+	PgStat_StatReplSlotEntry *statent;
+
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_REPLSLOT, InvalidOid,
+											ReplicationSlotIndex(slot), false);
+	shstatent = (PgStatShared_ReplSlot *) entry_ref->shared_stats;
+	statent = &shstatent->stats;
+
+	/* Update the replication slot statistics */
+#define REPLSLOT_ACC(fld) statent->fld += repSlotStat->fld
+	REPLSLOT_ACC(spill_txns);
+	REPLSLOT_ACC(spill_count);
+	REPLSLOT_ACC(spill_bytes);
+	REPLSLOT_ACC(stream_txns);
+	REPLSLOT_ACC(stream_count);
+	REPLSLOT_ACC(stream_bytes);
+	REPLSLOT_ACC(total_txns);
+	REPLSLOT_ACC(total_bytes);
+#undef REPLSLOT_ACC
+
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * Report replication slot creation.
+ *
+ * NB: This gets called with ReplicationSlotAllocationLock already held, be
+ * careful about calling back into slot.c.
+ */
+void
+pgstat_create_replslot(ReplicationSlot *slot)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStatShared_ReplSlot *shstatent;
+
+	entry_ref = pgstat_get_entry_ref_locked(PGSTAT_KIND_REPLSLOT, InvalidOid,
+											ReplicationSlotIndex(slot), false);
+	shstatent = (PgStatShared_ReplSlot *) entry_ref->shared_stats;
+
+	/*
+	 * NB: need to accept that there might be stats from an older slot, e.g.
+	 * if we previously crashed after dropping a slot.
+	 */
+	memset(&shstatent->stats, 0, sizeof(shstatent->stats));
+
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * Report replication slot has been acquired.
+ *
+ * This guarantees that a stats entry exists during later
+ * pgstat_report_replslot() calls.
+ *
+ * If we previously crashed, no stats data exists. But if we did not crash,
+ * the stats do belong to this slot:
+ * - the stats cannot belong to a dropped slot, pgstat_drop_replslot() would
+ *   have been called
+ * - if the slot was removed while shut down,
+ *   pgstat_replslot_from_serialized_name_cb() returning false would have
+ *   caused the stats to be dropped
+ */
+void
+pgstat_acquire_replslot(ReplicationSlot *slot)
+{
+	pgstat_get_entry_ref(PGSTAT_KIND_REPLSLOT, InvalidOid,
+						 ReplicationSlotIndex(slot), true, NULL);
+}
+
+/*
+ * Report replication slot drop.
+ */
+void
+pgstat_drop_replslot(ReplicationSlot *slot)
+{
+	pgstat_drop_entry(PGSTAT_KIND_REPLSLOT, InvalidOid,
+					  ReplicationSlotIndex(slot));
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * a pointer to the replication slot statistics struct.
+ */
+PgStat_StatReplSlotEntry *
+pgstat_fetch_replslot(NameData slotname)
+{
+	int			idx = get_replslot_index(NameStr(slotname));
+
+	if (idx == -1)
+		return NULL;
+
+	return (PgStat_StatReplSlotEntry *)
+		pgstat_fetch_entry(PGSTAT_KIND_REPLSLOT, InvalidOid, idx);
+}
+
+void
+pgstat_replslot_to_serialized_name_cb(const PgStat_HashKey *key, const PgStatShared_Common *header, NameData *name)
+{
+	/*
+	 * This is only called late during shutdown. The set of existing slots
+	 * isn't allowed to change at this point, we can assume that a slot exists
+	 * at the offset.
+	 */
+	if (!ReplicationSlotName(key->objoid, name))
+		elog(ERROR, "could not find name for replication slot index %u",
+			 key->objoid);
+}
+
+bool
+pgstat_replslot_from_serialized_name_cb(const NameData *name, PgStat_HashKey *key)
+{
+	int			idx = get_replslot_index(NameStr(*name));
+
+	/* slot might have been deleted */
+	if (idx == -1)
+		return false;
+
+	key->kind = PGSTAT_KIND_REPLSLOT;
+	key->dboid = InvalidOid;
+	key->objoid = idx;
+
+	return true;
+}
+
+void
+pgstat_replslot_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
+{
+	((PgStatShared_ReplSlot *) header)->stats.stat_reset_timestamp = ts;
+}
+
+static int
+get_replslot_index(const char *name)
+{
+	ReplicationSlot *slot;
+
+	AssertArg(name != NULL);
+
+	slot = SearchNamedReplicationSlot(name, true);
+
+	if (!slot)
+		return -1;
+
+	return ReplicationSlotIndex(slot);
+}
diff --git a/src/backend/utils/activity/pgstat_shmem.c b/src/backend/utils/activity/pgstat_shmem.c
new file mode 100644
index 0000000..9a4f037
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_shmem.c
@@ -0,0 +1,1003 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_shmem.c
+ *	  Storage of stats entries in shared memory
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_shmem.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "pgstat.h"
+#include "storage/shmem.h"
+#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
+
+
+#define PGSTAT_ENTRY_REF_HASH_SIZE	128
+
+/* hash table entry for finding the PgStat_EntryRef for a key */
+typedef struct PgStat_EntryRefHashEntry
+{
+	PgStat_HashKey key;			/* hash key */
+	char		status;			/* for simplehash use */
+	PgStat_EntryRef *entry_ref;
+} PgStat_EntryRefHashEntry;
+
+
+/* for references to shared statistics entries */
+#define SH_PREFIX pgstat_entry_ref_hash
+#define SH_ELEMENT_TYPE PgStat_EntryRefHashEntry
+#define SH_KEY_TYPE PgStat_HashKey
+#define SH_KEY key
+#define SH_HASH_KEY(tb, key) \
+	pgstat_hash_hash_key(&key, sizeof(PgStat_HashKey), NULL)
+#define SH_EQUAL(tb, a, b) \
+	pgstat_cmp_hash_key(&a, &b, sizeof(PgStat_HashKey), NULL) == 0
+#define SH_SCOPE static inline
+#define SH_DEFINE
+#define SH_DECLARE
+#include "lib/simplehash.h"
+
+
+static void pgstat_drop_database_and_contents(Oid dboid);
+
+static void pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat);
+
+static void pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref, bool discard_pending);
+static bool pgstat_need_entry_refs_gc(void);
+static void pgstat_gc_entry_refs(void);
+static void pgstat_release_all_entry_refs(bool discard_pending);
+typedef bool (*ReleaseMatchCB) (PgStat_EntryRefHashEntry *, Datum data);
+static void pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match, Datum match_data);
+
+static void pgstat_setup_memcxt(void);
+
+
+/* parameter for the shared hash */
+static const dshash_parameters dsh_params = {
+	sizeof(PgStat_HashKey),
+	sizeof(PgStatShared_HashEntry),
+	pgstat_cmp_hash_key,
+	pgstat_hash_hash_key,
+	LWTRANCHE_PGSTATS_HASH
+};
+
+
+/*
+ * Backend local references to shared stats entries. If there are pending
+ * updates to a stats entry, the PgStat_EntryRef is added to the pgStatPending
+ * list.
+ *
+ * When a stats entry is dropped each backend needs to release its reference
+ * to it before the memory can be released. To trigger that
+ * pgStatLocal.shmem->gc_request_count is incremented - which each backend
+ * compares to their copy of pgStatSharedRefAge on a regular basis.
+ */
+static pgstat_entry_ref_hash_hash *pgStatEntryRefHash = NULL;
+static int	pgStatSharedRefAge = 0; /* cache age of pgStatShmLookupCache */
+
+/*
+ * Memory contexts containing the pgStatEntryRefHash table and the
+ * pgStatSharedRef entries respectively. Kept separate to make it easier to
+ * track / attribute memory usage.
+ */
+static MemoryContext pgStatSharedRefContext = NULL;
+static MemoryContext pgStatEntryRefHashContext = NULL;
+
+
+/* ------------------------------------------------------------
+ * Public functions called from postmaster follow
+ * ------------------------------------------------------------
+ */
+
+/*
+ * The size of the shared memory allocation for stats stored in the shared
+ * stats hash table. This allocation will be done as part of the main shared
+ * memory, rather than dynamic shared memory, allowing it to be initialized in
+ * postmaster.
+ */
+static Size
+pgstat_dsa_init_size(void)
+{
+	Size		sz;
+
+	/*
+	 * The dshash header / initial buckets array needs to fit into "plain"
+	 * shared memory, but it's beneficial to not need dsm segments
+	 * immediately. A size of 256kB seems works well and is not
+	 * disproportional compared to other constant sized shared memory
+	 * allocations. NB: To avoid DSMs further, the user can configure
+	 * min_dynamic_shared_memory.
+	 */
+	sz = 256 * 1024;
+	Assert(dsa_minimum_size() <= sz);
+	return MAXALIGN(sz);
+}
+
+/*
+ * Compute shared memory space needed for cumulative statistics
+ */
+Size
+StatsShmemSize(void)
+{
+	Size		sz;
+
+	sz = MAXALIGN(sizeof(PgStat_ShmemControl));
+	sz = add_size(sz, pgstat_dsa_init_size());
+
+	return sz;
+}
+
+/*
+ * Initialize cumulative statistics system during startup
+ */
+void
+StatsShmemInit(void)
+{
+	bool		found;
+	Size		sz;
+
+	sz = StatsShmemSize();
+	pgStatLocal.shmem = (PgStat_ShmemControl *)
+		ShmemInitStruct("Shared Memory Stats", sz, &found);
+
+	if (!IsUnderPostmaster)
+	{
+		dsa_area   *dsa;
+		dshash_table *dsh;
+		PgStat_ShmemControl *ctl = pgStatLocal.shmem;
+		char	   *p = (char *) ctl;
+
+		Assert(!found);
+
+		/* the allocation of pgStatLocal.shmem itself */
+		p += MAXALIGN(sizeof(PgStat_ShmemControl));
+
+		/*
+		 * Create a small dsa allocation in plain shared memory. This is
+		 * required because postmaster cannot use dsm segments. It also
+		 * provides a small efficiency win.
+		 */
+		ctl->raw_dsa_area = p;
+		p += MAXALIGN(pgstat_dsa_init_size());
+		dsa = dsa_create_in_place(ctl->raw_dsa_area,
+								  pgstat_dsa_init_size(),
+								  LWTRANCHE_PGSTATS_DSA, 0);
+		dsa_pin(dsa);
+
+		/*
+		 * To ensure dshash is created in "plain" shared memory, temporarily
+		 * limit size of dsa to the initial size of the dsa.
+		 */
+		dsa_set_size_limit(dsa, pgstat_dsa_init_size());
+
+		/*
+		 * With the limit in place, create the dshash table. XXX: It'd be nice
+		 * if there were dshash_create_in_place().
+		 */
+		dsh = dshash_create(dsa, &dsh_params, 0);
+		ctl->hash_handle = dshash_get_hash_table_handle(dsh);
+
+		/* lift limit set above */
+		dsa_set_size_limit(dsa, -1);
+
+		/*
+		 * Postmaster will never access these again, thus free the local
+		 * dsa/dshash references.
+		 */
+		dshash_detach(dsh);
+		dsa_detach(dsa);
+
+		pg_atomic_init_u64(&ctl->gc_request_count, 1);
+
+
+		/* initialize fixed-numbered stats */
+		LWLockInitialize(&ctl->archiver.lock, LWTRANCHE_PGSTATS_DATA);
+		LWLockInitialize(&ctl->bgwriter.lock, LWTRANCHE_PGSTATS_DATA);
+		LWLockInitialize(&ctl->checkpointer.lock, LWTRANCHE_PGSTATS_DATA);
+		LWLockInitialize(&ctl->slru.lock, LWTRANCHE_PGSTATS_DATA);
+		LWLockInitialize(&ctl->wal.lock, LWTRANCHE_PGSTATS_DATA);
+	}
+	else
+	{
+		Assert(found);
+	}
+}
+
+void
+pgstat_attach_shmem(void)
+{
+	MemoryContext oldcontext;
+
+	Assert(pgStatLocal.dsa == NULL);
+
+	/* stats shared memory persists for the backend lifetime */
+	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
+
+	pgStatLocal.dsa = dsa_attach_in_place(pgStatLocal.shmem->raw_dsa_area,
+										  NULL);
+	dsa_pin_mapping(pgStatLocal.dsa);
+
+	pgStatLocal.shared_hash = dshash_attach(pgStatLocal.dsa, &dsh_params,
+											pgStatLocal.shmem->hash_handle, 0);
+
+	MemoryContextSwitchTo(oldcontext);
+}
+
+void
+pgstat_detach_shmem(void)
+{
+	Assert(pgStatLocal.dsa);
+
+	/* we shouldn't leave references to shared stats */
+	pgstat_release_all_entry_refs(false);
+
+	dshash_detach(pgStatLocal.shared_hash);
+	pgStatLocal.shared_hash = NULL;
+
+	dsa_detach(pgStatLocal.dsa);
+	pgStatLocal.dsa = NULL;
+}
+
+
+/* ------------------------------------------------------------
+ * Maintenance of shared memory stats entries
+ * ------------------------------------------------------------
+ */
+
+PgStatShared_Common *
+pgstat_init_entry(PgStat_Kind kind,
+				  PgStatShared_HashEntry *shhashent)
+{
+	/* Create new stats entry. */
+	dsa_pointer chunk;
+	PgStatShared_Common *shheader;
+
+	/*
+	 * Initialize refcount to 1, marking it as valid / not dropped. The entry
+	 * can't be freed before the initialization because it can't be found as
+	 * long as we hold the dshash partition lock. Caller needs to increase
+	 * further if a longer lived reference is needed.
+	 */
+	pg_atomic_init_u32(&shhashent->refcount, 1);
+	shhashent->dropped = false;
+
+	chunk = dsa_allocate0(pgStatLocal.dsa, pgstat_get_kind_info(kind)->shared_size);
+	shheader = dsa_get_address(pgStatLocal.dsa, chunk);
+	shheader->magic = 0xdeadbeef;
+
+	/* Link the new entry from the hash entry. */
+	shhashent->body = chunk;
+
+	LWLockInitialize(&shheader->lock, LWTRANCHE_PGSTATS_DATA);
+
+	return shheader;
+}
+
+static PgStatShared_Common *
+pgstat_reinit_entry(PgStat_Kind kind, PgStatShared_HashEntry *shhashent)
+{
+	PgStatShared_Common *shheader;
+
+	shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
+
+	/* mark as not dropped anymore */
+	pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
+	shhashent->dropped = false;
+
+	/* reinitialize content */
+	Assert(shheader->magic == 0xdeadbeef);
+	memset(pgstat_get_entry_data(kind, shheader), 0,
+		   pgstat_get_entry_len(kind));
+
+	return shheader;
+}
+
+static void
+pgstat_setup_shared_refs(void)
+{
+	if (likely(pgStatEntryRefHash != NULL))
+		return;
+
+	pgStatEntryRefHash =
+		pgstat_entry_ref_hash_create(pgStatEntryRefHashContext,
+									 PGSTAT_ENTRY_REF_HASH_SIZE, NULL);
+	pgStatSharedRefAge = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
+	Assert(pgStatSharedRefAge != 0);
+}
+
+/*
+ * Helper function for pgstat_get_entry_ref().
+ */
+static void
+pgstat_acquire_entry_ref(PgStat_EntryRef *entry_ref,
+						 PgStatShared_HashEntry *shhashent,
+						 PgStatShared_Common *shheader)
+{
+	Assert(shheader->magic == 0xdeadbeef);
+	Assert(pg_atomic_read_u32(&shhashent->refcount) > 0);
+
+	pg_atomic_fetch_add_u32(&shhashent->refcount, 1);
+
+	dshash_release_lock(pgStatLocal.shared_hash, shhashent);
+
+	entry_ref->shared_stats = shheader;
+	entry_ref->shared_entry = shhashent;
+}
+
+/*
+ * Helper function for pgstat_get_entry_ref().
+ */
+static bool
+pgstat_get_entry_ref_cached(PgStat_HashKey key, PgStat_EntryRef **entry_ref_p)
+{
+	bool		found;
+	PgStat_EntryRefHashEntry *cache_entry;
+
+	/*
+	 * We immediately insert a cache entry, because it avoids 1) multiple
+	 * hashtable lookups in case of a cache miss 2) having to deal with
+	 * out-of-memory errors after incrementing PgStatShared_Common->refcount.
+	 */
+
+	cache_entry = pgstat_entry_ref_hash_insert(pgStatEntryRefHash, key, &found);
+
+	if (!found || !cache_entry->entry_ref)
+	{
+		PgStat_EntryRef *entry_ref;
+
+		cache_entry->entry_ref = entry_ref =
+			MemoryContextAlloc(pgStatSharedRefContext,
+							   sizeof(PgStat_EntryRef));
+		entry_ref->shared_stats = NULL;
+		entry_ref->shared_entry = NULL;
+		entry_ref->pending = NULL;
+
+		found = false;
+	}
+	else if (cache_entry->entry_ref->shared_stats == NULL)
+	{
+		Assert(cache_entry->entry_ref->pending == NULL);
+		found = false;
+	}
+	else
+	{
+		PgStat_EntryRef *entry_ref PG_USED_FOR_ASSERTS_ONLY;
+
+		entry_ref = cache_entry->entry_ref;
+		Assert(entry_ref->shared_entry != NULL);
+		Assert(entry_ref->shared_stats != NULL);
+
+		Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
+		/* should have at least our reference */
+		Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) > 0);
+	}
+
+	*entry_ref_p = cache_entry->entry_ref;
+	return found;
+}
+
+/*
+ * Get a shared stats reference. If create is true, the shared stats object is
+ * created if it does not exist.
+ *
+ * When create is true, and created_entry is non-NULL, it'll be set to true
+ * if the entry is newly created, false otherwise.
+ */
+PgStat_EntryRef *
+pgstat_get_entry_ref(PgStat_Kind kind, Oid dboid, Oid objoid, bool create,
+					 bool *created_entry)
+{
+	PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
+	PgStatShared_HashEntry *shhashent;
+	PgStatShared_Common *shheader = NULL;
+	PgStat_EntryRef *entry_ref;
+
+	/*
+	 * passing in created_entry only makes sense if we possibly could create
+	 * entry.
+	 */
+	AssertArg(create || created_entry == NULL);
+	pgstat_assert_is_up();
+	Assert(pgStatLocal.shared_hash != NULL);
+	Assert(!pgStatLocal.shmem->is_shutdown);
+
+	pgstat_setup_memcxt();
+	pgstat_setup_shared_refs();
+
+	if (created_entry != NULL)
+		*created_entry = false;
+
+	/*
+	 * Check if other backends dropped stats that could not be deleted because
+	 * somebody held references to it. If so, check this backend's references.
+	 * This is not expected to happen often. The location of the check is a
+	 * bit random, but this is a relatively frequently called path, so better
+	 * than most.
+	 */
+	if (pgstat_need_entry_refs_gc())
+		pgstat_gc_entry_refs();
+
+	/*
+	 * First check the lookup cache hashtable in local memory. If we find a
+	 * match here we can avoid taking locks / causing contention.
+	 */
+	if (pgstat_get_entry_ref_cached(key, &entry_ref))
+		return entry_ref;
+
+	Assert(entry_ref != NULL);
+
+	/*
+	 * Do a lookup in the hash table first - it's quite likely that the entry
+	 * already exists, and that way we only need a shared lock.
+	 */
+	shhashent = dshash_find(pgStatLocal.shared_hash, &key, false);
+
+	if (create && !shhashent)
+	{
+		bool		shfound;
+
+		/*
+		 * It's possible that somebody created the entry since the above
+		 * lookup. If so, fall through to the same path as if we'd have if it
+		 * already had been created before the dshash_find() calls.
+		 */
+		shhashent = dshash_find_or_insert(pgStatLocal.shared_hash, &key, &shfound);
+		if (!shfound)
+		{
+			shheader = pgstat_init_entry(kind, shhashent);
+			pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
+
+			if (created_entry != NULL)
+				*created_entry = true;
+
+			return entry_ref;
+		}
+	}
+
+	if (!shhashent)
+	{
+		/*
+		 * If we're not creating, delete the reference again. In all
+		 * likelihood it's just a stats lookup - no point wasting memory for a
+		 * shared ref to nothing...
+		 */
+		pgstat_release_entry_ref(key, entry_ref, false);
+
+		return NULL;
+	}
+	else
+	{
+		/*
+		 * Can get here either because dshash_find() found a match, or if
+		 * dshash_find_or_insert() found a concurrently inserted entry.
+		 */
+
+		if (shhashent->dropped && create)
+		{
+			/*
+			 * There are legitimate cases where the old stats entry might not
+			 * yet have been dropped by the time it's reused. The most obvious
+			 * case are replication slot stats, where a new slot can be
+			 * created with the same index just after dropping. But oid
+			 * wraparound can lead to other cases as well. We just reset the
+			 * stats to their plain state.
+			 */
+			shheader = pgstat_reinit_entry(kind, shhashent);
+			pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
+
+			if (created_entry != NULL)
+				*created_entry = true;
+
+			return entry_ref;
+		}
+		else if (shhashent->dropped)
+		{
+			dshash_release_lock(pgStatLocal.shared_hash, shhashent);
+			pgstat_release_entry_ref(key, entry_ref, false);
+
+			return NULL;
+		}
+		else
+		{
+			shheader = dsa_get_address(pgStatLocal.dsa, shhashent->body);
+			pgstat_acquire_entry_ref(entry_ref, shhashent, shheader);
+
+			return entry_ref;
+		}
+	}
+}
+
+static void
+pgstat_release_entry_ref(PgStat_HashKey key, PgStat_EntryRef *entry_ref,
+						 bool discard_pending)
+{
+	if (entry_ref && entry_ref->pending)
+	{
+		if (discard_pending)
+			pgstat_delete_pending_entry(entry_ref);
+		else
+			elog(ERROR, "releasing ref with pending data");
+	}
+
+	if (entry_ref && entry_ref->shared_stats)
+	{
+		Assert(entry_ref->shared_stats->magic == 0xdeadbeef);
+		Assert(entry_ref->pending == NULL);
+
+		/*
+		 * This can't race with another backend looking up the stats entry and
+		 * increasing the refcount because it is not "legal" to create
+		 * additional references to dropped entries.
+		 */
+		if (pg_atomic_fetch_sub_u32(&entry_ref->shared_entry->refcount, 1) == 1)
+		{
+			PgStatShared_HashEntry *shent;
+
+			/*
+			 * We're the last referrer to this entry, try to drop the shared
+			 * entry.
+			 */
+
+			/* only dropped entries can reach a 0 refcount */
+			Assert(entry_ref->shared_entry->dropped);
+
+			shent = dshash_find(pgStatLocal.shared_hash,
+								&entry_ref->shared_entry->key,
+								true);
+			if (!shent)
+				elog(ERROR, "could not find just referenced shared stats entry");
+
+			Assert(pg_atomic_read_u32(&entry_ref->shared_entry->refcount) == 0);
+			Assert(entry_ref->shared_entry == shent);
+
+			pgstat_free_entry(shent, NULL);
+		}
+	}
+
+	if (!pgstat_entry_ref_hash_delete(pgStatEntryRefHash, key))
+		elog(ERROR, "entry ref vanished before deletion");
+
+	if (entry_ref)
+		pfree(entry_ref);
+}
+
+bool
+pgstat_lock_entry(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	LWLock	   *lock = &entry_ref->shared_stats->lock;
+
+	if (nowait)
+		return LWLockConditionalAcquire(lock, LW_EXCLUSIVE);
+
+	LWLockAcquire(lock, LW_EXCLUSIVE);
+	return true;
+}
+
+/*
+ * Separate from pgstat_lock_entry() as most callers will need to lock
+ * exclusively.
+ */
+bool
+pgstat_lock_entry_shared(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	LWLock	   *lock = &entry_ref->shared_stats->lock;
+
+	if (nowait)
+		return LWLockConditionalAcquire(lock, LW_SHARED);
+
+	LWLockAcquire(lock, LW_SHARED);
+	return true;
+}
+
+void
+pgstat_unlock_entry(PgStat_EntryRef *entry_ref)
+{
+	LWLockRelease(&entry_ref->shared_stats->lock);
+}
+
+/*
+ * Helper function to fetch and lock shared stats.
+ */
+PgStat_EntryRef *
+pgstat_get_entry_ref_locked(PgStat_Kind kind, Oid dboid, Oid objoid,
+							bool nowait)
+{
+	PgStat_EntryRef *entry_ref;
+
+	/* find shared table stats entry corresponding to the local entry */
+	entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, true, NULL);
+
+	/* lock the shared entry to protect the content, skip if failed */
+	if (!pgstat_lock_entry(entry_ref, nowait))
+		return NULL;
+
+	return entry_ref;
+}
+
+void
+pgstat_request_entry_refs_gc(void)
+{
+	pg_atomic_fetch_add_u64(&pgStatLocal.shmem->gc_request_count, 1);
+}
+
+static bool
+pgstat_need_entry_refs_gc(void)
+{
+	uint64		curage;
+
+	if (!pgStatEntryRefHash)
+		return false;
+
+	/* should have been initialized when creating pgStatEntryRefHash */
+	Assert(pgStatSharedRefAge != 0);
+
+	curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
+
+	return pgStatSharedRefAge != curage;
+}
+
+static void
+pgstat_gc_entry_refs(void)
+{
+	pgstat_entry_ref_hash_iterator i;
+	PgStat_EntryRefHashEntry *ent;
+	uint64		curage;
+
+	curage = pg_atomic_read_u64(&pgStatLocal.shmem->gc_request_count);
+	Assert(curage != 0);
+
+	/*
+	 * Some entries have been dropped. Invalidate cache pointer to them.
+	 */
+	pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
+	while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i)) != NULL)
+	{
+		PgStat_EntryRef *entry_ref = ent->entry_ref;
+
+		Assert(!entry_ref->shared_stats ||
+			   entry_ref->shared_stats->magic == 0xdeadbeef);
+
+		if (!entry_ref->shared_entry->dropped)
+			continue;
+
+		/* cannot gc shared ref that has pending data */
+		if (entry_ref->pending != NULL)
+			continue;
+
+		pgstat_release_entry_ref(ent->key, entry_ref, false);
+	}
+
+	pgStatSharedRefAge = curage;
+}
+
+static void
+pgstat_release_matching_entry_refs(bool discard_pending, ReleaseMatchCB match,
+								   Datum match_data)
+{
+	pgstat_entry_ref_hash_iterator i;
+	PgStat_EntryRefHashEntry *ent;
+
+	if (pgStatEntryRefHash == NULL)
+		return;
+
+	pgstat_entry_ref_hash_start_iterate(pgStatEntryRefHash, &i);
+
+	while ((ent = pgstat_entry_ref_hash_iterate(pgStatEntryRefHash, &i))
+		   != NULL)
+	{
+		Assert(ent->entry_ref != NULL);
+
+		if (match && !match(ent, match_data))
+			continue;
+
+		pgstat_release_entry_ref(ent->key, ent->entry_ref, discard_pending);
+	}
+}
+
+/*
+ * Release all local references to shared stats entries.
+ *
+ * When a process exits it cannot do so while still holding references onto
+ * stats entries, otherwise the shared stats entries could never be freed.
+ */
+static void
+pgstat_release_all_entry_refs(bool discard_pending)
+{
+	if (pgStatEntryRefHash == NULL)
+		return;
+
+	pgstat_release_matching_entry_refs(discard_pending, NULL, 0);
+	Assert(pgStatEntryRefHash->members == 0);
+	pgstat_entry_ref_hash_destroy(pgStatEntryRefHash);
+	pgStatEntryRefHash = NULL;
+}
+
+static bool
+match_db(PgStat_EntryRefHashEntry *ent, Datum match_data)
+{
+	Oid			dboid = DatumGetObjectId(match_data);
+
+	return ent->key.dboid == dboid;
+}
+
+static void
+pgstat_release_db_entry_refs(Oid dboid)
+{
+	pgstat_release_matching_entry_refs( /* discard pending = */ true,
+									   match_db,
+									   ObjectIdGetDatum(dboid));
+}
+
+
+/* ------------------------------------------------------------
+ * Dropping and resetting of stats entries
+ * ------------------------------------------------------------
+ */
+
+static void
+pgstat_free_entry(PgStatShared_HashEntry *shent, dshash_seq_status *hstat)
+{
+	dsa_pointer pdsa;
+
+	/*
+	 * Fetch dsa pointer before deleting entry - that way we can free the
+	 * memory after releasing the lock.
+	 */
+	pdsa = shent->body;
+
+	if (!hstat)
+		dshash_delete_entry(pgStatLocal.shared_hash, shent);
+	else
+		dshash_delete_current(hstat);
+
+	dsa_free(pgStatLocal.dsa, pdsa);
+}
+
+/*
+ * Helper for both pgstat_drop_database_and_contents() and
+ * pgstat_drop_entry(). If hstat is non-null delete the shared entry using
+ * dshash_delete_current(), otherwise use dshash_delete_entry(). In either
+ * case the entry needs to be already locked.
+ */
+static bool
+pgstat_drop_entry_internal(PgStatShared_HashEntry *shent,
+						   dshash_seq_status *hstat)
+{
+	Assert(shent->body != InvalidDsaPointer);
+
+	/* should already have released local reference */
+	if (pgStatEntryRefHash)
+		Assert(!pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, shent->key));
+
+	/*
+	 * Signal that the entry is dropped - this will eventually cause other
+	 * backends to release their references.
+	 */
+	if (shent->dropped)
+		elog(ERROR, "can only drop stats once");
+	shent->dropped = true;
+
+	/* release refcount marking entry as not dropped */
+	if (pg_atomic_sub_fetch_u32(&shent->refcount, 1) == 0)
+	{
+		pgstat_free_entry(shent, hstat);
+		return true;
+	}
+	else
+	{
+		if (!hstat)
+			dshash_release_lock(pgStatLocal.shared_hash, shent);
+		return false;
+	}
+}
+
+/*
+ * Drop stats for the database and all the objects inside that database.
+ */
+static void
+pgstat_drop_database_and_contents(Oid dboid)
+{
+	dshash_seq_status hstat;
+	PgStatShared_HashEntry *p;
+	uint64		not_freed_count = 0;
+
+	Assert(OidIsValid(dboid));
+
+	Assert(pgStatLocal.shared_hash != NULL);
+
+	/*
+	 * This backend might very well be the only backend holding a reference to
+	 * about-to-be-dropped entries. Ensure that we're not preventing it from
+	 * being cleaned up till later.
+	 *
+	 * Doing this separately from the dshash iteration below avoids having to
+	 * do so while holding a partition lock on the shared hashtable.
+	 */
+	pgstat_release_db_entry_refs(dboid);
+
+	/* some of the dshash entries are to be removed, take exclusive lock. */
+	dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
+	while ((p = dshash_seq_next(&hstat)) != NULL)
+	{
+		if (p->dropped)
+			continue;
+
+		if (p->key.dboid != dboid)
+			continue;
+
+		if (!pgstat_drop_entry_internal(p, &hstat))
+		{
+			/*
+			 * Even statistics for a dropped database might currently be
+			 * accessed (consider e.g. database stats for pg_stat_database).
+			 */
+			not_freed_count++;
+		}
+	}
+	dshash_seq_term(&hstat);
+
+	/*
+	 * If some of the stats data could not be freed, signal the reference
+	 * holders to run garbage collection of their cached pgStatShmLookupCache.
+	 */
+	if (not_freed_count > 0)
+		pgstat_request_entry_refs_gc();
+}
+
+bool
+pgstat_drop_entry(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	PgStat_HashKey key = {.kind = kind,.dboid = dboid,.objoid = objoid};
+	PgStatShared_HashEntry *shent;
+	bool		freed = true;
+
+	/* delete local reference */
+	if (pgStatEntryRefHash)
+	{
+		PgStat_EntryRefHashEntry *lohashent =
+		pgstat_entry_ref_hash_lookup(pgStatEntryRefHash, key);
+
+		if (lohashent)
+			pgstat_release_entry_ref(lohashent->key, lohashent->entry_ref,
+									 true);
+	}
+
+	/* mark entry in shared hashtable as deleted, drop if possible */
+	shent = dshash_find(pgStatLocal.shared_hash, &key, true);
+	if (shent)
+	{
+		freed = pgstat_drop_entry_internal(shent, NULL);
+
+		/*
+		 * Database stats contain other stats. Drop those as well when
+		 * dropping the database. XXX: Perhaps this should be done in a
+		 * slightly more principled way? But not obvious what that'd look
+		 * like, and so far this is the only case...
+		 */
+		if (key.kind == PGSTAT_KIND_DATABASE)
+			pgstat_drop_database_and_contents(key.dboid);
+	}
+
+	return freed;
+}
+
+void
+pgstat_drop_all_entries(void)
+{
+	dshash_seq_status hstat;
+	PgStatShared_HashEntry *ps;
+	uint64		not_freed_count = 0;
+
+	dshash_seq_init(&hstat, pgStatLocal.shared_hash, true);
+	while ((ps = dshash_seq_next(&hstat)) != NULL)
+	{
+		if (ps->dropped)
+			continue;
+
+		if (!pgstat_drop_entry_internal(ps, &hstat))
+			not_freed_count++;
+	}
+	dshash_seq_term(&hstat);
+
+	if (not_freed_count > 0)
+		pgstat_request_entry_refs_gc();
+}
+
+static void
+shared_stat_reset_contents(PgStat_Kind kind, PgStatShared_Common *header,
+						   TimestampTz ts)
+{
+	const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+	memset(pgstat_get_entry_data(kind, header), 0,
+		   pgstat_get_entry_len(kind));
+
+	if (kind_info->reset_timestamp_cb)
+		kind_info->reset_timestamp_cb(header, ts);
+}
+
+/*
+ * Reset one variable-numbered stats entry.
+ */
+void
+pgstat_reset_entry(PgStat_Kind kind, Oid dboid, Oid objoid, TimestampTz ts)
+{
+	PgStat_EntryRef *entry_ref;
+
+	Assert(!pgstat_get_kind_info(kind)->fixed_amount);
+
+	entry_ref = pgstat_get_entry_ref(kind, dboid, objoid, false, NULL);
+	if (!entry_ref || entry_ref->shared_entry->dropped)
+		return;
+
+	(void) pgstat_lock_entry(entry_ref, false);
+	shared_stat_reset_contents(kind, entry_ref->shared_stats, ts);
+	pgstat_unlock_entry(entry_ref);
+}
+
+/*
+ * Scan through the shared hashtable of stats, resetting statistics if
+ * approved by the provided do_reset() function.
+ */
+void
+pgstat_reset_matching_entries(bool (*do_reset) (PgStatShared_HashEntry *, Datum),
+							  Datum match_data, TimestampTz ts)
+{
+	dshash_seq_status hstat;
+	PgStatShared_HashEntry *p;
+
+	/* dshash entry is not modified, take shared lock */
+	dshash_seq_init(&hstat, pgStatLocal.shared_hash, false);
+	while ((p = dshash_seq_next(&hstat)) != NULL)
+	{
+		PgStatShared_Common *header;
+
+		if (p->dropped)
+			continue;
+
+		if (!do_reset(p, match_data))
+			continue;
+
+		header = dsa_get_address(pgStatLocal.dsa, p->body);
+
+		LWLockAcquire(&header->lock, LW_EXCLUSIVE);
+
+		shared_stat_reset_contents(p->key.kind, header, ts);
+
+		LWLockRelease(&header->lock);
+	}
+	dshash_seq_term(&hstat);
+}
+
+static bool
+match_kind(PgStatShared_HashEntry *p, Datum match_data)
+{
+	return p->key.kind == DatumGetInt32(match_data);
+}
+
+void
+pgstat_reset_entries_of_kind(PgStat_Kind kind, TimestampTz ts)
+{
+	pgstat_reset_matching_entries(match_kind, Int32GetDatum(kind), ts);
+}
+
+static void
+pgstat_setup_memcxt(void)
+{
+	if (unlikely(!pgStatSharedRefContext))
+		pgStatSharedRefContext =
+			AllocSetContextCreate(TopMemoryContext,
+								  "PgStat Shared Ref",
+								  ALLOCSET_SMALL_SIZES);
+	if (unlikely(!pgStatEntryRefHashContext))
+		pgStatEntryRefHashContext =
+			AllocSetContextCreate(TopMemoryContext,
+								  "PgStat Shared Ref Hash",
+								  ALLOCSET_SMALL_SIZES);
+}
diff --git a/src/backend/utils/activity/pgstat_slru.c b/src/backend/utils/activity/pgstat_slru.c
new file mode 100644
index 0000000..28ef736
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_slru.c
@@ -0,0 +1,248 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_slru.c
+ *	  Implementation of SLRU statistics.
+ *
+ * This file contains the implementation of SLRU statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_slru.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+#include "utils/timestamp.h"
+
+
+static inline PgStat_SLRUStats *get_slru_entry(int slru_idx);
+static void pgstat_reset_slru_counter_internal(int index, TimestampTz ts);
+
+
+/*
+ * SLRU statistics counts waiting to be flushed out.  We assume this variable
+ * inits to zeroes.  Entries are one-to-one with slru_names[].  Changes of
+ * SLRU counters are reported within critical sections so we use static memory
+ * in order to avoid memory allocation.
+ */
+static PgStat_SLRUStats pending_SLRUStats[SLRU_NUM_ELEMENTS];
+bool		have_slrustats = false;
+
+
+/*
+ * Reset counters for a single SLRU.
+ *
+ * Permission checking for this function is managed through the normal
+ * GRANT system.
+ */
+void
+pgstat_reset_slru(const char *name)
+{
+	TimestampTz ts = GetCurrentTimestamp();
+
+	AssertArg(name != NULL);
+
+	pgstat_reset_slru_counter_internal(pgstat_get_slru_index(name), ts);
+}
+
+/*
+ * SLRU statistics count accumulation functions --- called from slru.c
+ */
+
+void
+pgstat_count_slru_page_zeroed(int slru_idx)
+{
+	get_slru_entry(slru_idx)->blocks_zeroed += 1;
+}
+
+void
+pgstat_count_slru_page_hit(int slru_idx)
+{
+	get_slru_entry(slru_idx)->blocks_hit += 1;
+}
+
+void
+pgstat_count_slru_page_exists(int slru_idx)
+{
+	get_slru_entry(slru_idx)->blocks_exists += 1;
+}
+
+void
+pgstat_count_slru_page_read(int slru_idx)
+{
+	get_slru_entry(slru_idx)->blocks_read += 1;
+}
+
+void
+pgstat_count_slru_page_written(int slru_idx)
+{
+	get_slru_entry(slru_idx)->blocks_written += 1;
+}
+
+void
+pgstat_count_slru_flush(int slru_idx)
+{
+	get_slru_entry(slru_idx)->flush += 1;
+}
+
+void
+pgstat_count_slru_truncate(int slru_idx)
+{
+	get_slru_entry(slru_idx)->truncate += 1;
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * a pointer to the slru statistics struct.
+ */
+PgStat_SLRUStats *
+pgstat_fetch_slru(void)
+{
+	pgstat_snapshot_fixed(PGSTAT_KIND_SLRU);
+
+	return pgStatLocal.snapshot.slru;
+}
+
+/*
+ * Returns SLRU name for an index. The index may be above SLRU_NUM_ELEMENTS,
+ * in which case this returns NULL. This allows writing code that does not
+ * know the number of entries in advance.
+ */
+const char *
+pgstat_get_slru_name(int slru_idx)
+{
+	if (slru_idx < 0 || slru_idx >= SLRU_NUM_ELEMENTS)
+		return NULL;
+
+	return slru_names[slru_idx];
+}
+
+/*
+ * Determine index of entry for a SLRU with a given name. If there's no exact
+ * match, returns index of the last "other" entry used for SLRUs defined in
+ * external projects.
+ */
+int
+pgstat_get_slru_index(const char *name)
+{
+	int			i;
+
+	for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
+	{
+		if (strcmp(slru_names[i], name) == 0)
+			return i;
+	}
+
+	/* return index of the last entry (which is the "other" one) */
+	return (SLRU_NUM_ELEMENTS - 1);
+}
+
+/*
+ * Flush out locally pending SLRU stats entries
+ *
+ * If nowait is true, this function returns false on lock failure. Otherwise
+ * this function always returns true.
+ *
+ * If nowait is true, this function returns true if the lock could not be
+ * acquired. Otherwise return false.
+ */
+bool
+pgstat_slru_flush(bool nowait)
+{
+	PgStatShared_SLRU *stats_shmem = &pgStatLocal.shmem->slru;
+	int			i;
+
+	if (!have_slrustats)
+		return false;
+
+	if (!nowait)
+		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
+		return true;
+
+	for (i = 0; i < SLRU_NUM_ELEMENTS; i++)
+	{
+		PgStat_SLRUStats *sharedent = &stats_shmem->stats[i];
+		PgStat_SLRUStats *pendingent = &pending_SLRUStats[i];
+
+#define SLRU_ACC(fld) sharedent->fld += pendingent->fld
+		SLRU_ACC(blocks_zeroed);
+		SLRU_ACC(blocks_hit);
+		SLRU_ACC(blocks_read);
+		SLRU_ACC(blocks_written);
+		SLRU_ACC(blocks_exists);
+		SLRU_ACC(flush);
+		SLRU_ACC(truncate);
+#undef SLRU_ACC
+	}
+
+	/* done, clear the pending entry */
+	MemSet(pending_SLRUStats, 0, sizeof(pending_SLRUStats));
+
+	LWLockRelease(&stats_shmem->lock);
+
+	have_slrustats = false;
+
+	return false;
+}
+
+void
+pgstat_slru_reset_all_cb(TimestampTz ts)
+{
+	for (int i = 0; i < SLRU_NUM_ELEMENTS; i++)
+		pgstat_reset_slru_counter_internal(i, ts);
+}
+
+void
+pgstat_slru_snapshot_cb(void)
+{
+	PgStatShared_SLRU *stats_shmem = &pgStatLocal.shmem->slru;
+
+	LWLockAcquire(&stats_shmem->lock, LW_SHARED);
+
+	memcpy(pgStatLocal.snapshot.slru, &stats_shmem->stats,
+		   sizeof(stats_shmem->stats));
+
+	LWLockRelease(&stats_shmem->lock);
+}
+
+/*
+ * Returns pointer to entry with counters for given SLRU (based on the name
+ * stored in SlruCtl as lwlock tranche name).
+ */
+static inline PgStat_SLRUStats *
+get_slru_entry(int slru_idx)
+{
+	pgstat_assert_is_up();
+
+	/*
+	 * The postmaster should never register any SLRU statistics counts; if it
+	 * did, the counts would be duplicated into child processes via fork().
+	 */
+	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
+
+	Assert((slru_idx >= 0) && (slru_idx < SLRU_NUM_ELEMENTS));
+
+	have_slrustats = true;
+
+	return &pending_SLRUStats[slru_idx];
+}
+
+static void
+pgstat_reset_slru_counter_internal(int index, TimestampTz ts)
+{
+	PgStatShared_SLRU *stats_shmem = &pgStatLocal.shmem->slru;
+
+	LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+
+	memset(&stats_shmem->stats[index], 0, sizeof(PgStat_SLRUStats));
+	stats_shmem->stats[index].stat_reset_timestamp = ts;
+
+	LWLockRelease(&stats_shmem->lock);
+}
diff --git a/src/backend/utils/activity/pgstat_subscription.c b/src/backend/utils/activity/pgstat_subscription.c
new file mode 100644
index 0000000..e1072bd
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_subscription.c
@@ -0,0 +1,110 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_subscription.c
+ *	  Implementation of subscription statistics.
+ *
+ * This file contains the implementation of subscription statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_subscription.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+
+
+/*
+ * Report a subscription error.
+ */
+void
+pgstat_report_subscription_error(Oid subid, bool is_apply_error)
+{
+	PgStat_EntryRef *entry_ref;
+	PgStat_BackendSubEntry *pending;
+
+	entry_ref = pgstat_prep_pending_entry(PGSTAT_KIND_SUBSCRIPTION,
+										  InvalidOid, subid, NULL);
+	pending = entry_ref->pending;
+
+	if (is_apply_error)
+		pending->apply_error_count++;
+	else
+		pending->sync_error_count++;
+}
+
+/*
+ * Report creating the subscription.
+ *
+ * Ensures that stats are dropped if transaction rolls back.
+ */
+void
+pgstat_create_subscription(Oid subid)
+{
+	pgstat_create_transactional(PGSTAT_KIND_SUBSCRIPTION,
+								InvalidOid, subid);
+}
+
+/*
+ * Report dropping the subscription.
+ *
+ * Ensures that stats are dropped if transaction commits.
+ */
+void
+pgstat_drop_subscription(Oid subid)
+{
+	pgstat_drop_transactional(PGSTAT_KIND_SUBSCRIPTION,
+							  InvalidOid, subid);
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * the collected statistics for one subscription or NULL.
+ */
+PgStat_StatSubEntry *
+pgstat_fetch_stat_subscription(Oid subid)
+{
+	return (PgStat_StatSubEntry *)
+		pgstat_fetch_entry(PGSTAT_KIND_SUBSCRIPTION, InvalidOid, subid);
+}
+
+/*
+ * Flush out pending stats for the entry
+ *
+ * If nowait is true, this function returns false if lock could not
+ * immediately acquired, otherwise true is returned.
+ */
+bool
+pgstat_subscription_flush_cb(PgStat_EntryRef *entry_ref, bool nowait)
+{
+	PgStat_BackendSubEntry *localent;
+	PgStatShared_Subscription *shsubent;
+
+	localent = (PgStat_BackendSubEntry *) entry_ref->pending;
+	shsubent = (PgStatShared_Subscription *) entry_ref->shared_stats;
+
+	/* localent always has non-zero content */
+
+	if (!pgstat_lock_entry(entry_ref, nowait))
+		return false;
+
+#define SUB_ACC(fld) shsubent->stats.fld += localent->fld
+	SUB_ACC(apply_error_count);
+	SUB_ACC(sync_error_count);
+#undef SUB_ACC
+
+	pgstat_unlock_entry(entry_ref);
+	return true;
+}
+
+void
+pgstat_subscription_reset_timestamp_cb(PgStatShared_Common *header, TimestampTz ts)
+{
+	((PgStatShared_Subscription *) header)->stats.stat_reset_timestamp = ts;
+}
diff --git a/src/backend/utils/activity/pgstat_wal.c b/src/backend/utils/activity/pgstat_wal.c
new file mode 100644
index 0000000..305a925
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_wal.c
@@ -0,0 +1,182 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_wal.c
+ *	  Implementation of WAL statistics.
+ *
+ * This file contains the implementation of WAL statistics. It is kept
+ * separate from pgstat.c to enforce the line between the statistics access /
+ * storage implementation and the details about individual types of
+ * statistics.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_wal.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/pgstat_internal.h"
+#include "executor/instrument.h"
+
+
+PgStat_WalStats PendingWalStats = {0};
+
+/*
+ * WAL usage counters saved from pgWALUsage at the previous call to
+ * pgstat_report_wal(). This is used to calculate how much WAL usage
+ * happens between pgstat_report_wal() calls, by subtracting
+ * the previous counters from the current ones.
+ */
+static WalUsage prevWalUsage;
+
+
+/*
+ * Calculate how much WAL usage counters have increased and update
+ * shared statistics.
+ *
+ * Must be called by processes that generate WAL, that do not call
+ * pgstat_report_stat(), like walwriter.
+ *
+ * "force" set to true ensures that the statistics are flushed; note that
+ * this needs to acquire the pgstat shmem LWLock, waiting on it.  When
+ * set to false, the statistics may not be flushed if the lock could not
+ * be acquired.
+ */
+void
+pgstat_report_wal(bool force)
+{
+	bool		nowait;
+
+	/* like in pgstat.c, don't wait for lock acquisition when !force */
+	nowait = !force;
+
+	/* flush wal stats */
+	pgstat_flush_wal(nowait);
+}
+
+/*
+ * Support function for the SQL-callable pgstat* functions. Returns
+ * a pointer to the WAL statistics struct.
+ */
+PgStat_WalStats *
+pgstat_fetch_stat_wal(void)
+{
+	pgstat_snapshot_fixed(PGSTAT_KIND_WAL);
+
+	return &pgStatLocal.snapshot.wal;
+}
+
+/*
+ * Calculate how much WAL usage counters have increased by subtracting the
+ * previous counters from the current ones.
+ *
+ * If nowait is true, this function returns true if the lock could not be
+ * acquired. Otherwise return false.
+ */
+bool
+pgstat_flush_wal(bool nowait)
+{
+	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
+	WalUsage	diff = {0};
+
+	Assert(IsUnderPostmaster || !IsPostmasterEnvironment);
+	Assert(pgStatLocal.shmem != NULL &&
+		   !pgStatLocal.shmem->is_shutdown);
+
+	/*
+	 * This function can be called even if nothing at all has happened. Avoid
+	 * taking lock for nothing in that case.
+	 */
+	if (!pgstat_have_pending_wal())
+		return false;
+
+	/*
+	 * We don't update the WAL usage portion of the local WalStats elsewhere.
+	 * Calculate how much WAL usage counters were increased by subtracting the
+	 * previous counters from the current ones.
+	 */
+	WalUsageAccumDiff(&diff, &pgWalUsage, &prevWalUsage);
+	PendingWalStats.wal_records = diff.wal_records;
+	PendingWalStats.wal_fpi = diff.wal_fpi;
+	PendingWalStats.wal_bytes = diff.wal_bytes;
+
+	if (!nowait)
+		LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+	else if (!LWLockConditionalAcquire(&stats_shmem->lock, LW_EXCLUSIVE))
+		return true;
+
+#define WALSTAT_ACC(fld) stats_shmem->stats.fld += PendingWalStats.fld
+	WALSTAT_ACC(wal_records);
+	WALSTAT_ACC(wal_fpi);
+	WALSTAT_ACC(wal_bytes);
+	WALSTAT_ACC(wal_buffers_full);
+	WALSTAT_ACC(wal_write);
+	WALSTAT_ACC(wal_sync);
+	WALSTAT_ACC(wal_write_time);
+	WALSTAT_ACC(wal_sync_time);
+#undef WALSTAT_ACC
+
+	LWLockRelease(&stats_shmem->lock);
+
+	/*
+	 * Save the current counters for the subsequent calculation of WAL usage.
+	 */
+	prevWalUsage = pgWalUsage;
+
+	/*
+	 * Clear out the statistics buffer, so it can be re-used.
+	 */
+	MemSet(&PendingWalStats, 0, sizeof(PendingWalStats));
+
+	return false;
+}
+
+void
+pgstat_init_wal(void)
+{
+	/*
+	 * Initialize prevWalUsage with pgWalUsage so that pgstat_flush_wal() can
+	 * calculate how much pgWalUsage counters are increased by subtracting
+	 * prevWalUsage from pgWalUsage.
+	 */
+	prevWalUsage = pgWalUsage;
+}
+
+/*
+ * To determine whether any WAL activity has occurred since last time, not
+ * only the number of generated WAL records but also the numbers of WAL
+ * writes and syncs need to be checked. Because even transaction that
+ * generates no WAL records can write or sync WAL data when flushing the
+ * data pages.
+ */
+bool
+pgstat_have_pending_wal(void)
+{
+	return pgWalUsage.wal_records != prevWalUsage.wal_records ||
+		PendingWalStats.wal_write != 0 ||
+		PendingWalStats.wal_sync != 0;
+}
+
+void
+pgstat_wal_reset_all_cb(TimestampTz ts)
+{
+	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
+
+	LWLockAcquire(&stats_shmem->lock, LW_EXCLUSIVE);
+	memset(&stats_shmem->stats, 0, sizeof(stats_shmem->stats));
+	stats_shmem->stats.stat_reset_timestamp = ts;
+	LWLockRelease(&stats_shmem->lock);
+}
+
+void
+pgstat_wal_snapshot_cb(void)
+{
+	PgStatShared_Wal *stats_shmem = &pgStatLocal.shmem->wal;
+
+	LWLockAcquire(&stats_shmem->lock, LW_SHARED);
+	memcpy(&pgStatLocal.snapshot.wal, &stats_shmem->stats,
+		   sizeof(pgStatLocal.snapshot.wal));
+	LWLockRelease(&stats_shmem->lock);
+}
diff --git a/src/backend/utils/activity/pgstat_xact.c b/src/backend/utils/activity/pgstat_xact.c
new file mode 100644
index 0000000..d6f660e
--- /dev/null
+++ b/src/backend/utils/activity/pgstat_xact.c
@@ -0,0 +1,391 @@
+/* -------------------------------------------------------------------------
+ *
+ * pgstat_xact.c
+ *	  Transactional integration for the cumulative statistics system.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/pgstat_xact.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/transam.h"
+#include "access/xact.h"
+#include "pgstat.h"
+#include "utils/memutils.h"
+#include "utils/pgstat_internal.h"
+
+
+typedef struct PgStat_PendingDroppedStatsItem
+{
+	xl_xact_stats_item item;
+	bool		is_create;
+	dlist_node	node;
+} PgStat_PendingDroppedStatsItem;
+
+
+static void AtEOXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state, bool isCommit);
+static void AtEOSubXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state,
+											bool isCommit, int nestDepth);
+
+static PgStat_SubXactStatus *pgStatXactStack = NULL;
+
+
+/*
+ * Called from access/transam/xact.c at top-level transaction commit/abort.
+ */
+void
+AtEOXact_PgStat(bool isCommit, bool parallel)
+{
+	PgStat_SubXactStatus *xact_state;
+
+	AtEOXact_PgStat_Database(isCommit, parallel);
+
+	/* handle transactional stats information */
+	xact_state = pgStatXactStack;
+	if (xact_state != NULL)
+	{
+		Assert(xact_state->nest_level == 1);
+		Assert(xact_state->prev == NULL);
+
+		AtEOXact_PgStat_Relations(xact_state, isCommit);
+		AtEOXact_PgStat_DroppedStats(xact_state, isCommit);
+	}
+	pgStatXactStack = NULL;
+
+	/* Make sure any stats snapshot is thrown away */
+	pgstat_clear_snapshot();
+}
+
+/*
+ * When committing, drop stats for objects dropped in the transaction. When
+ * aborting, drop stats for objects created in the transaction.
+ */
+static void
+AtEOXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state, bool isCommit)
+{
+	dlist_mutable_iter iter;
+	int			not_freed_count = 0;
+
+	if (xact_state->pending_drops_count == 0)
+	{
+		Assert(dlist_is_empty(&xact_state->pending_drops));
+		return;
+	}
+
+	dlist_foreach_modify(iter, &xact_state->pending_drops)
+	{
+		PgStat_PendingDroppedStatsItem *pending =
+		dlist_container(PgStat_PendingDroppedStatsItem, node, iter.cur);
+		xl_xact_stats_item *it = &pending->item;
+
+		if (isCommit && !pending->is_create)
+		{
+			/*
+			 * Transaction that dropped an object committed. Drop the stats
+			 * too.
+			 */
+			if (!pgstat_drop_entry(it->kind, it->dboid, it->objoid))
+				not_freed_count++;
+		}
+		else if (!isCommit && pending->is_create)
+		{
+			/*
+			 * Transaction that created an object aborted. Drop the stats
+			 * associated with the object.
+			 */
+			if (!pgstat_drop_entry(it->kind, it->dboid, it->objoid))
+				not_freed_count++;
+		}
+
+		dlist_delete(&pending->node);
+		xact_state->pending_drops_count--;
+		pfree(pending);
+	}
+
+	if (not_freed_count > 0)
+		pgstat_request_entry_refs_gc();
+}
+
+/*
+ * Called from access/transam/xact.c at subtransaction commit/abort.
+ */
+void
+AtEOSubXact_PgStat(bool isCommit, int nestDepth)
+{
+	PgStat_SubXactStatus *xact_state;
+
+	/* merge the sub-transaction's transactional stats into the parent */
+	xact_state = pgStatXactStack;
+	if (xact_state != NULL &&
+		xact_state->nest_level >= nestDepth)
+	{
+		/* delink xact_state from stack immediately to simplify reuse case */
+		pgStatXactStack = xact_state->prev;
+
+		AtEOSubXact_PgStat_Relations(xact_state, isCommit, nestDepth);
+		AtEOSubXact_PgStat_DroppedStats(xact_state, isCommit, nestDepth);
+
+		pfree(xact_state);
+	}
+}
+
+/*
+ * Like AtEOXact_PgStat_DroppedStats(), but for subtransactions.
+ */
+static void
+AtEOSubXact_PgStat_DroppedStats(PgStat_SubXactStatus *xact_state,
+								bool isCommit, int nestDepth)
+{
+	PgStat_SubXactStatus *parent_xact_state;
+	dlist_mutable_iter iter;
+	int			not_freed_count = 0;
+
+	if (xact_state->pending_drops_count == 0)
+		return;
+
+	parent_xact_state = pgstat_get_xact_stack_level(nestDepth - 1);
+
+	dlist_foreach_modify(iter, &xact_state->pending_drops)
+	{
+		PgStat_PendingDroppedStatsItem *pending =
+		dlist_container(PgStat_PendingDroppedStatsItem, node, iter.cur);
+		xl_xact_stats_item *it = &pending->item;
+
+		dlist_delete(&pending->node);
+		xact_state->pending_drops_count--;
+
+		if (!isCommit && pending->is_create)
+		{
+			/*
+			 * Subtransaction creating a new stats object aborted. Drop the
+			 * stats object.
+			 */
+			if (!pgstat_drop_entry(it->kind, it->dboid, it->objoid))
+				not_freed_count++;
+			pfree(pending);
+		}
+		else if (isCommit)
+		{
+			/*
+			 * Subtransaction dropping a stats object committed. Can't yet
+			 * remove the stats object, the surrounding transaction might
+			 * still abort. Pass it on to the parent.
+			 */
+			dlist_push_tail(&parent_xact_state->pending_drops, &pending->node);
+			parent_xact_state->pending_drops_count++;
+		}
+		else
+		{
+			pfree(pending);
+		}
+	}
+
+	Assert(xact_state->pending_drops_count == 0);
+	if (not_freed_count > 0)
+		pgstat_request_entry_refs_gc();
+}
+
+/*
+ * Save the transactional stats state at 2PC transaction prepare.
+ */
+void
+AtPrepare_PgStat(void)
+{
+	PgStat_SubXactStatus *xact_state;
+
+	xact_state = pgStatXactStack;
+	if (xact_state != NULL)
+	{
+		Assert(xact_state->nest_level == 1);
+		Assert(xact_state->prev == NULL);
+
+		AtPrepare_PgStat_Relations(xact_state);
+	}
+}
+
+/*
+ * Clean up after successful PREPARE.
+ *
+ * Note: AtEOXact_PgStat is not called during PREPARE.
+ */
+void
+PostPrepare_PgStat(void)
+{
+	PgStat_SubXactStatus *xact_state;
+
+	/*
+	 * We don't bother to free any of the transactional state, since it's all
+	 * in TopTransactionContext and will go away anyway.
+	 */
+	xact_state = pgStatXactStack;
+	if (xact_state != NULL)
+	{
+		Assert(xact_state->nest_level == 1);
+		Assert(xact_state->prev == NULL);
+
+		PostPrepare_PgStat_Relations(xact_state);
+	}
+	pgStatXactStack = NULL;
+
+	/* Make sure any stats snapshot is thrown away */
+	pgstat_clear_snapshot();
+}
+
+/*
+ * Ensure (sub)transaction stack entry for the given nest_level exists, adding
+ * it if needed.
+ */
+PgStat_SubXactStatus *
+pgstat_get_xact_stack_level(int nest_level)
+{
+	PgStat_SubXactStatus *xact_state;
+
+	xact_state = pgStatXactStack;
+	if (xact_state == NULL || xact_state->nest_level != nest_level)
+	{
+		xact_state = (PgStat_SubXactStatus *)
+			MemoryContextAlloc(TopTransactionContext,
+							   sizeof(PgStat_SubXactStatus));
+		dlist_init(&xact_state->pending_drops);
+		xact_state->pending_drops_count = 0;
+		xact_state->nest_level = nest_level;
+		xact_state->prev = pgStatXactStack;
+		xact_state->first = NULL;
+		pgStatXactStack = xact_state;
+	}
+	return xact_state;
+}
+
+/*
+ * Get stat items that need to be dropped at commit / abort.
+ *
+ * When committing, stats for objects that have been dropped in the
+ * transaction are returned. When aborting, stats for newly created objects are
+ * returned.
+ *
+ * Used by COMMIT / ABORT and 2PC PREPARE processing when building their
+ * respective WAL records, to ensure stats are dropped in case of a crash / on
+ * standbys.
+ *
+ * The list of items is allocated in CurrentMemoryContext and must be freed by
+ * the caller (directly or via memory context reset).
+ */
+int
+pgstat_get_transactional_drops(bool isCommit, xl_xact_stats_item **items)
+{
+	PgStat_SubXactStatus *xact_state = pgStatXactStack;
+	int			nitems = 0;
+	dlist_iter	iter;
+
+	if (xact_state == NULL)
+		return 0;
+
+	/*
+	 * We expect to be called for subtransaction abort (which logs a WAL
+	 * record), but not for subtransaction commit (which doesn't).
+	 */
+	Assert(!isCommit || xact_state->nest_level == 1);
+	Assert(!isCommit || xact_state->prev == NULL);
+
+	*items = palloc(xact_state->pending_drops_count
+					* sizeof(xl_xact_stats_item));
+
+	dlist_foreach(iter, &xact_state->pending_drops)
+	{
+		PgStat_PendingDroppedStatsItem *pending =
+		dlist_container(PgStat_PendingDroppedStatsItem, node, iter.cur);
+
+		if (isCommit && pending->is_create)
+			continue;
+		if (!isCommit && !pending->is_create)
+			continue;
+
+		Assert(nitems < xact_state->pending_drops_count);
+		(*items)[nitems++] = pending->item;
+	}
+
+	return nitems;
+}
+
+/*
+ * Execute scheduled drops post-commit. Called from xact_redo_commit() /
+ * xact_redo_abort() during recovery, and from FinishPreparedTransaction()
+ * during normal 2PC COMMIT/ABORT PREPARED processing.
+ */
+void
+pgstat_execute_transactional_drops(int ndrops, struct xl_xact_stats_item *items, bool is_redo)
+{
+	int			not_freed_count = 0;
+
+	if (ndrops == 0)
+		return;
+
+	for (int i = 0; i < ndrops; i++)
+	{
+		xl_xact_stats_item *it = &items[i];
+
+		if (!pgstat_drop_entry(it->kind, it->dboid, it->objoid))
+			not_freed_count++;
+	}
+
+	if (not_freed_count > 0)
+		pgstat_request_entry_refs_gc();
+}
+
+static void
+create_drop_transactional_internal(PgStat_Kind kind, Oid dboid, Oid objoid, bool is_create)
+{
+	int			nest_level = GetCurrentTransactionNestLevel();
+	PgStat_SubXactStatus *xact_state;
+	PgStat_PendingDroppedStatsItem *drop = (PgStat_PendingDroppedStatsItem *)
+	MemoryContextAlloc(TopTransactionContext, sizeof(PgStat_PendingDroppedStatsItem));
+
+	xact_state = pgstat_get_xact_stack_level(nest_level);
+
+	drop->is_create = is_create;
+	drop->item.kind = kind;
+	drop->item.dboid = dboid;
+	drop->item.objoid = objoid;
+
+	dlist_push_tail(&xact_state->pending_drops, &drop->node);
+	xact_state->pending_drops_count++;
+}
+
+/*
+ * Create a stats entry for a newly created database object in a transactional
+ * manner.
+ *
+ * I.e. if the current (sub-)transaction aborts, the stats entry will also be
+ * dropped.
+ */
+void
+pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	if (pgstat_get_entry_ref(kind, dboid, objoid, false, NULL))
+	{
+		ereport(WARNING,
+				errmsg("resetting existing statistics for kind %s, db=%u, oid=%u",
+					   (pgstat_get_kind_info(kind))->name, dboid, objoid));
+
+		pgstat_reset(kind, dboid, objoid);
+	}
+
+	create_drop_transactional_internal(kind, dboid, objoid, /* create */ true);
+}
+
+/*
+ * Drop a stats entry for a just dropped database object in a transactional
+ * manner.
+ *
+ * I.e. if the current (sub-)transaction aborts, the stats entry will stay
+ * alive.
+ */
+void
+pgstat_drop_transactional(PgStat_Kind kind, Oid dboid, Oid objoid)
+{
+	create_drop_transactional_internal(kind, dboid, objoid, /* create */ false);
+}
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
new file mode 100644
index 0000000..87c15b9
--- /dev/null
+++ b/src/backend/utils/activity/wait_event.c
@@ -0,0 +1,749 @@
+/* ----------
+ * wait_event.c
+ *	  Wait event reporting infrastructure.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/activity/wait_event.c
+ *
+ * NOTES
+ *
+ * To make pgstat_report_wait_start() and pgstat_report_wait_end() as
+ * lightweight as possible, they do not check if shared memory (MyProc
+ * specifically, where the wait event is stored) is already available. Instead
+ * we initially set my_wait_event_info to a process local variable, which then
+ * is redirected to shared memory using pgstat_set_wait_event_storage(). For
+ * the same reason pgstat_track_activities is not checked - the check adds
+ * more work than it saves.
+ *
+ * ----------
+ */
+#include "postgres.h"
+
+#include "storage/lmgr.h"		/* for GetLockNameFromTagType */
+#include "storage/lwlock.h"		/* for GetLWLockIdentifier */
+#include "utils/wait_event.h"
+
+
+static const char *pgstat_get_wait_activity(WaitEventActivity w);
+static const char *pgstat_get_wait_client(WaitEventClient w);
+static const char *pgstat_get_wait_ipc(WaitEventIPC w);
+static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
+static const char *pgstat_get_wait_io(WaitEventIO w);
+
+
+static uint32 local_my_wait_event_info;
+uint32	   *my_wait_event_info = &local_my_wait_event_info;
+
+
+/*
+ * Configure wait event reporting to report wait events to *wait_event_info.
+ * *wait_event_info needs to be valid until pgstat_reset_wait_event_storage()
+ * is called.
+ *
+ * Expected to be called during backend startup, to point my_wait_event_info
+ * into shared memory.
+ */
+void
+pgstat_set_wait_event_storage(uint32 *wait_event_info)
+{
+	my_wait_event_info = wait_event_info;
+}
+
+/*
+ * Reset wait event storage location.
+ *
+ * Expected to be called during backend shutdown, before the location set up
+ * pgstat_set_wait_event_storage() becomes invalid.
+ */
+void
+pgstat_reset_wait_event_storage(void)
+{
+	my_wait_event_info = &local_my_wait_event_info;
+}
+
+/* ----------
+ * pgstat_get_wait_event_type() -
+ *
+ *	Return a string representing the current wait event type, backend is
+ *	waiting on.
+ */
+const char *
+pgstat_get_wait_event_type(uint32 wait_event_info)
+{
+	uint32		classId;
+	const char *event_type;
+
+	/* report process as not waiting. */
+	if (wait_event_info == 0)
+		return NULL;
+
+	classId = wait_event_info & 0xFF000000;
+
+	switch (classId)
+	{
+		case PG_WAIT_LWLOCK:
+			event_type = "LWLock";
+			break;
+		case PG_WAIT_LOCK:
+			event_type = "Lock";
+			break;
+		case PG_WAIT_BUFFER_PIN:
+			event_type = "BufferPin";
+			break;
+		case PG_WAIT_ACTIVITY:
+			event_type = "Activity";
+			break;
+		case PG_WAIT_CLIENT:
+			event_type = "Client";
+			break;
+		case PG_WAIT_EXTENSION:
+			event_type = "Extension";
+			break;
+		case PG_WAIT_IPC:
+			event_type = "IPC";
+			break;
+		case PG_WAIT_TIMEOUT:
+			event_type = "Timeout";
+			break;
+		case PG_WAIT_IO:
+			event_type = "IO";
+			break;
+		default:
+			event_type = "???";
+			break;
+	}
+
+	return event_type;
+}
+
+/* ----------
+ * pgstat_get_wait_event() -
+ *
+ *	Return a string representing the current wait event, backend is
+ *	waiting on.
+ */
+const char *
+pgstat_get_wait_event(uint32 wait_event_info)
+{
+	uint32		classId;
+	uint16		eventId;
+	const char *event_name;
+
+	/* report process as not waiting. */
+	if (wait_event_info == 0)
+		return NULL;
+
+	classId = wait_event_info & 0xFF000000;
+	eventId = wait_event_info & 0x0000FFFF;
+
+	switch (classId)
+	{
+		case PG_WAIT_LWLOCK:
+			event_name = GetLWLockIdentifier(classId, eventId);
+			break;
+		case PG_WAIT_LOCK:
+			event_name = GetLockNameFromTagType(eventId);
+			break;
+		case PG_WAIT_BUFFER_PIN:
+			event_name = "BufferPin";
+			break;
+		case PG_WAIT_ACTIVITY:
+			{
+				WaitEventActivity w = (WaitEventActivity) wait_event_info;
+
+				event_name = pgstat_get_wait_activity(w);
+				break;
+			}
+		case PG_WAIT_CLIENT:
+			{
+				WaitEventClient w = (WaitEventClient) wait_event_info;
+
+				event_name = pgstat_get_wait_client(w);
+				break;
+			}
+		case PG_WAIT_EXTENSION:
+			event_name = "Extension";
+			break;
+		case PG_WAIT_IPC:
+			{
+				WaitEventIPC w = (WaitEventIPC) wait_event_info;
+
+				event_name = pgstat_get_wait_ipc(w);
+				break;
+			}
+		case PG_WAIT_TIMEOUT:
+			{
+				WaitEventTimeout w = (WaitEventTimeout) wait_event_info;
+
+				event_name = pgstat_get_wait_timeout(w);
+				break;
+			}
+		case PG_WAIT_IO:
+			{
+				WaitEventIO w = (WaitEventIO) wait_event_info;
+
+				event_name = pgstat_get_wait_io(w);
+				break;
+			}
+		default:
+			event_name = "unknown wait event";
+			break;
+	}
+
+	return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_activity() -
+ *
+ * Convert WaitEventActivity to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_activity(WaitEventActivity w)
+{
+	const char *event_name = "unknown wait event";
+
+	switch (w)
+	{
+		case WAIT_EVENT_ARCHIVER_MAIN:
+			event_name = "ArchiverMain";
+			break;
+		case WAIT_EVENT_AUTOVACUUM_MAIN:
+			event_name = "AutoVacuumMain";
+			break;
+		case WAIT_EVENT_BGWRITER_HIBERNATE:
+			event_name = "BgWriterHibernate";
+			break;
+		case WAIT_EVENT_BGWRITER_MAIN:
+			event_name = "BgWriterMain";
+			break;
+		case WAIT_EVENT_CHECKPOINTER_MAIN:
+			event_name = "CheckpointerMain";
+			break;
+		case WAIT_EVENT_LOGICAL_APPLY_MAIN:
+			event_name = "LogicalApplyMain";
+			break;
+		case WAIT_EVENT_LOGICAL_LAUNCHER_MAIN:
+			event_name = "LogicalLauncherMain";
+			break;
+		case WAIT_EVENT_RECOVERY_WAL_STREAM:
+			event_name = "RecoveryWalStream";
+			break;
+		case WAIT_EVENT_SYSLOGGER_MAIN:
+			event_name = "SysLoggerMain";
+			break;
+		case WAIT_EVENT_WAL_RECEIVER_MAIN:
+			event_name = "WalReceiverMain";
+			break;
+		case WAIT_EVENT_WAL_SENDER_MAIN:
+			event_name = "WalSenderMain";
+			break;
+		case WAIT_EVENT_WAL_WRITER_MAIN:
+			event_name = "WalWriterMain";
+			break;
+			/* no default case, so that compiler will warn */
+	}
+
+	return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_client() -
+ *
+ * Convert WaitEventClient to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_client(WaitEventClient w)
+{
+	const char *event_name = "unknown wait event";
+
+	switch (w)
+	{
+		case WAIT_EVENT_CLIENT_READ:
+			event_name = "ClientRead";
+			break;
+		case WAIT_EVENT_CLIENT_WRITE:
+			event_name = "ClientWrite";
+			break;
+		case WAIT_EVENT_GSS_OPEN_SERVER:
+			event_name = "GSSOpenServer";
+			break;
+		case WAIT_EVENT_LIBPQWALRECEIVER_CONNECT:
+			event_name = "LibPQWalReceiverConnect";
+			break;
+		case WAIT_EVENT_LIBPQWALRECEIVER_RECEIVE:
+			event_name = "LibPQWalReceiverReceive";
+			break;
+		case WAIT_EVENT_SSL_OPEN_SERVER:
+			event_name = "SSLOpenServer";
+			break;
+		case WAIT_EVENT_WAL_SENDER_WAIT_WAL:
+			event_name = "WalSenderWaitForWAL";
+			break;
+		case WAIT_EVENT_WAL_SENDER_WRITE_DATA:
+			event_name = "WalSenderWriteData";
+			break;
+			/* no default case, so that compiler will warn */
+	}
+
+	return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_ipc() -
+ *
+ * Convert WaitEventIPC to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_ipc(WaitEventIPC w)
+{
+	const char *event_name = "unknown wait event";
+
+	switch (w)
+	{
+		case WAIT_EVENT_APPEND_READY:
+			event_name = "AppendReady";
+			break;
+		case WAIT_EVENT_ARCHIVE_CLEANUP_COMMAND:
+			event_name = "ArchiveCleanupCommand";
+			break;
+		case WAIT_EVENT_ARCHIVE_COMMAND:
+			event_name = "ArchiveCommand";
+			break;
+		case WAIT_EVENT_BACKEND_TERMINATION:
+			event_name = "BackendTermination";
+			break;
+		case WAIT_EVENT_BACKUP_WAIT_WAL_ARCHIVE:
+			event_name = "BackupWaitWalArchive";
+			break;
+		case WAIT_EVENT_BGWORKER_SHUTDOWN:
+			event_name = "BgWorkerShutdown";
+			break;
+		case WAIT_EVENT_BGWORKER_STARTUP:
+			event_name = "BgWorkerStartup";
+			break;
+		case WAIT_EVENT_BTREE_PAGE:
+			event_name = "BtreePage";
+			break;
+		case WAIT_EVENT_BUFFER_IO:
+			event_name = "BufferIO";
+			break;
+		case WAIT_EVENT_CHECKPOINT_DONE:
+			event_name = "CheckpointDone";
+			break;
+		case WAIT_EVENT_CHECKPOINT_START:
+			event_name = "CheckpointStart";
+			break;
+		case WAIT_EVENT_EXECUTE_GATHER:
+			event_name = "ExecuteGather";
+			break;
+		case WAIT_EVENT_HASH_BATCH_ALLOCATE:
+			event_name = "HashBatchAllocate";
+			break;
+		case WAIT_EVENT_HASH_BATCH_ELECT:
+			event_name = "HashBatchElect";
+			break;
+		case WAIT_EVENT_HASH_BATCH_LOAD:
+			event_name = "HashBatchLoad";
+			break;
+		case WAIT_EVENT_HASH_BUILD_ALLOCATE:
+			event_name = "HashBuildAllocate";
+			break;
+		case WAIT_EVENT_HASH_BUILD_ELECT:
+			event_name = "HashBuildElect";
+			break;
+		case WAIT_EVENT_HASH_BUILD_HASH_INNER:
+			event_name = "HashBuildHashInner";
+			break;
+		case WAIT_EVENT_HASH_BUILD_HASH_OUTER:
+			event_name = "HashBuildHashOuter";
+			break;
+		case WAIT_EVENT_HASH_GROW_BATCHES_ALLOCATE:
+			event_name = "HashGrowBatchesAllocate";
+			break;
+		case WAIT_EVENT_HASH_GROW_BATCHES_DECIDE:
+			event_name = "HashGrowBatchesDecide";
+			break;
+		case WAIT_EVENT_HASH_GROW_BATCHES_ELECT:
+			event_name = "HashGrowBatchesElect";
+			break;
+		case WAIT_EVENT_HASH_GROW_BATCHES_FINISH:
+			event_name = "HashGrowBatchesFinish";
+			break;
+		case WAIT_EVENT_HASH_GROW_BATCHES_REPARTITION:
+			event_name = "HashGrowBatchesRepartition";
+			break;
+		case WAIT_EVENT_HASH_GROW_BUCKETS_ALLOCATE:
+			event_name = "HashGrowBucketsAllocate";
+			break;
+		case WAIT_EVENT_HASH_GROW_BUCKETS_ELECT:
+			event_name = "HashGrowBucketsElect";
+			break;
+		case WAIT_EVENT_HASH_GROW_BUCKETS_REINSERT:
+			event_name = "HashGrowBucketsReinsert";
+			break;
+		case WAIT_EVENT_LOGICAL_SYNC_DATA:
+			event_name = "LogicalSyncData";
+			break;
+		case WAIT_EVENT_LOGICAL_SYNC_STATE_CHANGE:
+			event_name = "LogicalSyncStateChange";
+			break;
+		case WAIT_EVENT_MQ_INTERNAL:
+			event_name = "MessageQueueInternal";
+			break;
+		case WAIT_EVENT_MQ_PUT_MESSAGE:
+			event_name = "MessageQueuePutMessage";
+			break;
+		case WAIT_EVENT_MQ_RECEIVE:
+			event_name = "MessageQueueReceive";
+			break;
+		case WAIT_EVENT_MQ_SEND:
+			event_name = "MessageQueueSend";
+			break;
+		case WAIT_EVENT_PARALLEL_BITMAP_SCAN:
+			event_name = "ParallelBitmapScan";
+			break;
+		case WAIT_EVENT_PARALLEL_CREATE_INDEX_SCAN:
+			event_name = "ParallelCreateIndexScan";
+			break;
+		case WAIT_EVENT_PARALLEL_FINISH:
+			event_name = "ParallelFinish";
+			break;
+		case WAIT_EVENT_PROCARRAY_GROUP_UPDATE:
+			event_name = "ProcArrayGroupUpdate";
+			break;
+		case WAIT_EVENT_PROC_SIGNAL_BARRIER:
+			event_name = "ProcSignalBarrier";
+			break;
+		case WAIT_EVENT_PROMOTE:
+			event_name = "Promote";
+			break;
+		case WAIT_EVENT_RECOVERY_CONFLICT_SNAPSHOT:
+			event_name = "RecoveryConflictSnapshot";
+			break;
+		case WAIT_EVENT_RECOVERY_CONFLICT_TABLESPACE:
+			event_name = "RecoveryConflictTablespace";
+			break;
+		case WAIT_EVENT_RECOVERY_END_COMMAND:
+			event_name = "RecoveryEndCommand";
+			break;
+		case WAIT_EVENT_RECOVERY_PAUSE:
+			event_name = "RecoveryPause";
+			break;
+		case WAIT_EVENT_REPLICATION_ORIGIN_DROP:
+			event_name = "ReplicationOriginDrop";
+			break;
+		case WAIT_EVENT_REPLICATION_SLOT_DROP:
+			event_name = "ReplicationSlotDrop";
+			break;
+		case WAIT_EVENT_RESTORE_COMMAND:
+			event_name = "RestoreCommand";
+			break;
+		case WAIT_EVENT_SAFE_SNAPSHOT:
+			event_name = "SafeSnapshot";
+			break;
+		case WAIT_EVENT_SYNC_REP:
+			event_name = "SyncRep";
+			break;
+		case WAIT_EVENT_WAL_RECEIVER_EXIT:
+			event_name = "WalReceiverExit";
+			break;
+		case WAIT_EVENT_WAL_RECEIVER_WAIT_START:
+			event_name = "WalReceiverWaitStart";
+			break;
+		case WAIT_EVENT_XACT_GROUP_UPDATE:
+			event_name = "XactGroupUpdate";
+			break;
+			/* no default case, so that compiler will warn */
+	}
+
+	return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_timeout() -
+ *
+ * Convert WaitEventTimeout to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_timeout(WaitEventTimeout w)
+{
+	const char *event_name = "unknown wait event";
+
+	switch (w)
+	{
+		case WAIT_EVENT_BASE_BACKUP_THROTTLE:
+			event_name = "BaseBackupThrottle";
+			break;
+		case WAIT_EVENT_CHECKPOINT_WRITE_DELAY:
+			event_name = "CheckpointWriteDelay";
+			break;
+		case WAIT_EVENT_PG_SLEEP:
+			event_name = "PgSleep";
+			break;
+		case WAIT_EVENT_RECOVERY_APPLY_DELAY:
+			event_name = "RecoveryApplyDelay";
+			break;
+		case WAIT_EVENT_RECOVERY_RETRIEVE_RETRY_INTERVAL:
+			event_name = "RecoveryRetrieveRetryInterval";
+			break;
+		case WAIT_EVENT_REGISTER_SYNC_REQUEST:
+			event_name = "RegisterSyncRequest";
+			break;
+		case WAIT_EVENT_VACUUM_DELAY:
+			event_name = "VacuumDelay";
+			break;
+		case WAIT_EVENT_VACUUM_TRUNCATE:
+			event_name = "VacuumTruncate";
+			break;
+			/* no default case, so that compiler will warn */
+	}
+
+	return event_name;
+}
+
+/* ----------
+ * pgstat_get_wait_io() -
+ *
+ * Convert WaitEventIO to string.
+ * ----------
+ */
+static const char *
+pgstat_get_wait_io(WaitEventIO w)
+{
+	const char *event_name = "unknown wait event";
+
+	switch (w)
+	{
+		case WAIT_EVENT_BASEBACKUP_READ:
+			event_name = "BaseBackupRead";
+			break;
+		case WAIT_EVENT_BASEBACKUP_SYNC:
+			event_name = "BaseBackupSync";
+			break;
+		case WAIT_EVENT_BASEBACKUP_WRITE:
+			event_name = "BaseBackupWrite";
+			break;
+		case WAIT_EVENT_BUFFILE_READ:
+			event_name = "BufFileRead";
+			break;
+		case WAIT_EVENT_BUFFILE_WRITE:
+			event_name = "BufFileWrite";
+			break;
+		case WAIT_EVENT_BUFFILE_TRUNCATE:
+			event_name = "BufFileTruncate";
+			break;
+		case WAIT_EVENT_CONTROL_FILE_READ:
+			event_name = "ControlFileRead";
+			break;
+		case WAIT_EVENT_CONTROL_FILE_SYNC:
+			event_name = "ControlFileSync";
+			break;
+		case WAIT_EVENT_CONTROL_FILE_SYNC_UPDATE:
+			event_name = "ControlFileSyncUpdate";
+			break;
+		case WAIT_EVENT_CONTROL_FILE_WRITE:
+			event_name = "ControlFileWrite";
+			break;
+		case WAIT_EVENT_CONTROL_FILE_WRITE_UPDATE:
+			event_name = "ControlFileWriteUpdate";
+			break;
+		case WAIT_EVENT_COPY_FILE_READ:
+			event_name = "CopyFileRead";
+			break;
+		case WAIT_EVENT_COPY_FILE_WRITE:
+			event_name = "CopyFileWrite";
+			break;
+		case WAIT_EVENT_DATA_FILE_EXTEND:
+			event_name = "DataFileExtend";
+			break;
+		case WAIT_EVENT_DATA_FILE_FLUSH:
+			event_name = "DataFileFlush";
+			break;
+		case WAIT_EVENT_DATA_FILE_IMMEDIATE_SYNC:
+			event_name = "DataFileImmediateSync";
+			break;
+		case WAIT_EVENT_DATA_FILE_PREFETCH:
+			event_name = "DataFilePrefetch";
+			break;
+		case WAIT_EVENT_DATA_FILE_READ:
+			event_name = "DataFileRead";
+			break;
+		case WAIT_EVENT_DATA_FILE_SYNC:
+			event_name = "DataFileSync";
+			break;
+		case WAIT_EVENT_DATA_FILE_TRUNCATE:
+			event_name = "DataFileTruncate";
+			break;
+		case WAIT_EVENT_DATA_FILE_WRITE:
+			event_name = "DataFileWrite";
+			break;
+		case WAIT_EVENT_DSM_FILL_ZERO_WRITE:
+			event_name = "DSMFillZeroWrite";
+			break;
+		case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_READ:
+			event_name = "LockFileAddToDataDirRead";
+			break;
+		case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_SYNC:
+			event_name = "LockFileAddToDataDirSync";
+			break;
+		case WAIT_EVENT_LOCK_FILE_ADDTODATADIR_WRITE:
+			event_name = "LockFileAddToDataDirWrite";
+			break;
+		case WAIT_EVENT_LOCK_FILE_CREATE_READ:
+			event_name = "LockFileCreateRead";
+			break;
+		case WAIT_EVENT_LOCK_FILE_CREATE_SYNC:
+			event_name = "LockFileCreateSync";
+			break;
+		case WAIT_EVENT_LOCK_FILE_CREATE_WRITE:
+			event_name = "LockFileCreateWrite";
+			break;
+		case WAIT_EVENT_LOCK_FILE_RECHECKDATADIR_READ:
+			event_name = "LockFileReCheckDataDirRead";
+			break;
+		case WAIT_EVENT_LOGICAL_REWRITE_CHECKPOINT_SYNC:
+			event_name = "LogicalRewriteCheckpointSync";
+			break;
+		case WAIT_EVENT_LOGICAL_REWRITE_MAPPING_SYNC:
+			event_name = "LogicalRewriteMappingSync";
+			break;
+		case WAIT_EVENT_LOGICAL_REWRITE_MAPPING_WRITE:
+			event_name = "LogicalRewriteMappingWrite";
+			break;
+		case WAIT_EVENT_LOGICAL_REWRITE_SYNC:
+			event_name = "LogicalRewriteSync";
+			break;
+		case WAIT_EVENT_LOGICAL_REWRITE_TRUNCATE:
+			event_name = "LogicalRewriteTruncate";
+			break;
+		case WAIT_EVENT_LOGICAL_REWRITE_WRITE:
+			event_name = "LogicalRewriteWrite";
+			break;
+		case WAIT_EVENT_RELATION_MAP_READ:
+			event_name = "RelationMapRead";
+			break;
+		case WAIT_EVENT_RELATION_MAP_SYNC:
+			event_name = "RelationMapSync";
+			break;
+		case WAIT_EVENT_RELATION_MAP_WRITE:
+			event_name = "RelationMapWrite";
+			break;
+		case WAIT_EVENT_REORDER_BUFFER_READ:
+			event_name = "ReorderBufferRead";
+			break;
+		case WAIT_EVENT_REORDER_BUFFER_WRITE:
+			event_name = "ReorderBufferWrite";
+			break;
+		case WAIT_EVENT_REORDER_LOGICAL_MAPPING_READ:
+			event_name = "ReorderLogicalMappingRead";
+			break;
+		case WAIT_EVENT_REPLICATION_SLOT_READ:
+			event_name = "ReplicationSlotRead";
+			break;
+		case WAIT_EVENT_REPLICATION_SLOT_RESTORE_SYNC:
+			event_name = "ReplicationSlotRestoreSync";
+			break;
+		case WAIT_EVENT_REPLICATION_SLOT_SYNC:
+			event_name = "ReplicationSlotSync";
+			break;
+		case WAIT_EVENT_REPLICATION_SLOT_WRITE:
+			event_name = "ReplicationSlotWrite";
+			break;
+		case WAIT_EVENT_SLRU_FLUSH_SYNC:
+			event_name = "SLRUFlushSync";
+			break;
+		case WAIT_EVENT_SLRU_READ:
+			event_name = "SLRURead";
+			break;
+		case WAIT_EVENT_SLRU_SYNC:
+			event_name = "SLRUSync";
+			break;
+		case WAIT_EVENT_SLRU_WRITE:
+			event_name = "SLRUWrite";
+			break;
+		case WAIT_EVENT_SNAPBUILD_READ:
+			event_name = "SnapbuildRead";
+			break;
+		case WAIT_EVENT_SNAPBUILD_SYNC:
+			event_name = "SnapbuildSync";
+			break;
+		case WAIT_EVENT_SNAPBUILD_WRITE:
+			event_name = "SnapbuildWrite";
+			break;
+		case WAIT_EVENT_TIMELINE_HISTORY_FILE_SYNC:
+			event_name = "TimelineHistoryFileSync";
+			break;
+		case WAIT_EVENT_TIMELINE_HISTORY_FILE_WRITE:
+			event_name = "TimelineHistoryFileWrite";
+			break;
+		case WAIT_EVENT_TIMELINE_HISTORY_READ:
+			event_name = "TimelineHistoryRead";
+			break;
+		case WAIT_EVENT_TIMELINE_HISTORY_SYNC:
+			event_name = "TimelineHistorySync";
+			break;
+		case WAIT_EVENT_TIMELINE_HISTORY_WRITE:
+			event_name = "TimelineHistoryWrite";
+			break;
+		case WAIT_EVENT_TWOPHASE_FILE_READ:
+			event_name = "TwophaseFileRead";
+			break;
+		case WAIT_EVENT_TWOPHASE_FILE_SYNC:
+			event_name = "TwophaseFileSync";
+			break;
+		case WAIT_EVENT_TWOPHASE_FILE_WRITE:
+			event_name = "TwophaseFileWrite";
+			break;
+		case WAIT_EVENT_VERSION_FILE_WRITE:
+			event_name = "VersionFileWrite";
+			break;
+		case WAIT_EVENT_WALSENDER_TIMELINE_HISTORY_READ:
+			event_name = "WALSenderTimelineHistoryRead";
+			break;
+		case WAIT_EVENT_WAL_BOOTSTRAP_SYNC:
+			event_name = "WALBootstrapSync";
+			break;
+		case WAIT_EVENT_WAL_BOOTSTRAP_WRITE:
+			event_name = "WALBootstrapWrite";
+			break;
+		case WAIT_EVENT_WAL_COPY_READ:
+			event_name = "WALCopyRead";
+			break;
+		case WAIT_EVENT_WAL_COPY_SYNC:
+			event_name = "WALCopySync";
+			break;
+		case WAIT_EVENT_WAL_COPY_WRITE:
+			event_name = "WALCopyWrite";
+			break;
+		case WAIT_EVENT_WAL_INIT_SYNC:
+			event_name = "WALInitSync";
+			break;
+		case WAIT_EVENT_WAL_INIT_WRITE:
+			event_name = "WALInitWrite";
+			break;
+		case WAIT_EVENT_WAL_READ:
+			event_name = "WALRead";
+			break;
+		case WAIT_EVENT_WAL_SYNC:
+			event_name = "WALSync";
+			break;
+		case WAIT_EVENT_WAL_SYNC_METHOD_ASSIGN:
+			event_name = "WALSyncMethodAssign";
+			break;
+		case WAIT_EVENT_WAL_WRITE:
+			event_name = "WALWrite";
+			break;
+
+			/* no default case, so that compiler will warn */
+	}
+
+	return event_name;
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:17:33 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:17:33 +0000
commit	5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch)
tree	739caf8c461053357daa9f162bef34516c7bf452 /src/backend/utils/activity
parent	Initial commit. (diff)
download	postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip